ó
ÊIc           @   sL  d  d l  Z  d  d l Z d  d l m Z d  d l m Z d  d l m Z m Z d f  d „  ƒ  YZ	 d e	 f d „  ƒ  YZ
 d	 e	 f d
 „  ƒ  YZ e d k rHd  d l m Z d Z d Z d Z d GHd GHe ƒ  Z x" e j e e ƒ ƒ D] Z e GHqä We ƒ  Z x1 e j e e ƒ d d d d f ƒD] Z e GHq!We ƒ  Z x9 e j e e d e ƒ d d d d f ƒD] Z e GHqfWHd GHd GHe ƒ  Z xA e j e e ƒ d e ƒD]$ Z e j Ge j Ge j GHe j GHq¥We ƒ  Z xA e j e e ƒ d e ƒD]$ Z e j Ge j Ge j GHe j GHqòWe ƒ  Z xA e j e e ƒ d e ƒD]$ Z e j Ge j Ge j GHe j GHq?We ƒ  Z xA e j e e ƒ d e ƒD]$ Z e j Ge j Ge j GHe j GHqŒWHd GHd GHe
 ƒ  Z x" e j e e ƒ ƒ D] Z e GHqÞWHd GHd GHe
 ƒ  Z xA e j e e ƒ d e ƒD]$ Z e j Ge j Ge j GHe j GHqWn  d S(   iÿÿÿÿN(   t   Seq(   t	   SeqRecord(   t   generic_alphabett   generic_proteint   InsdcScannerc           B   sÝ   e  Z d  Z d Z d Z d g Z d g Z d Z d Z d g Z	 d d „ Z
 d „  Z d	 „  Z d
 „  Z e d „ Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z e d „ Z e d „ Z e d „ Z e d d „ Z RS(   s¤  Basic functions for breaking up a GenBank/EMBL file into sub sections.

    The International Nucleotide Sequence Database Collaboration (INSDC)
    between the DDBJ, EMBL, and GenBank.  These organisations all use the
    same "Feature Table" layout in their plain text flat file formats.

    However, the header and sequence sections of an EMBL file are very
    different in layout to those produced by GenBank/DDBJ.t   XXXi   s   XXX***FEATURES***XXXs   XXX***END FEATURES***XXXi    t    c         C   s~   t  |  j ƒ |  j k s t ‚ x) |  j D] } | | j ƒ  k s( t ‚ q( Wt  |  j ƒ |  j k sh t ‚ | |  _ d  |  _
 d  S(   N(   t   lent   RECORD_STARTt   HEADER_WIDTHt   AssertionErrort   SEQUENCE_HEADERSt   rstript   FEATURE_QUALIFIER_SPACERt   FEATURE_QUALIFIER_INDENTt   debugt   Nonet   line(   t   selfR   t   marker(    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyt   __init__4   s    	c         C   s   | |  _  d |  _ d  S(   NR   (   t   handleR   (   R   R   (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyt
   set_handle<   s    	c         C   s
  xú t  rü |  j r' |  j } d |  _ n |  j j ƒ  } | sQ |  j rM d GHn  d	 S| |  j  |  j k r† |  j d k r‚ d | GHn  Pn  | j ƒ  } | d k r¸ |  j d k rù d GHqù q | d k rÞ |  j d k rù d GHqù q |  j d k r d | GHq q W| |  _ | S(
   s°   Read in lines until find the ID/LOCUS line, which is returned.
        
        Any preamble (such as the header used by the NCBI on *.seq.gz archives)
        will we ignored.R   s   End of filei   s   Found the start of a record:
s   //s&   Skipping // marking end of last records!   Skipping blank line before records$   Skipping header line before record:
N(	   t   TrueR   R   t   readlineR   R   R	   R   R   (   R   R   (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyt
   find_start@   s0    				    	c         C   sò   |  j  |  j  |  j k s% t d ƒ ‚ g  } x· t rä |  j j ƒ  } | sX t d ƒ ‚ n  | j ƒ  } | |  j	 k rˆ |  j
 r„ d GHn  Pn  | |  j  j ƒ  |  j k r¹ |  j
 rµ d GHn  Pn  | d k rÔ t d ƒ ‚ n  | j | ƒ q. W| |  _  | S(   s˜   Return list of strings making up the header

        New line characters are removed.

        Assumes you have just read in the ID/LOCUS line.
        s   Not at start of records*   Premature end of line during sequence datas   Found header tables   Found start of sequences   //s0   Premature end of sequence data marker '//' found(   R   R	   R   R
   R   R   R   t
   ValueErrorR   t   FEATURE_START_MARKERSR   R   t   append(   R   t   header_linesR   (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyt   parse_header]   s*    			 	 	c         C   s@  |  j  j ƒ  |  j k r- |  j r) d GHn  g  Sx. |  j  j ƒ  |  j k r] |  j j ƒ  |  _  q0 Wg  } |  j  } xÃt r2| s‹ t d ƒ ‚ n  | |  j  j ƒ  |  j	 k r¼ |  j r¸ d GHn  Pn  | j ƒ  } | d k rã t d ƒ ‚ n  | |  j
 k r|  j rd GHn  |  j j ƒ  } Pn  | d |  j !j ƒ  d k rEt d	 | ƒ ‚ n  | r‰|  j j ƒ  } xÒ | |  j  |  j k r…|  j j ƒ  } q]Wqp | d |  j !j ƒ  } | |  j g } |  j j ƒ  } xU | |  j  |  j k sé| j ƒ  d k r| j | |  j j ƒ  ƒ |  j j ƒ  } qÁW| j |  j | | ƒ ƒ qp W| |  _  | S(
   s›  Return list of tuples for the features (if present)

        Each feature is returned as a tuple (key, location, qualifiers)
        where key and location are strings (e.g. "CDS" and
        "complement(join(490883..490885,1..879))") while qualifiers
        is a list of two string tuples (feature qualifier keys and values).

        Assumes you have already read to the start of the features table.
        s   Didn't find any feature tables+   Premature end of line during features tables   Found start of sequences   //s2   Premature end of features table, marker '//' founds   Found end of featuresi   R   s)   Expected a feature qualifier in line '%s'(   R   R   R   R   R   R   R   R   R	   R   t   FEATURE_END_MARKERSR   t   stripR   R   t   parse_feature(   R   t   skipt   featuresR   t   feature_keyt   feature_lines(    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyt   parse_features|   sN    
	 			 	 	c   
      C   s$  t  t d | ƒ ƒ } yØ| j ƒ  } | j ƒ  } x* | d d k r\ | | j ƒ  j ƒ  7} q3 Wg  } xy| D]q} | d d k r‰| j d ƒ } | d | !} | | d }	 | d k rÖ | d } | j | d f ƒ qÛ|	 d d k rs|	 d d k s|	 d k r0xX |	 d d k r,|	 d | j ƒ  7}	 qWn- |	 d k sBt ‚ |  j r]d	 | |	 f GHn  | j | |	 f ƒ qÛ| j | |	 f ƒ qj t	 | ƒ d k s¡t ‚ | | d d k s»t ‚ | | d d d | f | d <qj W| | | f SWn0 t
 k
 rt d
 | d j | ƒ f ƒ ‚ n Xd S(   sq
  Expects a feature as a list of strings, returns a tuple (key, location, qualifiers)

        For example given this GenBank feature:

             CDS             complement(join(490883..490885,1..879))
                             /locus_tag="NEQ001"
                             /note="conserved hypothetical [Methanococcus jannaschii];
                             COG1583:Uncharacterized ACR; IPR001472:Bipartite nuclear
                             localization signal; IPR002743: Protein of unknown
                             function DUF57"
                             /codon_start=1
                             /transl_table=11
                             /product="hypothetical protein"
                             /protein_id="NP_963295.1"
                             /db_xref="GI:41614797"
                             /db_xref="GeneID:2732620"
                             /translation="MRLLLELKALNSIDKKQLSNYLIQGFIYNILKNTEYSWLHNWKK
                             EKYFNFTLIPKKDIIENKRYYLIISSPDKRFIEVLHNKIKDLDIITIGLAQFQLRKTK
                             KFDPKLRFPWVTITPIVLREGKIVILKGDKYYKVFVKRLEELKKYNLIKKKEPILEEP
                             IEISLNQIKDGWKIIDVKDRYYDFRNKSFSAFSNWLRDLKEQSLRKYNNFCGKNFYFE
                             EAIFEGFTFYKTVSIRIRINRGEAVYIGTLWKELNVYRKLDKEEREFYKFLYDCGLGS
                             LNSMGFGFVNTKKNSAR"

        Then should give input key="CDS" and the rest of the data as a list of strings
        lines=["complement(join(490883..490885,1..879))", ..., "LNSMGFGFVNTKKNSAR"]
        where the leading spaces and trailing newlines have been removed.

        Returns tuple containing: (key as string, location string, qualifiers as list)
        as follows for this example:

        key = "CDS", string
        location = "complement(join(490883..490885,1..879))", string
        qualifiers = list of string tuples:

        [('locus_tag', '"NEQ001"'),
         ('note', '"conserved hypothetical [Methanococcus jannaschii];
COG1583:..."'),
         ('codon_start', '1'),
         ('transl_table', '11'),
         ('product', '"hypothetical protein"'),
         ('protein_id', '"NP_963295.1"'),
         ('db_xref', '"GI:41614797"'),
         ('db_xref', '"GeneID:2732620"'),
         ('translation', '"MRLLLELKALNSIDKKQLSNYLIQGFIYNILKNTEYSWLHNWKK
EKYFNFT..."')]

        In the above example, the "note" and "translation" were edited for compactness,
        and they would contain multiple new line characters (displayed above as 
)

        If a qualifier is quoted (in this case, everything except codon_start and
        transl_table) then the quotes are NOT removed.

        Note that no whitespace is removed.
        iÿÿÿÿt   ,i    t   /t   =i   t   "s   
s   Quoted line %s:%ss   Problem with '%s' feature:
%sN(   t   itert   filterR   t   nextR    t   findR   R
   R   R   t   StopIterationR   t   join(
   R   R$   t   linest   iteratorR   t   feature_locationt
   qualifierst   it   keyt   value(    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyR!   °   s>    6
	 $c         C   s  |  j  |  j k rw xb |  j  |  j  j ƒ  |  j k rs |  j j ƒ  |  _  |  j  s^ t d ƒ ‚ n  |  j  j ƒ  |  _  q Wn  |  j  |  j  j ƒ  |  j k s¢ t d ƒ ‚ xJ t	 rî |  j j ƒ  } | sÏ t d ƒ ‚ n  | j ƒ  } | d k r¥ Pq¥ q¥ W| |  _  g  d f S(   sG   returns a tuple containing a list of any misc strings, and the sequences   Premature end of files   Not at start of sequences*   Premature end of line during sequence datas   //R   (
   R   R   R	   R   R   R   R   R   R
   R   (   R   R   (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyt   parse_footer  s"    "	"		  	c         C   s   d S(   sÕ   Handle the LOCUS/ID line, passing data to the comsumer
        
        This should be implemented by the EMBL / GenBank specific subclass
        
        Used by the parse_records() and parse() methods.
        N(    (   R   t   consumerR   (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyt   _feed_first_line,  s    c         C   s   d S(   sæ   Handle the header lines (list of strings), passing data to the comsumer
        
        This should be implemented by the EMBL / GenBank specific subclass
        
        Used by the parse_records() and parse() methods.
        N(    (   R   R9   R1   (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyt   _feed_header_lines5  s    c         C   s‘   | j  ƒ  x€ | D]x \ } } } | j | ƒ | j | ƒ xL | D]D \ } } | j | g ƒ | d k	 rA | j | j d d ƒ ƒ qA qA Wq Wd S(   s’   Handle the feature table (list of tuples), passing data to the comsumer
        
        Used by the parse_records() and parse() methods.
        s   
t    N(   t   start_feature_tableR$   t   locationt   feature_qualifier_nameR   t   feature_qualifier_descriptiont   replace(   R   R9   t   feature_tuplesR$   t   location_stringR4   t   q_keyt   q_value(    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyt   _feed_feature_table?  s    
c         C   s   d S(   sý   Handle any lines between features and sequence (list of strings), passing data to the consumer
        
        This should be implemented by the EMBL / GenBank specific subclass
        
        Used by the parse_records() and parse() methods.
        N(    (   R   R9   R1   (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyt   _feed_misc_linesM  s    c         C   sÙ   |  j  | ƒ |  j ƒ  s& d | _ t S|  j | |  j ƒ |  j | |  j ƒ  ƒ | rt |  j	 | |  j
 d t ƒ ƒ n |  j
 d t ƒ |  j ƒ  \ } } |  j | | ƒ | j | ƒ | j d ƒ |  j d k sÕ t ‚ t S(   sæ  Feed a set of data into the consumer.

        This method is intended for use with the "old" code in Bio.GenBank

        Arguments:
        handle - A handle with the information to parse.
        consumer - The consumer that should be informed of events.
        do_features - Boolean, should the features be parsed?
                      Skipping the features can be much faster.

        Return values:
        true  - Passed a record
        false - Did not find a record
        R"   s   //N(   R   R   R   t   datat   FalseR:   R   R;   R   RF   R&   R   R8   RG   t   sequencet
   record_endR
   (   R   R   R9   t   do_featurest
   misc_linest   sequence_string(    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyt   feedV  s    	c         C   sY   d d l  m } d d l m } | d d d | ƒ  ƒ } |  j | | ƒ rQ | j Sd Sd S(   s   Returns a SeqRecord (with SeqFeatures if do_features=True)

        See also the method parse_records() for use on multi-record files.
        iÿÿÿÿ(   t   _FeatureConsumer(   t   FeatureValueCleanert   use_fuzzinessi   t   feature_cleanerN(   t   Bio.GenBankRP   t   Bio.GenBank.utilsRQ   RO   RH   R   (   R   R   RL   RP   RQ   R9   (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyt   parseˆ  s    c         c   st   xm t  ro |  j | ƒ } | d k r( Pn  | j d k	 s= t ‚ | j d k sR t ‚ | j d k sg t ‚ | Vq Wd S(   s  Returns a SeqRecord object iterator

        Each record (from the ID/LOCUS line to the // line) becomes a SeqRecord

        The SeqRecord objects include SeqFeatures if do_features=True
        
        This method is intended for use in Bio.SeqIO
        s   <unknown name>s   <unknown description>N(   R   RV   R   t   idR
   t   namet   description(   R   R   RL   t   record(    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyt   parse_records™  s    
	 t
   protein_idt	   locus_tagt   productc         c   sw  |  j  | ƒ xc|  j ƒ  rr|  j ƒ  |  j ƒ  } x7 t rk |  j j ƒ  } | sT Pn  | d  d k r5 Pq5 q5 W| j ƒ  |  _ xñ| D]é\ } } } | d k r‚ t	 d d ƒ }	 |	 j }
 | j d d ƒ |
 d <x| D]\ } } | d k	 r| d d	 k r| d
 d	 k r| d d
 !} n  | d k rb|	 j d k sAt d ƒ ‚ t | j d d ƒ | ƒ |	 _ qÒ | d k r|	 j j | ƒ qÒ | d k	 r®| j d d ƒ j d d ƒ } n  y |
 | c d | 7<WqÒ t k
 rã| |
 | <qÒ XqÒ Wy |
 | d |	 _ Wn t k
 rn Xy |
 | d |	 _ Wn t k
 r9n Xy |
 | d |	 _ Wn t k
 rbn X|	 Vq‚ q‚ Wq Wd S(   sW  Returns SeqRecord object iterator

        Each CDS feature becomes a SeqRecord.

        alphabet - Used for any sequence found in a translation field.
        tags2id  - Tupple of three strings, the feature keys to use
                   for the record id, name and description,

        This method is intended for use in Bio.SeqIO
        i   s   //t   CDSt   seqR<   R   t   raw_locationi    R*   iÿÿÿÿi   t   translations   Multiple translations!s   
t   db_xrefs     N(   R   R   R   R&   R   R   R   R   R   R   R   t   annotationsRA   R`   R
   R    t   dbxrefsR   t   KeyErrorRW   RX   RY   (   R   R   t   alphabett   tags2idRB   R   R6   RC   R4   RZ   Rd   t   qualifier_namet   qualifier_data(    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyt   parse_cds_features«  sX    
	  	 !!(   R\   R]   R^   (   t   __name__t
   __module__t   __doc__R   R	   R   R   R   R   R   R   R   R   R   RI   R&   R!   R8   R:   R;   RF   RG   R   RO   RV   R[   R   Rk   (    (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyR   !   s0   						4	g				
			2t   EmblScannerc           B   s   e  Z d  Z d Z d Z d d g Z d g Z d Z d d e d	 Z d
 g Z	 d „  Z
 d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z RS(   s2   For extracting chunks of information in EMBL filess   ID   i   s(   FH   Key             Location/Qualifierst   FHt   XXi   t   FTR<   i   t   SQc         C   sÐ  |  j  |  j  j ƒ  |  j k s2 t d |  j  ƒ ‚ g  } xo |  j  |  j  j ƒ  |  j k r© | j |  j  ƒ |  j j ƒ  |  _  |  j  s” t d ƒ ‚ n  |  j  j ƒ  |  _  q; W|  j  |  j  d |  j k sñ |  j  j	 ƒ  d k sñ t t
 |  j  ƒ ƒ ‚ g  } |  j  } x± t r³| st d ƒ ‚ n  | j	 ƒ  } | s?t d ƒ ‚ n  | d k rOPn  |  j  |  j  d |  j k st t
 |  j  ƒ ƒ ‚ | j d j | j ƒ  d  ƒ ƒ |  j j ƒ  } qW| |  _  | d j | ƒ f S(	   sG   returns a tuple containing a list of any misc strings, and the sequences   Eh? '%s's   Premature end of fileR<   s   //s&   Premature end of file in sequence datas   Blank line in sequence dataR   iÿÿÿÿ(   R   R	   R   R   R
   R   R   R   R   R    t   reprR   R0   t   split(   R   RM   t	   seq_linesR   (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyR8     s6    ""	*		  	c         C   s‘   | |  j   j ƒ  d k s t ‚ | |  j  j d ƒ d k rN |  j | | ƒ n? | |  j  j d ƒ d k r} |  j | | ƒ n t d | ƒ ‚ d  S(   Nt   IDt   ;i   i   s&   Did not recognise the ID line layout:
(   R	   R   R
   t   countt   _feed_first_line_newt   _feed_first_line_oldR   (   R   R9   R   (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyR:   '  s    c         C   sÖ   | |  j   j ƒ  d k s t ‚ | |  j  j d  d ƒ d g } | j | |  j  j d  d ƒ d j d ƒ ƒ g  | D] } | j ƒ  ^ qs } | j | d ƒ | j | d ƒ | j	 | d ƒ |  j
 | | d ƒ d  S(   NRw   i   i    Rx   i   i   i   (   R	   R   R
   Ru   R   t   extendR    t   locust   residue_typet   data_file_divisiont   _feed_seq_length(   R   R9   R   t   fieldst   entry(    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyR{   2  s     -c         C   s*  | |  j   j ƒ  d k s t ‚ g  | |  j  j ƒ  j d ƒ D] } | j ƒ  ^ q< } t | ƒ d k sl t ‚ | j | d ƒ | j | d ƒ | d j ƒ  } t | ƒ d k rä | d d k rä | d j ƒ  rä | j	 | d ƒ n  | j
 d j | d d	 !ƒ ƒ | j | d
 ƒ |  j | | d ƒ d  S(   NRw   Rx   i   i    i   i   t   SVR<   i   i   i   (   R	   R   R
   R    Ru   R   R}   t	   accessiont   isdigitt   version_suffixR~   R0   R   R€   (   R   R9   R   RH   R   t   version_parts(    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyRz   I  s    5c         C   sU   | j  ƒ  } t | ƒ d k s$ t ‚ | d j ƒ  d k s@ t ‚ | j | d ƒ d  S(   Ni   i   t   BPs   BP.i    (   Rˆ   s   BP.(   Ru   R   R
   t   uppert   size(   R   R9   t   textt   length_parts(    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyR€   r  s    c         C   sq  |  j  } d | } i	 d d 6d d 6d d 6d d	 6d
 d 6d d 6d d 6d d 6d d 6} t d  | ƒ } t | ƒ } yÚxÓt rKy | j ƒ  } Wn t k
 r£ Pn X| s® Pn  | |  j ƒ  } | | j ƒ  }	 | d k rÝ qy | d k r)|	 d d k r|	 d d k r|	 d d !}	 n  | j |	 ƒ qy | d k rt|	 j	 d ƒ d k sPt
 ‚ | j d |	 j d d ƒ d ƒ qy | d  k rß|	 j d! d ƒ \ }
 } | j d" ƒ r´| d  } n  | j ƒ  } |
 d# k rH| j | ƒ qHqy | d k rþ| j |	 g ƒ qy | d$ k rqy | | k r3t | | | ƒ |	 ƒ qy |  j ry d% | GHqy qy WWn t k
 rlt d& ƒ ‚ n Xd  S('   NR<   R„   t   ACt   versionRƒ   t
   definitiont   DEt   authorst   RAt   titlet   RTt   journalt   RLt   organismt   OSt   taxonomyt   OCt   commentt   CCRq   t   RNi    t   [iÿÿÿÿt   ]i   t   RPt   -s   (bases s    to t   )t   RXRx   t   .t   PUBMEDt   DRs   Ignoring EMBL header line:
%ss   Problem with header(   R	   R,   R   R+   R   R-   R/   R    t   reference_numRy   R
   t   reference_basesRA   Ru   t   endswitht	   pubmed_idR›   t   getattrR   R   (   R   R9   R1   t   EMBL_INDENTt   EMBL_SPACERt   consumer_dictt	   line_iterR   t	   line_typeRH   R6   R7   (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyR;   x  sd    	

	   $ 		c         C   s   d  S(   N(    (   R   R9   R1   (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyRG   Ñ  s    (   Rl   Rm   Rn   R   R	   R   R   R   R   R   R8   R:   R{   Rz   R€   R;   RG   (    (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyRo   ú  s   			"			)		Yt   GenBankScannerc           B   sr   e  Z d  Z d Z d Z d d g Z g  Z d Z d e Z d d d	 d
 g Z	 d „  Z
 d „  Z d „  Z d „  Z RS(   s5   For extracting chunks of information in GenBank filess   LOCUS       i   s(   FEATURES             Location/Qualifierst   FEATURESi   R<   t   CONTIGt   ORIGINs
   BASE COUNTt   WGSc         C   s  |  j  |  j  j ƒ  |  j k s2 t d |  j  ƒ ‚ g  } x· |  j  |  j  j ƒ  |  j k sŠ |  j  |  j  d |  j k sŠ d |  j  d  k rñ |  j  j t j ƒ |  _  | j |  j  ƒ |  j j	 ƒ  |  _  |  j  sÜ t
 d ƒ ‚ n  |  j  j ƒ  |  _  q; W|  j  |  j  j ƒ  |  j k s$t d |  j  ƒ ‚ g  } |  j  } xÍ t r| sQt
 d ƒ ‚ n  | j ƒ  } | srt
 d ƒ ‚ n  | d k r‚Pn  | j d	 ƒ d
 k r›Pn  t | ƒ d k rÓ| d d !d k rÓt
 d | ƒ ‚ n  | j | d j d d ƒ ƒ |  j j	 ƒ  } q6W| |  _  | d j | ƒ f S(   sG   returns a tuple containing a list of any misc strings, and the sequences   Eh? '%s'R<   Rµ   i   s   Premature end of files&   Premature end of file in sequence datas   Blank line in sequence datas   //R³   i    i	   i
   s   Sequence line mal-formed, '%s'R   (   R   R	   R   R   R
   t   ost   linesepR   R   R   R   R   R.   R   RA   R0   (   R   RM   Rv   R   (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyR8   à  s@    ""	"		%	c         C   s<  |  j  } d | } | d | !d k s6 t d | ƒ ‚ | d d !dH k ru| d d !dI k sl t d
 | ƒ ‚ | d d !d k s t d | ƒ ‚ | d d !j ƒ  dJ k s¸ t d | ƒ ‚ | d d !d k sÛ t d | ƒ ‚ | d d !d k sþ t d | ƒ ‚ | d d !d k s!t d | ƒ ‚ | d d !d k sDt d | ƒ ‚ | | d !} x+ | j d  ƒ d! k r~| j d  d ƒ } qTW| j d ƒ } t | ƒ d" k s°t d# | ƒ ‚ t | ƒ d$ k sÒt d% | ƒ ‚ | j | d ƒ | j | d$ ƒ | d d !j ƒ  d k r0| d d !d k r0| j	 d& ƒ n | j	 | d d !j ƒ  ƒ | j
 | d d !ƒ | j | d d' !ƒ nÃ| d( d) !dK k rU| d( d) !dL k s«t d
 | ƒ ‚ | d) d* !dM k sÎt d/ | ƒ ‚ | d* d0 !j ƒ  d k s;| d* d0 !j ƒ  j d1 ƒ d! k s;| d* d0 !j ƒ  j d2 ƒ d! k s;t d3 | ƒ ‚ | d0 d !d k s^t d4 | ƒ ‚ | d d5 !j ƒ  dN k s‡t d | ƒ ‚ | d5 d !d k sªt d6 | ƒ ‚ | d7 d !d k sÍt d8 | ƒ ‚ | d9 d: !d k sðt d; | ƒ ‚ | d< d= !d k st d> | ƒ ‚ | | d( !} x+ | j d  ƒ d! k rM| j d  d ƒ } q#W| j d ƒ } t | ƒ d" k st d# | ƒ ‚ t | ƒ d$ k s¡t d% | ƒ ‚ | j | d ƒ | j | d$ ƒ | d) d0 !j ƒ  d k r| d( d) !d k r| j	 d? | d0 d5 !j ƒ  ƒ n | j	 | d) d5 !j ƒ  ƒ | j
 | d d7 !ƒ | j | d d@ !ƒ nã | | j ƒ  j d ƒ d k r¸| | j ƒ  d k r¤| j | | j ƒ  ƒ q8t j dA | IJn€ t | j ƒ  ƒ dB k r(| j ƒ  dC dO k r(| j | j ƒ  d$ ƒ | j | j ƒ  d" ƒ t j dF | IJn t dG | ƒ ‚ d  S(P   NR<   i    s   LOCUS       s%   LOCUS line does not start correctly:
i   i!   s    bp s    aa s    rc s=   LOCUS line does not contain size units at expected position:
i)   i*   s2   LOCUS line does not contain space at position 42:
i3   R   t   lineart   circularsA   LOCUS line does not contain valid entry (linear, circular, ...):
i4   s2   LOCUS line does not contain space at position 52:
i7   i>   s          s;   LOCUS line does not contain spaces from position 56 to 62:
i@   iA   R¡   s6   LOCUS line does not contain - at position 65 in date:
iD   iE   s6   LOCUS line does not contain - at position 69 in date:
s     iÿÿÿÿi   s4   Cannot parse the name and length in the LOCUS line:
i   s+   Name and length collide in the LOCUS line:
t   PROTEINiI   i(   i,   i/   s      s   ss-s   ds-s   ms-sC   LOCUS line does not have valid strand type (Single stranded, ...):
i6   t   DNAt   RNAsA   LOCUS line does not contain valid sequence type (DNA, RNA, ...):
s2   LOCUS line does not contain space at position 55:
i?   s2   LOCUS line does not contain space at position 64:
iC   s2   LOCUS line does not contain space at position 68:
iF   iG   s6   LOCUS line does not contain - at position 71 in date:
iJ   iK   s6   LOCUS line does not contain - at position 75 in date:
s   PROTEIN iO   s5   Warning: Minimal LOCUS line found - is this correct?
i   i   t   aat   bps7   Warning: Malformed LOCUS line found - is this correct?
s)   Did not recognise the LOCUS line layout:
(   s    bp s    aa s    rc (   s    bp s    aa s    rc (   R   R¸   R¹   (   s    bp s    aa s    rc (   s    bp s    aa s    rc (   s      s   ss-s   ds-s   ms-(   R   R¸   R¹   (   R½   R¾   (   R	   R
   R    R.   RA   Ru   R   R}   RŠ   R~   R   t   dateRy   t   syst   stderrR   (   R   R9   R   t   GENBANK_INDENTt   GENBANK_SPACERt   name_and_length_strt   name_and_length(    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyR:     sž    	
,"%,!.c         C   s˜  |  j  } d | } i d d 6d d 6d d 6d d	 6d
 d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d  d! 6d" d# 6} t d  | ƒ } | j d$ ƒ t | ƒ } y¼| j ƒ  } x©t rr| sÚ Pn  | |  j ƒ  } | | j ƒ  }	 | d% k r×x+ |	 j d& ƒ d' k r3|	 j	 d& d ƒ }	 q	W|	 j d( ƒ d' k rY| j
 |	 ƒ no |  j r”d) |	 j d( ƒ d* d+ |	 j d( ƒ d, d- GHn  | j
 |	 j d( ƒ d* ƒ | j |	 j d( ƒ d, ƒ | j ƒ  } qÊ | d. k r^|  j d, k rd/ |	 d- GHn  |	 j ƒ  }	 x[ t rk| j ƒ  } | |  | k rg|	 d | | 7}	 |  j d, k rhd0 |	 d- GHqhqPqWx+ |	 j d& ƒ d' k r™|	 j	 d& d ƒ }	 qoW|	 j d ƒ d' k rÞ|  j d1 k rÎd2 |	 d3 GHn  | j |	 ƒ qo|  j d1 k r#d2 |	 |	 j d ƒ  d4 |	 |	 j d ƒ d, d3 GHn  | j |	 |	 j d ƒ  ƒ | j |	 |	 j d ƒ d, ƒ qÊ | d5 k r7|	 }
 d$ } xl t rä| j ƒ  } | d* | !| k rà| s°d6 | k rÅ| d | | 7} qá|
 d | | j ƒ  7}
 qyPqyW| j |
 ƒ | j ƒ  d$ k r|  j d, k rd7 GHn  | j | j ƒ  ƒ ~
 ~ qÊ | d8 k ræ|  j d, k rZd9 GHn  g  } | j |	 ƒ xc t rÒ| j ƒ  } | d* | !| k rÎ| | }	 | j |	 ƒ |  j d1 k rÏd: |	 d- GHqÏqpPqpW| j | ƒ ~ qÊ | | k rNxz t rJ| j ƒ  } | d* | !| k r/|	 d | | 7}	 qõt | | | ƒ |	 ƒ PqõWqÊ |  j rcd; | GHn  | j ƒ  } qÊ WWn t k
 r“t d< ƒ ‚ n Xd  S(=   NR<   R   t
   DEFINITIONR„   t	   ACCESSIONt   nidt   NIDt   pidt   PIDt	   db_sourcet   DBSOURCEt   keywordst   KEYWORDSt   segmentt   SEGMENTt   sourcet   SOURCER‘   t   AUTHORSt   consrtmt   CONSRTMt   projectt   PROJECTt   dblinkt   DBLINKR“   t   TITLER•   t   JOURNALt
   medline_idt   MEDLINERª   R¥   t   remarkt   REMARKR   t   VERSIONs     iÿÿÿÿs    GI:s	   Version [i    s   ], gi [i   RŸ   t	   REFERENCEs   Found reference [s   Extended reference text [i   s   Reference number "s   "s   ", "t   ORGANISMRx   s!   Taxonomy line(s) missing or blankt   COMMENTs   Found comments   Comment continuation [s   Ignoring GenBank header line:
s   Problem in header(   R	   R,   R   R   R+   R-   R   R    R.   RA   RŽ   R   Ru   t   giR§   R¨   R—   R™   R›   R«   R/   R   (   R   R9   R1   RÂ   RÃ   R®   R¯   R   R°   RH   t   organism_datat   lineage_datat   comment_list(    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyR;   ´  sÎ    	

	 	 2 	   6!		!	 	
 		c         C   s  |  j  } d | } | j d ƒ t | ƒ } y¾x³| D]«} | j d ƒ d k r | d j ƒ  } | r |  j r| d | GHn  | j | ƒ q n  | j d ƒ d k râ | d j ƒ  } | râ |  j rÏ d	 | GHn  | j | ƒ qâ n  | j d
 ƒ d k r| d j ƒ  } | j | ƒ n  | j d ƒ d k rL| d j ƒ  } | j	 | ƒ n  | j d ƒ d k r6 | d j ƒ  } | } xW t
 rÐ| j ƒ  } | s–Pqz| |  | k r½| | | j ƒ  7} qzt d | ƒ ‚ qzW| j | ƒ q6 q6 Wd  SWn t k
 r	t d ƒ ‚ n Xd  S(   NR<   R   s
   BASE COUNTi    i
   s   base_count = R´   i   s   origin_name = s   WGS i   t
   WGS_SCAFLDR³   s(   Expected CONTIG continuation line, got:
s%   Problem in misc lines before sequence(   R	   R   R+   R.   R    R   t
   base_countt   origin_namet   wgst   add_wgs_scafldR   R-   R   R   t   contig_locationR/   (   R   R9   R1   RÂ   RÃ   R¯   R   Rî   (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyRG   N  sL    	
	 	 	(   Rl   Rm   Rn   R   R	   R   R   R   R   R   R8   R:   R;   RG   (    (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyR±   Õ  s   
	+	©	št   __main__(   t   StringIOs,)  LOCUS       SCU49845     5028 bp    DNA             PLN       21-JUN-1999
DEFINITION  Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p
            (AXL2) and Rev7p (REV7) genes, complete cds.
ACCESSION   U49845
VERSION     U49845.1  GI:1293613
KEYWORDS    .
SOURCE      Saccharomyces cerevisiae (baker's yeast)
  ORGANISM  Saccharomyces cerevisiae
            Eukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;
            Saccharomycetales; Saccharomycetaceae; Saccharomyces.
REFERENCE   1  (bases 1 to 5028)
  AUTHORS   Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.
  TITLE     Cloning and sequence of REV7, a gene whose function is required for
            DNA damage-induced mutagenesis in Saccharomyces cerevisiae
  JOURNAL   Yeast 10 (11), 1503-1509 (1994)
  PUBMED    7871890
REFERENCE   2  (bases 1 to 5028)
  AUTHORS   Roemer,T., Madden,K., Chang,J. and Snyder,M.
  TITLE     Selection of axial growth sites in yeast requires Axl2p, a novel
            plasma membrane glycoprotein
  JOURNAL   Genes Dev. 10 (7), 777-793 (1996)
  PUBMED    8846915
REFERENCE   3  (bases 1 to 5028)
  AUTHORS   Roemer,T.
  TITLE     Direct Submission
  JOURNAL   Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, New
            Haven, CT, USA
FEATURES             Location/Qualifiers
     source          1..5028
                     /organism="Saccharomyces cerevisiae"
                     /db_xref="taxon:4932"
                     /chromosome="IX"
                     /map="9"
     CDS             <1..206
                     /codon_start=3
                     /product="TCP1-beta"
                     /protein_id="AAA98665.1"
                     /db_xref="GI:1293614"
                     /translation="SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEA
                     AEVLLRVDNIIRARPRTANRQHM"
     gene            687..3158
                     /gene="AXL2"
     CDS             687..3158
                     /gene="AXL2"
                     /note="plasma membrane glycoprotein"
                     /codon_start=1
                     /function="required for axial budding pattern of S.
                     cerevisiae"
                     /product="Axl2p"
                     /protein_id="AAA98666.1"
                     /db_xref="GI:1293615"
                     /translation="MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESF
                     TFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFN
                     VILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNE
                     VFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPE
                     TSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYV
                     YLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYG
                     DVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQ
                     DHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSA
                     NATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIA
                     CGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLN
                     NPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQ
                     SQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDS
                     YGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTK
                     HRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRL
                     VDFSNKSNVNVGQVKDIHGRIPEML"
     gene            complement(3300..4037)
                     /gene="REV7"
     CDS             complement(3300..4037)
                     /gene="REV7"
                     /codon_start=1
                     /product="Rev7p"
                     /protein_id="AAA98667.1"
                     /db_xref="GI:1293616"
                     /translation="MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQ
                     FVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVD
                     KDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNR
                     RVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEK
                     LISGDDKILNGVYSQYEEGESIFGSLF"
ORIGIN
        1 gatcctccat atacaacggt atctccacct caggtttaga tctcaacaac ggaaccattg
       61 ccgacatgag acagttaggt atcgtcgaga gttacaagct aaaacgagca gtagtcagct
      121 ctgcatctga agccgctgaa gttctactaa gggtggataa catcatccgt gcaagaccaa
      181 gaaccgccaa tagacaacat atgtaacata tttaggatat acctcgaaaa taataaaccg
      241 ccacactgtc attattataa ttagaaacag aacgcaaaaa ttatccacta tataattcaa
      301 agacgcgaaa aaaaaagaac aacgcgtcat agaacttttg gcaattcgcg tcacaaataa
      361 attttggcaa cttatgtttc ctcttcgagc agtactcgag ccctgtctca agaatgtaat
      421 aatacccatc gtaggtatgg ttaaagatag catctccaca acctcaaagc tccttgccga
      481 gagtcgccct cctttgtcga gtaattttca cttttcatat gagaacttat tttcttattc
      541 tttactctca catcctgtag tgattgacac tgcaacagcc accatcacta gaagaacaga
      601 acaattactt aatagaaaaa ttatatcttc ctcgaaacga tttcctgctt ccaacatcta
      661 cgtatatcaa gaagcattca cttaccatga cacagcttca gatttcatta ttgctgacag
      721 ctactatatc actactccat ctagtagtgg ccacgcccta tgaggcatat cctatcggaa
      781 aacaataccc cccagtggca agagtcaatg aatcgtttac atttcaaatt tccaatgata
      841 cctataaatc gtctgtagac aagacagctc aaataacata caattgcttc gacttaccga
      901 gctggctttc gtttgactct agttctagaa cgttctcagg tgaaccttct tctgacttac
      961 tatctgatgc gaacaccacg ttgtatttca atgtaatact cgagggtacg gactctgccg
     1021 acagcacgtc tttgaacaat acataccaat ttgttgttac aaaccgtcca tccatctcgc
     1081 tatcgtcaga tttcaatcta ttggcgttgt taaaaaacta tggttatact aacggcaaaa
     1141 acgctctgaa actagatcct aatgaagtct tcaacgtgac ttttgaccgt tcaatgttca
     1201 ctaacgaaga atccattgtg tcgtattacg gacgttctca gttgtataat gcgccgttac
     1261 ccaattggct gttcttcgat tctggcgagt tgaagtttac tgggacggca ccggtgataa
     1321 actcggcgat tgctccagaa acaagctaca gttttgtcat catcgctaca gacattgaag
     1381 gattttctgc cgttgaggta gaattcgaat tagtcatcgg ggctcaccag ttaactacct
     1441 ctattcaaaa tagtttgata atcaacgtta ctgacacagg taacgtttca tatgacttac
     1501 ctctaaacta tgtttatctc gatgacgatc ctatttcttc tgataaattg ggttctataa
     1561 acttattgga tgctccagac tgggtggcat tagataatgc taccatttcc gggtctgtcc
     1621 cagatgaatt actcggtaag aactccaatc ctgccaattt ttctgtgtcc atttatgata
     1681 cttatggtga tgtgatttat ttcaacttcg aagttgtctc cacaacggat ttgtttgcca
     1741 ttagttctct tcccaatatt aacgctacaa ggggtgaatg gttctcctac tattttttgc
     1801 cttctcagtt tacagactac gtgaatacaa acgtttcatt agagtttact aattcaagcc
     1861 aagaccatga ctgggtgaaa ttccaatcat ctaatttaac attagctgga gaagtgccca
     1921 agaatttcga caagctttca ttaggtttga aagcgaacca aggttcacaa tctcaagagc
     1981 tatattttaa catcattggc atggattcaa agataactca ctcaaaccac agtgcgaatg
     2041 caacgtccac aagaagttct caccactcca cctcaacaag ttcttacaca tcttctactt
     2101 acactgcaaa aatttcttct acctccgctg ctgctacttc ttctgctcca gcagcgctgc
     2161 cagcagccaa taaaacttca tctcacaata aaaaagcagt agcaattgcg tgcggtgttg
     2221 ctatcccatt aggcgttatc ctagtagctc tcatttgctt cctaatattc tggagacgca
     2281 gaagggaaaa tccagacgat gaaaacttac cgcatgctat tagtggacct gatttgaata
     2341 atcctgcaaa taaaccaaat caagaaaacg ctacaccttt gaacaacccc tttgatgatg
     2401 atgcttcctc gtacgatgat acttcaatag caagaagatt ggctgctttg aacactttga
     2461 aattggataa ccactctgcc actgaatctg atatttccag cgtggatgaa aagagagatt
     2521 ctctatcagg tatgaataca tacaatgatc agttccaatc ccaaagtaaa gaagaattat
     2581 tagcaaaacc cccagtacag cctccagaga gcccgttctt tgacccacag aataggtctt
     2641 cttctgtgta tatggatagt gaaccagcag taaataaatc ctggcgatat actggcaacc
     2701 tgtcaccagt ctctgatatt gtcagagaca gttacggatc acaaaaaact gttgatacag
     2761 aaaaactttt cgatttagaa gcaccagaga aggaaaaacg tacgtcaagg gatgtcacta
     2821 tgtcttcact ggacccttgg aacagcaata ttagcccttc tcccgtaaga aaatcagtaa
     2881 caccatcacc atataacgta acgaagcatc gtaaccgcca cttacaaaat attcaagact
     2941 ctcaaagcgg taaaaacgga atcactccca caacaatgtc aacttcatct tctgacgatt
     3001 ttgttccggt taaagatggt gaaaattttt gctgggtcca tagcatggaa ccagacagaa
     3061 gaccaagtaa gaaaaggtta gtagattttt caaataagag taatgtcaat gttggtcaag
     3121 ttaaggacat tcacggacgc atcccagaaa tgctgtgatt atacgcaacg atattttgct
     3181 taattttatt ttcctgtttt attttttatt agtggtttac agatacccta tattttattt
     3241 agtttttata cttagagaca tttaatttta attccattct tcaaatttca tttttgcact
     3301 taaaacaaag atccaaaaat gctctcgccc tcttcatatt gagaatacac tccattcaaa
     3361 attttgtcgt caccgctgat taatttttca ctaaactgat gaataatcaa aggccccacg
     3421 tcagaaccga ctaaagaagt gagttttatt ttaggaggtt gaaaaccatt attgtctggt
     3481 aaattttcat cttcttgaca tttaacccag tttgaatccc tttcaatttc tgctttttcc
     3541 tccaaactat cgaccctcct gtttctgtcc aacttatgtc ctagttccaa ttcgatcgca
     3601 ttaataactg cttcaaatgt tattgtgtca tcgttgactt taggtaattt ctccaaatgc
     3661 ataatcaaac tatttaagga agatcggaat tcgtcgaaca cttcagtttc cgtaatgatc
     3721 tgatcgtctt tatccacatg ttgtaattca ctaaaatcta aaacgtattt ttcaatgcat
     3781 aaatcgttct ttttattaat aatgcagatg gaaaatctgt aaacgtgcgt taatttagaa
     3841 agaacatcca gtataagttc ttctatatag tcaattaaag caggatgcct attaatggga
     3901 acgaactgcg gcaagttgaa tgactggtaa gtagtgtagt cgaatgactg aggtgggtat
     3961 acatttctat aaaataaaat caaattaatg tagcatttta agtataccct cagccacttc
     4021 tctacccatc tattcataaa gctgacgcaa cgattactat tttttttttc ttcttggatc
     4081 tcagtcgtcg caaaaacgta taccttcttt ttccgacctt ttttttagct ttctggaaaa
     4141 gtttatatta gttaaacagg gtctagtctt agtgtgaaag ctagtggttt cgattgactg
     4201 atattaagaa agtggaaatt aaattagtag tgtagacgta tatgcatatg tatttctcgc
     4261 ctgtttatgt ttctacgtac ttttgattta tagcaagggg aaaagaaata catactattt
     4321 tttggtaaag gtgaaagcat aatgtaaaag ctagaataaa atggacgaaa taaagagagg
     4381 cttagttcat cttttttcca aaaagcaccc aatgataata actaaaatga aaaggatttg
     4441 ccatctgtca gcaacatcag ttgtgtgagc aataataaaa tcatcacctc cgttgccttt
     4501 agcgcgtttg tcgtttgtat cttccgtaat tttagtctta tcaatgggaa tcataaattt
     4561 tccaatgaat tagcaatttc gtccaattct ttttgagctt cttcatattt gctttggaat
     4621 tcttcgcact tcttttccca ttcatctctt tcttcttcca aagcaacgat ccttctaccc
     4681 atttgctcag agttcaaatc ggcctctttc agtttatcca ttgcttcctt cagtttggct
     4741 tcactgtctt ctagctgttg ttctagatcc tggtttttct tggtgtagtt ctcattatta
     4801 gatctcaagt tattggagtc ttcagccaat tgctttgtat cagacaattg actctctaac
     4861 ttctccactt cactgtcgag ttgctcgttt ttagcggaca aagatttaat ctcgttttct
     4921 ttttcagtgt tagattgctc taattctttg agctgttctc tcagctcctc atatttttct
     4981 tgccatgact cagattctaa ttttaagcta ttcaatttct ctttgatc
//sŸ  LOCUS       AAD51968                 143 aa            linear   BCT 21-AUG-2001
DEFINITION  transcriptional regulator RovA [Yersinia enterocolitica].
ACCESSION   AAD51968
VERSION     AAD51968.1  GI:5805369
DBSOURCE    locus AF171097 accession AF171097.1
KEYWORDS    .
SOURCE      Yersinia enterocolitica
  ORGANISM  Yersinia enterocolitica
            Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacteriales;
            Enterobacteriaceae; Yersinia.
REFERENCE   1  (residues 1 to 143)
  AUTHORS   Revell,P.A. and Miller,V.L.
  TITLE     A chromosomally encoded regulator is required for expression of the
            Yersinia enterocolitica inv gene and for virulence
  JOURNAL   Mol. Microbiol. 35 (3), 677-685 (2000)
  MEDLINE   20138369
   PUBMED   10672189
REFERENCE   2  (residues 1 to 143)
  AUTHORS   Revell,P.A. and Miller,V.L.
  TITLE     Direct Submission
  JOURNAL   Submitted (22-JUL-1999) Molecular Microbiology, Washington
            University School of Medicine, Campus Box 8230, 660 South Euclid,
            St. Louis, MO 63110, USA
COMMENT     Method: conceptual translation.
FEATURES             Location/Qualifiers
     source          1..143
                     /organism="Yersinia enterocolitica"
                     /mol_type="unassigned DNA"
                     /strain="JB580v"
                     /serotype="O:8"
                     /db_xref="taxon:630"
     Protein         1..143
                     /product="transcriptional regulator RovA"
                     /name="regulates inv expression"
     CDS             1..143
                     /gene="rovA"
                     /coded_by="AF171097.1:380..811"
                     /note="regulator of virulence"
                     /transl_table=11
ORIGIN      
        1 mestlgsdla rlvrvwrali dhrlkplelt qthwvtlhni nrlppeqsqi qlakaigieq
       61 pslvrtldql eekglitrht candrrakri klteqsspii eqvdgvicst rkeilggisp
      121 deiellsgli dklerniiql qsk
//
sU  ID   X56734; SV 1; linear; mRNA; STD; PLN; 1859 BP.
XX
AC   X56734; S46826;
XX
DT   12-SEP-1991 (Rel. 29, Created)
DT   25-NOV-2005 (Rel. 85, Last updated, Version 11)
XX
DE   Trifolium repens mRNA for non-cyanogenic beta-glucosidase
XX
KW   beta-glucosidase.
XX
OS   Trifolium repens (white clover)
OC   Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
OC   Spermatophyta; Magnoliophyta; eudicotyledons; core eudicotyledons; rosids;
OC   eurosids I; Fabales; Fabaceae; Papilionoideae; Trifolieae; Trifolium.
XX
RN   [5]
RP   1-1859
RX   PUBMED; 1907511.
RA   Oxtoby E., Dunn M.A., Pancoro A., Hughes M.A.;
RT   "Nucleotide and derived amino acid sequence of the cyanogenic
RT   beta-glucosidase (linamarase) from white clover (Trifolium repens L.)";
RL   Plant Mol. Biol. 17(2):209-219(1991).
XX
RN   [6]
RP   1-1859
RA   Hughes M.A.;
RT   ;
RL   Submitted (19-NOV-1990) to the EMBL/GenBank/DDBJ databases.
RL   Hughes M.A., University of Newcastle Upon Tyne, Medical School, Newcastle
RL   Upon Tyne, NE2 4HH, UK
XX
FH   Key             Location/Qualifiers
FH
FT   source          1..1859
FT                   /organism="Trifolium repens"
FT                   /mol_type="mRNA"
FT                   /clone_lib="lambda gt10"
FT                   /clone="TRE361"
FT                   /tissue_type="leaves"
FT                   /db_xref="taxon:3899"
FT   CDS             14..1495
FT                   /product="beta-glucosidase"
FT                   /EC_number="3.2.1.21"
FT                   /note="non-cyanogenic"
FT                   /db_xref="GOA:P26204"
FT                   /db_xref="InterPro:IPR001360"
FT                   /db_xref="InterPro:IPR013781"
FT                   /db_xref="UniProtKB/Swiss-Prot:P26204"
FT                   /protein_id="CAA40058.1"
FT                   /translation="MDFIVAIFALFVISSFTITSTNAVEASTLLDIGNLSRSSFPRGFI
FT                   FGAGSSAYQFEGAVNEGGRGPSIWDTFTHKYPEKIRDGSNADITVDQYHRYKEDVGIMK
FT                   DQNMDSYRFSISWPRILPKGKLSGGINHEGIKYYNNLINELLANGIQPFVTLFHWDLPQ
FT                   VLEDEYGGFLNSGVINDFRDYTDLCFKEFGDRVRYWSTLNEPWVFSNSGYALGTNAPGR
FT                   CSASNVAKPGDSGTGPYIVTHNQILAHAEAVHVYKTKYQAYQKGKIGITLVSNWLMPLD
FT                   DNSIPDIKAAERSLDFQFGLFMEQLTTGDYSKSMRRIVKNRLPKFSKFESSLVNGSFDF
FT                   IGINYYSSSYISNAPSHGNAKPSYSTNPMTNISFEKHGIPLGPRAASIWIYVYPYMFIQ
FT                   EDFEIFCYILKINITILQFSITENGMNEFNDATLPVEEALLNTYRIDYYYRHLYYIRSA
FT                   IRAGSNVKGFYAWSFLDCNEWFAGFTVRFGLNFVD"
FT   mRNA            1..1859
FT                   /experiment="experimental evidence, no additional details
FT                   recorded"
XX
SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
     aaacaaacca aatatggatt ttattgtagc catatttgct ctgtttgtta ttagctcatt        60
     cacaattact tccacaaatg cagttgaagc ttctactctt cttgacatag gtaacctgag       120
     tcggagcagt tttcctcgtg gcttcatctt tggtgctgga tcttcagcat accaatttga       180
     aggtgcagta aacgaaggcg gtagaggacc aagtatttgg gataccttca cccataaata       240
     tccagaaaaa ataagggatg gaagcaatgc agacatcacg gttgaccaat atcaccgcta       300
     caaggaagat gttgggatta tgaaggatca aaatatggat tcgtatagat tctcaatctc       360
     ttggccaaga atactcccaa agggaaagtt gagcggaggc ataaatcacg aaggaatcaa       420
     atattacaac aaccttatca acgaactatt ggctaacggt atacaaccat ttgtaactct       480
     ttttcattgg gatcttcccc aagtcttaga agatgagtat ggtggtttct taaactccgg       540
     tgtaataaat gattttcgag actatacgga tctttgcttc aaggaatttg gagatagagt       600
     gaggtattgg agtactctaa atgagccatg ggtgtttagc aattctggat atgcactagg       660
     aacaaatgca ccaggtcgat gttcggcctc caacgtggcc aagcctggtg attctggaac       720
     aggaccttat atagttacac acaatcaaat tcttgctcat gcagaagctg tacatgtgta       780
     taagactaaa taccaggcat atcaaaaggg aaagataggc ataacgttgg tatctaactg       840
     gttaatgcca cttgatgata atagcatacc agatataaag gctgccgaga gatcacttga       900
     cttccaattt ggattgttta tggaacaatt aacaacagga gattattcta agagcatgcg       960
     gcgtatagtt aaaaaccgat tacctaagtt ctcaaaattc gaatcaagcc tagtgaatgg      1020
     ttcatttgat tttattggta taaactatta ctcttctagt tatattagca atgccccttc      1080
     acatggcaat gccaaaccca gttactcaac aaatcctatg accaatattt catttgaaaa      1140
     acatgggata cccttaggtc caagggctgc ttcaatttgg atatatgttt atccatatat      1200
     gtttatccaa gaggacttcg agatcttttg ttacatatta aaaataaata taacaatcct      1260
     gcaattttca atcactgaaa atggtatgaa tgaattcaac gatgcaacac ttccagtaga      1320
     agaagctctt ttgaatactt acagaattga ttactattac cgtcacttat actacattcg      1380
     ttctgcaatc agggctggct caaatgtgaa gggtttttac gcatggtcat ttttggactg      1440
     taatgaatgg tttgcaggct ttactgttcg ttttggatta aactttgtag attagaaaga      1500
     tggattaaaa aggtacccta agctttctgc ccaatggtac aagaactttc tcaaaagaaa      1560
     ctagctagta ttattaaaag aactttgtag tagattacag tacatcgttt gaagttgagt      1620
     tggtgcacct aattaaataa aagaggttac tcttaacata tttttaggcc attcgttgtg      1680
     aagttgttag gctgttattt ctattatact atgttgtagt aataagtgca ttgttgtacc      1740
     agaagctatg atcataacta taggttgatc cttcatgtat cagtttgatg ttgagaatac      1800
     tttgaattaa aagtcttttt ttattttttt aaaaaaaaaa aaaaaaaaaa aaaaaaaaa       1859
//
s   GenBank CDS Iterations   =====================Rh   t   geneR]   R^   s   
s   GenBank Iterations   =================RL   s   EMBL CDS Iterations   ==================s   EMBL Iterations   ==============(   RÀ   R¶   t   Bio.SeqR    t   Bio.SeqRecordR   t   Bio.AlphabetR   R   R   Ro   R±   Rl   Rð   t   gbk_examplet   gbk_example2t   embl_examplet   gRk   RZ   R[   RI   RW   RX   RY   R`   R   t   e(    (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/GenBank/Scanner.pyt   <module>   sv   ÿ ÚÛÿ £§1b							"	"	"	"			"