ó
ö.ÓIc           @   s4  d  Z  d d l m Z d d l m Z m Z d d l m Z d d l m	 Z	 d „  Z
 d „  Z e j d „ Z e j d	 „ Z d
 e	 f d „  ƒ  YZ e d k r0d GHd d l Z d d l m Z d „  Z xx e j d ƒ D]g Z e j d ƒ re j d ƒ rqØ n  e GHe d e ƒ Z e e
 e ƒ ƒ Z e j ƒ  e e ƒ qØ Wxg e j d ƒ D]V Z e j d ƒ snqSn  e GHe d e ƒ Z e e e ƒ ƒ Z e j ƒ  e e ƒ qSWd d l m Z xp e j d ƒ D]\ Z e j d ƒ sèqÍn  e GHe d e ƒ Z e e j e d ƒ ƒ Z e j ƒ  e e ƒ qÍWn  d S(   sí  Bio.SeqIO support for the "genbank" and "embl" file formats.

You are expected to use this module via the Bio.SeqIO functions.
Note that internally this module calls Bio.GenBank to do the actual
parsing of both GenBank and EMBL files.

See also:

International Nucleotide Sequence Database Collaboration
http://www.insdc.org/
 
GenBank
http://www.ncbi.nlm.nih.gov/Genbank/

EMBL Nucleotide Sequence Database
http://www.ebi.ac.uk/embl/

DDBJ (DNA Data Bank of Japan)
http://www.ddbj.nig.ac.jp/
iÿÿÿÿ(   t
   UnknownSeq(   t   GenBankScannert   EmblScanner(   t   Alphabet(   t   SequentialSequenceWriterc         C   s   t  d d ƒ j |  ƒ S(   s  Breaks up a Genbank file into SeqRecord objects.

    Every section from the LOCUS line to the terminating // becomes
    a single SeqRecord with associated annotation and features.
    
    Note that for genomes or chromosomes, there is typically only
    one record.t   debugi    (   R   t   parse_records(   t   handle(    (    s„   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqIO/InsdcIO.pyt   GenBankIterator'   s    	c         C   s   t  d d ƒ j |  ƒ S(   s
  Breaks up an EMBL file into SeqRecord objects.

    Every section from the LOCUS line to the terminating // becomes
    a single SeqRecord with associated annotation and features.
    
    Note that for genomes or chromosomes, there is typically only
    one record.R   i    (   R   R   (   R   (    (    s„   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqIO/InsdcIO.pyt   EmblIterator2   s    	c         C   s   t  d d ƒ j |  | ƒ S(   s  Breaks up a Genbank file into SeqRecord objects for each CDS feature.

    Every section from the LOCUS line to the terminating // can contain
    many CDS features.  These are returned as with the stated amino acid
    translation sequence (if given).
    R   i    (   R   t   parse_cds_features(   R   t   alphabet(    (    s„   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqIO/InsdcIO.pyt   GenBankCdsFeatureIterator=   s    c         C   s   t  d d ƒ j |  | ƒ S(   sþ   Breaks up a EMBL file into SeqRecord objects for each CDS feature.

    Every section from the LOCUS line to the terminating // can contain
    many CDS features.  These are returned as with the stated amino acid
    translation sequence (if given).
    R   i    (   R   R
   (   R   R   (    (    s„   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqIO/InsdcIO.pyt   EmblCdsFeatureIteratorG   s    t   GenBankWriterc           B   sY   e  Z d  Z d Z d „  Z d „  Z d „  Z d e d „ Z d „  Z	 d „  Z
 d	 „  Z RS(
   i   iP   c         C   s‰   t  | ƒ |  j k  s t ‚ t  | ƒ |  j |  j k  sS t d t | ƒ | f ƒ ‚ |  j j d | j |  j ƒ | j d d ƒ f ƒ d S(   s0   Used in the the 'header' of each GenBank record.s"   Annotation %s too long for %s lines   %s%s
s   
t    N(	   t   lent   HEADER_WIDTHt   AssertionErrort	   MAX_WIDTHt   reprR   t   writet   ljustt   replace(   t   selft   tagt   text(    (    s„   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqIO/InsdcIO.pyt   _write_single_lineU   s
    c         C   sÂ  |  j  |  j } t | ƒ |  j k  s+ t ‚ | j ƒ  } t | ƒ | k  r] |  j | | ƒ d S| j ƒ  } t g  | D] } t | ƒ ^ qs ƒ | k  s  t d ƒ ‚ d } xQ | rù t | ƒ d t | d ƒ | k  rù | d | j d ƒ 7} | j ƒ  } q© Wt | ƒ | k  st ‚ |  j | | ƒ xŒ | r°d } xQ | r„t | ƒ d t | d ƒ | k  r„| d | j d ƒ 7} | j ƒ  } q4Wt | ƒ | k  st ‚ |  j d | ƒ q%W| s¾t ‚ d S(   s0   Used in the the 'header' of each GenBank record.Ns2   Your description cannot be broken into nice lines!t    i   i    R   (	   R   R   R   R   t   stripR   t   splitt   maxt   pop(   R   R   R   t   max_lent   wordst   w(    (    s„   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqIO/InsdcIO.pyt   _write_multi_line]   s.    .	-	-c         C   sa  | j  } | s | d k r( | j } n  | s; | d k rV |  j | d d t ƒ} n  t | ƒ d k r t d t | ƒ ƒ ‚ n  t | ƒ d k r¢ t d ƒ ‚ n  t j | j	 j
 ƒ } t | t j ƒ sØ t d	 ƒ ‚ nB t | t j ƒ ró d
 } n' t | t j ƒ rd } n t d ƒ ‚ t | t j ƒ r5d } nB t | t j ƒ rPd } n' t | t j ƒ rkd } n t d ƒ ‚ y | j d } Wn t k
 r¡d } n X| dS k r·d } n  t | ƒ d% k sÏt ‚ t | ƒ d& k sçt ‚ d' | j d ƒ t t | ƒ ƒ j d( ƒ | | j d) ƒ | f } t | ƒ dT k sKt t | ƒ ƒ ‚ | d, d- !j ƒ  | k stt d. | ƒ ‚ | d- d/ !d0 k st ‚ | d/ d1 !j ƒ  t t | ƒ ƒ k sÂt d2 | ƒ ‚ | d1 d3 !dU k såt d6 | ƒ ‚ | d3 d7 !dV k st d< | ƒ ‚ | d7 d= !j ƒ  d k su| d7 d= !j ƒ  j d ƒ d> k su| d7 d= !j ƒ  j d ƒ d> k sut d? | ƒ ‚ | d= d@ !d0 k s˜t dA | ƒ ‚ | d@ dB !j ƒ  dW k sÁt dE | ƒ ‚ | dB dF !d0 k sät dG | ƒ ‚ | dH dI !d0 k st dJ | ƒ ‚ | dK dL !dM k s*t dN | ƒ ‚ | dO dP !dM k sMt dQ | ƒ ‚ |  j j | ƒ dR S(X   s   Write the LOCUS line.s   <unknown name>s   <unknown id>t	   accessiont
   just_firsti   s   Locus identifier %s is too longIÿçvH   s   Sequence too long!s   Invalid alphabett   bpt   aas%   Need a Nucleotide or Protein alphabetR   t   DNAt   RNAs#   Need a DNA, RNA or Protein alphabett   data_file_divisiont   UNKt   PRIt   RODt   MAMt   VRTt   INVt   PLNt   BCTt   VRLt   PHGt   SYNt   UNAt   ESTt   PATt   STSt   GSSt   HTGt   HTCt   ENVi   i   s4   LOCUS       %s %s %s    %s           %s 01-JAN-1980
i   i   iO   i   i   i   s@   LOCUS line does not contain the locus at the expected position:
i   R   i(   sA   LOCUS line does not contain the length at the expected position:
i,   s    bp s    aa s=   LOCUS line does not contain size units at expected position:
i/   s      s   ss-s   ds-s   ms-sC   LOCUS line does not have valid strand type (Single stranded, ...):
i6   iÿÿÿÿsA   LOCUS line does not contain valid sequence type (DNA, RNA, ...):
i7   s2   LOCUS line does not contain space at position 55:
i?   t   lineart   circularsA   LOCUS line does not contain valid entry (linear, circular, ...):
i@   s2   LOCUS line does not contain space at position 64:
iC   iD   s2   LOCUS line does not contain space at position 68:
iF   iG   t   -s6   LOCUS line does not contain - at position 71 in date:
iJ   iK   s6   LOCUS line does not contain - at position 75 in date:
N(   R-   R.   R/   R0   R1   R2   R3   R4   R5   R6   R7   R8   R9   R:   R;   R<   R=   R>   iP   (   s    bp s    aa (   s      s   ss-s   ds-s   ms-(   R   R?   R@   (   t   namet   idt   _get_annotation_strt   TrueR   t
   ValueErrorR   R   t   _get_base_alphabett   seqR   t
   isinstancet	   TypeErrort   ProteinAlphabett   NucleotideAlphabett   DNAAlphabett   RNAAlphabett   annotationst   KeyErrorR   R   t   strt   rjustt   rstript   lstripR   t   findR   R   (   R   t   recordt   locust   at   unitst   mol_typet   divisiont   line(    (    s„   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqIO/InsdcIO.pyt   _write_the_first_liney   s†    						
 		
$("%t   .c         C   sr   y | j  | } Wn t k
 r% | SXt | t ƒ rd | sV t | ƒ d k sV t ‚ n  t | d ƒ St | ƒ Sd S(   sú   Get an annotation dictionary entry (as a string).

        Some entries are lists, in which case if just_first=True the first entry
        is returned.  If just_first=False (default) this verifies there is only
        one entry before returning it.i   i    N(   RO   RP   RI   t   listR   R   RQ   (   R   RV   t   keyt   defaultR&   t   answer(    (    s„   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqIO/InsdcIO.pyRD   Ö   s     c         C   sÚ   d } d } t  | j t ƒ r" d  S|  j | ƒ } t | ƒ } x– t d | | ƒ D]‚ } |  j j t | d ƒ j	 | ƒ ƒ xF t | t
 | | | ƒ d ƒ D]% } |  j j d | | | d !ƒ q™ W|  j j d ƒ qP Wd  S(   Ni<   i	   i    i   i
   s    %ss   
(   RI   RH   R    t   _get_seq_stringR   t   rangeR   R   RQ   RR   t   min(   R   RV   t   LETTERS_PER_LINEt   SEQUENCE_INDENTt   datat   seq_lent   line_numberR"   (    (    s„   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqIO/InsdcIO.pyt   _write_sequenceæ   s    #&#c         C   s‘  |  j  } |  j | ƒ |  j | d | j j d d ƒ d d t ƒ} | } | j j | d ƒ r¤ y- d | t | j j d d ƒ d ƒ f } Wq¤ t k
 r  q¤ Xn  |  j | d d t ƒ} | j	 } | d k rÚ d } n  |  j
 d	 | ƒ |  j d
 | ƒ | d k r#|  j d d | | f ƒ n |  j d d | ƒ y d j | j d ƒ } Wn t k
 rjd } n X|  j
 d | ƒ |  j
 d |  j | d ƒ ƒ |  j | d ƒ } t | ƒ |  j |  j k rä| |  j |  j d  d } n  |  j d | ƒ y d j | j d ƒ }	 Wn t k
 r'd }	 n X|  j
 d |	 ƒ | j d ƒ x | j D] }
 |  j |
 ƒ qOW| j d ƒ |  j | ƒ | j d ƒ d S(   s)   Write a single record to the output file.R%   R^   i   i    R&   s   %s.%it   gis   <unknown description>t
   DEFINITIONt	   ACCESSIONt   VERSIONs	   %s  GI:%ss   %ss   ; t   keywordst   KEYWORDSt   SOURCEt   sourcet   organismi   s   ...s
     ORGANISMt   taxonomyR   s)   FEATURES             Location/Qualifiers
s   ORIGIN
s   //
N(   R   R]   RD   RC   R   RE   t
   startswitht   intRF   t   descriptionR$   R   t   joinRO   RP   R   R   R   R   t   featurest   _write_featureRk   (   R   RV   R   R%   t   acc_with_versionRl   t   descrRp   t   orgRu   t   feature(    (    s„   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqIO/InsdcIO.pyt   write_recordù   sV    		*	 	
	
c         C   s   d S(   sÁ   Write a single SeqFeature object to features table.

        Not implemented yet, but this stub exists in the short term to
        facilitate working on writing GenBank files with a sub-class.N(    (   R   R   (    (    s„   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqIO/InsdcIO.pyR{   1  s    (   t   __name__t
   __module__R   R   R   R$   R]   t   FalseRD   Rk   R€   R{   (    (    (    s„   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqIO/InsdcIO.pyR   Q   s   			]		8t   __main__s   Quick self testN(   t   StringIOc         C   sè  t  ƒ  } t | ƒ j |  ƒ | j d ƒ t t | ƒ ƒ } t |  ƒ t | ƒ k sY t ‚ xˆt |  | ƒ D]w\ } } | j	 j
 d d ƒ | j	 k s™ t ‚ | j | j k s± t ‚ | j | j k sÉ t ‚ t | j ƒ t | j ƒ k sí t ‚ xR d d d d g D]> } | | j k r | j | | j | k s>t | ƒ ‚ q q Wx› d g D] } | | j k rL| j | } | j | } t | t ƒ r™t | t ƒ sŸt ‚ | | k sÜ| j d	 ƒ rÍ| j | d
  ƒ sÜt | ƒ ‚ qLqLWqi Wd  S(   Ni    s   
R   Rl   Rp   Rs   Ru   Rt   s   ...iýÿÿÿ(   R…   R   t
   write_filet   seekR_   R   R   R   t   zipRx   R   RC   RB   RQ   RH   RO   RI   t   endswithRv   (   t   recordsR   t   records2t   r1t   r2R`   t   v1t   v2(    (    s„   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqIO/InsdcIO.pyt   check_genbank_writer>  s(    	$$-$s   ../../Tests/GenBanks   .gbks   .gbs   ../../Tests/GenBank/%ss   ../../Tests/EMBLs   .embls   ../../Tests/EMBL/%s(   t   SeqIOs   ../../Tests/SwissPrott   sps   ../../Tests/SwissProt/%st   swiss(   t   __doc__t   Bio.SeqR    t   Bio.GenBank.ScannerR   R   t   BioR   t
   InterfacesR   R   R	   t   generic_proteinR   R   R   R   t   osR…   R   t   listdirt   filenameR‰   t   openR   R_   RŠ   t   closeR‘   Rv   t   parse(    (    (    s„   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqIO/InsdcIO.pyt   <module>   sN   		

è	 


