ó
’;ïHc           @   s)  d  Z  d d l m Z d d l m Z m Z d e f d „  ƒ  YZ d e f d „  ƒ  YZ e d k r%d	 GHd
 Z	 d Z
 d d l m Z e e e e	 ƒ ƒ ƒ Z d e e ƒ k sµ t ‚ e d j d k sÎ t ‚ e d j ƒ  Z d e e ƒ k sö t ‚ e d j d k st ‚ e d j d k s(t ‚ e d j j ƒ  d d d d d k sWt ‚ e e e e
 ƒ ƒ ƒ Z d e e ƒ k s‡t ‚ e d j d k s t ‚ e d j ƒ  Z d e e ƒ k sÈt ‚ e d j d k sát ‚ e d j j ƒ  d d d k st ‚ x? e e e
 e	 ƒ ƒ D]' Z d e e j ƒ  ƒ e j ƒ  f GHqWd GHd e e e e d  ƒ ƒ ƒ ƒ k syt ‚ d! GHe e e e	 ƒ ƒ ƒ e e e e
 ƒ ƒ ƒ d Z e ƒ  Z e e ƒ j e ƒ e j d ƒ xB e e e ƒ ƒ D]. \ Z Z e j ƒ  e e j ƒ  k sìt ‚ qìWe j d ƒ d" GHe j d d !e _ e ƒ  Z e e ƒ j e g ƒ e j d ƒ x\ e e e ƒ ƒ D]H \ Z Z e j ƒ  e j ƒ  k s¬t ‚ e e j ƒ  ƒ d k s‚t ‚ q‚Wd# Z e e e e ƒ ƒ ƒ Z d e e ƒ k st ‚ e d j d$ k st ‚ d% GHn  d& S('   sæ   
Bio.AlignIO support for the "clustal" output from CLUSTAL W and other tools.

You are expected to use this module via the Bio.AlignIO functions (or the
Bio.SeqIO functions if you want to work directly with the gapped sequences).
iÿÿÿÿ(   t	   Alignment(   t   AlignmentIteratort   SequentialAlignmentWritert   ClustalWriterc           B   s   e  Z d  Z d „  Z RS(   s   Clustalw alignment writer.c   	      C   sÉ  t  | j ƒ  ƒ d k r' t d ƒ ‚ n  y t | j ƒ } Wn t k
 rS d } n X| sc d } n  | j d ƒ r d | } n
 d | } d } t  | j d j ƒ } | d k rÀ t d ƒ ‚ n  xî | | k r°| d	 | k rì | | } n d	 } x_ | j D]T } | j	 d d
 !j
 d d ƒ j d ƒ } | | j j | | | !7} | | d 7} qü Wt | d ƒ r™| j d k r™| d d | j | | | !d 7} n  | d 7} | | 7} qÃ W|  j j | d ƒ d S(   s=   Use this to write (another) single alignment to an open file.i    s   Must have at least one sequencet    s   1.81s   2.s)   CLUSTAL %s multiple sequence alignment


s-   CLUSTAL X (%s) multiple sequence alignment


s    Non-empty sequences are requiredi2   i   t    t   _i$   s   
t
   _star_infoN(   t   lent   get_all_seqst
   ValueErrort   strt   _versiont   AttributeErrort
   startswitht   _recordst   seqt   idt   replacet   ljustt   datat   hasattrR   t   handlet   write(	   t   selft	   alignmentt   versiont   outputt   cur_chart
   max_lengtht   show_numt   recordt   line(    (    sˆ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/AlignIO/ClustalIO.pyt   write_alignment   s:    
	
%$
(   t   __name__t
   __module__t   __doc__R!   (    (    (    sˆ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/AlignIO/ClustalIO.pyR      s   t   ClustalIteratorc           B   s   e  Z d  Z d „  Z RS(   s   Clustalw alignment iterator.c         C   s­  |  j  } y |  j } |  ` Wn t k
 r; | j ƒ  } n X| sF d  S| d  d k re t d ƒ ‚ n  d  } x] | j ƒ  D]O } | d d k r® | d d k r® | d d !} n  | d d	 k rx | } qx qx W| j ƒ  } x" | j ƒ  d
 k rû | j ƒ  } qÚ Wg  } g  } d
 } d  } xrt rˆ| d d k r°| j ƒ  d
 k r°| j	 ƒ  j ƒ  }	 t
 |	 ƒ d k  sut
 |	 ƒ d k rˆt d | ƒ ‚ n  | j |	 d ƒ | j |	 d ƒ | d  k rt
 |	 d ƒ | t
 |	 d ƒ j |	 d ƒ }
 |
 t
 |	 d ƒ } t |
 | ƒ } ~
 ~ n  |	 d | | k s+t ‚ t
 |	 ƒ d k roy t |	 d ƒ } Wn! t k
 rtt d | ƒ ‚ n Xt
 |	 d j d d
 ƒ ƒ | k r­t d | ƒ ‚ q­qon¿ | d d k rnt
 | ƒ t
 | ƒ k sÞt ‚ t
 | ƒ d k söt ‚ | d  k	 st ‚ | | } | | j  j ƒ  s,t ‚ | | j j ƒ  sFt ‚ | j ƒ  } | j ƒ  d
 k sjt ‚ Pn P| j ƒ  } | sPqqW| j ƒ  d
 k s¡t ‚ | d  k	 s³t ‚ x0 | D]( } t
 | ƒ t
 | d ƒ k sºt ‚ qºW| rt
 | ƒ t
 | d ƒ k st ‚ n  t } x1| sJx3 | s<| j ƒ  d
 k rU| j ƒ  } | s#Pq#q#W| s`Pn  | d  d k rƒt } | |  _ Pn  x t t
 | ƒ ƒ D]} | d d k sÂt d t | ƒ ƒ ‚ | j	 ƒ  j ƒ  }	 t
 |	 ƒ d k  søt
 |	 ƒ d k rt d t | ƒ ƒ ‚ n  |	 d | | k rFt d |	 d | | f ƒ ‚ n  |	 d | | k rÚt
 |	 d ƒ | t
 |	 d ƒ j |	 d ƒ }
 |
 | j k s®t d | |
 f ƒ ‚ |
 t
 |	 d ƒ } t |
 | ƒ } ~
 ~ n  | | c |	 d 7<t
 | | ƒ t
 | d ƒ k st ‚ t
 |	 ƒ d k r–y t |	 d ƒ } Wn! t k
 r]t d | ƒ ‚ n Xt
 | | j d d
 ƒ ƒ | k r–t d | ƒ ‚ q–n  | j ƒ  } q–W| r| d d k sÂt ‚ | d  k	 sÔt ‚ | | | 7} t
 | ƒ t
 | d ƒ k st ‚ | | j  j ƒ  st ‚ | | j j ƒ  s8t ‚ | j ƒ  } qqWt
 | ƒ t
 | ƒ k sit ‚ t
 | ƒ d k s‘t
 | d ƒ d k r•d  S|  j d  k	 rÛ|  j t
 | ƒ k rÛt d t
 | ƒ |  j f ƒ ‚ n  t |  j ƒ } t
 | d ƒ } xW t t
 | ƒ ƒ D]C } t
 | | ƒ | k r8t d ƒ ‚ n  | j | | | | ƒ qW| rf| | _ n  | r©t
 | ƒ | k st d | t
 | ƒ | f ƒ ‚ | | _ n  | S(   Ni   t   CLUSTALs   Did not find CLUSTAL headeri    t   (iÿÿÿÿt   )i   t
   0123456789R   R   i   i   s   Could not parse line:
%ss-   Could not parse line, bad sequence number:
%st   -s1   Could not parse line, invalid sequence number:
%ss   Unexpected line:
%ss4   Identifiers out of order? Got '%s' but expected '%s's   Old location %s -> %i:XXs5   Found %i records in this alignment, told to expect %is8   Error parsing alignment - sequences of different length?s4   Alignment length is %i, consensus length is %i, '%s'(   R   t   _headerR   t   readlinet   NoneR
   t   splitt   stript   Truet   rstripR   t   appendt   findt   slicet   AssertionErrort   intR   t   startt   stopt   Falset   ranget   reprt   records_per_alignmentR    t   alphabett   add_sequenceR   R   (   R   R   R    R   t   wordt   idst   seqst	   consensust   seq_colst   fieldsR7   t   endt   letterst   st   donet   iR   t   alignment_length(    (    sˆ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/AlignIO/ClustalIO.pyt   nextQ   s    		
 	"$/	"
 &%	  	&$/%	&""((   R"   R#   R$   RK   (    (    (    sˆ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/AlignIO/ClustalIO.pyR%   N   s   t   __main__s   Running a quick self-tests  CLUSTAL W (1.81) multiple sequence alignment


gi|4959044|gb|AAD34209.1|AF069      MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNN 50
gi|671626|emb|CAA85685.1|           ---------MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFR 41
                                              * *: ::    :.   :*  :  :. : . :*  ::   .

gi|4959044|gb|AAD34209.1|AF069      LLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDW 100
gi|671626|emb|CAA85685.1|           VTPQPG-----------------VPPEEAGAAVAAESSTGT--------- 65
                                    :   **                  **:...   *.*** ..         

gi|4959044|gb|AAD34209.1|AF069      LNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQT 150
gi|671626|emb|CAA85685.1|           WTTVWTDGLTSLDRYKG-----RCYHIEPVPG------------------ 92
                                     .:*   * *: .* :*        : :* .*                  

gi|4959044|gb|AAD34209.1|AF069      SENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTE 200
gi|671626|emb|CAA85685.1|           -EKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIP 141
                                     *::.  .    .:: :*..*  :* .*   .. .  :    .  :    

gi|4959044|gb|AAD34209.1|AF069      VPTTRAQRRA 210
gi|671626|emb|CAA85685.1|           VAYVKTFQGP 151
                                    *. .:: : .
                                     
sL	  CLUSTAL X (1.83) multiple sequence alignment


V_Harveyi_PATH                 --MKNWIKVAVAAIA--LSAA------------------TVQAATEVKVG
B_subtilis_YXEM                MKMKKWTVLVVAALLAVLSACG------------NGNSSSKEDDNVLHVG
B_subtilis_GlnH_homo_YCKK      MKKALLALFMVVSIAALAACGAGNDNQSKDNAKDGDLWASIKKKGVLTVG
YA80_HAEIN                     MKKLLFTTALLTGAIAFSTF-----------SHAGEIADRVEKTKTLLVG
FLIY_ECOLI                     MKLAHLGRQALMGVMAVALVAG---MSVKSFADEG-LLNKVKERGTLLVG
E_coli_GlnH                    --MKSVLKVSLAALTLAFAVS------------------SHAADKKLVVA
Deinococcus_radiodurans        -MKKSLLSLKLSGLLVPSVLALS--------LSACSSPSSTLNQGTLKIA
HISJ_E_COLI                    MKKLVLSLSLVLAFSSATAAF-------------------AAIPQNIRIG
HISJ_E_COLI                    MKKLVLSLSLVLAFSSATAAF-------------------AAIPQNIRIG
                                         : .                                 : :.

V_Harveyi_PATH                 MSGRYFPFTFVKQ--DKLQGFEVDMWDEIGKRNDYKIEYVTANFSGLFGL
B_subtilis_YXEM                ATGQSYPFAYKEN--GKLTGFDVEVMEAVAKKIDMKLDWKLLEFSGLMGE
B_subtilis_GlnH_homo_YCKK      TEGTYEPFTYHDKDTDKLTGYDVEVITEVAKRLGLKVDFKETQWGSMFAG
YA80_HAEIN                     TEGTYAPFTFHDK-SGKLTGFDVEVIRKVAEKLGLKVEFKETQWDAMYAG
FLIY_ECOLI                     LEGTYPPFSFQGD-DGKLTGFEVEFAQQLAKHLGVEASLKPTKWDGMLAS
E_coli_GlnH                    TDTAFVPFEFKQG--DKYVGFDVDLWAAIAKELKLDYELKPMDFSGIIPA
Deinococcus_radiodurans        MEGTYPPFTSKNE-QGELVGFDVDIAKAVAQKLNLKPEFVLTEWSGILAG
HISJ_E_COLI                    TDPTYAPFESKNS-QGELVGFDIDLAKELCKRINTQCTFVENPLDALIPS
HISJ_E_COLI                    TDPTYAPFESKNS-QGELVGFDIDLAKELCKRINTQCTFVENPLDALIPS
                                     **       .:  *::::.   : :.   .        ..:   

V_Harveyi_PATH                 LETGRIDTISNQITMTDARKAKYLFADPYVVDG-AQI
B_subtilis_YXEM                LQTGKLDTISNQVAVTDERKETYNFTKPYAYAG-TQI
B_subtilis_GlnH_homo_YCKK      LNSKRFDVVANQVG-KTDREDKYDFSDKYTTSR-AVV
YA80_HAEIN                     LNAKRFDVIANQTNPSPERLKKYSFTTPYNYSG-GVI
FLIY_ECOLI                     LDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQAL
E_coli_GlnH                    LQTKNVDLALAGITITDERKKAIDFSDGYYKSG-LLV
Deinococcus_radiodurans        LQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEII
HISJ_E_COLI                    LKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLV
HISJ_E_COLI                    LKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLV
                               *.: . *        .  *     *:          :

(   t   StringIOi   i    s   1.81i   s   gi|4959044|gb|AAD34209.1|AF069s   gi|671626|emb|CAA85685.1|t2   MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNNt2   LLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDWt2   LNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQTt2   SENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTEt
   VPTTRAQRRAs   1.83i	   t   HISJ_E_COLIs2   MKKLVLSLSLVLAFSSATAAF-------------------AAIPQNIRIGs2   TDPTYAPFESKNS-QGELVGFDIDLAKELCKRINTQCTFVENPLDALIPSt%   LKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLVs&   Alignment with %i records of length %is   Checking empty file...R   s   Checking write/read...s5   Testing write/read when there is only one sequence...s  CLUSTAL 2.0.9 multiple sequence alignment


Test1seq             ------------------------------------------------------------
AT3G20900.1-SEQ      ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGGTGAACAAAAAAAAAACTCAATCCAC
AT3G20900.1-CDS      ------------------------------------------------------------
                                                                                 

Test1seq             -----AGTTACAATAACTGACGAAGCTAAGTAGGCTACTAATTAACGTCATCAACCTAAT
AT3G20900.1-SEQ      ATCAAAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAAATTAAAGTCATCAACCTAAT
AT3G20900.1-CDS      ------------------------------------------------------------
                                                                                 

Test1seq             ACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATC
AT3G20900.1-SEQ      ACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATC
AT3G20900.1-CDS      ------------------------------------------------------------
                                                                                 

Test1seq             AATTGATAGTGTAAATCATCGTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGA
AT3G20900.1-SEQ      AATTGATAGTGTAAATCATAGTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGA
AT3G20900.1-CDS      ------------------------------------------------------------
                                                                                 

Test1seq             CTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACA
AT3G20900.1-SEQ      CTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAA
AT3G20900.1-CDS      ------------------------------------------------------------
                                                                                 

Test1seq             AAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATT
AT3G20900.1-SEQ      CAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATT
AT3G20900.1-CDS      ------------------------------------------------------------
                                                                                 

Test1seq             GTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGAT
AT3G20900.1-SEQ      GTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGAT
AT3G20900.1-CDS      ------------------------------------------------------------
                                                                                 

Test1seq             CCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGT
AT3G20900.1-SEQ      CCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGT
AT3G20900.1-CDS      ------------------------------------------------------ATGAAC
                                                                             *   

Test1seq             TCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGT
AT3G20900.1-SEQ      GCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGT
AT3G20900.1-CDS      AAAGTAGCGAGGAAGAACAAAACATC------AGCAAAGAAAACGATCTGTCTCCGTCGT
                         *  *** ***** *   *  **      ****************************

Test1seq             AACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
AT3G20900.1-SEQ      AACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
AT3G20900.1-CDS      AACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
                     ******* **   * ****  ***************************************

Test1seq             GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCT
AT3G20900.1-SEQ      GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCT
AT3G20900.1-CDS      GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCT
                     **************************************** *******************

Test1seq             GCTGGGGATGGAGAGGGAACAGAGTT-
AT3G20900.1-SEQ      GCTGGGGATGGAGAGGGAACAGAGTAG
AT3G20900.1-CDS      GCTGGGGATGGAGAGGGAACAGAGTAG
                     *************************  
s   2.0.9s   The EndN(    R$   t   Bio.Align.GenericR    t
   InterfacesR   R   R   R%   R"   t   aln_example1t   aln_example2RM   t   listt
   alignmentsR   R5   R   R	   t   recordsR   R   t   tostringR   t   get_alignment_lengthR   t
   write_filet   seekt	   enumerateRI   t   aR   t   aln_example3(    (    (    sˆ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/AlignIO/ClustalIO.pyt   <module>   sp   =½**	&	"A