ó
’;ïHc           @   s²  d  Z  y
 e Z Wn! e k
 r3 d d l m Z n Xd d l m Z d d l m Z d d l	 m
 Z
 m Z d e f d „  ƒ  YZ d e
 f d	 „  ƒ  YZ e d
 k r®d GHd Z d d l m Z e e ƒ Z d Z x= e e ƒ D]/ Z x& e j ƒ  D] Z e d Z e j GHqï WqÜ We d k s!t ‚ d j d d ƒ j d d ƒ j ƒ  Z e j j ƒ  j d d ƒ e k slt ‚ d Z d Z e e ƒ Z e  e e ƒ ƒ Z! e j" ƒ  e# e! ƒ d k s¸t ‚ e# e! d j ƒ  ƒ d k sÚt ‚ e e ƒ Z e  e e ƒ ƒ Z$ e j" ƒ  e# e$ ƒ d k st ‚ e# e$ d j ƒ  ƒ d k s<t ‚ x{ e% d d ƒ D]j Z& e! d j ƒ  e& j e$ d j ƒ  e& j k e! d j ƒ  e& j j ƒ  e$ d j ƒ  e& j j ƒ  k qLWd Z' d Z( d Z) e e' ƒ Z e  e e ƒ ƒ Z* e j" ƒ  e# e* ƒ d k st ‚ e# e* d j ƒ  ƒ d k s.t ‚ e e( ƒ Z yU e  e e ƒ ƒ Z+ e# e+ ƒ d k sgt ‚ e# e+ d j ƒ  ƒ d k s‰t ‚ d GHWn e, k
 r§d GHn Xe j" ƒ  e e) ƒ Z e  e e ƒ ƒ Z+ e j" ƒ  e# e+ ƒ d k sòt ‚ e# e* d j ƒ  ƒ d k st ‚ d GHe e' d e' ƒ Z e# e  e e ƒ ƒ ƒ d k sQt ‚ e e d e' d  e ƒ Z e# e  e e ƒ ƒ ƒ d! k s‘t ‚ d" GHd# GHe ƒ  Z e e ƒ j- e+ ƒ e j. d ƒ e  e e ƒ ƒ Z/ e# e+ ƒ e# e/ ƒ k sôt ‚ x¯ e0 e+ e/ ƒ D]ž \ Z1 Z2 e# e1 j ƒ  ƒ e# e2 j ƒ  ƒ k s:t ‚ xe e0 e1 j ƒ  e2 j ƒ  ƒ D]H \ Z3 Z4 e3 j e4 j k szt ‚ e3 j j ƒ  e4 j j ƒ  k sVt ‚ qVWqWd$ GHn  d% S(&   sš  
AlignIO support for the "phylip" format used in Joe Felsenstein's PHYLIP tools.

You are expected to use this module via the Bio.AlignIO functions (or the
Bio.SeqIO functions if you want to work directly with the gapped sequences).

Note
====
In TREE_PUZZLE (Schmidt et al. 2003) and PHYML (Guindon and Gascuel 2003)
a dot/period (".") in a sequence is interpreted as meaning the same
character as in the first sequence.  The PHYLIP 3.6 documentation says:

   "a period was also previously allowed but it is no longer allowed,
   because it sometimes is used in different senses in other programs"

At the time of writing, we do nothing special with a dot/period.
iÿÿÿÿ(   t   Set(   t   single_letter_alphabet(   t	   Alignment(   t   AlignmentIteratort   SequentialAlignmentWritert   PhylipWriterc           B   s   e  Z d  Z d „  Z RS(   s   Phylip alignment writer.c         C   sV  d } | j  ƒ  } |  j } t | ƒ d k r< t d ƒ ‚ n  | j ƒ  } x2 | D]* } | t | j ƒ k rO t d ƒ ‚ qO qO W| d k r˜ t d ƒ ‚ n  t | ƒ t t g  | D] } | j |  ^ q® ƒ ƒ k rß t d ƒ ‚ n  | j d t | ƒ | f ƒ d } xMt	 rQx| D]} | d k r–| j j
 ƒ  }	 x  d D] }
 |	 j |
 d	 ƒ }	 q:Wx  d
 D] }
 |	 j |
 d ƒ }	 q]W| j |	 |  j | ƒ ƒ n | j d | ƒ xh t d d ƒ D]W } | d | d } | j j ƒ  | | d !} | j d | ƒ | d | k r·Pq·q·W| j d ƒ qW| d } | d | k rAPn  | j d ƒ qWd S(   sÞ  Use this to write (another) single alignment to an open file.

        This code will write interlaced alignments (when the sequences are
        longer than 50 characters).

        Note that record identifiers are strictly truncated at 10 characters.

        For more information on the file format, please see:
        http://evolution.genetics.washington.edu/phylip/doc/sequence.html
        http://evolution.genetics.washington.edu/phylip/doc/main.html#inputfiles
        i
   i    s   Must have at least one sequences%   Sequences must all be the same lengths    Non-empty sequences are requireds/   Repeated identifier, possibly due to truncations    %i %s
s   [](),t    s   :;t   |t    i   i2   s    %ss   
i   N(   t   get_all_seqst   handlet   lent
   ValueErrort   get_alignment_lengtht   seqt   sett   idt   writet   Truet   stript   replacet   ljustt   ranget   tostring(   t   selft	   alignmentt   truncatet   recordsR
   t   length_of_seqst   recordt   rt   blockt   namet   chart   chunkt   it   seq_segment(    (    s‡   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/AlignIO/PhylipIO.pyt   write_alignment#   sH    	8	 
 (   t   __name__t
   __module__t   __doc__R%   (    (    (    s‡   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/AlignIO/PhylipIO.pyR   !   s   t   PhylipIteratorc           B   s    e  Z d  Z d „  Z d „  Z RS(   sÚ  Reads a Phylip alignment file returning an Alignment object iterator.

    Record identifiers are limited to at most 10 characters.

    It only copes with interlaced phylip files!  Sequential files won't work
    where the sequences are split over multiple lines.

    For more information on the file format, please see:
    http://evolution.genetics.washington.edu/phylip/doc/sequence.html
    http://evolution.genetics.washington.edu/phylip/doc/main.html#inputfiles
    c         C   sx   | j  ƒ  } t d  | j ƒ  ƒ } t | ƒ d k r7 t Sy( t | d ƒ } t | d ƒ } t SWn t k
 rs t SXd  S(   Ni   i    i   (	   R   t   filtert   Nonet   splitR   t   Falset   intR   R   (   R   t   linet   partst   number_of_seqsR   (    (    s‡   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/AlignIO/PhylipIO.pyt
   _is_header‚   s    c         C   s`  |  j  } y |  j } |  ` Wn t k
 r; | j ƒ  } n X| sF d  S| j ƒ  } t d  | j ƒ  ƒ } t | ƒ d k rˆ t	 d ƒ ‚ n  y$ t
 | d ƒ } t
 | d ƒ } Wn t	 k
 rË t	 d ƒ ‚ n X|  j | ƒ sá t ‚ |  j d  k	 r|  j | k rt	 d | |  j f ƒ ‚ n  g  } g  } xf t d | ƒ D]U } | j ƒ  j ƒ  } | j | d  j ƒ  ƒ | j | d j ƒ  j d d ƒ g ƒ q7Wd } xØ t rpx, d | j ƒ  k rÍ| j ƒ  } | s¢Pq¢q¢W| sØPn  |  j | ƒ rô| |  _ Pn  xl t d | ƒ D][ } | | j | j ƒ  j d d ƒ ƒ | j ƒ  } | r| d | k  rt	 d	 ƒ ‚ qqW| s™Pq™q™Wt |  j ƒ }	 xÙ t d | ƒ D]È } d j | | ƒ }
 t |
 ƒ | k rát	 d
 | d t |
 ƒ | f ƒ ‚ n  |	 j | | |
 ƒ |	 j ƒ  d } | | | j k s1| | | j k s1t ‚ | | | _ | | | _ | | | _ qW|	 S(   Ni   s#   First line should have two integersi    i   s5   Found %i records in this alignment, told to expect %ii
   R   R   s   End of file mid-blocks)   Sequence %i length %i, expected length %iiÿÿÿÿ(   R
   t   _headert   AttributeErrort   readlineR   R*   R+   R,   R   R   R.   R2   t   AssertionErrort   records_per_alignmentR   t   rstript   appendR   R   R   t   alphabett   joint   add_sequenceR	   R   t   descriptionR    (   R   R
   R/   R0   R1   R   t   idst   seqsR#   R   R   R   (    (    s‡   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/AlignIO/PhylipIO.pyt   nextŽ   sv    		
 *	  	#  ,(   R&   R'   R(   R2   R@   (    (    (    s‡   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/AlignIO/PhylipIO.pyR)   u   s   	t   __main__s   Running short mini-tests#       8    286
V_Harveyi_ --MKNWIKVA VAAIA--LSA A--------- ---------T VQAATEVKVG 
B_subtilis MKMKKWTVLV VAALLAVLSA CG-------- ----NGNSSS KEDDNVLHVG 
B_subtilis MKKALLALFM VVSIAALAAC GAGNDNQSKD NAKDGDLWAS IKKKGVLTVG 
YA80_HAEIN MKKLLFTTAL LTGAIAFSTF ---------- -SHAGEIADR VEKTKTLLVG 
FLIY_ECOLI MKLAHLGRQA LMGVMAVALV AG---MSVKS FADEG-LLNK VKERGTLLVG 
E_coli_Gln --MKSVLKVS LAALTLAFAV S--------- ---------S HAADKKLVVA 
Deinococcu -MKKSLLSLK LSGLLVPSVL ALS------- -LSACSSPSS TLNQGTLKIA 
HISJ_E_COL MKKLVLSLSL VLAFSSATAA F--------- ---------- AAIPQNIRIG 

           MSGRYFPFTF VKQ--DKLQG FEVDMWDEIG KRNDYKIEYV TANFSGLFGL 
           ATGQSYPFAY KEN--GKLTG FDVEVMEAVA KKIDMKLDWK LLEFSGLMGE 
           TEGTYEPFTY HDKDTDKLTG YDVEVITEVA KRLGLKVDFK ETQWGSMFAG 
           TEGTYAPFTF HDK-SGKLTG FDVEVIRKVA EKLGLKVEFK ETQWDAMYAG 
           LEGTYPPFSF QGD-DGKLTG FEVEFAQQLA KHLGVEASLK PTKWDGMLAS 
           TDTAFVPFEF KQG--DKYVG FDVDLWAAIA KELKLDYELK PMDFSGIIPA 
           MEGTYPPFTS KNE-QGELVG FDVDIAKAVA QKLNLKPEFV LTEWSGILAG 
           TDPTYAPFES KNS-QGELVG FDIDLAKELC KRINTQCTFV ENPLDALIPS 

           LETGRIDTIS NQITMTDARK AKYLFADPYV VDG-AQITVR KGNDSIQGVE 
           LQTGKLDTIS NQVAVTDERK ETYNFTKPYA YAG-TQIVVK KDNTDIKSVD 
           LNSKRFDVVA NQVG-KTDRE DKYDFSDKYT TSR-AVVVTK KDNNDIKSEA 
           LNAKRFDVIA NQTNPSPERL KKYSFTTPYN YSG-GVIVTK SSDNSIKSFE 
           LDSKRIDVVI NQVTISDERK KKYDFSTPYT ISGIQALVKK GNEGTIKTAD 
           LQTKNVDLAL AGITITDERK KAIDFSDGYY KSG-LLVMVK ANNNDVKSVK 
           LQANKYDVIV NQVGITPERQ NSIGFSQPYA YSRPEIIVAK NNTFNPQSLA 
           LKAKKIDAIM SSLSITEKRQ QEIAFTDKLY AADSRLVVAK NSDIQP-TVE 

           DLAGKTVAVN LGSNFEQLLR DYDKDGKINI KTYDT--GIE HDVALGRADA 
           DLKGKTVAAV LGSNHAKNLE SKDPDKKINI KTYETQEGTL KDVAYGRVDA 
           DVKGKTSAQS LTSNYNKLAT N----AGAKV EGVEGMAQAL QMIQQARVDM 
           DLKGRKSAQS ATSNWGKDAK A----AGAQI LVVDGLAQSL ELIKQGRAEA 
           DLKGKKVGVG LGTNYEEWLR QNV--QGVDV RTYDDDPTKY QDLRVGRIDA 
           DLDGKVVAVK SGTGSVDYAK AN--IKTKDL RQFPNIDNAY MELGTNRADA 
           DLKGKRVGST LGSNYEKQLI DTG---DIKI VTYPGAPEIL ADLVAGRIDA 
           SLKGKRVGVL QGTTQETFGN EHWAPKGIEI VSYQGQDNIY SDLTAGRIDA 

           FIMDRLSALE -LIKKT-GLP LQLAGEPFET I-----QNAW PFVDNEKGRK 
           YVNSRTVLIA -QIKKT-GLP LKLAGDPIVY E-----QVAF PFAKDDAHDK 
           TYNDKLAVLN -YLKTSGNKN VKIAFETGEP Q-----STYF TFRKGS--GE 
           TINDKLAVLD -YFKQHPNSG LKIAYDRGDK T-----PTAF AFLQGE--DA 
           ILVDRLAALD -LVKKT-NDT LAVTGEAFSR Q-----ESGV ALRKGN--ED 
           VLHDTPNILY -FIKTAGNGQ FKAVGDSLEA Q-----QYGI AFPKGS--DE 
           AYNDRLVVNY -IINDQ-KLP VRGAGQIGDA A-----PVGI ALKKGN--SA 
           AFQDEVAASE GFLKQPVGKD YKFGGPSVKD EKLFGVGTGM GLRKED--NE 

           LQAEVNKALA EMRADGTVEK ISVKWFGADI TK----
           LRKKVNKALD ELRKDGTLKK LSEKYFNEDI TVEQKH
           VVDQVNKALK EMKEDGTLSK ISKKWFGEDV SK----
           LITKFNQVLE ALRQDGTLKQ ISIEWFGYDI TQ----
           LLKAVNDAIA EMQKDGTLQA LSEKWFGADV TK----
           LRDKVNGALK TLRENGTYNE IYKKWFGTEP K-----
           LKDQIDKALT EMRSDGTFEK ISQKWFGQDV GQP---
           LREALNKAFA EMRADGTYEK LAKKYFDFDV YGG---
(   t   StringIOi    i   i   s+  mkklvlslsl vlafssataa faaipqniri gtdptyapfe sknsqgelvg
    fdidlakelc krintqctfv enpldalips lkakkidaim sslsitekrq qeiaftdkly
    aadsrlvvak nsdiqptves lkgkrvgvlq gttqetfgne hwapkgieiv syqgqdniys
    dltagridaafqdevaaseg flkqpvgkdy kfggpsvkde klfgvgtgmg lrkednelre
    alnkafaemradgtyeklak kyfdfdvyggR   R   s   
t   -sw  5 60
Tax1        CCATCTCACGGTCGGTACGATACACCTGCTTTTGGCAG
Tax2        CCATCTCACGGTCAGTAAGATACACCTGCTTTTGGCGG
Tax3        CCATCTCCCGCTCAGTAAGATACCCCTGCTGTTGGCGG
Tax4        TCATCTCATGGTCAATAAGATACTCCTGCTTTTGGCGG
Tax5        CCATCTCACGGTCGGTAAGATACACCTGCTTTTGGCGG

GAAATGGTCAATATTACAAGGT
GAAATGGTCAACATTAAAAGAT
GAAATCGTCAATATTAAAAGGT
GAAATGGTCAATCTTAAAAGGT
GAAATGGTCAATATTAAAAGGTsq  5 60
Tax1        CCATCTCACGGTCGGTACGATACACCTGCTTTTGGCAGGAAATGGTCAATATTACAAGGT
Tax2        CCATCTCACGGTCAGTAAGATACACCTGCTTTTGGCGGGAAATGGTCAACATTAAAAGAT
Tax3        CCATCTCCCGCTCAGTAAGATACCCCTGCTGTTGGCGGGAAATCGTCAATATTAAAAGGT
Tax4        TCATCTCATGGTCAATAAGATACTCCTGCTTTTGGCGGGAAATGGTCAATCTTAAAAGGT
Tax5        CCATCTCACGGTCGGTAAGATACACCTGCTTTTGGCGGGAAATGGTCAATATTAAAAGGTi   s'    5    42
Turkey    AAGCTNGGGC ATTTCAGGGT
Salmo gairAAGCCTTGGC AGTGCAGGGT
H. SapiensACCGGTTGGC CGTTCAGGGT
Chimp     AAACCCTTGC CGTTACGCTT
Gorilla   AAACCCTTGC CGGTACGCTT

GAGCCCGGGC AATACAGGGT AT
GAGCCGTGGC CGGGCACGGT AT
ACAGGTTGGC CGTTCAGGGT AA
AAACCGAGGC CGGGACACTC AT
AAACCATTGC CGGTACGCTT AAs&    5    42
Turkey    AAGCTNGGGC ATTTCAGGGT
GAGCCCGGGC AATACAGGGT AT
Salmo gairAAGCCTTGGC AGTGCAGGGT
GAGCCGTGGC CGGGCACGGT AT
H. SapiensACCGGTTGGC CGTTCAGGGT
ACAGGTTGGC CGTTCAGGGT AA
Chimp     AAACCCTTGC CGTTACGCTT
AAACCGAGGC CGGGACACTC AT
Gorilla   AAACCCTTGC CGGTACGCTT
AAACCATTGC CGGTACGCTT AAs&    5    42
Turkey    AAGCTNGGGC ATTTCAGGGT GAGCCCGGGC AATACAGGGT AT
Salmo gairAAGCCTTGGC AGTGCAGGGT GAGCCGTGGC CGGGCACGGT AT
H. SapiensACCGGTTGGC CGTTCAGGGT ACAGGTTGGC CGTTCAGGGT AA
Chimp     AAACCCTTGC CGTTACGCTT AAACCGAGGC CGGGACACTC AT
Gorilla   AAACCCTTGC CGGTACGCTT AAACCATTGC CGGTACGCTT AAs   That should have failed...s8   Evil multiline non-interlaced example failed as expectedt   Concatenationi   s   


i   t   OKs   Checking write/readt   DoneN(5   R(   R   t	   NameErrort   setsR    t   Bio.AlphabetR   t   Bio.Align.GenericR   t
   InterfacesR   R   R   R)   R&   t   phylip_textt	   cStringIORB   R
   t   countR   R	   R   R   R6   R   t   uppert   expectedR   R   t   phylip_text2t   phylip_text3t   listt   list2t   closeR   t   list3R   R#   t   phylip_text4t   phylip_text5t   phylip_text5at   list4t   list5R   t
   write_filet   seekt   list6t   zipt   a1t   a2t   r1t   r2(    (    (    s‡   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/AlignIO/PhylipIO.pyt   <module>   sš   
Te8
$'
"
",<
""		

"$$	*(,