ó
Ú=êIc        _   @   s&  d  d l  Z  y
 e Z Wn! e k
 r9 d  d l m Z n Xd  d l m Z d  d l m Z d  d l m	 Z	 d  d l
 m Z m Z d  d l m Z d  d l m Z e j g Z e j g Z e j g Z e j e j e j ƒ g Z d	 d
 d d d d d d d d g
 Z d d d d g Z d	 d
 d d g Z x- e j D]" Z e e k r1e j e ƒ q1q1Wx- e j D]" Z e e k rae j e ƒ qaqaWe j d ƒ d
 e d d f d
 e d d f d
 e d d f d
 e d d f d	 e  d d f d	 e  d d f d	 e  d  d f d	 e  d! d f d	 e  d" d f d	 e  d# d f d	 e  d$ d f d	 e  d% d f d	 e  d& d f d	 e  d' d f d	 e  d( d f d	 e  d) d f d	 e  d* d+ f d	 e  d, d f d	 e  d- d f d	 e  d. d f d	 e d/ d+ f d	 e  d0 d f d1 e d2 d3 f d4 e  d5 d f d4 e  d6 d f d4 e  d7 d f d4 e  d8 d f d4 e  d9 d f d4 e  d: d f d4 e  d; d f d4 e  d< d f d4 e  d= d f d4 e  d> d f d4 e  d? d f d4 e  d@ d f d4 e  dA d f d4 e  dB d f d4 e  dC d f d4 e  dD d f d4 e  dE d f d e  dF d f d e  dG dH f d e  dI d f d e  dJ d f d e  dK d f d e  dL d f d e  dM d f d e  dN d f d e  dO d f d e  dP d f d e  dQ d f d e  dR d f d e  dS d f d e  dT d f d e  dU d+ f d e  dV d f d e  dW d f d e  dX d f d e  dY d f d e  dZ d f d e  d[ d f d e d\ d f d e d] d f d e d^ dH f d e d_ dH f d e d` da f d e db da f d e dc da f d e dd d+ f d e de df f d e dg df f d e  dh da f d e di d f dj e  dk d+ f dj e  dl d f dm e  dn d f dm e  do d f dm e  dp d f d e  dq dr f d e ds dt f d e  du d3 f dv e  dw dx f dv e  dy dz f dv e  d{ df f dv e  d| dH f dv e d} d f d	 e d~ d+ f d e  d d+ f d e d€ d+ f d e d df f d e d‚ d f d e dƒ d f g\ Z! g  d„ f e	 e d… e j ƒ d† d‡ ƒe	 e dˆ e j ƒ d† d‰ ƒe	 e dŠ e j ƒ d† d‹ ƒg dŒ f e	 e d… e j ƒ d† d‡ ƒe	 e d e j ƒ d† dŽ ƒe	 e dˆ e j ƒ d† d‰ ƒg d f e	 e d e j ƒ d† d‘ ƒe	 e d’ e j ƒ d† d“ ƒe	 e d” e j ƒ d† d• ƒg d– f e	 e d e j ƒ d† d‘ d— d˜ ƒe	 e d’ e j ƒ d† d“ d™ dš e  j" ƒe	 e d” e j ƒ d† d• ƒg d› f e	 e d… e j ƒ d† d‡ ƒe	 e d e j ƒ d† dŽ ƒe	 e d e j ƒ d† dŽ ƒe	 e dˆ e j ƒ d† d‰ ƒg dœ f g Z# e# df d d› k s	t$ ‚ d e  j" dž dŸ g e# df d  d j% d¡ <d¢ e  j" d£ e# df d  d j% d¤ <d¥ e# df d  d j% d¦ <d§ „  Z& d¨ d© „ Z' dª „  Z( d¨ d« „ Z) d¨ d¬ „ Z* xJ e j+ D]? Z, e ƒ  Z- e. e j/ e- e, ƒ ƒ Z0 e1 e0 ƒ d  k s¾	t$ ‚ q¾	WxW	e! D]O	\ Z, Z2 Z3 Z4 d­ e, e3 f GHe  j5 j6 e3 ƒ sG
t$ e3 ƒ ‚ e. e j/ d® e7 e3 d¯ ƒ d° e, ƒ ƒ Z0 e1 e0 ƒ e4 k sœ
t$ d± e1 e0 ƒ e4 f ƒ ‚ g  Z8 x6 e j/ d® e7 e3 d¯ ƒ d° e, ƒ D] Z9 e8 j e9 ƒ qÄ
We1 e8 ƒ e4 k só
t$ ‚ g  Z: e j/ d® e7 e3 d¯ ƒ d° e, ƒ Z; xQ e rmy e; j< ƒ  Z9 Wn e= k
 rLd Z9 n Xe9 d k	 rie: j e9 ƒ qPqWe j/ d® e7 e3 d¯ ƒ d° e, ƒ Z; y e; j< ƒ  Z9 Wn e= k
 r¸d Z9 n Xe9 d k	 räe9 g Z? e? j@ e. e; ƒ ƒ n g  Z? e1 e? ƒ e4 k st$ ‚ e j/ d® e7 e3 d¯ ƒ d° e, ƒ Z; y e; j< ƒ  Z9 Wn e= k
 rLd Z9 n Xe9 d k	 rƒe9 g ZA x$ e; D] Z9 eA j e9 ƒ qiWn g  ZA e1 eA ƒ e4 k s¡t$ ‚ xjeB e4 ƒ D]\ZC e0 eC Z9 eD e9 e	 ƒ sÓt$ ‚ e, e k reD e9 jE e ƒ s$eD e9 jE e ƒ s$t$ ‚ n eD e9 jE e ƒ s$t$ ‚ eD e9 jF eG ƒ s<t$ ‚ eD e9 jH eG ƒ sTt$ ‚ eD e9 jI eG ƒ slt$ ‚ e9 jF d² k st$ ‚ d³ e9 j% k re9 j% d³ ZJ x< eJ D]4 ZK eK rÂeK eK jL ƒ  k s¤t$ d´ eM eK ƒ ƒ ‚ q¤We1 e eJ ƒ ƒ e1 eJ ƒ k st$ dµ eM eJ ƒ ƒ ‚ n  x? e9 jN D]4 ZO eO r;eO eO jL ƒ  k st$ d¶ eM eO ƒ ƒ ‚ qWe1 e9 jN ƒ e1 e9 jN ƒ k sŒt$ d· eM e9 jN ƒ ƒ ‚ e& e9 e8 eC ƒ s¥t$ ‚ e& e9 e: eC ƒ s¾t$ ‚ e& e9 e? eC ƒ s×t$ ‚ e& e9 eA eC ƒ sðt$ ‚ eC d+ k  r®e' e9 ƒ GHq®q®We4 df k r"d¸ GHn  e4 d+ k r@e' e0 d  ƒ GHn  e4 d k r‚e jP d® e7 e3 ƒ d° e, ƒ Z9 eD e9 e	 ƒ sÄt$ ‚ nB y. e jP e7 e3 ƒ e, ƒ Z9 e  s¯t$ d¹ ƒ ‚ Wn eQ k
 rÃn Xx_ e0 D]W Z9 e jR e9 jE jS ƒ ZT eD eT e jU ƒ sþt$ ‚ e, e k rËeT e jV k s"t$ ‚ qËqËWeT d k rGg  ZW g  ZX d ZY nÜ eD eT e jZ ƒ rpe ZW e e e ZX n³ eD eT e j[ ƒ r™e e ZW e e ZX nŠ eD eT e j\ ƒ rÂe e ZW e e ZX na eD eT e j] ƒ rãe ZW e ZX n@ e, e k st$ dº eM eT ƒ e, f ƒ ‚ e e e e ZW g  ZX xÖ eW D]Î ZY e jR eY ƒ Z^ xb e j/ e7 e3 ƒ e, eY ƒ D]E Z9 e jR e9 jE jS ƒ ZT eD eT e^ j_ ƒ sŽt$ ‚ eT e^ k s[t$ ‚ q[We4 d k r*e jP e7 e3 ƒ e, eY ƒ Z9 eD eT e^ j_ ƒ sãt$ ‚ eT e^ k søt$ ‚ q*q*Wxh eX D]` ZY yF e j/ e7 e3 ƒ e, eY ƒ j< ƒ  GHe  sNt$ d» eM eY ƒ e3 f ƒ ‚ WqeQ k
 rbqXqW[W [X [Y [T e2 rCd¼ e, e3 f GHe j` e j/ d® e7 e3 d¯ ƒ d° e, ƒ ƒ Za e1 ea jb ƒ  ƒ e4 k sÐt$ ‚ ea jc ƒ  Zd xV eB e4 ƒ D]H ZC e& e0 eC ea jb ƒ  eC ƒ st$ ‚ e1 e0 eC jE ƒ ed k sét$ ‚ qéWe) ea ƒ GHn  e0 je ƒ  e* e0 ƒ q
Wd½ GHHd¾ GHd¿ GHHx®e# D]¦\ Z0 Zf dÀ ef GHxŽe D]†Z dÁ e GHe ƒ  Z- y1 e jg e0 e- e ƒ Zh eh e1 e0 ƒ k s×t$ ‚ Wn% eQ k
 rÿZi dÂ ej ei ƒ GHqn Xe- jk d  ƒ y e. e j/ e- e ƒ ƒ Zl Wn% eQ k
 rPZi dÃ ej ei ƒ GHqn Xe1 el ƒ e1 e0 ƒ k sot$ ‚ x™ em e0 el ƒ D]ˆ \ Z9 Zn e d1 k rËe9 jF en jF k sãen jF jo e9 jF dÄ ƒ sãt$ ‚ n e9 jF en jF k sãt$ ‚ e9 jE jp ƒ  en jE jp ƒ  k st$ ‚ qWe- jq ƒ  qWqsWdÅ GHd S(Æ   iÿÿÿÿN(   t   Set(   t   SeqIO(   t   AlignIO(   t	   SeqRecord(   t   Seqt
   UnknownSeq(   t   StringIO(   t   Alphabett   fastat   clustalt   phylipt   tabt   igt	   stockholmt   embosst   fastqs   fastq-solexat   qualt   genbankt   gbt   embls   Clustalw/cw02.alni   s   Clustalw/opuntia.alni   s   Clustalw/hedgehog.alni   s   Clustalw/odd_consensus.alns   Nucleic/lupine.nui   s   Nucleic/elderberry.nus   Nucleic/phlox.nus   Nucleic/centaurea.nus   Nucleic/wisteria.nus   Nucleic/sweetpea.nus   Nucleic/lavender.nus   Amino/aster.pros   Amino/loveliesbleeding.pros   Amino/rose.pros   Amino/rosemary.pros
   Fasta/f001s
   Fasta/f002i   s
   Fasta/fa01s   GFF/NC_001802.fnas   GFF/NC_001802lc.fnas   GFF/multi.fnas   Registry/seqs.fastat   nexuss   Nexus/test_Nexus_input.nexi	   t   swisss   SwissProt/sp001s   SwissProt/sp002s   SwissProt/sp003s   SwissProt/sp004s   SwissProt/sp005s   SwissProt/sp006s   SwissProt/sp007s   SwissProt/sp008s   SwissProt/sp009s   SwissProt/sp010s   SwissProt/sp011s   SwissProt/sp012s   SwissProt/sp013s   SwissProt/sp014s   SwissProt/sp015s   SwissProt/sp016s   Registry/EDD_RAT.dats   GenBank/noref.gbs   GenBank/cor6_6.gbi   s   GenBank/iro.gbs   GenBank/pri1.gbs   GenBank/arab1.gbs   GenBank/protein_refseq.gbs   GenBank/protein_refseq2.gbs   GenBank/extra_keywords.gbs   GenBank/one_of.gbs   GenBank/NT_019265.gbs   GenBank/origin_line.gbs   GenBank/blank_seq.gbs   GenBank/dbsource_wrap.gbs   GenBank/NC_005816.gbs   GenBank/gbvrl1_start.seqs   GFF/NC_001422.gbks   EMBL/TRBG361.embls   EMBL/DD231055_edited.embls   EMBL/SC10H5.embls   EMBL/U87107.embls   EMBL/AAA03323.embls   Stockholm/simple.sths   Stockholm/funny.sths   Phylip/reference_dna.phys   Phylip/reference_dna2.phys   Phylip/hennigian.phyi
   s   Phylip/horses.phys   Phylip/random.phys   Phylip/interlaced.phys   Phylip/interlaced2.phyi   s   Emboss/alignret.txts   Emboss/needle.txts   Emboss/water.txtt   phds   Phd/phd1s   Phd/phd2t   aces   Ace/contig1.aces   Ace/consed_sample.aces   Ace/seq.cap.aces    IntelliGenetics/TAT_mase_nuc.txti   s    IntelliGenetics/VIF_mase-pro.txti   s"   IntelliGenetics/vpu_nucaligned.txtt   pirs   NBRF/B_nuc.piri¼  s   NBRF/Cw_prot.pirio   s   NBRF/DMA_nuc.pirs   NBRF/DMB_prot.pirs   NBRF/clustalw.pirs   Quality/example.fastas   Quality/example.quals   Quality/example.fastqs   Quality/tricky.fastqs   Quality/solexa.fastqs   Quality/solexa_example.fastqs   zero recordst   CHSMAIKLSSEHNIPSGIANALt   idt   Alphat   HNGFTALEGEIHHLTHGEKVAFt   Gammat   DITHGVGt   deltas#   three peptides of different lengthst   VHGMAHPLGAFYNTPHGVANAIt   Betas   three proteins alignmentt    AATAAACCTTGCTGGCCATTGTGATCCATCCAt   Xt    ACTCAACCTTGCTGGTCATTGTGACCCCAGCAt   Yt    TTTCCTCGGAGGCCAATCTGGATCAAGACCATt   Zs   three DNA sequence alignmentt   names   The
MysterySequece:
Xt   descriptions   an%sevildescription right
heres,   3 DNA seq alignment with CR/LF in name/descrs   alignment with repeated records
   Note%salsos   
has
 evil linebreaks!t   Wowi    t   notes   More%sofs   
these
 evil linebreaks!t   commentg      @t   weightc         C   s†   |  j  | j  k r t S|  j | j k r, t S|  j | j k rB t S|  j d k	 r‚ | j d k	 r‚ |  j j ƒ  | j j ƒ  k r‚ t St S(   s;   This is meant to be a strict comparison for exact agreementN(   R   t   FalseR(   R)   t   seqt   Nonet   tostringt   True(   t
   record_onet
   record_two(    (    s   test_SeqIO.pyt   records_matchÑ   s    t    c         C   sÕ   |  j  |  j k r+ d | |  j  | f } n d | |  j  |  j | f } |  j d	 k rc | d 7} nn t |  j ƒ d k r§ | |  j d  j ƒ  d |  j d j ƒ  7} n | |  j j ƒ  7} | d t |  j ƒ 7} | S(
   s;   Returns a concise summary of a SeqRecord object as a strings   %sID and Name='%s',
%sSeq='s   %sID = '%s', Name='%s',
%sSeq='R0   i2   i(   s   ...iùÿÿÿs   ', length=%iN(   R   R(   R/   R0   t   lenR1   (   t   recordt   indentt   answer(    (    s   test_SeqIO.pyt   record_summaryß   s    /c         C   s.   t  |  ƒ d k  r |  S|  d  d |  d Sd  S(   NiA   i<   s   ...iûÿÿÿ(   R7   (   t   col_text(    (    s   test_SeqIO.pyt   col_summaryï   s    c         C   sÙ   g  } |  j  ƒ  } t |  j ƒ  ƒ } xE t t d | ƒ ƒ D]. } | j | t |  j | ƒ ƒ d | ƒ q: W| d k rÌ | d } | j | t d | ƒ d ƒ | j | t |  j | ƒ ƒ d | ƒ n  d j | ƒ S(   s<   Returns a concise summary of an Alignment object as a stringi   s    alignment column %ii   t   |s    ...s   
(	   t   get_alignment_lengthR7   t   get_all_seqst   ranget   mint   appendR=   t
   get_columnt   join(   t	   alignmentt   indexR:   t   alignment_lent	   rec_countt   i(    (    s   test_SeqIO.pyt   alignment_summaryõ   s    &
%c   	      C   s¿  x¸t  D]°} | t k rN t |  d j t ƒ rN t |  d j ƒ d k rN q n  | d | GHt ƒ  } y: t j d |  d | d | ƒ } | t |  ƒ k s t	 ‚ Wna t
 t f k
 r} d t | ƒ k rÐ d GHn | d	 t | ƒ GH| t k s t	 d
 ƒ ‚ q n X| j ƒ  | j d ƒ y" t t j d | d | ƒ ƒ } WnQ t k
 rŽ} | j d ƒ t d t | ƒ t | j ƒ  ƒ t |  ƒ f ƒ ‚ n Xt | ƒ t k s§t	 ‚ xt |  | ƒ D]ü\ } } | d k rü| j j ƒ  j ƒ  | j j ƒ  k set	 ‚ ni | d k rAt | j t ƒ s t	 ‚ t | ƒ t | ƒ k set	 ‚ n$ | j j ƒ  | j j ƒ  k set	 ‚ | d k r¾| j j d d ƒ j d d ƒ d  | j k s³t	 d | j | j f ƒ ‚ q·| d k r| j j d d ƒ d  | j k s³t	 d | j | j f ƒ ‚ q·| d k rT| j j d d ƒ | j k s³t	 d | j | j f ƒ ‚ q·| d k r…| j j ƒ  d | j k s³t	 ‚ q·| j | j k s·t	 d | j | j f ƒ ‚ q·Wq Wd  S(   Ni    id   s&   Checking can write/read as '%s' formatt	   sequencest   handlet   formats   len()s   Failed: Probably len() of Nones
   Failed: %ss2   Should be able to re-write in the original format!s
   %s

%s

%sR   R   R   R
   t   [t    t   ]i
   s   '%s' vs '%s'R	   R6   t   _i   R   R   (   R   R   (   t!   test_write_read_alignment_formatst   possible_unknown_seq_formatst
   isinstanceR/   R   R7   R   R   t   writet   AssertionErrort	   TypeErrort
   ValueErrort   strt   t_formatt   flusht   seekt   listt   parset   reprt   readt   t_countt   zipR1   t   upperR   t   replacet   split(	   t   recordsR9   RN   RM   t   ct   et   records2t   r1t   r2(    (    s   test_SeqIO.pyt   check_simple_write_read  s\    	
	
"/-!$1%!%s!   Testing reading %s format file %sRM   t   rRN   s    Found %i records but expected %iRP   t
   accessionss    Bad accession in annotations: %ss%   Repeated accession in annotations: %ss"   Bad cross reference in dbxrefs: %ss'   Repeated cross reference in dbxrefs: %ss    ...s&   Bio.SeqIO.read(...) should have faileds   Got %s from %s files,   Forcing wrong alphabet, %s, should fail (%s)s1   Testing reading %s format file %s as an alignments   Finished tested reading filess    Starting testing writing recordss<   (Note that some of these are expected to 'fail' and say why)s   Testing can write/read %ss'    Checking can write/read as '%s' formats    Failed: %ss    FAILED: %ss   .copys   Finished tested writing files(r   t   ost   sett	   NameErrort   setsR    t   BioR   R   t   Bio.SeqRecordR   t   Bio.SeqR   R   R   R   t   generic_proteint   protein_alphast   generic_dnat
   dna_alphast   generic_rnat
   rna_alphast   generic_nucleotidet   Gappedt   nucleotide_alphast   no_alpha_formatsRT   RS   t   _FormatToWriterRN   RC   t   removeR2   R.   t
   test_filest   linesept   test_recordsRW   t   annotationsR5   R;   R=   RK   Rm   t   _FormatToIteratorR[   RM   R^   R_   Rg   R7   t   t_alignmentt
   t_filenameRb   t   patht   isfilet   openRj   R8   t   records3t   seq_iteratort   nextt   StopIterationR0   t   records4t   extendt   records5RA   RJ   RU   R/   R   t
   basestringR(   R)   t   accst   acct   stripR`   t   dbxrefst   refRa   RY   t   _get_base_alphabett   alphabett
   base_alphat   SingleLetterAlphabett   single_letter_alphabett   goodt   badt   given_alphat   ProteinAlphabett   RNAAlphabett   DNAAlphabett   NucleotideAlphabett
   given_baset	   __class__t   to_alignmentRF   R@   R?   RH   t   reverset   descrRV   Rh   Ri   RZ   R]   t   new_recordsRc   t
   new_recordt
   startswithR1   t   close(    (    (    s   test_SeqIO.pyt   <module>   sŒ  

	
 
		T	'(!	
!
	!
	
!!	

	" ##
			"(