ó
lPIc           @   s  d  Z  d d l Z d d l Z d d l Z d d l m Z d d l m Z d d l m Z e j	 j
 e j ƒ  d ƒ Z d d d	 d
 d d d d d d d d d d g Z d d d	 d
 d d d d g Z g  Z x* e D]" Z e j e j	 j
 e e ƒ ƒ qÐ We j d d ƒ Z e j d d ƒ Z e e g Z d GHxŽe D]†Z x}e D]uZ e j	 j e ƒ sfd e GHq?n  e e d ƒ Z e j e e ƒ Z x e j ƒ  Z e d k r¦Pn  e! e e j ƒ r¶d e j" e j	 j# ƒ d GHd Ge$ e j% ƒ GHd Ge j& GHd Ge j' GHd Ge j( GHd GHe j) j* ƒ  Z+ e+ j, ƒ  x^ e+ D]V Z- e- d  k r]d! e- GHd" e j) e- GHq/d# GHx  e j) e- D] Z. e/ e. ƒ GHqpWq/Wd$ GHx e j0 D] Z1 e1 GHq˜Wd% Ge j2 GHqŠe! e e j ƒ rŠd& e j" e j	 j# ƒ d GHd' e3 e j4 ƒ GHd( Ge j5 GHd) Ge j6 GHd* Ge j7 GHx e j8 D] Z. d+ Ge. j9 GHq$Wxi e j0 D][ Z1 d, Ge1 j: GHd- Ge1 j; GHd. Ge3 e1 j< ƒ GHx( e1 j< D] Z= d/ Ge= j: Gd0 Ge= j> GHq~WqDWqŠqŠWe j? ƒ  q?Wq2Wd1 GHd2 „  Z@ d3 „  ZA eA ƒ  d4 „  ZB d5 GHeB ƒ  d6 „  ZC d7 GHd S(8   sF   Test the GenBank parser and make sure everything is working smoothly.
iÿÿÿÿN(   t   File(   t   GenBank(   t   utilsR   s   noref.gbs	   cor6_6.gbs   iro.gbs   pri1.gbs   arab1.gbs   protein_refseq.gbs   extra_keywords.gbs	   one_of.gbs   NT_019265.gbs   origin_line.gbs   blank_seq.gbs   dbsource_wrap.gbs   gbvrl1_start.seqs   NC_005816.gbt   debug_leveli    s   Testing parsers...s   Missing test input file: %st   rs(   ***Record from %s with the FeatureParsers   Seq:s   Id:s   Name:t   Descriptions   Annotations***t
   referencess   Key: %ss	   Value: %ss   References*t   Feauress   DB cross refss'   ***Record from %s with the RecordParsers   sequence length: %is   locus:s   definition:s
   accession:s   reference title:s   feature key:s	   location:s   num qualifiers:s   key:s   value:s!   Testing writing GenBank format...c   	      C   s  t  j |  ƒ } t  j | ƒ } xÞ | j ƒ  } | j ƒ  } | rK | rK Pn  | sd t d | ƒ ‚ n  | s} t d | ƒ ‚ n  d j g  | j ƒ  D] } | r | ^ q ƒ } d j g  | j ƒ  D] } | r¾ | ^ q¾ ƒ } | | k s! t d | | f ƒ ‚ q! Wd S(   s¿   Compare two records to see if they are the same.

    Ths compares the two GenBank record, and will raise an AssertionError
    if two lines do not match, showing the non-matching lines.
    s   Extra info in Test: `%s`s   Extra info in Expected: `%s`t    s6   Expected does not match Test.
Expect:`%s`
Test  :`%s`
N(   t	   cStringIOt   StringIOt   readlinet   AssertionErrort   joint   split(	   t   good_recordt   test_recordt   good_handlet   test_handlet	   good_linet	   test_linet   xt   test_normalizedt   good_normalized(    (    s   test_GenBank.pyt   do_comparisonr   s     ..c    	      C   s  t  j d d ƒ }  xî t D]æ } d t j j | ƒ GHt t j j d | ƒ d ƒ } t t j j d | ƒ d ƒ } t  j | |  ƒ } t  j | ƒ } xa | j	 ƒ  } | j	 ƒ  } | d  k sÄ | d  k rÈ Pn  d | j GHt | ƒ d } t | | ƒ q” W| j ƒ  q Wd  S(   NR   i    s!   Testing GenBank writing for %s...R   R   s   	Testing for %ss   
(   R   t   RecordParsert   write_format_filest   ost   patht   basenamet   openR   t   Iteratort   nextt   Nonet   versiont   strR   t   close(	   t   record_parsert   filet
   cur_handlet   compare_handlet   iteratort   compare_iteratort
   cur_recordt   compare_recordt   output_record(    (    s   test_GenBank.pyt   t_write_format‹   s     c          C   sµ   t  j d t j ƒ  ƒ }  t t j j d d ƒ ƒ } t  j | |  ƒ } | j	 ƒ  } | j
 d } | j d d } | j d ƒ d k s t d	 ƒ ‚ | j d
 ƒ d k s± t d ƒ ‚ d S(   s1   Test the ability to clean up feature values.
    t   feature_cleanerR   s   arab1.gbi   t   translationi    R   iÿÿÿÿs+   Did not clean spaces out of the translations   
s-   Did not clean newlines out of the translationN(   R   t   FeatureParserR   t   FeatureValueCleanerR   R   R   R   R   R    t   featurest
   qualifierst   findR   (   t   parsert   handleR)   t   first_recordt   translation_featuret
   test_trans(    (    s   test_GenBank.pyt   t_cleaning_features¦   s    		s   Testing feature cleaning...c          C   sG  d d l  m }  d d l  m } t j j d d ƒ } t | ƒ } |  d j | ƒ } | j d k sv t	 d | j ƒ ‚ g  } x' | j
 j | ƒ D] } | j | ƒ q Wt | ƒ d	 k s¾ t	 ‚ | d
 j d k sè t	 d | d
 j ƒ ‚ | d
 j d
 d !d k st	 d | d
 j ƒ ‚ | d
 j d k sCt	 d | d
 j ƒ ‚ d S(   sD   Test converting GenBank into different formats using Bioformat.
    iÿÿÿÿ(   t   formats(   t	   SeqRecordR   s   iro.gbt   sequences   genbank-recordss!   Identified format incorrectly: %si   i    s
   AL109817.1s   Unexpected record id: %si
   t
   cacaggcccas   Unexpected sequence: %ssA   Homo sapiens mRNA full length insert cDNA clone EUROIMAGE 125195.s   Unexpected description: %sN(   t   BioR<   R=   R   R   R   R   t   identifyt   nameR   t   iot   readFilet   appendt   lent   idt   seqt   description(   R<   R=   t	   test_fileR   t   formatt   all_recordst   record(    (    s   test_GenBank.pyt   t_bioformat»   s$    
s   Testing format conversions...(D   t   __doc__R   t   copyR	   R@   R    R   t   Bio.GenBankR   R   R   t   getcwdt   gb_file_dirt
   test_filesR   t   files_to_parseR&   RE   R1   t   feature_parserR   R%   t   all_parsersR6   t   filenamet   isfileR   R7   R   R)   R    R+   R!   t
   isinstanceR   t   sept   reprRH   RG   RB   RI   t   annotationst   keyst   ann_keyst   sortt   ann_keyt	   referenceR#   R3   t   featuret   dbxrefsRF   R>   t   locust
   definitiont	   accessionR   t   titlet   keyt   locationR4   t	   qualifiert   valueR$   R   R.   R;   RN   (    (    (    s   test_GenBank.pyt   <module>   sœ   		 	
		&				