ó
PÓIc        8   @   s  d  Z  d d l Z y
 e Z Wn! e k
 r? d d l m Z n Xd d l m Z d d l m Z d d l	 m
 Z
 d d l m Z d d l m Z d d	 l m Z d d
 l m Z d d l m Z yR d d l m Z m Z d d l m Z m Z m Z m Z d d l m Z m Z Wn) e e f k
 r=d Z e e ƒ ‚ n Xd Z d e d d f d e d d f d e d d f d e d d f d e d d f d e d d f d e d d f d e d d f d e d d f d e d d f d e d d f d e d d f d e d d  f d e d! d" f d e d# d f d e  d$ d  f d e d% d" f d& e d' d f d& e d( d f d& e d) d f d& e d* d f d& e d+ d f d& e d, d f d& e d- d f d& e d. d f d& e d/ d f d& e d0 d f d& e d1 d f d& e d2 d f d& e d3 d f d& e d4 d f d& e d5 d f d& e d6 d f d& e d7 d f d8 e d9 d f d8 e d: d; f d8 e d< d f d8 e d= d f d8 e d> d f d8 e d? d f d8 e d@ d f d8 e dA d f d8 e dB d f d8 e dC d f d8 e dD d f d8 e dE d f d8 e dF d f d8 e dG d  f d8 e dH d f dI e dJ d f dI e dK d f dI e dL d f dI e dM d f g5 Z! dN „  Z" dO „  Z# dP „  Z$ dQ „  Z% dR „  Z& dS GHy. e j' dT e dU e dV e dW e dX e ƒ Z( Wn/ e) k
 röZ* dY e+ e* ƒ Z e e ƒ ‚ n XdZ e GHe e( j, ƒ  k r,e( j- e ƒ e( j. ƒ  n  d[ e GHe( j/ e ƒ Z0 x‰e! D]\ Z1 Z2 Z3 Z4 d\ e1 e3 f GHe j5 j6 e3 ƒ s„t7 ‚ e j8 d] e9 e3 d^ ƒ d_ e1 ƒ Z: e0 j; e: ƒ Z< e< e4 k sÆt7 ‚ e( j. ƒ  e j8 d] e9 e3 d^ ƒ d_ e1 ƒ Z: xØe: D]ÐZ= d` e" e= ƒ e= j> f GHe= j? Z@ da e@ Ge0 jA db e@ ƒ ZB e& e= eB ƒ e0 jA dc e@ ƒ ZB e& e= eB ƒ dd GHe= j> Z@ e@ j< de ƒ d k rÐe@ jC de ƒ d jD ƒ  rÐdf e@ Ge0 jA dg e@ ƒ ZB e& e= eB ƒ dd GHn  dh e= jE k rke e= jE dh ƒ ZF xv eF D]k Z@ e@ st7 di eG eF ƒ ƒ ‚ y0 dj e@ Ge0 jA dk e@ ƒ ZB e& e= eB ƒ dd GHWqùeH k
 rcdl GHqùXqùWn  dm e= jE k røe= jE dm Z@ e@ e= j> k rÈdn e@ Ge0 jA do e@ ƒ ZB e& e= eB ƒ dd GHqÈqøqøWqKWdp e GHe( j- e ƒ dq GHe( j. ƒ  dr GHe( jI ƒ  d S(s   sÍ   Testing BioSQL with BioSQL

Uses Bio.SeqIO to parse files, and then loads them into a BioSQL database,
and checks we can retreive them again.

Goals:
    Make sure that BioSQL preserves SeqRecord objects.
iÿÿÿÿN(   t   Set(   t   MissingExternalDependencyError(   t   SeqIO(   t
   UnknownSeq(   t   StringIO(   t   seguid(   t   ExactPosition(   t   BioSeqDatabase(   t   BioSeq(   t   DBDRIVERt   DBTYPE(   t   DBHOSTt   DBUSERt   DBPASSWDt   TESTDB(   t   DBSCHEMAt   SQL_FILEsB   Check settings in Tests/setup_BioSQL.py if you plan to use BioSQL.s   biosql-seqio-testt   fastas   Nucleic/lupine.nui   s   Nucleic/elderberry.nus   Nucleic/phlox.nus   Nucleic/centaurea.nus   Nucleic/wisteria.nus   Nucleic/sweetpea.nus   Nucleic/lavender.nus   Amino/aster.pros   Amino/loveliesbleeding.pros   Amino/rose.pros   Amino/rosemary.pros
   Fasta/f001s
   Fasta/f002i   s
   Fasta/fa01i   s   GFF/NC_001802.fnas   GFF/multi.fnas   Registry/seqs.fastat   swisss   SwissProt/sp001s   SwissProt/sp002s   SwissProt/sp003s   SwissProt/sp004s   SwissProt/sp005s   SwissProt/sp006s   SwissProt/sp007s   SwissProt/sp008s   SwissProt/sp009s   SwissProt/sp010s   SwissProt/sp011s   SwissProt/sp012s   SwissProt/sp013s   SwissProt/sp014s   SwissProt/sp015s   SwissProt/sp016s   Registry/EDD_RAT.datt   genbanks   GenBank/noref.gbs   GenBank/cor6_6.gbi   s   GenBank/iro.gbs   GenBank/pri1.gbs   GenBank/arab1.gbs   GenBank/protein_refseq2.gbs   GenBank/extra_keywords.gbs   GenBank/one_of.gbs   GenBank/NT_019265.gbs   GenBank/origin_line.gbs   GenBank/blank_seq.gbs   GenBank/dbsource_wrap.gbs   GenBank/NC_005816.gbs   GenBank/gbvrl1_start.seqs   GFF/NC_001422.gbkt   embls   EMBL/TRBG361.embls   EMBL/DD231055_edited.embls   EMBL/SC10H5.embls   EMBL/U87107.emblc         C   s‘   t  |  j t ƒ r t |  j ƒ St |  j ƒ d k  rF |  j j ƒ  } n( |  j j ƒ  d  d |  j j ƒ  d } d | t |  j ƒ t |  j ƒ f S(   Ni   i   s   ...iýÿÿÿs   %s [%s] len %i(   t
   isinstancet   seqR   t   reprt   lent   tostringR   (   t   recordt   short(    (    s   test_BioSQL_SeqIO.pyt   checksum_summaryu   s    (c         C   sæ   |  j  | j  k s. t d |  j  | j  f ƒ ‚ |  j | j k s\ t d |  j | j f ƒ ‚ |  j | j k sŠ t d |  j | j f ƒ ‚ |  j | j k s¸ t d |  j | j f ƒ ‚ | j d k sÍ t ‚ | j d k sâ t ‚ d S(   s   Compare two Reference objectss   %s vs %st    N(   t   titlet   AssertionErrort   authorst   journalt
   medline_idt   commentt   consrtm(   t   old_rt   new_r(    (    s   test_BioSQL_SeqIO.pyt   compare_references€   s    c         C   s}  |  j  | j  k s. t d |  j  | j  f ƒ ‚ |  j | j k s\ t d |  j | j f ƒ ‚ |  j | j k sŠ t d |  j | j f ƒ ‚ |  j | j k s¸ t d |  j | j f ƒ ‚ | j d k sÍ t ‚ yJ t |  j ƒ t | j ƒ k st d t |  j ƒ t | j ƒ f ƒ ‚ Wnº t k
 rÓ} t |  j j	 t
 ƒ r\t |  j j t
 ƒ r\| ‚ qÔ|  j j | j j k s–t d |  j j | j j f ƒ ‚ |  j j | j j k sÔt d |  j j | j j f ƒ ‚ n Xt |  j ƒ t | j ƒ k st d t |  j ƒ t | j ƒ f ƒ ‚ xât |  j | j ƒ D]Ë\ } } | j  | j  k sjt d | j  | j  f ƒ ‚ | j | j k s˜t d | j | j f ƒ ‚ | j | j k sÆt d | j | j f ƒ ‚ | j | j k sôt d | j | j f ƒ ‚ yJ t | j ƒ t | j ƒ k s=t d t | j ƒ t | j ƒ f ƒ ‚ Wq0t k
 rú} t | j j	 t
 ƒ rƒt | j j t
 ƒ rƒ| ‚ qû| j j | j j k s½t d | j j | j j f ƒ ‚ | j j | j j k sût d | j j | j j f ƒ ‚ q0Xq0Wt |  j ƒ t | j ƒ k s#t ‚ t |  j j ƒ  ƒ t | j j ƒ  ƒ k sSt ‚ x#|  j j ƒ  D]} t |  j | t ƒ r7t | j | t ƒ r¸|  j | | j | k s4t ‚ qut | j | t ƒ r|  j | g | j | k s4t d t |  j | ƒ t | j | ƒ f ƒ ‚ qut sut d | @ƒ ‚ qc|  j | | j | k sct d |  j | | j | f ƒ ‚ qcWd S(   s   Compare two SeqFeature objectss   %s -> %ss   <unknown id>s    number of sub_features: %s -> %ss%   Problem with feature's '%s' qualifierN(   t   typeR   t   strandt   reft   ref_dbt   idt   strt   locationR   t   startR   t   endt   nofuzzy_startt   nofuzzy_endR   t   sub_featurest   zipt
   qualifierst   sett   keyst   listR   t   False(   t   old_ft   new_ft   et   old_subt   new_subt   key(    (    s   test_BioSQL_SeqIO.pyt   compare_features    s‚    !)			!""!)			$0# c   	   
   C   sJ  t  |  ƒ t  | ƒ k s t ‚ |  j ƒ  | j ƒ  k s< t ‚ t |  t ƒ rc t | t ƒ sy t ‚ n t | t ƒ sy t ‚ t  |  ƒ } |  j ƒ  } t | t ƒ s¦ t ‚ | d k  rÅ t | | ƒ } n? | d
 t | d ƒ d d d t | d ƒ | d | d g	 } xD | D]< } | | } | |  | k s1t ‚ | | | k st ‚ qW| j | ƒ | j | d ƒ x¨| D] } x| D]} | | | !} | |  | | !j ƒ  k sÒt d t	 | ƒ t	 |  | | !ƒ f ƒ ‚ | | | | !j ƒ  k st d t	 | ƒ t	 | | | !ƒ f ƒ ‚ xq d d g D]c } | | | | … } | |  | | | … j ƒ  k s_t ‚ | | | | | … j ƒ  k s!t ‚ q!Wq}W| | } | |  | j ƒ  k s²t ‚ | | | j ƒ  k sÎt ‚ | |  } | |  |  j ƒ  k sôt ‚ | | |  j ƒ  k spt ‚ qpW| |  j ƒ  k s-t ‚ | | j ƒ  k sFt ‚ d	 S(   s    Compare two Seq or DBSeq objectsi2   iÿÿÿÿi   i   i    iè  s   Slice %s vs %si   Ni    (
   R   R   R   R   R   R-   t   ranget   intt   appendR   (	   t   oldt   newt   lt   st   indicest   it   expectedt   jt   step(    (    s   test_BioSQL_SeqIO.pyt   compare_sequences  sH    ?
&&%-

 c         C   sÇ  t  |  j | j ƒ |  j | j k s+ t ‚ |  j | j k sC t ‚ |  j | j k s[ t ‚ |  j | j k s‰ t d |  j | j f ƒ ‚ t |  j ƒ t | j ƒ k s­ t ‚ x0 t	 |  j | j ƒ D] \ } } t
 | | ƒ qÃ Wt | j ƒ j |  j ƒ } | j d d d d d g ƒ } | s9t d d j | ƒ ƒ ‚ t |  j ƒ j | j ƒ } | j d d	 g ƒ } | s‰t d
 d j | ƒ ƒ ‚ x7t |  j j ƒ  ƒ j | j j ƒ  ƒ D]} | d k r-t |  j | ƒ t | j | ƒ k sït ‚ xÍt	 |  j | | j | ƒ D] \ } } t | | ƒ qWq±| d k r.t |  j | t ƒ r~g  |  j | D] }	 |	 j d d ƒ ^ q]}
 n |  j | j d d ƒ g }
 t |
 ƒ t | j | ƒ k sât d t |
 ƒ t | j | ƒ f ƒ ‚ xÚt	 |
 | j | ƒ D]. \ } } | | k sùt d | | f ƒ ‚ qùWq±| d k rot | j | t ƒ s¿t | j | t ƒ s¿t ‚ q±t |  j | ƒ t | j | ƒ k rÙ|  j | | j | k s¿t d | |  j | | j | f ƒ ‚ q±t |  j | t ƒ rLt | j | t ƒ rL|  j | g | j | k s¿t d | |  j | | j | f ƒ ‚ q±t |  j | t ƒ r±t | j | t ƒ r±|  j | | j | g k s¿t d | |  j | | j | f ƒ ‚ q±q±Wd S(   s,   Compare two SeqRecord or DBSeqRecord objectss    dbxrefs mismatch
Old: %s
New: %st   cross_referencest   datest   data_file_divisiont
   ncbi_taxidt   gis"   Unexpected new annotation keys: %ss   , t   contigs(   Unexpectedly missing annotation keys: %st
   referencesR#   s   
t    sF   Number of annotation 'comment's changed by load/retrieve
Was:%s
Now:%ss;   Annotation 'comment' changed by load/retrieve
Was:%s
Now:%st   taxonomyt   organismt   sources6   Annotation '%s' changed by load/retrieve
Was:%s
Now:%sN(   RV   RW   RX   (   RM   R   R,   R   t   namet   descriptiont   dbxrefsR   t   featuresR4   R@   R6   t   annotationst
   differencet   joinR7   t   intersectionR'   R   R8   t   replaceR-   R(   (   RD   RE   R:   R;   t   new_keyst   missing_keysR?   R%   R&   t   commt   old_commentt   old_comt   new_com(    (    s   test_BioSQL_SeqIO.pyt   compare_recordsJ  sl    $"	.,*,"##&$ $ s   Connecting to databaset   drivert   usert   passwdt   hostt   dbsX   Connection failed, check settings in Tests/setup_BioSQL.py if you plan to use BioSQL: %ss/   Removing existing sub-database '%s' (if exists)s$   (Re)creating empty sub-database '%s's&   Testing loading from %s format file %st   handlet   rt   formats	    - %s, %ss&    - Retrieving by name/display_id '%s',RY   t
   display_idt   OKt   .s    - Retrieving by version '%s',t   versiont
   accessionss    Blank accession in annotation %ss     - Retrieving by accession '%s',t	   accessiont   FailedRR   s    - Retrieving by GI '%s',t
   primary_ids   Removing (deleting) '%s's   Committing remaining changess   Closing connection(J   t   __doc__t   osR6   t	   NameErrort   setsR    t   BioR   R   t   Bio.SeqR   R   t   Bio.SeqUtils.CheckSumR   t   Bio.SeqFeatureR   t   BioSQLR   R   t   setup_BioSQLR	   R
   R   R   R   R   R   R   t   ImportErrort   messaget   db_nameR9   t   Truet
   test_filesR   R'   R@   RM   Rh   t   open_databaset   servert	   ExceptionR<   R-   R7   t   remove_databaset   committ   new_databaseRm   t   t_formatt   t_alignmentt
   t_filenamet   t_countt   patht   isfileR   t   parset   opent   iteratort   loadt   countR   R,   RY   R?   t   lookupt   db_rect   splitt   isdigitR]   t   accsR   t
   IndexErrort   close(    (    (    s   test_BioSQL_SeqIO.pyt   <module>   s  
"		 	o	;	W		!
!		.		
