
PIc           @   s   d  Z  d d l m Z d d l m Z m Z d d l m Z d d l m Z d e f d     YZ	 d   Z
 d	   Z d
   Z d   Z d   Z d   Z d   Z d   Z d   Z d e f d     YZ d S(   s   Implementations of Biopython-like Seq objects on top of BioSQL.

This allows retrival of items stored in a BioSQL database using
a biopython-like Seq interface.
i(   t   Alphabet(   t   Seqt
   UnknownSeq(   t	   SeqRecord(   t
   SeqFeaturet   DBSeqc           B   sb   e  Z d    Z d   Z d   Z d   Z d   Z e e d d Z d   Z	 d   Z
 d	   Z RS(
   c         C   s1   | |  _  | |  _ | |  _ | |  _ | |  _ d S(   s   Create a new DBSeq object referring to a BioSQL entry.

        You wouldn't normally create a DBSeq object yourself, this is done
        for you when retreiving a DBSeqRecord object from the database.
        N(   t
   primary_idt   adaptort   alphabett   _lengtht   start(   t   selfR   R   R   R
   t   length(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   __init__   s
    				c         C   s   |  j  S(   N(   R	   (   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   __len__"   s    c         C   s\  t  | t  r | } | d k  rP | |  j k r@ t |   n  | |  j } n | |  j k rn t |   n  |  j j |  j |  j | |  j | d  St  | t  s t	 d   n  | j d  k r d } n	 | j } | d k  r| |  j k rt |   n  | |  j } n | |  j k r.|  j } n  | j d  k rI|  j } n	 | j } | d k  r| |  j k r}t |   n  | |  j } n | |  j k r|  j } n  | | k rt d |  j  S| j d  k s| j d k r|  j |  j |  j |  j |  j | | |  S|  j j |  j |  j | |  j |  } t | d  d  | j  |  j  Sd  S(   Ni    i   s   Unexpected index typet    (   t
   isinstancet   intR	   t
   IndexErrorR   t   get_subseq_as_stringR   R
   t   slicet
   ValueErrort   Nonet   stopR   R   t   stept	   __class__(   R   t   indext   it   jt   full(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   __getitem__%   sP    
			
c         C   s&   |  j  j |  j |  j |  j |  j  S(   s   Returns the full sequence as a python string.

        Although not formally deprecated, you are now encouraged to use
        str(my_seq) instead of my_seq.tostring().(   R   R   R   R
   R	   (   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   tostringa   s    c         C   s&   |  j  j |  j |  j |  j |  j  S(   s-   Returns the full sequence as a python string.(   R   R   R   R
   R	   (   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   __str__i   s    t   docs   Sequence as string (DEPRECATED)c         C   s   t  t |   |  j  S(   s*   Returns the full sequence as a Seq object.(   R   t   strR   (   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   toseqq   s    c         C   s   |  j    | S(   N(   R#   (   R   t   other(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   __add__v   s    c         C   s   | |  j    S(   N(   R#   (   R   R$   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   __radd__z   s    (   t   __name__t
   __module__R   R   R   R   R    t   propertyt   dataR#   R%   R&   (    (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyR      s   			<				c   	      C   s  |  j  d | f  } | s d  St |  d k s7 t  | d \ } } } y4 t |  } t |  } | | k sw t  t } Wn t k
 r| d  k s t  |  j  d | f  } t |  d k s t  | d \ } } } | d  k s | d k s t  t |  } t } ~ n X~ | j   } | d k r>t	 j
 } nX | d k rVt	 j } n@ | d k rnt	 j } n( | d	 k rt	 j } n t d
 |   | rt | |  | d t |   St | |  Sd  S(   NsL   SELECT alphabet, length, length(seq) FROM biosequence WHERE bioentry_id = %si   i    sD   SELECT alphabet, length, seq FROM biosequence WHERE bioentry_id = %sR   t   dnat   rnat   proteint   unknowns   Unknown moltype: %s(   t   execute_and_fetchallt   lent   AssertionErrorR   t   Truet	   TypeErrorR   t   Falset   lowerR    t   generic_dnat   generic_rnat   generic_proteint   single_letter_alphabetR   R   (	   R   R   t   seqst   moltypet   given_lengthR   t   have_seqt   seqR   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   _retrieve_seq   sJ     
c         C   s{   g  } |  j  d | f  } xY | D]Q \ } } } | rV | d k rV d | | f } n | } | j d | | f  q" W| S(   s8   Retrieve the database cross references for the sequence.sy   SELECT dbname, accession, version FROM bioentry_dbxref join dbxref using (dbxref_id) WHERE bioentry_id = %s ORDER BY rankt   0s   %s.%ss   %s:%s(   R/   t   append(   R   R   t   _dbxrefst   dbxrefst   dbnamet	   accessiont   versiont   v(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   _retrieve_dbxrefs   s    c         C   s  d } |  j  | | f  } g  } x| D]\ } } } |  j  d | f  } i  }	 x- | D]% \ }
 } |	 j |
 g   j |  qY W|  j  d | f  } x= | D]5 \ }
 } d |
 | f } |	 j d g   j |  q W|  j  d | f  } g  } x[ | D]S \ } } } } | r| d 8} n  | d k r3d  } n  | j | | | | f  q W|  j  d	 | f  } i  } xj | D]b \ } } } } | r| d
 k rd | | f } n | } | d k rd  } n  | | f | | <qrWt j d |  } | | _ |	 | _ t |  d k rnt |  d k r| d \ } } } } t |  |  | _	 | j
 | d  \ } } t j | |  | _ | | _ | | _ | | _ n#| j g  k st  x | D] } | \ } } } } | j
 | d  \ } } t j   } | | _ t |  |  | _	 | j	 sd | _	 n  t j | |  | _ | | _ | | _ | | _ | j j |  qW| j d j	 | _	 | d d } | d d } t j | |  | _ | j d j | _ | j |  q( W| S(   Ns   SELECT seqfeature_id, type.name, rank FROM seqfeature join term type on (type_term_id = type.term_id) WHERE bioentry_id = %s ORDER BY rankst   SELECT name, value FROM seqfeature_qualifier_value  join term using (term_id) WHERE seqfeature_id = %s ORDER BY ranks   SELECT dbxref.dbname, dbxref.accession FROM dbxref join seqfeature_dbxref using (dbxref_id) WHERE seqfeature_dbxref.seqfeature_id = %s ORDER BY ranks   %s:%st   db_xrefsc   SELECT location_id, start_pos, end_pos, strand FROM location WHERE seqfeature_id = %s ORDER BY ranki   i    ss   SELECT location_id, dbname, accession, version FROM location join dbxref using (dbxref_id) WHERE seqfeature_id = %sR@   s   %s.%sR   t   typet   joinii   (   NN(   NN(   R/   t
   setdefaultRA   R   R   t   _seqfeature_idt
   qualifiersR0   t"   _retrieve_location_qualifier_valuet   location_operatort   gett   FeatureLocationt   locationt   strandt   ref_dbt   reft   sub_featuresR1   RJ   (   R   R   t   sqlt   resultst   seq_feature_listt   seqfeature_idt   seqfeature_typet   seqfeature_rankt   qvsRN   t   qv_namet   qv_valuet   valuet	   locationst   location_idR
   t   endRT   t   remote_resultst   lookupRD   RE   RF   RG   t   featureRS   t
   subfeature(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   _retrieve_features   s    											c         C   s:   |  j  d | f  } y | d SWn t k
 r5 d SXd  S(   NsA   SELECT value FROM location_qualifier_value WHERE location_id = %si    R   (   t   execute_and_fetch_col0R   (   R   Rc   Ra   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyRO   4  s    c         C   se   i  } | j  t |  |   | j  t |  |   | j  t |  | |   | j  t |  |   | S(   N(   t   updatet   _retrieve_qualifier_valuet   _retrieve_referencet   _retrieve_taxont   _retrieve_comment(   R   R   t   taxon_idt   annotations(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   _retrieve_annotations=  s    c         C   s   |  j  d | f  } i  } xl | D]d \ } } | d k rC d } n* | d k rX d } n | d k rm d } n  | j | g   j |  q" W| S(   Nso   SELECT name, value FROM bioentry_qualifier_value JOIN term USING (term_id) WHERE bioentry_id = %s ORDER BY rankt   keywordt   keywordst   date_changedt   datest   secondary_accessiont
   accessions(   R/   RL   RA   (   R   R   R^   RN   t   nameRa   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyRl   E  s     	 	 	c         C   s   |  j  d | f  } g  } x | D] \ } } } } } }	 }
 t j   } | r\ | d 8} n  t j | |  g | _ | r | | _ n  | r | | _ n  | | _ |	 d k r |
 | _ n |	 d k r |
 | _	 n  | j
 |  q" W| r i | d 6Si  Sd  S(   Ns   SELECT start_pos, end_pos,  location, title, authors, dbname, accession FROM bioentry_reference JOIN reference USING (reference_id) LEFT JOIN dbxref USING (dbxref_id) WHERE bioentry_id = %s ORDER BY ranki   t   PUBMEDt   MEDLINEt
   references(   R/   R   t	   ReferenceRR   RS   t   authorst   titlet   journalt	   pubmed_idt
   medline_idRA   (   R   R   t   refsR|   R
   Rd   RS   R   R~   RD   RE   t	   reference(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyRm   S  s,    "   	c         C   s!  i  } |  j  d | f  } | r2 | d | d <n  |  j  d | f  } | r^ | d | d <n  |  j  d | f  } | r | d r | d d k r | d | d <n  g  } x] | r	|  j d	 | f  \ } }	 }
 | |
 k r Pn  |	 d
 k r | j d |  n  |
 } q W| r| | d <n  | S(   NsV   SELECT name FROM taxon_name WHERE taxon_id = %s AND name_class = 'genbank common name'i    t   sourcesR   SELECT name FROM taxon_name WHERE taxon_id = %s AND name_class = 'scientific name't   organisms3   SELECT ncbi_taxon_id FROM taxon WHERE taxon_id = %sR@   t
   ncbi_taxids   SELECT taxon_name.name, taxon.node_rank, taxon.parent_taxon_id FROM taxon, taxon_name WHERE taxon.taxon_id=taxon_name.taxon_id AND taxon_name.name_class='scientific name' AND taxon.taxon_id = %ss   no rankt   taxonomy(   Rj   t   execute_onet   insert(   R   R   Rp   t   at   common_namest   scientific_namest   ncbi_taxidsR   Ry   t   rankt   parent_taxon_id(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyRn   r  s8     	
c         C   sK   |  j  d | f  } g  | D] } | d ^ q } | rC i | d 6Si  Sd  S(   NsC   SELECT comment_text FROM comment WHERE bioentry_id=%s ORDER BY ranki    t   comment(   R/   (   R   R   R^   t   commt   comments(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyRo     s    t   DBSeqRecordc           B   s   e  Z d  Z d   Z d   Z d   Z d   Z e e e e d  Z d   Z	 d   Z
 d   Z e e	 e
 e d	  Z d
   Z d   Z d   Z e e e e d  Z d   Z d   Z d   Z e e e e d  Z RS(   s9   BioSQL equivalent of the biopython SeqRecord object.
    c      	   C   s   | |  _  | |  _ |  j  j d |  j f  \ |  _ |  _ |  _ } } |  _ |  _ |  _ | r | d k r d | | f |  _	 n	 | |  _	 d  S(   Ns   SELECT biodatabase_id, taxon_id, name, accession, version, identifier, division, description FROM bioentry WHERE bioentry_id = %sR@   s   %s.%s(
   t   _adaptort   _primary_idR   t   _biodatabase_idt	   _taxon_idRy   t   _identifiert	   _divisiont   descriptiont   id(   R   R   R   RE   RF   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyR     s    			9c         C   s1   t  |  d  s* t |  j |  j  |  _ n  |  j S(   Nt   _seq(   t   hasattrR?   R   R   R   (   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt	   __get_seq  s    c         C   s   | |  _  d  S(   N(   R   (   R   R>   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt	   __set_seq  s    c         C   s
   |  `  d  S(   N(   R   (   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt	   __del_seq  s    s
   Seq objectc         C   s1   t  |  d  s* t |  j |  j  |  _ n  |  j S(   NRB   (   R   RH   R   R   RB   (   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   __get_dbxrefs  s    c         C   s   | |  _  d  S(   N(   RB   (   R   RC   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   __set_dbxrefs  s    c         C   s
   |  `  d  S(   N(   RB   (   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   __del_dbxrefs  s    s   Database cross referencesc         C   s1   t  |  d  s* t |  j |  j  |  _ n  |  j S(   Nt	   _features(   R   Ri   R   R   R   (   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   __get_features  s    	c         C   s   | |  _  d  S(   N(   R   (   R   t   features(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   __set_features  s    c         C   s
   |  `  d  S(   N(   R   (   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   __del_features  s    t   Featuresc         C   so   t  |  d  sh t |  j |  j |  j  |  _ |  j rI |  j |  j d <n  |  j rh |  j |  j d <qh n  |  j S(   Nt   _annotationst   git   data_file_division(   R   Rr   R   R   R   R   R   R   (   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   __get_annotations  s    			c         C   s   | |  _  d  S(   N(   R   (   R   Rq   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   __set_annotations  s    c         C   s
   |  `  d  S(   N(   R   (   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   __del_annotations  s    t   Annotations(   R'   R(   t   __doc__R   t   _DBSeqRecord__get_seqt   _DBSeqRecord__set_seqt   _DBSeqRecord__del_seqR)   R>   t   _DBSeqRecord__get_dbxrefst   _DBSeqRecord__set_dbxrefst   _DBSeqRecord__del_dbxrefsRC   t   _DBSeqRecord__get_featurest   _DBSeqRecord__set_featurest   _DBSeqRecord__del_featuresR   t   _DBSeqRecord__get_annotationst   _DBSeqRecord__set_annotationst   _DBSeqRecord__del_annotationsRq   (    (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyR     s*   													
			N(   R   t   BioR    t   Bio.SeqR   R   t   Bio.SeqRecordR   R   R   R?   RH   Ri   RO   Rr   Rl   Rm   Rn   Ro   R   (    (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/BioSeq.pyt   <module>   s   j	5		p						2	