
PIc           @   s   d  Z  d d l m Z m Z d d l m Z d d l m Z d d l m Z d d l	 m
 Z
 d f  d     YZ d	 f  d
     YZ d S(   s'  Load biopython objects into a BioSQL database for persistent storage.

This code makes it possible to store biopython objects in a relational
database and then retrieve them back. You shouldn't use any of the
classes in this module directly. Rather, call the load() method on
a database object.
i(   t   gmtimet   strftime(   t   Alphabet(   t   crc64(   t   Entrez(   t
   UnknownSeqt   DatabaseLoaderc           B   s  e  Z d  Z e d  Z d   Z d d  Z d d d d  Z d   Z	 d   Z
 d   Z d d d  Z d	   Z d
   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z RS(   s=   Object used to load SeqRecord objects into a BioSQL database.c         C   s   | |  _  | |  _ | |  _ d S(   s!  Initialize with connection information for the database.

        Creating a DatabaseLoader object is normally handled via the
        BioSeqDatabase DBServer object, for example:

        from BioSQL import BioSeqDatabase
        server = BioSeqDatabase.open_database(driver="MySQLdb", user="gbrowse",
                         passwd = "biosql", host = "localhost", db="test_biosql")
        try :
            db = server["test"]
        except KeyError :
            db = server.new_database("test", description="For testing GBrowse")
        N(   t   adaptort   dbidt   fetch_NCBI_taxonomy(   t   selfR   R   R	   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyt   __init__   s    		c         C   s   |  j  |  } |  j | |  |  j | |  |  j | |  |  j | |  | j j d d  } x< t | t t	 |    D] \ } } |  j
 | | |  q W|  j | |  x= t t	 | j   D]& } | j | } |  j | | |  q Wd S(   s6   Load a Biopython SeqRecord into the database.
        t
   referencesN(    (   t   _load_bioentry_tablet   _load_bioentry_datet   _load_biosequencet   _load_commentt   _load_dbxrefst   annotationst   gett   zipt   ranget   lent   _load_referencet   _load_annotationst   featurest   _load_seqfeature(   R
   t   recordt   bioentry_idR   t	   referencet   rankt   seq_feature_numt   seq_feature(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyt   load_seqrecord/   s    (c         C   sO   |  j  j d | f  } | r& | d S|  j  j d | | f  |  j  j d  S(   s}  Returns the identifier for the named ontology (PRIVATE).

        This looks through the onotology table for a the given entry name.
        If it is not found, a row is added for this ontology (using the
        definition if supplied).  In either case, the id corresponding to
        the provided name is returned, so that you can reference it in
        another table.
        s0   SELECT ontology_id FROM ontology WHERE name = %si    s6   INSERT INTO ontology(name, definition) VALUES (%s, %s)t   ontology(   R   t   execute_and_fetch_col0t   executet   last_id(   R
   t   namet
   definitiont   oids(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyt   _get_ontology_id?   s    			c         C   s   d } | g } | r/ | d 7} | j  |  n  |  j j | |  } t |  d k ro t d | | f   nS t |  d k r | d d Sd } |  j j | | | | | f  |  j j d  Sd S(	   s  Get the id that corresponds to a term (PRIVATE).

        This looks through the term table for a the given term. If it
        is not found, a new id corresponding to this term is created.
        In either case, the id corresponding to that term is returned, so
        that you can reference it in another table.

        The ontology_id should be used to disambiguate the term.
        s(   SELECT term_id FROM term WHERE name = %ss    AND ontology_id = %si   s   Multiple term ids for %s: %ri    sT   INSERT INTO term (name, definition, identifier, ontology_id) VALUES (%s, %s, %s, %s)t   termN(   t   appendR   t   execute_and_fetchallR   t
   ValueErrorR$   R%   (   R
   R&   t   ontology_idR'   t
   identifiert   sqlt   fieldst
   id_results(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyt   _get_term_idS   s    	
c         C   s,   |  j  j d | | | f  |  j  j d  S(   s"   Insert a dbxref and return its id.sB   INSERT INTO dbxref(dbname, accession, version) VALUES (%s, %s, %s)t   dbxref(   R   R$   R%   (   R
   t   dbnamet	   accessiont   version(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyt   _add_dbxrefx   s    	c         C   s  d } d | j k rk t | j d t  r[ t | j d  d k rh | j d d } qh qk | j d } n  | s x | j D]~ } | j d k r t | d i   } d | k r x; | j d D]) } | j	 d  r t
 | d  } Pq q Wq n  | r{ Pq{ q{ Wn  y | j d	 d
  } Wn t k
 r.d } n Xy | j d d
  } Wn t k
 r]d } n X| rw|  j | | |  S| r| rd S| r|  j j d | f  } | r| d Sn  | r|  j j d | f  } t |  d k rt d t |  | f   n  | r| d Sn  g  }	 x3 | j j d g   D] }
 |	 j d d |
 g  q4W|	 rkd |	 d d <n  |	 j d d | j d	 g  d | j k r|	 j d d | j d g  n  d | j k r|	 j d d | j d g  n  | |	 d d <|  j j d  d } | sd } n  | d 7} |  j j d  d } | sHd } n  | d t |	  d } d } x |	 D]} } |  j j d | | d | d | | f  |  j j d  } |  j j d | | d d
  f  | d 7} | d 8} | } qmW| r|  j j d | | f  n  | S(   s  Get the taxon id for this record (PRIVATE).

        record - a SeqRecord object

        This searches the taxon/taxon_name tables using the
        NCBI taxon ID, scientific name and common name to find
        the matching taxon table entry's id.
        
        If the species isn't in the taxon table, and we have at
        least the NCBI taxon ID, scientific name or common name,
        at least a minimal stub entry is created in the table.

        Returns the taxon id (database key for the taxon table,
        not an NCBI taxon ID), or None if the taxonomy information
        is missing.

        See also the BioSQL script load_ncbi_taxonomy.pl which
        will populate and update the taxon/taxon_name tables
        with the latest information from the NCBI.
        t
   ncbi_taxidi   i    t   sourcet
   qualifierst   db_xrefs   taxon:i   t   organismi   sR   SELECT taxon_id FROM taxon_name WHERE name_class = 'scientific name' AND name = %ss8   SELECT DISTINCT taxon_id FROM taxon_name WHERE name = %ss   Taxa: %d species have name %rt   taxonomyt   genusit   speciest
   subspeciest   variantt   varietass!   SELECT MAX(left_value) FROM taxons"   SELECT MAX(right_value) FROM taxoni   sq   INSERT INTO taxon(parent_taxon_id, ncbi_taxon_id, node_rank, left_value, right_value) VALUES (%s, %s, %s, %s, %s)t   taxonsT   INSERT INTO taxon_name(taxon_id, name, name_class)VALUES (%s, %s, 'scientific name')sP   INSERT INTO taxon_name(taxon_id, name, name_class)VALUES (%s, %s, 'common name')N(   t   NoneR   t
   isinstancet   listR   R   t   typet   getattrR;   t
   startswitht   intt   KeyErrort    _get_taxon_id_from_ncbi_taxon_idR   R#   R-   R   R+   t   execute_oneR$   R%   (   R
   R   t   ncbi_taxon_idt   ft   qualsR<   t   scientific_namet   common_namet   taxat   lineaget   ct
   left_valuet   right_start_valuet   right_valuet   parent_taxon_idRD   t   taxon_id(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyt   _get_taxon_id   s     

						
			
	


	c         C   sS   d   } d j  g  | D] } | |  ^ q  j   } | | j   k sO t  | S(   sb  Map Entrez name terms to those used in taxdump (PRIVATE).

        We need to make this conversion to match the taxon_name.name_class
        values used by the BioSQL load_ncbi_taxonomy.pl script.
        
        e.g.
        "ScientificName" -> "scientific name",
        "EquivalentName" -> "equivalent name",
        "Synonym" -> "synonym",
        c         S   s"   |  j    r d |  j   S|  Sd  S(   Nt    (   t   isuppert   lower(   t   letter(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyt	   add_space;  s    t    (   t   joint   stripR_   t   AssertionError(   R
   t   entrez_nameRa   R`   t   answer(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyt   _fix_name_class'  s    	.c      	   C   s  | s t   |  j j d | f  } | r2 | d Sd } d } d } d } g  }	 | rl |	 j d | f  n  | r |	 j d | f  n  |  j rt j d d d | d	 d
  }
 t j |
  } t	 |  d k r| d d t
 |  k st  d | d d | f   |  j | d d  } | d d } | d d d } | d d d } d | d d f g }	 y x | d d j   D]l \ } } |  j |  } t | t  s| g } n  x3 | D]+ } t | t  r|	 j | | f  qqWqWWqt k
 rqXqn  |  j j d | | | | | d d f  |  j j d  } x4 |	 D], \ } } |  j j d | | d  | f  qLW| S(   s  Get the taxon id for this record from the NCBI taxon ID (PRIVATE).

        ncbi_taxon_id - string containing an NCBI taxon id
        scientific_name - string, used if a stub entry is recorded
        common_name - string, used if a stub entry is recorded
        
        This searches the taxon table using ONLY the NCBI taxon ID
        to find the matching taxon table entry's ID (database key).
        
        If the species isn't in the taxon table, and the fetch_NCBI_taxonomy
        flag is true, Biopython will attempt to go online using Bio.Entrez
        to fetch the official NCBI lineage, recursing up the tree until an
        existing entry is found in the database or the full lineage has been
        fetched.

        Otherwise the NCBI taxon ID, scientific name and common name are
        recorded as a minimal stub entry in the taxon and taxon_name tables.
        Any partial information about the lineage from the SeqRecord is NOT
        recorded.  This should mean that (re)running the BioSQL script
        load_ncbi_taxonomy.pl can fill in the taxonomy lineage.

        Returns the taxon id (database key for the taxon table, not
        an NCBI taxon ID).
        s3   SELECT taxon_id FROM taxon WHERE ncbi_taxon_id = %si    R@   s   scientific names   common namet   dbR>   t   idt   retmodet   XMLi   t   TaxIds   %s versus %st	   LineageExt   Rankt   GeneticCodet   GCIdt   MitoGeneticCodet   MGCIdt   ScientificNamet
   OtherNamess   INSERT INTO taxon(parent_taxon_id, ncbi_taxon_id, node_rank, genetic_code, mito_genetic_code, left_value, right_value) VALUES (%s, %s, %s, %s, %s, %s, %s)RD   sF   INSERT INTO taxon_name(taxon_id, name, name_class) VALUES (%s, %s, %s)i   N(   Re   R   R#   RE   R+   R	   R   t   efetcht   readR   t   strt   _get_taxon_id_from_ncbi_lineaget	   iteritemsRh   RF   RG   t
   basestringRL   R$   R%   (   R
   RO   RR   RS   R[   RZ   R   t   genetic_codet   mito_genetic_codet   species_namest   handlet   taxonomic_recordt
   name_classt   namesR&   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyRM   D  sp    		!"
	
	c         C   sr  | d d } |  j  j d |  } | r` t | t  rY t |  d k sQ t  | d S| Sn  t |  d k r |  j | d   } t | t  s t | t  s t t	 |    n d } | d j d d  } |  j  j d | | | f  |  j  j d  } t | t  s2t | t  s2t t	 |    | d j d	 d  } | rn|  j  j d
 | | d  f  n  | S(   sH  This is recursive! (PRIVATE).

        taxonomic_lineage - list of taxonomy dictionaries from Bio.Entrez

        First dictionary in list is the taxonomy root, highest would be the species.
        Each dictionary includes:
        - TaxID (string, NCBI taxon id)
        - Rank (string, e.g. "species", "genus", ..., "phylum", ...)
        - ScientificName (string)
        (and that is all at the time of writing)

        This method will record all the lineage given, returning the the taxon id
        (database key, not NCBI taxon id) of the final entry (the species).
        iRm   s1   SELECT taxon_id FROM taxon WHERE ncbi_taxon_id=%si   i    Ro   sP   INSERT INTO taxon(ncbi_taxon_id, parent_taxon_id, node_rank) VALUES (%s, %s, %s)RD   Rt   sU   INSERT INTO taxon_name(taxon_id, name, name_class) VALUES (%s, %s, 'scientific name')i   N(   R   R#   RF   RG   R   Re   Ry   RK   t   longt   reprRE   R   R$   R%   (   R
   t   taxonomic_lineageRO   R[   RZ   R   RR   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyRy     s4    	
3	0	c   
   
   C   sf  | j  j d  d k rf | j  j d  \ } } y t |  } Wqu t k
 rb | j  } d } qu Xn | j  } d } d | j k r t | j d t  r | j d r | j d d } n  |  j |  } d | j k r | j d } n	 | j  } t	 | d d
  } | j j d d
  } d } |  j j | |  j | | j | | | | | f  |  j j d	  }	 |	 S(   s   Fill the bioentry table with sequence information (PRIVATE).

        record - SeqRecord object to add to the database.
        t   .i   i    t
   accessionst   git   descriptiont   data_file_divisions7  
        INSERT INTO bioentry (
         biodatabase_id,
         taxon_id,
         name,
         accession,
         identifier,
         division,
         description,
         version)
        VALUES (
         %s,
         %s,
         %s,
         %s,
         %s,
         %s,
         %s,
         %s)t   bioentryN(   Rj   t   countt   splitRK   R-   R   RF   RG   R\   RI   RE   R   R   R$   R   R&   R%   (
   R
   R   R6   R7   R[   R/   R   t   divisionR0   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR     s<    			
c         C   sn   | j  j d t d t    j    } |  j d  } |  j d |  } d } |  j j | | | | f  d S(   s   Add the effective date of the entry into the database.

        record - a SeqRecord object with an annotated date
        bioentry_id - corresponding database identifier
        t   dates   %d-%b-%Ys   Annotation Tagst   date_changeds_   INSERT INTO bioentry_qualifier_value (bioentry_id, term_id, value, rank) VALUES (%s, %s, %s, 1)N(	   R   R   R   R    t   upperR)   R3   R   R$   (   R
   R   R   R   t   annotation_tags_idt   date_idR0   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR   3  s    c         C   s   | j  d k r d St | j  j t j  r4 d } nH t | j  j t j  rU d } n' t | j  j t j  rv d } n d } t | j  t  r d } n t	 | j   } d } |  j
 j | | t | j   | | f  d S(   s   Record a SeqRecord's sequence and alphabet in the database (PRIVATE).

        record - a SeqRecord object with a seq property
        bioentry_id - corresponding database identifier
        Nt   dnat   rnat   proteint   unknowns`   INSERT INTO biosequence (bioentry_id, version, length, seq, alphabet) VALUES (%s, 0, %s, %s, %s)(   t   seqRE   RF   t   alphabetR   t   DNAAlphabett   RNAAlphabett   ProteinAlphabetR   Rx   R   R$   R   (   R
   R   R   R   t   seq_strR0   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR   D  s"    				c         C   s   | j  j d  } | s d St | t  s7 | g } n  xR t |  D]D \ } } | j d d  } d } |  j j | | | | d f  qD Wd S(   s   Record a SeqRecord's annotated comment in the database (PRIVATE).

        record - a SeqRecord object with an annotated comment
        bioentry_id - corresponding database identifier
        t   commentNs   
R]   sI   INSERT INTO comment (bioentry_id, comment_text, rank) VALUES (%s, %s, %s)i   (   R   R   RF   RG   t	   enumeratet   replaceR   R$   (   R
   R   R   t   commentst   indexR   R0   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR   f  s    c         C   s"  d } d } |  j  d  } x | j j   D] \ } } | d k rI q+ n  |  j | d | } t | t  r d }	 x | D]V }
 t |
 t  s t |
 t  rz |	 d	 7}	 |  j j	 | | | t |
  |	 f  qz qz Wq+ t | t  s t | t  r+ |  j j	 | | | t |  f  q+ q+ Wd
 S(   s  Record a SeqRecord's misc annotations in the database (PRIVATE).

        The annotation strings are recorded in the bioentry_qualifier_value
        table, except for special cases like the reference, comment and
        taxonomy which are handled with their own tables.

        record - a SeqRecord object with an annotations dictionary
        bioentry_id - corresponding database identifier
        sU   INSERT INTO bioentry_qualifier_value(bioentry_id, term_id, value) VALUES (%s, %s, %s)s_   INSERT INTO bioentry_qualifier_value(bioentry_id, term_id, value, rank) VALUES (%s, %s, %s, %s)s   Annotation TagsR   R   R9   R.   i    i   N(   R   R   R9   (
   R)   R   Rz   R3   RF   RG   Rx   RK   R   R$   (   R
   R   R   t   mono_sqlt   many_sqlt   tag_ontology_idt   keyt   valuet   term_idR   t   entry(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR   {  s&    

c         C   s  d } | j r- |  j j d | j f  } n  | r[ | j r[ |  j j d | j f  } n  | s g  } x3 | j | j | j f D] } | j | p d  q Wt	 d j
 |   } |  j j d | f  } n  | s| j r |  j d | j d  } n* | j r|  j d | j d  } n d } | j p-d }	 | j p<d }
 | j pKd } |  j j d	 | | |
 |	 | f  |  j j d
  } n
 | d } | j rd t t | j d j   } t t | j d j   } n d } d } d } |  j j | | | | | | d f  d S(   s   Record a SeqRecord's annotated references in the database (PRIVATE).

        record - a SeqRecord object with annotated references
        bioentry_id - corresponding database identifier
        sm   SELECT reference_id  FROM reference JOIN dbxref USING (dbxref_id) WHERE dbname = 'MEDLINE' AND accession = %ssl   SELECT reference_id  FROM reference JOIN dbxref USING (dbxref_id) WHERE dbname = 'PUBMED' AND accession = %ss   <undef>Rb   s1   SELECT reference_id FROM reference WHERE crc = %st   MEDLINEi    t   PUBMEDs\   INSERT INTO reference (dbxref_id, location, title, authors, crc) VALUES (%s, %s, %s, %s, %s)R   i   sp   INSERT INTO bioentry_reference (bioentry_id, reference_id, start_pos, end_pos, rank) VALUES (%s, %s, %s, %s, %s)N(   RE   t
   medline_idR   R#   t	   pubmed_idt   authorst   titlet   journalR+   R   Rc   R8   R$   R%   t   locationRK   Rx   t   startt   end(   R
   R   R   R   t   refst   sRP   t   crct	   dbxref_idR   R   R   t   reference_idR   R   R0   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR     sT    										
	 c         C   s?   |  j  | j | |  } |  j | |  |  j | j |  d S(   sA   Load a biopython SeqFeature into the database (PRIVATE).
        N(   t   _load_seqfeature_basicRH   t   _load_seqfeature_locationst   _load_seqfeature_qualifiersR;   (   R
   t   featuret   feature_rankR   t   seqfeature_id(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR     s    	c   
      C   s   |  j  d  } |  j | d | } |  j  d  } |  j d d | } d } |  j j | | | | | d f  |  j j d  }	 |	 S(   s   Load the first tables of a seqfeature and returns the id (PRIVATE).

        This loads the "key" of the seqfeature (ie. CDS, gene) and
        the basic seqfeature table itself.
        s   SeqFeature KeysR.   s   SeqFeature Sourcess   EMBL/GenBank/SwissProts`   INSERT INTO seqfeature (bioentry_id, type_term_id, source_term_id, rank) VALUES (%s, %s, %s, %s)i   t
   seqfeature(   R)   R3   R   R$   R%   (
   R
   t   feature_typeR   R   R.   t   seqfeature_key_idt   source_cat_idt   source_term_idR0   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR     s    		c         C   sZ   | j  s |  j | d |  n7 x4 t | j   D]# \ } } |  j | | d |  q/ Wd S(   s1  Load all of the locations for a SeqFeature into tables (PRIVATE).

        This adds the locations related to the SeqFeature into the
        seqfeature_location table. Fuzzies are not handled right now.
        For a simple location, ie (1..2), we have a single table row
        with seq_start = 1, seq_end = 2, location_rank = 1.

        For split locations, ie (1..2, 3..4, 5..6) we would have three
        row tables with:
            start = 1, end = 2, rank = 1
            start = 3, end = 4, rank = 2
            start = 5, end = 6, rank = 3
        i   N(   t   sub_featurest   _insert_seqfeature_locationR   (   R
   R   R   R   t   cur_feature(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR     s    		c   
   	   C   s   | j  j d } | j  j } | j p( d } d } | j r[ |  j | j pL d | j  } n d } d }	 |  j j	 |	 | | | | | | | f  d S(   s   Add a location of a SeqFeature to the seqfeature_location table (PRIVATE).

        TODO - Add location_operators to location_qualifier_value.
        i   i    Rb   s}   INSERT INTO location (seqfeature_id, dbxref_id, term_id,start_pos, end_pos, strand, rank) VALUES (%s, %s, %s, %s, %s, %s, %s)N(
   R   t   nofuzzy_startt   nofuzzy_endt   strandRE   t   reft   _get_dbxref_idt   ref_dbR   R$   (
   R
   R   R   R   R   R   R   t   loc_term_idR   R0   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR   !  s    	!c   
      C   s   |  j  d  } x | j   D] } | d k r |  j | d | } | | } t | t  sh | g } n  xd t t |   D]9 } | | } d }	 |  j j |	 | | | d | f  q{ Wq |  j	 | | |  q Wd S(   s   Insert the (key, value) pair qualifiers relating to a feature (PRIVATE).

        Qualifiers should be a dictionary of the form:
            {key : [value1, value2]}
        s   Annotation TagsR<   R.   se   INSERT INTO seqfeature_qualifier_value  (seqfeature_id, term_id, rank, value) VALUES (%s, %s, %s, %s)i   N(
   R)   t   keysR3   RF   RG   R   R   R   R$   t   _load_seqfeature_dbxref(
   R
   R;   R   R   t   qualifier_keyt   qualifier_key_idt   entriest   qual_value_rankt   qualifier_valueR0   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR   W  s"    	

c   
      C   s   x t  |  D] \ } } y? | j d d  j d d  j d  } | d } | d } Wn t d |   n Xx7 | D]/ } |  j | |  }	 |  j | |	 | d  qy Wq Wd S(	   s  Add database crossreferences of a SeqFeature to the database (PRIVATE).

            o dbxrefs           List, dbxref data from the source file in the
                                format <database>:<accession>

            o seqfeature_id     Int, the identifier for the seqfeature in the
                                seqfeature table

            Insert dbxref qualifier data for a seqfeature into the
            seqfeature_dbxref and, if required, dbxref tables.
            The dbxref_id qualifier/value sets go into the dbxref table
            as dbname, accession, version tuples, with dbxref.dbxref_id
            being automatically assigned, and into the seqfeature_dbxref
            table as seqfeature_id, dbxref_id, and rank tuples
        R]   Rb   s   
t   :i    i   s   Parsing of db_xref failed: '%s'N(   R   R   R   R-   R   t   _get_seqfeature_dbxref(
   R
   t   dbxrefsR   R   R   t   dbxref_dataRi   R   R6   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR   ~  s    '
c         C   sB   d } |  j  j | | | f  } | r/ | d S|  j | | d  S(   s   _get_dbxref_id(self, db, accession) -> Int

            o db          String, the name of the external database containing
                          the accession number

            o accession   String, the accession of the dbxref data

            Finds and returns the dbxref_id for the passed data.  The method
            attempts to find an existing record first, and inserts the data
            if there is no record.
        sA   SELECT dbxref_id FROM dbxref WHERE dbname = %s AND accession = %si    (   R   R#   R8   (   R
   Ri   R6   R0   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR     s
    c         C   s>   d } |  j  j | | | f  } | r+ | S|  j | | |  S(   s    Check for a pre-existing seqfeature_dbxref entry with the passed
            seqfeature_id and dbxref_id.  If one does not exist, insert new
            data

        sf   SELECT seqfeature_id, dbxref_id FROM seqfeature_dbxref WHERE seqfeature_id = '%s' AND dbxref_id = '%s'(   R   R#   t   _add_seqfeature_dbxref(   R
   R   R   R   R0   t   result(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR     s    c         C   s,   d } |  j  j | | | | f  | | f S(   s_    Insert a seqfeature_dbxref row and return the seqfeature_id and
            dbxref_id
        sQ   INSERT INTO seqfeature_dbxref (seqfeature_id, dbxref_id, rank) VALUES(%s, %s, %s)(   R   R$   (   R
   R   R   R   R0   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR     s    c         C   s   x t  | j  D] \ } } | j d  d k s7 t  y4 | j d d  \ } } | j   } | j   } Wn t d |   n X|  j | |  } |  j | | | d  q Wd S(   si   Load any sequence level cross references into the database (PRIVATE).

        See table bioentry_dbxref.s   
i    R   i   s$   Parsing of dbxrefs list failed: '%s'N(	   R   R   R   Re   R   Rd   R-   R   t   _get_bioentry_dbxref(   R
   R   R   R   R   Ri   R6   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR     s    c         C   s>   d } |  j  j | | | f  } | r+ | S|  j | | |  S(   s    Check for a pre-existing bioentry_dbxref entry with the passed
            seqfeature_id and dbxref_id.  If one does not exist, insert new
            data

        s`   SELECT bioentry_id, dbxref_id FROM bioentry_dbxref WHERE bioentry_id = '%s' AND dbxref_id = '%s'(   R   R#   t   _add_bioentry_dbxref(   R
   R   R   R   R0   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR     s    c         C   s,   d } |  j  j | | | | f  | | f S(   s]    Insert a bioentry_dbxref row and return the seqfeature_id and
            dbxref_id
        sL   INSERT INTO bioentry_dbxref (bioentry_id,dbxref_id,rank) VALUES (%s, %s, %s)(   R   R$   (   R
   R   R   R   R0   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR     s    N(    t   __name__t
   __module__t   __doc__t   FalseR   R!   RE   R)   R3   R8   R\   Rh   RM   Ry   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   (    (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR      s>   	!			m	7	I		"		,	A				6	'	%			
		t   DatabaseRemoverc           B   s    e  Z d  Z d   Z d   Z RS(   s;  Complement the Loader functionality by fully removing a database.

    This probably isn't really useful for normal purposes, since you
    can just do a:
        DROP DATABASE db_name
    and then recreate the database. But, it's really useful for testing
    purposes.

    YB: now use the cascaded deletions
    c         C   s   | |  _  | |  _ d S(   s>   Initialize with a database id and adaptor connection.
        N(   R   R   (   R
   R   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR     s    	c         C   sB   d } |  j  j | |  j f  d } |  j  j | |  j f  d S(   s<   Remove everything related to the given database id.
        s.   DELETE FROM bioentry WHERE biodatabase_id = %ss1   DELETE FROM biodatabase WHERE biodatabase_id = %sN(   R   R$   R   (   R
   R0   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyt   remove  s    (   R   R   R   R   R   (    (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyR     s   
	N(   R   t   timeR    R   t   BioR   t   Bio.SeqUtils.CheckSumR   R   t   Bio.SeqR   R   R   (    (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/BioSQL/Loader.pyt   <module>   s      