ó
óVêNc           @   sU  y d  d l  m Z Wn d  d l m Z n Xd  d l Z d  d l Z d  d l Z d  d l m Z e j d ƒ rƒ e j d ƒ Z	 n d Z	 d e	 Z
 e j d ƒ r´ e j d ƒ Z n d Z e e _ i d	 d
 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d  6d! d" 6d# d$ 6d% d& 6Z d' d* d( „  ƒ  YZ e
 d) „ Z d S(+   iÿÿÿÿ(   t   dbapi2N(   t   environt   CISTEMATIC_ROOTs   /proj/genomes   %s/db/gene_info.dbt   CISTEMATIC_TEMPs   /tmpt	   athalianat   3702t   scerevisiaet   4932t   celeganst   6239t   dmelanogastert   7227t   spurpuratust   7668t   dreriot   7955t   xtropicalist   8364t   ggallust   9031t   hsapienst   9606t   cfamiliarist   9615t	   ecaballust   9796t   btaurust   9913t	   mmusculust   10090t   rnorvegicust   10116t
   mdomesticat   13616t
   geneinfoDBc           B   sƒ   e  Z d  Z d Z g  Z d Z g  e d „ Z d „  Z d „  Z	 d „  Z
 d „  Z d „  Z d d	 „ Z d
 „  Z d „  Z d „  Z RS(   sE    The geneinfoDB class allows for the querying of NCBI gene data.
    t    c         C   s    | |  _  | r |  j ƒ  n  d S(   s`    initialize the geneinfoDB object with a target genome and cache database, if desired. 
        N(   t   targetGenomest   cacheDB(   t   selft   tGenomest   cache(    (    sS   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/core/geneinfo.pyt   __init__N   s    	c         C   s    |  j  d k r |  j ƒ  n  d S(   s3    cleanup copy in local cache, if present. 
        R#   N(   t   cachedDBt	   uncacheDB(   R&   (    (    sS   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/core/geneinfo.pyt   __del__V   s    c         C   s*   d t  j ƒ  |  _ t j t |  j ƒ d S(   s,    copy geneinfoDB to a local cache. 
        s   %s.dbN(   t   tempfilet   mktempR*   t   shutilt   copyfilet   dbPath(   R&   (    (    sS   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/core/geneinfo.pyR%   ]   s    c         C   sS   |  j  d k rO y t j |  j  ƒ Wn t k
 rB d |  j  GHn Xd |  _  n  d S(   s.    delete geneinfoDB from local cache. 
        R#   s   could not delete %sN(   R*   t   ost   removet   OSError(   R&   (    (    sS   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/core/geneinfo.pyR+   d   s    c         C   s4   t  } |  j d k r! |  j } n  t j | d d ƒS(   s+    return a handle to the database. 
        R#   t   timeouti<   (   R1   R*   t   sqlitet   connect(   R&   t   path(    (    sS   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/core/geneinfo.pyt	   connectDBq   s    c         C   s¿   |  j  ƒ  } | j ƒ  } g  } | \ } } | j d t ƒ  ƒ | j ƒ  } | j ƒ  | j ƒ  yJ | \ } }	 }
 } } t | ƒ t |	 ƒ t |
 ƒ t | ƒ t | ƒ f SWn t k
 rº n X| S(   sk    returns a list of one or more (symbol, locustag, dbxrefs, chromosome, map_location) for a geneID.
        sm   select symbol, locustag, dbxrefs, chromosome, map_location from gene_info where genome = :gen and gID = :gid (   R9   t   cursort   executet   localst   fetchonet   closet   strt
   ValueError(   R&   t   geneIDt   dbR:   t   emptyRest   gent   gidt   entryt   symbolt   locustagt   dbxrefst
   chromosomet   map_location(    (    sS   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/core/geneinfo.pyt   getGeneInfo{   s    

5RE   c         C   sY  |  j  ƒ  } | j ƒ  } i  } | j d t ƒ  ƒ | j ƒ  } | j ƒ  | j ƒ  x| D]ù \ } } }	 }
 } } | d k rè t |	 ƒ | k r¡ g  | t |	 ƒ <n  | t |	 ƒ j t | ƒ t | ƒ t |
 ƒ t | ƒ t | ƒ f ƒ qX t | ƒ | k rg  | t | ƒ <n  | t | ƒ j t | ƒ t |	 ƒ t |
 ƒ t | ƒ t | ƒ f ƒ qX W| S(   s®    returns a dictionary of one or more (symbol, locustag, dbxrefs, chromosome, map_location) per gID.
            acceptable infoKey arguments are: 'locus', and 'gid'.
        se   select gid, symbol, locustag, dbxrefs, chromosome, map_location from gene_info where genome = :genomet   locus(   R9   R:   R;   R<   t   fetchallR>   R?   t   append(   R&   t   genomet   infoKeyRB   R:   t   resDictt   resultsRE   RG   RH   RI   RJ   RK   (    (    sS   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/core/geneinfo.pyt   getallGeneInfo   s     

GHc   	      C   s‰   |  j  ƒ  } | j ƒ  } g  } | \ } } | j d t ƒ  ƒ | j ƒ  } | j ƒ  | j ƒ  x% | D] } | j t | d ƒ ƒ qd W| S(   sG    returns a list of one or more gene description for a geneID. 
        sK   select description from gene_description where genome = :gen and gID = :gidi    (   R9   R:   R;   R<   RN   R>   RO   R?   (	   R&   RA   RB   R:   RS   RD   RE   t   entriesRF   (    (    sS   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/core/geneinfo.pyt   getDescription©   s    

c   	      C   s‰   |  j  ƒ  } | j ƒ  } g  } | \ } } | j d t ƒ  ƒ | j ƒ  } | j ƒ  | j ƒ  x% | D] } | j t | d ƒ ƒ qd W| S(   s3    returns a list of synonyms for a geneID. 
        sD   select synonym from gene_synonyms where genome = :gen and gID = :gidi    (   R9   R:   R;   R<   RN   R>   RO   R?   (	   R&   RA   RB   R:   RS   RD   RE   RU   RF   (    (    sS   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/core/geneinfo.pyt   geneIDSynonymsº   s    

c         C   s	  |  j  ƒ  } | j ƒ  } g  } | j d t ƒ  ƒ | j ƒ  } | rk | j ƒ  | j ƒ  | t | d ƒ f S| j d t ƒ  ƒ | j ƒ  } | r¸ | j ƒ  | j ƒ  | t | d ƒ f S| j d t ƒ  ƒ | j ƒ  } | j ƒ  | j ƒ  | r| t | d ƒ f S| S(   s9    returns a geneID given a genome and a synonym. 
        sD   select gID from gene_info where genome= :genome and symbol= :synonymi    sK   select gID from gene_synonyms where genome = :genome and synonym = :synonymsH   select gID from gene_info where genome = :genome and locustag = :synonym(   R9   R:   R;   R<   R=   R>   R?   (   R&   RP   t   synonymRB   R:   RS   RF   (    (    sS   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/core/geneinfo.pyt	   getGeneIDË   s,    





(   t   __name__t
   __module__t   __doc__t   startingGenomeR$   R*   t   FalseR)   R,   R%   R+   R9   RL   RT   RV   RW   RY   (    (    (    sS   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/core/geneinfo.pyR"   F   s   				
			c      
   C   sª  t  |  d ƒ } t ƒ  } | j ƒ  } | j ƒ  } | j d ƒ | j d ƒ | j d ƒ t j ƒ  } xÇ| D]¿} | j d d ƒ } | j d ƒ } | d | k rj yet | d }	 d	 |	 | d
 | d | d | d | d | d f }
 | j |
 ƒ | d j	 ƒ  } t
 | ƒ d
 k rId |	 | d
 | d j	 ƒ  f } | j | ƒ n  d |	 | d
 | d
 j	 ƒ  f } | j | ƒ | d j d ƒ } xx | D]p } yS | d k ré| | d
 j	 ƒ  k réd |	 | d
 | j	 ƒ  f } | j | ƒ n  Wq‘t j k
 r q‘Xq‘WWq)t j k
 r%d | GHq)Xqj qj W| j d ƒ | j d ƒ | j d ƒ | j d ƒ | j d ƒ | j d ƒ | j d ƒ | j ƒ  | j ƒ  | j ƒ  d S(   s=    populate geneinfo database from NCBI gene information. 
    t   rs¨   create table gene_info(ID INTEGER PRIMARY KEY, genome varchar, gID varchar, symbol varchar, locustag varchar, dbxrefs varchar, chromosome varchar, map_location varchar)sg   create table gene_description(ID INTEGER PRIMARY KEY, genome varchar, gID varchar, description varchar)s`   create table gene_synonyms(ID INTEGER PRIMARY KEY, genome varchar, gID varchar, synonym varchar)t   't   primes   	i    s“   INSERT into gene_info(ID, genome, gID, symbol, locustag, dbxrefs, chromosome, map_location) values (NULL, '%s', '%s', '%s', '%s', '%s', '%s', '%s')i   i   i   i   i   i   i   sZ   INSERT into gene_description(ID, genome, gID, description) values (NULL, '%s', '%s', '%s')sS   INSERT into gene_synonyms(ID, genome, gID, synonym) values (NULL, '%s', '%s', '%s')i   t   |t   -s   could not register %ss)   create index genIdx1 on gene_info(genome)s0   create index genIdx2 on gene_description(genome)s-   create index genIdx3 on gene_synonyms(genome)s&   create index gIDIdx1 on gene_info(gID)s-   create index gIDIdx2 on gene_description(gID)s*   create index gIDIdx3 on gene_synonyms(gID)s-   create index synIdx on gene_synonyms(synonym)N(   t   openR"   R9   R:   R;   t
   speciesMapt   keyst   replacet   splitt   stript   lenR6   t   OperationalErrort   commitR>   (   t   datafileR8   t   inFilet   idbRB   R:   t
   genomeKeyst   linet   fieldRP   t   sqlstmtt   descrt   sqlstmt2t   sqlstmt3t   synonymsRF   (    (    sS   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/core/geneinfo.pyt   buildgeneinfoDBé   sT    	7!!"

(    (   t	   pysqlite2R    R6   t   sqlite3R-   R/   R2   R   t   gett   cisRootR1   t   cisTempt   tempdirRe   R"   Rx   (    (    (    sS   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/core/geneinfo.pyt   <module>   s<   $
	

£