ó
‘Mc           @   s¶   d  d l  Z  d  d l Z d  d l m Z d  d l m Z d  d l m Z e j d ƒ ri e j d ƒ Z n d Z d e Z	 d „  Z
 d	 „  Z d
 „  Z d „  Z d „  Z e	 d „ Z d S(   iÿÿÿÿN(   t   Genome(   t
   geneinfoDB(   t   environt   CISTEMATIC_ROOTs   /proj/genomes   %s/D_rerio/drerio.genedbc         C   sÅ  g  } d } t  d d |  ƒ} t j | ƒ } x‘| D]‰} t d | | f d ƒ } | j ƒ  }	 xQ|	 d k r²g  } d } |	 j ƒ  d }
 | j ƒ  } xU | d k rí | d d k rí | j ƒ  } | t | ƒ 7} | j | ƒ | j ƒ  } q™ Wt j	 | d ƒ } | d	 k  rMd
 |
 GH| j
 d |
 f | d t | ƒ ƒ | j |
 |
 d ƒ n\ d | |
 f } t d t | f d ƒ } | j | ƒ | j ƒ  d | GH| j |
 | d ƒ | }	 qb W| j ƒ  q4 Wd  S(   Ni    t   dreriot   dbFiles   %s/%st   rt    i   t   >iÐ s   Added contig %s to databaset
   chromosomet   dbs   %s%s.bins   %s%st   ws    Added contig file %s to databaset   file(   R    t   ost   listdirt   opent   readlinet   stript   lent   appendt   stringt   joint   addSequencet   strt   addChromosomeEntryt   cisRoott   writet   close(   R
   t	   chromPatht   chromOutPatht   seqArrayt   seqLent   drGenomet   filest   filenamet   inFilet   headert   chromIDt   currentLinet   lineSeqt   seqt   outFileNamet   outFile(    (    sK   /woldlab/castor/data00/home/georgi/erange-4.0a/cistematic/genomes/drerio.pyt   loadChromosome,   s<    	"
	
c      	   C   sŽ  g  } g  } t  d d |  ƒ} t | d ƒ } t ƒ  } x5| D]-} | j d ƒ } | d }	 yJ | j d |	 ƒ }
 t |
 ƒ sŒ d |	 GHw= n  | j |
 ƒ } | d }	 Wn
 q= n X|	 d k rÅ q= n  t | d	 ƒ } t | d
 ƒ } | d } | d } | d k rd } n d } d |	 f } | | k r5d } n d } | j | ƒ | j | | | | | d | f ƒ q= Wd t | ƒ GH| j	 | ƒ d  S(   NR   R   R   s   	i    s   could not find %si   R   i   i   i   i   t   -t   Rt   Ft   2t   1t   genes   Adding %d gene entries(
   R    R   R   t   splitt	   getGeneIDR   t   getGeneInfot   intR   t   addGeneEntryBatch(   R
   t   gFilet   geneEntriest   seenGIDsR    t   geneFilet   idbt   linet   colst   gidt   tempIDt   geneInfot   startt   stopt   senset   chromt   geneIDt
   gidVersion(    (    sK   /woldlab/castor/data00/home/georgi/erange-4.0a/cistematic/genomes/drerio.pyt   loadGeneEntriesQ   sD    	
	

		&c         C   sT  t  | d ƒ } t ƒ  } g  } i d d 6d d 6d d 6} g  } xÖ| D]Î} | j d ƒ } t | d ƒ }	 | d	 j d
 ƒ }
 | d j d
 ƒ } | d } | | d } t | d ƒ d } t | d ƒ d } | d } yJ | j d | ƒ } t | ƒ sd | GHwF n  | j | ƒ } | d } Wn
 qF n X| d k rDqF n  d | f } | | k red } n d } x¦t |	 ƒ D]˜} t |
 | ƒ d } t | | ƒ d } | | k rã| | k rã| j | | | | | | d f ƒ qx| | k r/| d k rd } n d } | j | | | | | | | f ƒ qx| | k r{| d k rPd } n d } | j | | | | | | | f ƒ qx| | k rù| | k rù| d k r¨d } n d } | j | | | | | | d f ƒ | j | | | | d | | | f ƒ qx| | k  rw| | k rw| d k r&d } n d } | j | | | | | d | | f ƒ | j | | | | | | d f ƒ qx| d k r’d } d } n d } d } | j | | | | | d | | f ƒ | j | | | | | | d f ƒ | j | | | | d | d | | f ƒ qxWqF W| j	 ƒ  t
 d d |  ƒ} d t | ƒ GH| j | ƒ d  S(   NR   R.   t   +R-   R,   t   .s   	i   i	   t   ,i
   i   i   i   i   i   i    R   s   could not find %sR   R/   R0   t   CDSt   5UTRt   3UTRR   s   Adding %d features(   R   R   R2   R5   R3   R   R4   t   rangeR   R   R    t   addFeatureEntryBatch(   R
   t   gfileR:   R;   R9   t
   senseArrayt   insertArrayt   geneLinet
   geneFieldst   exonNumt
   exonStartst	   exonStopsRD   RC   t   gstartt   gstopR>   R?   R@   RE   RF   t   indext   estartt   estopt   fTypet   fType1t   fType2R    (    (    sK   /woldlab/castor/data00/home/georgi/erange-4.0a/cistematic/genomes/drerio.pyt   loadGeneFeatures~   sŽ    	



		%	%	%	")	&%	&"2
c         C   s#   t  d d |  ƒ} | j |  ƒ d  S(   NR   R   (   R    t   createGeneDB(   R
   R    (    (    sK   /woldlab/castor/data00/home/georgi/erange-4.0a/cistematic/genomes/drerio.pyt   createDBFileÛ   s    c         C   s    t  d d |  ƒ} | j ƒ  d  S(   NR   R   (   R    t   createIndices(   R
   R    (    (    sK   /woldlab/castor/data00/home/georgi/erange-4.0a/cistematic/genomes/drerio.pyt   createDBindicesà   s    c         C   sT   d t  } d } d |  GHt |  ƒ d GHt |  | | ƒ d GHt |  ƒ d |  GHd S(   s1    genes and annotations are from UCSC (dr3). 
    s   %s/download/dr3s	   /D_rerio/s   Creating database %ss   Loading chromosomess   Creating Indicess   Finished creating database %sN(   R   Rb   R+   Rd   (   R
   t
   chromoPatht   chromoOutPath(    (    sK   /woldlab/castor/data00/home/georgi/erange-4.0a/cistematic/genomes/drerio.pyt   buildDrerioDBå   s    
	

(   R   R   t   cistematic.genomesR    t   cistematic.core.geneinfoR   R   t   getR   t   geneDBR+   RG   R`   Rb   Rd   Rg   (    (    (    sK   /woldlab/castor/data00/home/georgi/erange-4.0a/cistematic/genomes/drerio.pyt   <module>   s   
	%	-	]		