ó
ųVźNc           @   s¶   d  d l  Z  d  d l Z d  d l m Z d  d l m Z d  d l m Z e j d  ri e j d  Z n d Z d e Z	 d   Z
 d	   Z d
   Z d   Z d   Z e	 d  Z d S(   i’’’’N(   t   Genome(   t
   geneinfoDB(   t   environt   CISTEMATIC_ROOTs   /proj/genomes   %s/D_rerio/drerio.genedbc         C   sÅ  g  } d } t  d d |  } t j |  } x| D]} t d | | f d  } | j   }	 xQ|	 d k r²g  } d } |	 j   d }
 | j   } xU | d k rķ | d d k rķ | j   } | t |  7} | j |  | j   } q Wt j	 | d  } | d	 k  rMd
 |
 GH| j
 d |
 f | d t |   | j |
 |
 d  n\ d | |
 f } t d t | f d  } | j |  | j   d | GH| j |
 | d  | }	 qb W| j   q4 Wd  S(   Ni    t   dreriot   dbFiles   %s/%st   rt    i   t   >iŠ s   Added contig %s to databaset
   chromosomet   dbs   %s%s.bins   %s%st   ws    Added contig file %s to databaset   file(   R    t   ost   listdirt   opent   readlinet   stript   lent   appendt   stringt   joint   addSequencet   strt   addChromosomeEntryt   cisRoott   writet   close(   R
   t	   chromPatht   chromOutPatht   seqArrayt   seqLent   drGenomet   filest   filenamet   inFilet   headert   chromIDt   currentLinet   lineSeqt   seqt   outFileNamet   outFile(    (    sT   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/drerio.pyt   loadChromosome,   s<    	"
	
c      	   C   s  g  } g  } t  d d |  } t | d  } t   } x5| D]-} | j d  } | d }	 yJ | j d |	  }
 t |
  s d |	 GHw= n  | j |
  } | d }	 Wn
 q= n X|	 d k rÅ q= n  t | d	  } t | d
  } | d } | d } | d k rd } n d } d |	 f } | | k r5d } n d } | j |  | j | | | | | d | f  q= Wd t |  GH| j	 |  d  S(   NR   R   R   s   	i    s   could not find %si   R   i   i   i   i   t   -t   Rt   Ft   2t   1t   genes   Adding %d gene entries(
   R    R   R   t   splitt	   getGeneIDR   t   getGeneInfot   intR   t   addGeneEntryBatch(   R
   t   gFilet   geneEntriest   seenGIDsR    t   geneFilet   idbt   linet   colst   gidt   tempIDt   geneInfot   startt   stopt   senset   chromt   geneIDt
   gidVersion(    (    sT   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/drerio.pyt   loadGeneEntriesQ   sD    	
	

		&c      
   C   sT  t  | d  } t   } g  } i d d 6d d 6d d 6} g  } xÖ| D]Ī} | j d  } t | d  }	 | d	 j d
  }
 | d j d
  } | d } | | d } t | d  d } t | d  d } | d } yJ | j d |  } t |  sd | GHwF n  | j |  } | d } Wn
 qF n X| d k rDqF n  d | f } | | k red } n d } x¦t |	  D]} t |
 |  d } t | |  d } | | k rć| | k rć| j | | | | | | d f  qx| | k r/| d k rd } n d } | j | | | | | | | f  qx| | k r{| d k rPd } n d } | j | | | | | | | f  qx| | k rł| | k rł| d k rØd } n d } | j | | | | | | d f  | j | | | | d | | | f  qx| | k  rw| | k rw| d k r&d } n d } | j | | | | | d | | f  | j | | | | | | d f  qx| d k rd } d } n d } d } | j | | | | | d | | f  | j | | | | | | d f  | j | | | | d | d | | f  qxWqF W| j	   t
 d d |  } d t |  GH| j |  d  S(   NR   R.   t   +R-   R,   t   .s   	i   i	   t   ,i
   i   i   i   i   i   i    R   s   could not find %sR   R/   R0   t   CDSt   5UTRt   3UTRR   s   Adding %d features(   R   R   R2   R5   R3   R   R4   t   rangeR   R   R    t   addFeatureEntryBatch(   R
   t   gfileR:   R;   R9   t
   senseArrayt   insertArrayt   geneLinet
   geneFieldst   exonNumt
   exonStartst	   exonStopsRD   RC   t   gstartt   gstopR>   R?   R@   RE   RF   t   indext   estartt   estopt   fTypet   fType1t   fType2R    (    (    sT   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/drerio.pyt   loadGeneFeatures~   s    	



		%	%	%	")	&%	&"2
c         C   s#   t  d d |  } | j |   d  S(   NR   R   (   R    t   createGeneDB(   R
   R    (    (    sT   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/drerio.pyt   createDBFileŪ   s    c         C   s    t  d d |  } | j   d  S(   NR   R   (   R    t   createIndices(   R
   R    (    (    sT   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/drerio.pyt   createDBindicesą   s    c         C   sT   d t  } d } d |  GHt |   d GHt |  | |  d GHt |   d |  GHd S(   s1    genes and annotations are from UCSC (dr3). 
    s   %s/download/dr3s	   /D_rerio/s   Creating database %ss   Loading chromosomess   Creating Indicess   Finished creating database %sN(   R   Rb   R+   Rd   (   R
   t
   chromoPatht   chromoOutPath(    (    sT   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/drerio.pyt   buildDrerioDBå   s    
	

(   R   R   t   cistematic.genomesR    t   cistematic.core.geneinfoR   R   t   getR   t   geneDBR+   RG   R`   Rb   Rd   Rg   (    (    (    sT   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/drerio.pyt   <module>   s   
	%	-	]		