ó
ÿfmSc           @   sh  d  d l  Z  d  d l m Z d  d l m Z e j d ƒ rM e j d ƒ Z n d Z d e Z i d d 6d	 d
 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d  6d! d" 6d# d$ 6d% d& 6d' d( 6d) d* 6d+ d, 6d- d. 6d/ d0 6d1 d2 6d3 d4 6d5 d6 6d7 d8 6d9 d: 6Z d; „  Z	 d< „  Z
 d= „  Z d> „  Z d? „  Z d@ „  Z dA „  Z e dB „ Z d S(C   iÿÿÿÿN(   t   Genome(   t   environt   CISTEMATIC_ROOTs   /proj/genomes&   %s/D_melanogaster/dmelanogaster.genedbt   1t   At   2t   Bt   3t   Ct   4t   Dt   5t   Et   6t   Ft   7t   Gt   8t   Ht   9t   It   10t   Jt   11t   Kt   12t   Lt   13t   Mt   14t   Nt   15t   Ot   16t   Pt   17t   Qt   18t   Rt   19t   St   20t   Tt   21t   Ut   22t   Vt   23t   Wt   24t   Xt   25t   Yt   26t   Zc         C   sÙ   g  } t  d d |  ƒ} t | d ƒ } | j ƒ  } x! | D] } | j | j ƒ  ƒ q: Wt j | d ƒ } t | ƒ }	 |	 d k  r‰ d GHn  d | GHt d t | f d	 ƒ }
 |
 j	 | ƒ |
 j
 ƒ  | j | | d
 ƒ d  S(   Nt   dmelanogastert   dbFilet   rt    i   s#   Problems reading sequence from files   writing to file %ss   %s%st   wt   file(   R    t   opent   readlinet   appendt   stript   stringt   joint   lent   cisRoott   writet   closet   addChromosomeEntry(   t   dbt   chromIDt	   chromPatht   chromOutt   seqArrayt   dmGenomet   inFilet   linet   seqt   seqLent   outFile(    (    sj   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/dmelanogaster.pyt   loadChromosomeF   s    	
c      	   C   s$  g  } t  d d |  ƒ} t | d ƒ } xÚ | D]Ò } | j d ƒ } | d j d ƒ } | d } t | d ƒ }	 t | d	 ƒ }
 | d
 } | d d
 } | d k r­ d } n d } d | f } y t | d } Wn
 q. n X| j | | |	 |
 | d | f ƒ q. Wd t | ƒ GH| j | ƒ d  S(   NR7   R8   R9   s   	i   s   -Ri    i   i   i   i   t   -R&   R   t   genes   Adding %d gene entries(   R    R=   t   splitt   intt   versionR?   RC   t   addGeneEntryBatch(   RH   t   gFilet   geneEntriesRM   t   geneFileRO   t   colst   namet   gidt   startt   stopt   senset   chromt   geneIDt
   gidVersion(    (    sj   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/dmelanogaster.pyt   loadGeneEntriesZ   s,    

	&c      
   C   sñ  t  | d ƒ } i d d 6d d 6d d 6} g  } x‚| D]z} | j d ƒ } t | d ƒ } | d	 j d
 ƒ } | d j d
 ƒ }	 | d d }
 | | d } t | d ƒ d } t | d ƒ d } | d j d ƒ } d | d f } y t | d } Wn
 q7 n Xx¦t | ƒ D]˜} t | | ƒ d } t |	 | ƒ d } | | k r€| | k r€| j | | |
 | | | d f ƒ q| | k rÌ| d k r¡d } n d } | j | | |
 | | | | f ƒ q| | k r| d k ríd } n d } | j | | |
 | | | | f ƒ q| | k r–| | k r–| d k rEd } n d } | j | | |
 | | | d f ƒ | j | | |
 | d | | | f ƒ q| | k  r| | k r| d k rÃd } n d } | j | | |
 | | d | | f ƒ | j | | |
 | | | d f ƒ q| d k r/d } d } n d } d } | j | | |
 | | d | | f ƒ | j | | |
 | | | d f ƒ | j | | |
 | d | d | | f ƒ qWq7 W| j ƒ  t d d |  ƒ} d t | ƒ GH| j	 | ƒ d  S(   NR9   R   t   +R&   RT   t   .s   	i   i	   t   ,i
   i   i   i   i   i   s   -RR7   i    t   CDSt   5UTRt   3UTRR8   s   Adding %d features(
   R=   RV   RW   RX   t   rangeR?   RF   R    RC   t   addFeatureEntryBatch(   RH   t   gfileR\   t
   senseArrayt   insertArrayt   geneLinet
   geneFieldst   exonNumt
   exonStartst	   exonStopsRc   Rb   t   gstopt   gstartR^   Rd   Re   t   indext   estartt   estopt   fTypet   fType1t   fType2RM   (    (    sj   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/dmelanogaster.pyt   loadGeneFeaturesx   sv    

%	%	%	")	&%	&"2
c   	      C   sû   g  } t  | d ƒ } t d d |  ƒ} x± | D]© } y™ | j d ƒ } | d d k r\ w. n  | d } d | k r | d	 } n  | d
 } t | ƒ d k rÌ | j d | f t j | j ƒ  d d ƒ f ƒ n  Wq. q. Xq. Wd t | ƒ GH| j | ƒ d  S(   NR9   R7   R8   s   	i    t   7227i   t   Dmel_i   i   t   't   ps   Adding %d annotations(	   R=   R    RV   RC   R?   RA   t   replaceR@   t   addAnnotationBatch(	   RH   t	   annotPatht   geneAnnotationst	   annotFileRM   RO   R]   t   locIDt   geneDesc(    (    sj   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/dmelanogaster.pyt   loadGeneAnnotationsÅ   s$    

5c         C   sX  t  d d |  ƒ} t | d ƒ } t | d ƒ } t | d ƒ } | j ƒ  } | j ƒ  | j ƒ  }	 i  }
 i  } g  } xR |	 D]J } | d d k rz | j d ƒ } | d | d j ƒ  f |
 | d <qz qz W| j ƒ  } x€ | D]x } yh | j d ƒ } | d d	 k r	wÛ n  | d
 j ƒ  } | d j ƒ  } t | ƒ d k rH| | | <n  WqÛ qÛ XqÛ WxÞ | D]Ö } | d d k rzq^n  | d  d	 k rq^n  yš | j d ƒ } | d j ƒ  } | | } d | k rÕ| d } n  | d } | j d | f | d | d t j	 |
 | d d d ƒ |
 | d d f ƒ Wq^q^Xq^Wd t | ƒ GH| j
 | ƒ d  S(   NR7   R8   R9   i    t   !s   	i   i   R€   i   i   R   i   R:   R‚   Rƒ   s   adding %d go entries(   R    R=   t	   readlinesRF   RV   R@   RC   R?   RA   R„   t   addGoInfoBatch(   RH   t   goPatht	   goDefPathR†   RM   t	   goDefFilet   goFileRˆ   t   annotEntriest   goDefEntriest   goDefst   locust   goArrayt
   goDefEntryR]   t	   goEntriest
   annotEntryR‰   t   geneNamet   entryt   fieldst   GOID(    (    sj   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/dmelanogaster.pyt   loadGeneOntologyÝ   sV    
)

Nc         C   s#   t  d d |  ƒ} | j |  ƒ d  S(   NR7   R8   (   R    t   createGeneDB(   RH   RM   (    (    sj   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/dmelanogaster.pyt   createDBFile  s    c         C   s    t  d d |  ƒ} | j ƒ  d  S(   NR7   R8   (   R    t   createIndices(   RH   RM   (    (    sj   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/dmelanogaster.pyt   createDBindices  s    c         C   s‹  d t  } d t  } d t  } d t  } i d t  d 6d t  d 6d	 t  d
 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d  6d! t  d" 6} d# |  GHt |  ƒ d$ GHt |  | ƒ d% GHt |  | ƒ d& GHt |  | ƒ d' GHt |  | | | ƒ x8 | j ƒ  D]* } d( | GHt |  | | | d) | ƒ qAWd* GHt |  ƒ d+ |  GHd, S(-   sY    genes and annotations are from UCSC. GO association file is from geneontology.org. 
    s   %s/download/flyBaseGene.txts   %s/download/gene_infos   %s/download/GO.terms_and_idss   %s/download/gene2gos   %s/download/chr2L.fat   2Ls   %s/download/chr2R.fat   2Rs   %s/download/chr2LHet.fat   2LHets   %s/download/chr2RHet.fat   2RHets   %s/download/chr3L.fat   3Ls   %s/download/chr3LHet.fat   3LHets   %s/download/chr3R.fat   3Rs   %s/download/chr3RHet.fat   3RHets   %s/download/chr4.faR	   s   %s/download/chrX.faR2   s   %s/download/chrXHet.fat   XHets   %s/download/chrYHet.fat   YHets   %s/download/chrU.faR,   s   %s/download/chrUextra.fat   Uextras   %s/download/chrM.faR   s   Creating database %ss   Adding gene entriess   Adding gene featuress   Adding gene annotationss   Adding gene ontologys   Loading chromosome %ss   /D_melanogaster/chromo%s.bins   Creating Indicess   Finished creating database %sN(	   RD   R¡   Rf   R   R‹   RŸ   t   keysRS   R£   (   RH   t   genePathR†   R   R   t   chromosRI   (    (    sj   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/dmelanogaster.pyt   buildDmelanogasterDB  sF    



	
	
(   RA   t   cistematic.genomesR    t   osR   t   getRD   t   geneDBRX   RS   Rf   R   R‹   RŸ   R¡   R£   R²   (    (    (    sj   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/dmelanogaster.pyt   <module>   sP   


			M		5		