ó
˙fmSc           @   sŻ   d  d l  Z  d  d l m Z d  d l m Z e j d  rM e j d  Z n d Z d e Z d   Z d   Z	 d	   Z
 d
   Z d   Z d   Z d   Z e d d  Z d S(   i˙˙˙˙N(   t   Genome(   t   environt   CISTEMATIC_ROOTs   /proj/genomes   %s/C_elegans/celegans.genedbc         C   sŮ   g  } t  d d |  } t | d  } | j   } x! | D] } | j | j    q: Wt j | d  } t |  }	 |	 d k  r d GHn  d | GHt d t | f d	  }
 |
 j	 |  |
 j
   | j | | d
  d  S(   Nt   celeganst   dbFilet   rt    i   s#   Problems reading sequence from files   writing to file %ss   %s%st   wt   file(   R    t   opent   readlinet   appendt   stript   stringt   joint   lent   cisRoott   writet   closet   addChromosomeEntry(   t   dbt   chromIDt	   chromPatht   chromOutt   seqArrayt   ceGenomet   inFilet   linet   seqt   seqLent   outFile(    (    se   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/celegans.pyt   loadChromosome+   s    	
c      	   C   s  t  d d |  } t | d  } g  } xÓ| D]Ë} | d d k rJ q. n  | j d  } | d d k r | d d	 k r q. n  | d
 d k rĽ | d
 d k rĽ q. n  | d j d  } | d j d  } | d d t j k r| d d  }	 | d d }
 n | d }	 d }
 d | d |	 f } d | f } d } |
 d k rzy t |
 j    d } Wqzd | d GHq. qzXn  t | d  d } t | d  d } | d } | d j   } | d k rŃd } n d } | j	 | | | | | d | f  q. Wd t
 |  GH| j |  d  S(   NR   R   R   i    t   #s   	i   t   Coding_transcriptt   miRNAi   t
   Transcriptt   miRNA_primary_transcripti   t   "t   .i˙˙˙˙t   as   %s.%si`   s    problem processing %s - skippingi   i   i   t   +t   Ft   Rs   Adding %d gene entries(   R    R	   t   splitR   t   letterst   ordt   lowert   intR   R   R   t   addGeneEntryBatch(   R   t   gffFileR   t   geneFilet   geneEntriesR   t   fieldt   gidrevt   giddotst   gidGenet	   gidLettert   gidt   geneIDt
   gidVersiont   startt   stopt   senset   chrom(    (    se   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/celegans.pyt   loadGeneEntries?   sJ      


	&c      	   C   sś  t  d d |  } t | d  } g  } i  } i d d 6d d 6d d	 6} xK| D]C} | d
 d k rk qO n  | j d  } | d d! k r qO n  | d d k rź | d j   | k rź qO n  | d d" k rŐ d }	 n | | d j   }	 | d j d  }
 |
 d j d  } | d d t j k rE| d d  } | d d } n | d } d } d | d
 | f } d | f } d } | d k ržy t | j    d } Wqžd |
 d GHqO qžXn  t | d  d } t | d  d } | d } | d
 j   } | d k rd } n d } | | k r4g  | | <n  | | | |	 f | | k rO | j	 | | | | | | |	 f  | | j	 | | | |	 f  qO qO Wd  t
 |  GH| j |  d  S(#   NR   R   R   t   CDSt   coding_exont   3UTRt   three_prime_UTRt   5UTRt   five_prime_UTRi    R    s   	i   R!   R"   s   tRNAscan-SE-1.23i   i   R%   R&   i˙˙˙˙R'   s   %s.%si`   s    problem processing %s - skippingi   i   i   R(   R)   R*   s   Adding %d feature entries(   s   Coding_transcripts   miRNAs   tRNAscan-SE-1.23(   s   miRNAs   tRNAscan-SE-1.23(   R    R	   R+   R   R   R,   R-   R.   R/   R   R   t   addFeatureEntryBatch(   R   R1   R   t   featureFilet   featureEntriest   seenFeaturest   featureTranslationR   R4   t   featureTypeR5   R6   R7   R8   R9   R:   R;   R<   R=   R>   R?   (    (    se   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/celegans.pyt   loadFeatureEntriesp   s`    

&	


	"$c   
      C   sĚ   g  } t  | d  } | j   } | j   t d d |  } xl | D]d } | j d  } yE | d j   } d | d | d f }	 | j d | f |	 f  WqD qD XqD Wd	 t |  GH| j |  d  S(
   NR   R   R   t   ,i   s   %s	%si    i   s   Adding %d annotations(	   R	   t	   readlinesR   R    R+   R   R   R   t   addAnnotationBatch(
   R   t
   geneIDPatht   geneAnnotationst
   geneIDFilet   linesR   R   R4   R9   R:   (    (    se   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/celegans.pyt   loadGeneAnnotations˛   s    
c         C   sö  t  d d d d |  } t | d  } t | d  } t | d  } | j   } | j   i  }	 i  }
 xT | D]L } | j d  } t | d j    d k rn | d j   |	 | d <qn qn W| j   } i  } xR | D]J } | d	 d
 k r× | j d  } | d | d j   f | | d	 <q× q× W| j   } g  } x| D]} | d	 d
 k rZq>n  | j d  } | d } | |	 k r|	 | } n  | d d k rŠ| d  } n  | d j d  } | d	 } | d } | d j d  } | } | d } t |  d k r"d | j   | d f } n  y! t j | | d	 d d  } Wn d | GHd } n X| d } | |
 k rg  |
 | <n  | |
 | k r>|
 | j	 |  | j	 d | f | | | | | | | d f  q>q>Wd t |  GH| j
 |  d  S(   NR   t   versiont   WS200R   R   RN   i   i   i    t   !s   	i˙˙˙˙R'   i   t    i   i
   t   |i   s   %s|%st   't   ps!   could no map %s - using GOID onlyR   i	   s   Adding %d GO entries(   R    R	   RO   R   R+   R   R   R   t   replaceR   t   addGoInfoBatch(   R   t   goPatht	   goDefPathRQ   R   RS   t	   goDefFilet   goFileRT   t	   geneIDmapt   seenGOR   R4   t   goDefEntriest   goDefst
   goDefEntryt   colst	   goEntriest   goArrayt   fieldst   namet	   GOIDarrayt   GOIDt   objTypet   objNamet   gIDt   isNott   GOtermt   evidence(    (    se   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/celegans.pyt   loadGeneOntologyĹ   sd    
)



!	

6c         C   s)   t  d d d d |  } | j |   d  S(   NR   RV   RW   R   (   R    t   createGeneDB(   R   R   (    (    se   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/celegans.pyt   createDBFile  s    c         C   s&   t  d d d d |  } | j   d  S(   NR   RV   RW   R   (   R    t   createIndices(   R   R   (    (    se   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/celegans.pyt   createDBindices
  s    R   c         C   s;  | d k r d t  } n  d | } d | } d | } i d | d 6d | d	 6d
 | d 6d | d 6d | d 6d | d 6} d | } d |  GHt |   d GHt |  |  d GHt |  |  d GHt |  |  d GHt |  | | |  x2 | D]* } d | GHt |  | | | d |  qń Wd GHt |   d |  GHd  S(   NR   s   %s/download/s   %sgeneIDs.WS200s   %sGO.terms_and_idss   %sgene_association.wbs   %sCHROMOSOME_I_softmasked.dnat   Is   %sCHROMOSOME_II_softmasked.dnat   IIs   %sCHROMOSOME_III_softmasked.dnat   IIIs   %sCHROMOSOME_IV_softmasked.dnat   IVs   %sCHROMOSOME_V_softmasked.dnat   Vs   %sCHROMOSOME_X_softmasked.dnat   Xs   %selegansWS200.gffs   Creating database %ss   Adding gene entriess   Adding feature entriess   Adding gene annotationss   Adding gene ontologys   Loading chromosome %ss   /C_elegans/chr%s.bins   Creating Indicess   Finished creating database %s(   R   Rw   R@   RM   RU   Ru   R   Ry   (   R   t   downloadRootRQ   R`   R_   t   chromost   gffPathR   (    (    se   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/celegans.pyt   buildCelegansDB  s8    



	
	
(   R   t   cistematic.genomesR    t   osR   t   getR   t   geneDBR   R@   RM   RU   Ru   Rw   Ry   R   (    (    (    se   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/celegans.pyt   <module>   s   
		1	B		@		