ó
ùVêNc           @   sÃ   d  d l  Z  d  d l m Z d  d l m Z d  d l m Z e j d ƒ r] e j d ƒ Z n d Z d e Z	 d „  Z
 d	 „  Z d
 „  Z d „  Z d „  Z d „  Z d „  Z e	 d e d „ Z d S(   iÿÿÿÿN(   t   Genome(   t
   geneinfoDB(   t   environt   CISTEMATIC_ROOTs   /proj/genomes   %s/M_musculus/mmusculus.genedbc         C   sÙ   g  } t  d d |  ƒ} t | d ƒ } | j ƒ  } x! | D] } | j | j ƒ  ƒ q: Wt j | d ƒ } t | ƒ }	 |	 d k  r‰ d GHn  d | GHt d t | f d	 ƒ }
 |
 j	 | ƒ |
 j
 ƒ  | j | | d
 ƒ d  S(   Nt	   mmusculust   dbFilet   rt    i   s#   Problems reading sequence from files   writing to file %ss   %s%st   wt   file(   R    t   opent   readlinet   appendt   stript   stringt   joint   lent   cisRoott   writet   closet   addChromosomeEntry(   t   dbt   chromIDt	   chromPatht   chromOutt   seqArrayt   mmGenomet   inFilet   linet   seqt   seqLent   outFile(    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/mmusculus.pyt   loadChromosome,   s    	
c      	   C   s„  g  } g  } t  d d |  ƒ} t | d ƒ } x4| D],} | j d ƒ } | d j ƒ  d k s4 | d d k ru q4 n  | d	 j ƒ  }	 |	 | k r— q4 n  | d
 j d ƒ }
 |
 d	 } | d k s4 | | k rÒ q4 n  | j | ƒ t | d ƒ d	 } t | d ƒ d	 } | d } | d k r&d } n d } d | f } d	 } | j | |	 | | | d | f ƒ q4 Wd t | ƒ GH| j | ƒ d S(   s<    FIXME - NEED TO DEAL WITH ALTERNATIVE SPLICING ENTRIES
    R   R   R   s   	i   t   GENEi   s   C57BL/6Ji   i
   t   :R   i   i   i   t   +t   Ft   Rt   genes   Adding %d gene entriesN(   R    R
   t   splitR   R   t   intR   t   addGeneEntryBatch(   R   t   gFilet   cDictt   geneEntriest   alreadySeenR   t   geneFileR   t   colst   chromt   namet   gidt   startt   stopt   senset   geneIDt
   gidVersion(    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/mmusculus.pyt   loadGeneEntries@   s6    &

	&c      	   C   so  g  } t  d d |  ƒ} t | d ƒ } x%| D]} | j d ƒ } | d j ƒ  d k s. | d	 d
 k ro q. n  | d j ƒ  } | | k r‘ q. n  | d }	 | d j d ƒ }
 |
 d } | d k rÊ q. n  t | d ƒ d } t | d ƒ d } | d } | d k rd } n d } d | f } d } | j | | | | | | |	 f ƒ q. Wd t | ƒ GH| j | ƒ d S(   sI    Load gene features such as CDS, UTR, and PSEUDO from the gene file.
    R   R   R   s   	i   t   CDSt   UTRt   PSEUDOi   s   C57BL/6Ji   i
   R"   R   i   i   i   R#   R$   R%   s   Adding %d feature entriesN(   s   CDSs   UTRs   PSEUDO(   R    R
   R'   R   R(   R   R   t   addFeatureEntryBatch(   R   R*   R+   t   featureEntriesR   t   featureFileR   R/   R0   t   fTypeR1   R2   R3   R4   R5   R6   R7   (    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/mmusculus.pyt   loadGeneFeaturesf   s4    &


	&c   
      C   så   g  } t  ƒ  } t d d |  ƒ} | j ƒ  } x• | D] } d | f } | j | ƒ } d } x( | D]  }	 | d 7} | |	 j ƒ  7} qb Wt | ƒ d k r4 | j | t j | d d d ƒ f ƒ q4 q4 Wd	 t | ƒ GH| j	 | ƒ d  S(
   NR   R   R   t   ,i    i   t   't   ps   Adding %d annotations(
   R   R    t   allGIDst   getDescriptionR   R   R   R   t   replacet   addAnnotationBatch(
   R   t   geneAnnotationst   idbR   t   gidListt   locIDt   gIDt   geneDescArrayt   geneDesct   entry(    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/mmusculus.pyt   loadGeneAnnotations‹   s    	
-c         C   só  t  d d |  ƒ} t | d ƒ } t | d ƒ } t ƒ  } i  } g  } xR | D]J }	 |	 d d k rL |	 j d ƒ }
 |
 d |
 d j ƒ  f | |
 d <qL qL W| j ƒ  } d	 } x$| D]} y| j d ƒ } | d d
 k rá w³ n  | d j ƒ  } d | f } | | k rg| } d	 } | j | ƒ } t | ƒ d k r^x+ | D] } | d 7} | | 7} q=Wqgd } n  | j | | d d	 | d d	 t	 j
 | | d d d d ƒ | | d d d	 f ƒ Wq³ d | GHq³ Xq³ Wd t | ƒ GH| j | ƒ d  S(   NR   R   R   i    t   !s   	i   i   R   t   10116RA   t    RB   RC   s   locus ID %s could not be addeds   adding %d go entries(   R    R
   R   R'   R   t	   readlinest   geneIDSynonymsR   R   R   RF   t   addGoInfoBatch(   R   t   goPatht	   goDefPathR   t	   goDefFilet   goFileRI   t   goDefst   goArrayt
   goDefEntryR/   t	   goEntriest   prevGIDRO   t   fieldsRK   RL   t	   gene_namet   synonyms(    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/mmusculus.pyt   loadGeneOntologyŸ   sD    	)
	X	c         C   s#   t  d d |  ƒ} | j |  ƒ d  S(   NR   R   (   R    t   createGeneDB(   R   R   (    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/mmusculus.pyt   createDBFileÉ   s    c         C   s    t  d d |  ƒ} | j ƒ  d  S(   NR   R   (   R    t   createIndices(   R   R   (    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/mmusculus.pyt   createDBindicesÎ   s    s   %s/downloadc         C   sÎ  d | } d | } d | } i d | d 6d | d 6d | d	 6d
 | d 6d | d 6d | d 6d | d 6d | d 6d | d 6d | d 6d | d 6d | d 6d | d 6d | d 6d  | d! 6d" | d# 6d$ | d% 6d& | d' 6d( | d) 6d* | d+ 6d, | d- 6d. | d/ 6} d0 |  GHt  |  ƒ d1 GHt |  | | ƒ d2 GHt |  | | ƒ d3 GHt |  ƒ d4 GHt |  | | ƒ x8 | j ƒ  D]* } d5 | GHt |  | | | d6 | ƒ q„Wd7 GHt |  ƒ d8 |  GHd  S(9   Ns   %s/seq_gene.mds   %s/GO.terms_and_idss
   %s/gene2gos
   %s/chr1.fat   1s
   %s/chr2.fat   2s
   %s/chr3.fat   3s
   %s/chr4.fat   4s
   %s/chr5.fat   5s
   %s/chr6.fat   6s
   %s/chr7.fat   7s
   %s/chr8.fat   8s
   %s/chr9.fat   9s   %s/chr10.fat   10s   %s/chr11.fat   11s   %s/chr12.fat   12s   %s/chr13.fat   13s   %s/chr14.fat   14s   %s/chr15.fat   15s   %s/chr16.fat   16s   %s/chr17.fat   17s   %s/chr18.fat   18s   %s/chr19.fat   19s
   %s/chrX.fat   Xs
   %s/chrY.fat   Ys
   %s/chrM.fat   Ms   Creating database %ss   Adding gene entriess   Adding gene featuress   Adding gene annotationss   Adding gene ontologys   Loading chromosome %ss   /M_musculus/chromo%s.bins   Creating Indicess   Finished creating database %s(   Re   R8   R@   RP   Rc   t   keysR    Rg   (   R   t   downloadDirt   genePathRX   RW   t	   chromDictR   (    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/mmusculus.pyt   buildMmusculusDBÓ   sR    


	

	
(   R   t   cistematic.genomesR    t   cistematic.core.geneinfoR   t   osR   t   getR   t   geneDBR    R8   R@   RP   Rc   Re   Rg   R‚   (    (    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/mmusculus.pyt   <module>   s   
		&	%		*		