ó
ùVêNc           @   sÃ   d  d l  Z  d  d l m Z d  d l m Z d  d l m Z e j d ƒ r] e j d ƒ Z n d Z d e Z	 d „  Z
 d	 „  Z d
 „  Z d „  Z d „  Z d „  Z d „  Z e	 d e d „ Z d S(   iÿÿÿÿN(   t   Genome(   t
   geneinfoDB(   t   environt   CISTEMATIC_ROOTs   /proj/genomes"   %s/R_norvegicus/rnorvegicus.genedbc         C   sÙ   g  } t  d d |  ƒ} t | d ƒ } | j ƒ  } x! | D] } | j | j ƒ  ƒ q: Wt j | d ƒ } t | ƒ }	 |	 d k  r‰ d GHn  d | GHt d t | f d	 ƒ }
 |
 j	 | ƒ |
 j
 ƒ  | j | | d
 ƒ d  S(   Nt   rnorvegicust   dbFilet   rt    i   s#   Problems reading sequence from files   writing to file %ss   %s%st   wt   file(   R    t   opent   readlinet   appendt   stript   stringt   joint   lent   cisRoott   writet   closet   addChromosomeEntry(   t   dbt   chromIDt	   chromPatht   chromOutt   seqArrayt   rnGenomet   inFilet   linet   seqt   seqLent   outFile(    (    sY   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/rnorvegicus.pyt   loadChromosome,   s    	
c      	   C   sA  g  } t  d d |  ƒ} t | d ƒ } x÷ | D]ï } | j d ƒ } | d d k rY q. n  | d d k ro q. n  | d	 j d
 ƒ } | d } t | d ƒ d }	 t | d ƒ d }
 | d } | d j ƒ  } | d k rã d } n d } d | f } d } | j | | |	 |
 | d | f ƒ q. Wd t | ƒ GH| j | ƒ d S(   s<    FIXME - NEED TO DEAL WITH ALTERNATIVE SPLICING ENTRIES
    R   R   R   s   	i   t   GENEi   t   Celerai
   t   :i   i   i   i   t   +t   Ft   Rt   genes   Adding %d gene entriesN(   R    R
   t   splitt   intR   R   R   t   addGeneEntryBatch(   R   t   gFilet   geneEntriesR   t   geneFileR   t   colst   namet   gidt   startt   stopt   senset   chromt   geneIDt
   gidVersion(    (    sY   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/rnorvegicus.pyt   loadGeneEntries@   s.    

	&c      	   C   sK  g  } t  d d |  ƒ} t | d ƒ } x| D]ù } | j d ƒ } | d d k rY q. n  | d	 d
 k ro q. n  | d } | d j d ƒ } | d }	 t | d ƒ d }
 t | d ƒ d } | d } | d j ƒ  } | d k rí d } n d } d |	 f } d } | j | | | |
 | | | f ƒ q. Wd t | ƒ GH| j | ƒ d S(   sI    Load gene features such as CDS, UTR, and PSEUDO from the gene file.
    R   R   R   s   	i   t   CDSt   UTRt   PSEUDOi   R"   i
   R#   i   i   i   i   R$   R%   R&   s   Adding %d feature entriesN(   s   CDSs   UTRs   PSEUDO(   R    R
   R(   R)   R   R   R   t   addFeatureEntryBatch(   R   R+   t   featureEntriesR   t   featureFileR   R.   t   fTypeR/   R0   R1   R2   R3   R4   R5   R6   (    (    sY   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/rnorvegicus.pyt   loadGeneFeaturesa   s0    


	&c   
      C   så   g  } t  ƒ  } t d d |  ƒ} | j ƒ  } x• | D] } d | f } | j | ƒ } d } x( | D]  }	 | d 7} | |	 j ƒ  7} qb Wt | ƒ d k r4 | j | t j | d d d ƒ f ƒ q4 q4 Wd	 t | ƒ GH| j	 | ƒ d  S(
   NR   R   R   t   ,i    i   t   't   ps   Adding %d annotations(
   R   R    t   allGIDst   getDescriptionR   R   R   R   t   replacet   addAnnotationBatch(
   R   t   geneAnnotationst   idbR   t   gidListt   locIDt   gIDt   geneDescArrayt   geneDesct   entry(    (    sY   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/rnorvegicus.pyt   loadGeneAnnotationsƒ   s    	
-c         C   só  t  d d |  ƒ} t | d ƒ } t | d ƒ } t ƒ  } i  } g  } xR | D]J }	 |	 d d k rL |	 j d ƒ }
 |
 d |
 d j ƒ  f | |
 d <qL qL W| j ƒ  } d	 } x$| D]} y| j d ƒ } | d d
 k rá w³ n  | d j ƒ  } d | f } | | k rg| } d	 } | j | ƒ } t | ƒ d k r^x+ | D] } | d 7} | | 7} q=Wqgd } n  | j | | d d	 | d d	 t	 j
 | | d d d d ƒ | | d d d	 f ƒ Wq³ d | GHq³ Xq³ Wd t | ƒ GH| j | ƒ d  S(   NR   R   R   i    t   !s   	i   i   R   t   10090R@   t    RA   RB   s   locus ID %s could not be addeds   adding %d go entries(   R    R
   R   R(   R   t	   readlinest   geneIDSynonymsR   R   R   RE   t   addGoInfoBatch(   R   t   goPatht	   goDefPathR   t	   goDefFilet   goFileRH   t   goDefst   goArrayt
   goDefEntryR.   t	   goEntriest   prevGIDRN   t   fieldsRJ   RK   t	   gene_namet   synonyms(    (    sY   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/rnorvegicus.pyt   loadGeneOntology—   sD    	)
	X	c         C   s#   t  d d |  ƒ} | j |  ƒ d  S(   NR   R   (   R    t   createGeneDB(   R   R   (    (    sY   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/rnorvegicus.pyt   createDBFileÁ   s    c         C   s    t  d d |  ƒ} | j ƒ  d  S(   NR   R   (   R    t   createIndices(   R   R   (    (    sY   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/rnorvegicus.pyt   createDBindicesÆ   s    s   %s/downloadc         C   sÓ  d | } d | } d | } i d | d 6d | d 6d | d	 6d
 | d 6d | d 6d | d 6d | d 6d | d 6d | d 6d | d 6d | d 6d | d 6d | d 6d | d 6d  | d! 6d" | d# 6d$ | d% 6d& | d' 6d( | d) 6d* | d+ 6d, | d- 6d. | d/ 6d0 | d1 6} d2 |  GHt  |  ƒ d3 GHt |  | ƒ d4 GHt |  | ƒ d5 GHt |  ƒ d6 GHt |  | | ƒ x8 | j ƒ  D]* } d7 | GHt |  | | | d8 | ƒ q‰Wd9 GHt |  ƒ d: |  GHd  S(;   Ns   %s/seq_gene.mds   %s/GO.terms_and_idss
   %s/gene2gos
   %s/chr1.fat   1s
   %s/chr2.fat   2s
   %s/chr3.fat   3s
   %s/chr4.fat   4s
   %s/chr5.fat   5s
   %s/chr6.fat   6s
   %s/chr7.fat   7s
   %s/chr8.fat   8s
   %s/chr9.fat   9s   %s/chr10.fat   10s   %s/chr11.fat   11s   %s/chr12.fat   12s   %s/chr13.fat   13s   %s/chr14.fat   14s   %s/chr15.fat   15s   %s/chr16.fat   16s   %s/chr17.fat   17s   %s/chr18.fat   18s   %s/chr19.fat   19s   %s/chrUn.fat   Uns
   %s/chrX.fat   Xs   %s/chr20.fat   20s
   %s/chrM.fat   Ms   Creating database %ss   Adding gene entriess   Adding gene featuress   Adding gene annotationss   Adding gene ontologys   Loading chromosome %ss   /R_norvegicus/chromo%s.bins   Creating Indicess   Finished creating database %s(   Rd   R7   R?   RO   Rb   t   keysR    Rf   (   R   t   downloadDirt   genePathRW   RV   t   chromosR   (    (    sY   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/rnorvegicus.pyt
   buildRatDBË   sT    


	

	
(   R   t   cistematic.genomesR    t   cistematic.core.geneinfoR   t   osR   t   getR   t   geneDBR    R7   R?   RO   Rb   Rd   Rf   R‚   (    (    (    sY   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/rnorvegicus.pyt   <module>   s   
		!	"		*		