
fmSc           @   s   d  d l  Z  d  d l m Z d  d l m Z d  d l m Z e j d  r] e j d  Z n d Z d e Z	 d   Z
 d	   Z d
   Z d   Z d   Z d   Z e	 d  Z d S(   iN(   t   Genome(   t
   geneinfoDB(   t   environt   CISTEMATIC_ROOTs   /proj/genomes   %s/G_gallus/ggallus.genedbc         C   s   g  } t  d d |  } t | d  } | j   } x! | D] } | j | j    q: Wt j | d  } t |  }	 |	 d k  r d GHn  d | GHt d t | f d	  }
 |
 j	 |  |
 j
   | j | | d
  d  S(   Nt   ggallust   dbFilet   rt    i   s#   Problems reading sequence from files   writing to file %ss   %s%st   wt   file(   R    t   opent   readlinet   appendt   stript   stringt   joint   lent   cisRoott   writet   closet   addChromosomeEntry(   t   dbt   chromIDt	   chromPatht   chromOutt   seqArrayt   ggGenomet   inFilet   linet   seqt   seqLent   outFile(    (    sd   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/ggallus.pyt   loadChromosome,   s    	
c      	   C   s~  g  } g  } t  d d |  } t | d  } | j   x$| D]} d | k rV q> n  | j d  } | d j   d k r q> n  | d j d	  } | d
 }	 |	 d k s> |	 | k r q> n  | j |	  t | d  d
 }
 t | d  d
 } | d } | d
 j   } | d k r d } n d } d |	 f } d
 } | j | | |
 | | d | f  q> Wd t |  GH| j |  d S(   s<    FIXME - NEED TO DEAL WITH ALTERNATIVE SPLICING ENTRIES
    R   R   R   t   |s   	i   t   GENEi
   t   :i   R   i   i   i   t   +t   Ft   Rt   genes   Adding %d gene entriesN(	   R    R
   R   t   splitR   R   t   intR   t   addGeneEntryBatch(   R   t   gFilet   geneEntriest   alreadySeenR   t   geneFileR   t   colst   namet   gidt   startt   stopt   senset   chromt   geneIDt
   gidVersion(    (    sd   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/ggallus.pyt   loadGeneEntries@   s8    


	&c      	   C   si  g  } t  d d |  } t | d  } | j   x| D]} d | k rP q8 n  | j d  } | d j   d k r{ q8 n  | d } | d
 j d  } | d }	 |	 d k r q8 n  t | d  d }
 t | d  d } | d } | d j   } | d k rd } n d } d |	 f } d } | j | | | |
 | | | f  q8 Wd t |  GH| j |  d S(   sI    Load gene features such as CDS, UTR, and PSEUDO from the gene file.
    R   R   R   R!   s   	i   t   CDSt   UTRt   PSEUDOi
   R#   i   R   i   i   i   R$   R%   R&   s   Adding %d feature entriesN(   s   CDSs   UTRs   PSEUDO(	   R    R
   R   R(   R   R)   R   R   t   addFeatureEntryBatch(   R   R+   t   featureEntriesR   t   featureFileR   R/   t   fTypeR0   R1   R2   R3   R4   R5   R6   R7   (    (    sd   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/ggallus.pyt   loadGeneFeaturesg   s6    



	&c         C   s  t  d d |  } t | d  } t | d  } t   } i  } g  } xR | D]J }	 |	 d d k rL |	 j d  }
 |
 d |
 d j   f | |
 d <qL qL W| j   } d	 } x| D] } y | j d  } | d d
 k r w n  | d j   } d | f } | | k rN| } d	 } | j |  } t |  d k rNt j	 | d  } qNn  | j
 | | d d	 | d	 t j | | d d d d  | | d d d	 f  Wq d | GHq Xq Wd t |  GH| j |  d  S(   NR   R   R   i    t   !s   	i   i   R   t   9031t   ,t   't   ps   locus ID %s could not be addeds   adding %d go entries(   R    R
   R   R(   R   t	   readlinest   geneIDSynonymsR   R   R   R   t   replacet   addGoInfoBatch(   R   t   goPatht	   goDefPathR   t	   goDefFilet   goFilet   idbt   goDefst   goArrayt
   goDefEntryR/   t	   goEntriest   prevGIDt   entryt   fieldst   locIDt   gIDt	   gene_namet   synonyms(    (    sd   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/ggallus.pyt   loadGeneOntology   s>    	)T	c         C   s#   t  d d |  } | j |   d  S(   NR   R   (   R    t   createGeneDB(   R   R   (    (    sd   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/ggallus.pyt   createDBFile   s    c         C   s    t  d d |  } | j   d  S(   NR   R   (   R    t   createIndices(   R   R   (    (    sd   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/ggallus.pyt   createDBindices   s    c         C   s:  d t  } d t  } d t  } i9 d t  d 6d t  d 6d t  d	 6d
 t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d  t  d! 6d" t  d# 6d$ t  d% 6d& t  d' 6d( t  d) 6d* t  d+ 6d, t  d- 6d. t  d/ 6d0 t  d1 6d2 t  d3 6d4 t  d5 6d6 t  d7 6d8 t  d9 6d: t  d; 6d< t  d= 6d> t  d? 6d@ t  dA 6dB t  dC 6dD t  dE 6dF t  dG 6dH t  dI 6dJ t  dK 6dL t  dM 6dN t  dO 6dP t  dQ 6dR t  dS 6dT t  dU 6dV t  dW 6dX t  dY 6dZ t  d[ 6d\ t  d] 6d^ t  d_ 6d` t  da 6db t  dc 6dd t  de 6df t  dg 6dh t  di 6dj t  dk 6dl t  dm 6dn t  do 6dp t  dq 6dr t  ds 6dt t  du 6} dv |  GHt |   dw GHt |  |  dx GHt |  |  x8 | j   D]* } dy | GHt |  | | | dz |  qWd{ GHt |  | |  d| GHt |   d} |  GHd  S(~   Ns   %s/download/seq_gene.mds   %s/download/GO.terms_and_idss   %s/download/gene2gos   %s/download/chr1.fat   1s   %s/download/chr2.fat   2s   %s/download/chr3.fat   3s   %s/download/chr4.fat   4s   %s/download/chr5.fat   5s   %s/download/chr6.fat   6s   %s/download/chr7.fat   7s   %s/download/chr8.fat   8s   %s/download/chr9.fat   9s   %s/download/chr10.fat   10s   %s/download/chr11.fat   11s   %s/download/chr12.fat   12s   %s/download/chr13.fat   13s   %s/download/chr14.fat   14s   %s/download/chr15.fat   15s   %s/download/chr16.fat   16s   %s/download/chr17.fat   17s   %s/download/chr18.fat   18s   %s/download/chr19.fat   19s   %s/download/chr20.fat   20s   %s/download/chr21.fat   21s   %s/download/chr22.fat   22s   %s/download/chr23.fat   23s   %s/download/chr24.fat   24s   %s/download/chr25.fat   25s   %s/download/chr26.fat   26s   %s/download/chr27.fat   27s   %s/download/chr28.fat   28s   %s/download/chr32.fat   32s   %s/download/chrW.fat   Ws   %s/download/chrZ.fat   Zs   %s/download/chrM.fat   Ms"   %s/download/chrE22C19W28_E50C23.fat   E22C19W28_E50C23s   %s/download/chrE64.fat   E64s   %s/download/chr1_random.fat   1_randoms   %s/download/chr2_random.fat   2_randoms   %s/download/chr4_random.fat   4_randoms   %s/download/chr5_random.fat   5_randoms   %s/download/chr6_random.fat   6_randoms   %s/download/chr7_random.fat   7_randoms   %s/download/chr8_random.fat   8_randoms   %s/download/chr10_random.fat	   10_randoms   %s/download/chr11_random.fat	   11_randoms   %s/download/chr12_random.fat	   12_randoms   %s/download/chr13_random.fat	   13_randoms   %s/download/chr16_random.fat	   16_randoms   %s/download/chr17_random.fat	   17_randoms   %s/download/chr18_random.fat	   18_randoms   %s/download/chr20_random.fat	   20_randoms   %s/download/chr22_random.fat	   22_randoms   %s/download/chr25_random.fat	   25_randoms   %s/download/chr28_random.fat	   28_randoms   %s/download/chrUn_random.fat	   Un_randoms   %s/download/chrW_random.fat   W_randoms   %s/download/chrE64_random.fat
   E64_randoms   %s/download/chrZ_random.fat   Z_randoms)   %s/download/chrE22C19W28_E50C23_random.fat   E22C19W28_E50C23_randoms   Creating database %ss   Adding gene entriess   Adding gene featuress   Loading chromosome %ss   /G_gallus/chromo%s.bins   Adding gene ontologys   Creating Indicess   Finished creating database %s(   R   R\   R8   R@   t   keysR    RZ   R^   (   R   t   genePathRK   RJ   t   chromosR   (    (    sd   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/ggallus.pyt   buildChickenDB   s    


	
	
(   R   t   cistematic.genomesR    t   cistematic.core.geneinfoR   t   osR   t   getR   t   geneDBR    R8   R@   RZ   R\   R^   R   (    (    (    sd   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/ggallus.pyt   <module>   s   
		'	&	&		