
fmSc           @   s   d  d l  Z  d  d l m Z d  d l m Z e j d  rM e j d  Z n d Z d e Z d   Z d   Z	 d	   Z
 d
   Z d   Z d   Z d   Z e d  Z d S(   iN(   t   Genome(   t   environt   CISTEMATIC_ROOTs   /proj/genomes"   %s/C_familiaris/cfamiliaris.genedbc         C   s   g  } t  d d |  } t | d  } | j   } x! | D] } | j | j    q: Wt j | d  } t |  }	 |	 d k  r d GHn  d | GHt d t | f d	  }
 |
 j	 |  |
 j
   | j | | d
  d  S(   Nt   cfamiliarist   dbFilet   rt    i   s#   Problems reading sequence from files   writing to file %ss   %s%st   wt   file(   R    t   opent   readlinet   appendt   stript   stringt   joint   lent   cisRoott   writet   closet   addChromosomeEntry(   t   dbt   chromIDt	   chromPatht   chromOutt   seqArrayt   cfGenomet   inFilet   linet   seqt   seqLent   outFile(    (    sh   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/cfamiliaris.pyt   loadChromosome+   s    	
c      	   C   sl  g  } g  } t  d d |  } t | d  } | j   x| D]
} | j d  } | d j   d k ro q> n  | d j d  } | d	 }	 |	 d
 k s> |	 | k r q> n  | j |	  t | d  d	 }
 t | d  d	 } | d } | d	 j   } | d k rd } n d } d |	 f } d	 } | j | | |
 | | d | f  q> Wd t |  GH| j |  d S(   s<    FIXME - NEED TO DEAL WITH ALTERNATIVE SPLICING ENTRIES
    R   R   R   s   	i   t   GENEi
   t   :i   R   i   i   i   t   +t   Ft   Rt   genes   Adding %d gene entriesN(	   R    R	   R
   t   splitR   R   t   intR   t   addGeneEntryBatch(   R   t   gFilet   geneEntriest   alreadySeenR   t   geneFileR   t   colst   namet   gidt   startt   stopt   senset   chromt   geneIDt
   gidVersion(    (    sh   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/cfamiliaris.pyt   loadGeneEntries?   s4    


	&c      	   C   sW  g  } t  d d |  } t | d  } | j   x| D] } | j d  } | d j   d k ri q8 n  | d } | d	 j d
  } | d }	 |	 d k r q8 n  t | d  d }
 t | d  d } | d } | d j   } | d k r d } n d } d |	 f } d } | j | | | |
 | | | f  q8 Wd t |  GH| j |  d S(   sI    Load gene features such as CDS, UTR, and PSEUDO from the gene file.
    R   R   R   s   	i   t   CDSt   UTRt   PSEUDOi
   R!   i   R   i   i   i   R"   R#   R$   s   Adding %d feature entriesN(   s   CDSR8   s   PSEUDO(	   R    R	   R
   R&   R   R'   R   R   t   addFeatureEntryBatch(   R   R)   t   featureEntriesR   t   featureFileR   R-   t   fTypeR.   R/   R0   R1   R2   R3   R4   R5   (    (    sh   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/cfamiliaris.pyt   loadGeneFeaturesc   s2    



	&c   	      C   s   g  } t  | d  } t d d |  } x | D]z } yj | j d  } | d } | d } t |  d k r | j d | f t j | j   d d  f  n  Wq. q. Xq. Wd	 t |  GH| j |  d  S(
   NR   R   R   s   	i    i   t   't   ps   Adding %d annotations(	   R	   R    R&   R   R   R   t   replaceR   t   addAnnotationBatch(	   R   t	   annotPatht   geneAnnotationst	   annotFileR   R   R-   t   locIDt   geneDesc(    (    sh   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/cfamiliaris.pyt   loadGeneAnnotations   s    

5c         C   s  t  d d |  } t | d  } t | d  } t   } i  } g  } xR | D]J }	 |	 d d k rL |	 j d  }
 |
 d |
 d j   f | |
 d <qL qL W| j   } d	 } x$| D]} y| j d  } | d d
 k r w n  | d j   } d | f } | | k rg| } d	 } | j |  } t |  d k r^x+ | D] } | d 7} | | 7} q=Wqgd } n  | j | | d d	 | d d	 t	 j
 | | d d d d  | | d d d	 f  Wq d | GHq Xq Wd t |  GH| j |  d  S(   NR   R   R   i    t   !s   	i   i   R   t   9615t   ,t    R?   R@   s   locus ID %s could not be addeds   adding %d go entries(   R    R	   t
   geneinfoDBR&   R   t	   readlinest   geneIDSynonymsR   R   R   RA   t   addGoInfoBatch(   R   t   goPatht	   goDefPathR   t	   goDefFilet   goFilet   idbt   goDefst   goArrayt
   goDefEntryR-   t	   goEntriest   prevGIDt   entryt   fieldsRF   t   gIDt	   gene_namet   synonyms(    (    sh   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/cfamiliaris.pyt   loadGeneOntology   sD    	)
	X	c         C   s#   t  d d |  } | j |   d  S(   NR   R   (   R    t   createGeneDB(   R   R   (    (    sh   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/cfamiliaris.pyt   createDBFile   s    c         C   s    t  d d |  } | j   d  S(   NR   R   (   R    t   createIndices(   R   R   (    (    sh   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/cfamiliaris.pyt   createDBindices   s    c         C   sV  d t  } i( d t  d 6d t  d 6d t  d 6d t  d	 6d
 t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d  t  d! 6d" t  d# 6d$ t  d% 6d& t  d' 6d( t  d) 6d* t  d+ 6d, t  d- 6d. t  d/ 6d0 t  d1 6d2 t  d3 6d4 t  d5 6d6 t  d7 6d8 t  d9 6d: t  d; 6d< t  d= 6d> t  d? 6d@ t  dA 6dB t  dC 6dD t  dE 6dF t  dG 6dH t  dI 6dJ t  dK 6dL t  dM 6dN t  dO 6dP t  dQ 6} dR |  GHt |   dS GHt |  |  dT GHt |  |  x8 | j   D]* } dU | GHt |  | | | dV |  qWdW GHt |   dX |  GHd  S(Y   Ns   %s/download/seq_gene.mds   %s/download/chr1.fat   1s   %s/download/chr2.fat   2s   %s/download/chr3.fat   3s   %s/download/chr4.fat   4s   %s/download/chr5.fat   5s   %s/download/chr6.fat   6s   %s/download/chr7.fat   7s   %s/download/chr8.fat   8s   %s/download/chr9.fat   9s   %s/download/chr10.fat   10s   %s/download/chr11.fat   11s   %s/download/chr12.fat   12s   %s/download/chr13.fat   13s   %s/download/chr14.fat   14s   %s/download/chr15.fat   15s   %s/download/chr16.fat   16s   %s/download/chr17.fat   17s   %s/download/chr18.fat   18s   %s/download/chr19.fat   19s   %s/download/chr20.fat   20s   %s/download/chr21.fat   21s   %s/download/chr22.fat   22s   %s/download/chr23.fat   23s   %s/download/chr24.fat   24s   %s/download/chr25.fat   25s   %s/download/chr26.fat   26s   %s/download/chr27.fat   27s   %s/download/chr28.fat   28s   %s/download/chr29.fat   29s   %s/download/chr30.fat   30s   %s/download/chr31.fat   31s   %s/download/chr32.fat   32s   %s/download/chr33.fat   33s   %s/download/chr34.fat   34s   %s/download/chr35.fat   35s   %s/download/chr36.fat   36s   %s/download/chr37.fat   37s   %s/download/chr38.fat   38s   %s/download/chrX.fat   Xs   %s/download/chrUn.fat   Uns   Creating database %ss   Adding gene entriess   Adding gene featuress   Loading chromosome %ss   /C_familiaris/chromo%s.bins   Creating Indicess   Finished creating database %s(   R   Rb   R6   R>   t   keysR   Rd   (   R   t   genePatht   chromosR   (    (    sh   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/cfamiliaris.pyt
   buildDogDB   sj    
	
	
(   R   t   cistematic.genomesR    t   osR   t   getR   t   geneDBR   R6   R>   RH   R`   Rb   Rd   R   (    (    (    sh   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/cfamiliaris.pyt   <module>   s   
		$	#		*		