
fmSc           @   s   d  d l  Z  d  d l m Z d  d l m Z d  d l m Z e j d  r] e j d  Z n d Z d e Z	 d   Z
 d	   Z d
   Z d   Z d   Z d   Z d   Z e	 d e d  Z d S(   iN(   t   Genome(   t
   geneinfoDB(   t   environt   CISTEMATIC_ROOTs   /proj/genomes   %s/H_sapiens/hsapiens.genedbc         C   s   g  } t  d d |  } t | d  } | j   } x! | D] } | j | j    q: Wt j | d  } t |  }	 |	 d k  r d GHn  d | GHt d t | f d	  }
 |
 j	 |  |
 j
   | j | | d
  d  S(   Nt   hsapienst   dbFilet   rt    i   s#   Problems reading sequence from files   writing to file %ss   %s%st   wt   file(   R    t   opent   readlinet   appendt   stript   stringt   joint   lent   cisRoott   writet   closet   addChromosomeEntry(   t   dbt   chromIDt	   chromPatht   chromOutt   seqArrayt   hsGenomet   inFilet   linet   seqt   seqLent   outFile(    (    se   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/hsapiens.pyt   loadChromosome+   s    	
c      	   C   sK  g  } t  d d |  } t | d  } x| D] } | j d  } | d d k rY q. n  | d d k ro q. n  | d	 j   } | | k r q. n  | d
 j d  }	 |	 d	 }
 t | d  } t | d  } | d } | d k r d } n d } d |
 f } d	 } | j | | | | | d | f  q. Wd t |  GH| j |  d S(   s<    FIXME - NEED TO DEAL WITH ALTERNATIVE SPLICING ENTRIES
    R   R   R   s   	i   t   GENEi   t   Celerai   i
   t   :i   i   i   t   +t   Ft   Rt   genes   Adding %d gene entriesN(   R    R
   t   splitR   t   intR   R   t   addGeneEntryBatch(   R   t   gFilet   cDictt   geneEntriesR   t   geneFileR   t   colst   chromt   namet   gidt   startt   stopt   senset   geneIDt
   gidVersion(    (    se   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/hsapiens.pyt   loadGeneEntries>   s2    

	&c      	   C   sU  g  } t  d d |  } t | d  } x| D]} | j d  } | d d k rY q. n  | d	 d
 k ro q. n  | d j   } | | k r q. n  | d }	 | d j d  }
 |
 d } t | d  } t | d  } | d } | d k r d } n d } d | f } d } | j | | | | | | |	 f  q. Wd t |  GH| j |  d S(   sI    Load gene features such as CDS, UTR, and PSEUDO from the gene file.
    R   R   R   s   	i   t   CDSt   UTRt   PSEUDOi   R"   i   i
   R#   i   i   i   R$   R%   R&   s   Adding %d feature entriesN(   s   CDSs   UTRs   PSEUDO(   R    R
   R(   R   R)   R   R   t   addFeatureEntryBatch(   R   R+   R,   t   featureEntriesR   t   featureFileR   R/   R0   t   fTypeR1   R2   R3   R4   R5   R6   R7   (    (    se   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/hsapiens.pyt   loadGeneFeaturesb   s4    


	&c   
      C   s   g  } t    } t d d |  } | j   } x | D] } d | f } | j |  } d } x( | D]  }	 | d 7} | |	 j   7} qb Wt |  d k r4 | j | t j | d d d  f  q4 q4 Wd	 t |  GH| j	 |  d  S(
   NR   R   R   t   ,i    i   t   't   ps   Adding %d annotations(
   R   R    t   allGIDst   getDescriptionR   R   R   R   t   replacet   addAnnotationBatch(
   R   t   geneAnnotationst   idbR   t   gidListt   locIDt   gIDt   geneDescArrayt   geneDesct   entry(    (    se   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/hsapiens.pyt   loadGeneAnnotations   s    	
-c         C   s"  t  d d |  } t | d  } t | d  } t   } i  } g  } xR | D]J }	 |	 d d k rL |	 j d  }
 |
 d |
 d j   f | |
 d <qL qL Wd	 } d } xY| D]Q} y8| j d  } | d d
 k r w n  | d 7} | d d k rd GH| j |  g  } n  | d j   } d | f } | | k r| } d	 } | j |  } t |  d k rx+ | D] } | d 7} | | 7} qlWqd } n  | j | | d d	 | d d	 t	 j
 | | d d d d  | | d d d	 f  Wq d | GHq Xq Wd t |  GH| j |  d  S(   NR   R   R   i    t   !s   	i   i   R   t   9606i  s   adding 1000 go entriesRA   t    RB   RC   s   locus ID %s could not be addeds   adding %d go entries(   R    R
   R   R(   R   t   addGoInfoBatcht   geneIDSynonymsR   R   R   RF   (   R   t   goPatht	   goDefPathR   t	   goDefFilet   goFileRI   t   goDefst   goArrayt
   goDefEntryR/   t   prevGIDt   indexRO   t   fieldsRK   RL   t	   gene_namet   synonyms(    (    se   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/hsapiens.pyt   loadGeneOntology   sN    	)
	
	X	c         C   s#   t  d d |  } | j |   d  S(   NR   R   (   R    t   createGeneDB(   R   R   (    (    se   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/hsapiens.pyt   createDBFile   s    c         C   s    t  d d |  } | j   d  S(   NR   R   (   R    t   createIndices(   R   R   (    (    se   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/hsapiens.pyt   createDBindices   s    s   %s/downloadc         C   s  d | } d | } d | } i d | d 6d | d 6d | d	 6d
 | d 6d | d 6d | d 6d | d 6d | d 6d | d 6d | d 6d | d 6d | d 6d | d 6d | d 6d  | d! 6d" | d# 6d$ | d% 6d& | d' 6d( | d) 6d* | d+ 6d, | d- 6d. | d/ 6d0 | d1 6d2 | d3 6} d4 |  GHt  |   d5 GHt |  | |  d6 GHt |  | |  d7 GHt |   d8 GHt |  | |  x8 | j   D]* } d9 | GHt |  | | | d: |  qWd; GHt |   d< |  GHd  S(=   Ns   %s/seq_gene.mds   %s/GO.terms_and_idss
   %s/gene2gos
   %s/chr1.fat   1s
   %s/chr2.fat   2s
   %s/chr3.fat   3s
   %s/chr4.fat   4s
   %s/chr5.fat   5s
   %s/chr6.fat   6s
   %s/chr7.fat   7s
   %s/chr8.fat   8s
   %s/chr9.fat   9s   %s/chr10.fat   10s   %s/chr11.fat   11s   %s/chr12.fat   12s   %s/chr13.fat   13s   %s/chr14.fat   14s   %s/chr15.fat   15s   %s/chr16.fat   16s   %s/chr17.fat   17s   %s/chr18.fat   18s   %s/chr19.fat   19s   %s/chr20.fat   20s   %s/chr21.fat   21s   %s/chr22.fat   22s
   %s/chrX.fat   Xs
   %s/chrY.fat   Ys   Creating database %ss   Adding gene entriess   Adding gene featuress   Adding gene annotationss   Adding gene ontologys   Loading chromosome %ss   /H_sapiens/chromo%s.bins   Creating Indicess   Finished creating database %s(   Rd   R8   R@   RP   Rb   t   keysR    Rf   (   R   t   downloadDirt   genePathRW   RV   t	   chromDictR   (    (    se   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/hsapiens.pyt   buildHsapiensDB   sV    


	

	
(   R   t   cistematic.genomesR    t   cistematic.core.geneinfoR   t   osR   t   getR   t   geneDBR    R8   R@   RP   Rb   Rd   Rf   R   (    (    (    se   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM-2014-05-09-fix/cistematic/genomes/hsapiens.pyt   <module>   s   
		$	#		0		