ó
÷VêNc           @   s¬   d  d l  Z  d  d l m Z d  d l m Z e j d ƒ rM e j d ƒ Z n d Z d e Z d „  Z d „  Z	 d	 „  Z
 d
 „  Z d „  Z d „  Z d „  Z e d „ Z d S(   iÿÿÿÿN(   t   Genome(   t   environt   CISTEMATIC_ROOTs   /proj/genomes   %s/B_taurus/btaurus.genedbc         C   s›  g  } d } t  d d |  ƒ} t | d ƒ } | j ƒ  } xQ| d k rŒg  } d } | j ƒ  d } | j ƒ  }	 xU |	 d k rÇ |	 d d k rÇ |	 j ƒ  }
 | t |
 ƒ 7} | j |
 ƒ | j ƒ  }	 qs Wt j | d ƒ } | d k  r'd	 | GH| j d | f | d
 t	 | ƒ ƒ | j
 | | d ƒ n\ d | | f } t d t | f d ƒ } | j | ƒ | j ƒ  d | GH| j
 | | d ƒ |	 } q< W| j ƒ  d  S(   Ni    t   btaurust   dbFilet   rt    i   t   >i ¡ s   Added contig %s to databaset
   chromosomet   dbs   %s%s.bins   %s%st   ws    Added contig file %s to databaset   file(   R    t   opent   readlinet   stript   lent   appendt   stringt   joint   addSequencet   strt   addChromosomeEntryt   cisRoott   writet   close(   R	   t	   chromPatht   chromOutPatht   seqArrayt   seqLent   btGenomet   inFilet   headert   chromIDt   currentLinet   lineSeqt   seqt   outFileNamet   outFile(    (    sU   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/btaurus.pyt   loadChromosome+   s8    	"
	
c      	   C   sô   g  } t  d d |  ƒ} t | d ƒ } xª | D]¢ } | j d ƒ } | d } t | d ƒ } t | d ƒ }	 | d }
 | d	 } |
 d
 k r– d }
 n d }
 d | f } d	 } | j | | | |	 |
 d | f ƒ q. Wd t | ƒ GH| j | ƒ d S(   s<    FIXME - NEED TO DEAL WITH ALTERNATIVE SPLICING ENTRIES
    R   R   R   s   	i    i   i   i   i   t   +t   Ft   Rt   genes   Adding %d gene entriesN(   R    R   t   splitt   intR   R   t   addGeneEntryBatch(   R	   t   gFilet   geneEntriesR   t   geneFilet   linet   colst   gidt   startt   stopt   senset   chromt   geneIDt
   gidVersion(    (    sU   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/btaurus.pyt   loadGeneEntriesO   s$    


	&c   	      C   sÌ   g  } t  | d ƒ } t d d |  ƒ} x‚ | D]z } yj | j d ƒ } | d } | d } t | ƒ d k r | j d | f t j | j ƒ  d d ƒ f ƒ n  Wq. q. Xq. Wd	 t | ƒ GH| j | ƒ d  S(
   NR   R   R   s   	i    i   t   't   ps   Adding %d annotations(	   R   R    R+   R   R   R   t   replaceR   t   addAnnotationBatch(	   R	   t	   annotPatht   geneAnnotationst	   annotFileR   R1   R2   t   locIDt   geneDesc(    (    sU   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/btaurus.pyt   loadGeneAnnotationsj   s    

5c      
   C   s@  t  | d ƒ } i d d 6d d 6d d 6} g  } g  } xË| D]Ã} | j d ƒ } t | d ƒ } | d	 j d
 ƒ }	 | d j d
 ƒ }
 | d } | | d } t | d ƒ d } t | d ƒ d } | d } y d | f } Wn
 q= n Xd } | | k r
d } n | j | ƒ xæt | ƒ D]Ø} t |	 | ƒ d } t |
 | ƒ d } | | k r| | k r| j | | | | | | d f ƒ q$| | k rÛ| d k r°d } n d } | j | d | | | | | f ƒ q$| | k r'| d k rüd } n d } | j | d | | | | | f ƒ q$| | k r™| d k rHd } n d } | j | d | | | | d f ƒ | j | d | | d | | | f ƒ q$| d k r®d } n d } | j | d | | | | d f ƒ | j | d | | | d | | f ƒ q$Wq= W| j ƒ  t d d |  ƒ} d t | ƒ GH| j | ƒ d  S(   NR   R(   R'   R)   t   -t   .s   	i   i   t   ,i	   i   i   i   i   i    R   t   1t   2t   CDSt   5UTRt   3UTRR   s   Adding %d features(	   R   R+   R,   R   t   rangeR   R    R   t   addFeatureEntryBatch(   R	   t   gfileR0   t
   senseArrayt	   seenArrayt   insertArrayt   geneLinet
   geneFieldst   exonNumt
   exonStartst	   exonStopsR7   R6   t   gstopt   gstartt   geneidR8   R9   t   indext   estartt   estopt   fTypeR   (    (    sU   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/btaurus.pyt   loadGeneFeatures|   sl    



	%	%	%	")	".
c         C   s  t  d d |  ƒ} t | d ƒ } t | d ƒ } t | d ƒ } | j ƒ  } | j ƒ  | j ƒ  }	 i  }
 i  } g  } xR |	 D]J } | d d k rz | j d ƒ } | d | d j ƒ  f |
 | d <qz qz W| j ƒ  } xw | D]o } y_ | j d ƒ } | d } | d } | d	 } d
 } t | ƒ d k r?| | | f | | <n  WqÛ qÛ XqÛ Wx¤ | D]œ } yŒ | j d ƒ } | d j ƒ  } | | \ } } } | j d | f | d d
 | d
 t j	 |
 | d d d d ƒ |
 | d d | f ƒ WqUqUXqUWd t | ƒ GH| j
 | ƒ d  S(   NR   R   R   i    t   !s   	i   i   i   R   R;   R<   s   adding %d go entries(   R    R   t	   readlinesR   R+   R   R   R   R   R=   t   addGoInfoBatch(   R	   t   goPatht	   goDefPathR?   R   t	   goDefFilet   goFileRA   t   annotEntriest   goDefEntriest   goDefst   locust   goArrayt
   goDefEntryR2   t	   goEntriest
   annotEntryRB   t   geneNameRC   t   mimIDt   entryt   fieldst	   gene_namet	   gene_desc(    (    sU   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/btaurus.pyt   loadGeneOntologyÅ   sH    
)


Zc         C   s#   t  d d |  ƒ} | j |  ƒ d  S(   NR   R   (   R    t   createGeneDB(   R	   R   (    (    sU   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/btaurus.pyt   createDBFileñ   s    c         C   s    t  d d |  ƒ} | j ƒ  d  S(   NR   R   (   R    t   createIndices(   R	   R   (    (    sU   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/btaurus.pyt   createDBindicesö   s    c         C   s‚   d t  } d t  } d } d |  GHt |  ƒ d GHt |  | ƒ d GHt |  | ƒ d GHt |  | | ƒ d GHt |  ƒ d	 |  GHd  S(
   Ns   %s/download/bt2/genscan.txts$   %s/download/bt2/bosTau2.softmask2.fas
   /B_taurus/s   Creating database %ss   Adding gene entriess   Adding gene featuress   Loading sequencess   Creating Indicess   Finished creating database %s(   R   Rw   R:   R_   R&   Ry   (   R	   t   genePatht
   chromoPatht   chromoOutPath(    (    sU   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/btaurus.pyt   buildBtaurusDBû   s    

	

(   R   t   cistematic.genomesR    t   osR   t   getR   t   geneDBR&   R:   RD   R_   Ru   Rw   Ry   R}   (    (    (    sU   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/btaurus.pyt   <module>   s   
	$			I	,		