
VNc           @   s   d  d l  Z  d  d l m Z d  d l m Z e j d  rM e j d  Z n d Z d e Z d   Z d   Z	 d	   Z
 d
   Z d   Z d   Z e d  Z d S(   iN(   t   Genome(   t   environt   CISTEMATIC_ROOTs   /proj/genomes"   %s/S_cerevisiae/scerevisiae.genedbc         C   s   g  } t  d d |  } t | d  } | j   } x! | D] } | j | j    q: Wt j | d  } t |  }	 |	 d k  r d GHn  d | GHt d t | f d	  }
 |
 j	 |  |
 j
   d } d
 GH| j | | d  d  S(   Nt   scerevisiaet   dbFilet   rt    i   s#   Problems reading sequence from files   writing to file %ss   %s%st   ws   calling scGenome()t   file(   R    t   opent   readlinet   appendt   stript   stringt   joint   lent   cisRoott   writet   closet   addChromosomeEntry(   t   dbt   chromIDt	   chromPatht   chromOutt   seqArrayt   scGenomet   inFilet   linet   seqt   seqLent   outFile(    (    sY   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/scerevisiae.pyt   loadChromosome+   s"    	
c      	   C   s  g  } g  } t  d d |  } t | d  } xj| D]b} | j d  } | d d k r_ q4 n  | d j   } | d }	 | d	 j   }
 |	 d
 k r d }	 y8 t | d j    d } t | d j    d } Wq@d } d } q@XnT d }	 y8 t | d j    d } t | d j    d } Wn d } d } n Xd | f } d } | j | |
 | | |	 d | f  | j | | |
 | | |	 d f  q4 Wd t |  GH| j |  d t |  GH| j |  d  S(   NR   R   R   s   	i   t   ORFi   i   i   t   Wt   Fi	   i
   i    t   Rt   chromosomal_featuret   CDSs   loading %d gene entriess   loading %d gene features(	   R    R	   t   splitR   t   intR   R   t   addGeneEntryBatcht   addFeatureEntryBatch(   R   t   gFilet   geneEntriest   geneFeaturesR   t   geneFileR   t   fieldt   orfNamet   senset   chromt   startt   stopt   geneIDt
   gidVersion(    (    sY   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/scerevisiae.pyt   loadGeneEntriesA   sD    

"&c   
      C   s   g  } t  | d  } | j   } | j   t d d |  } x | D] } | j d  } | d d k ro qD n  yL | d j   } | d j   }	 | j d | f t j |	 d	 d
  f  WqD qD XqD Wd t	 |  GH| j
 |  d  S(   NR   R   R   s   	i   R    i   i   t   't   ps   Adding %d annotations(   R	   t	   readlinesR   R    R&   R   R   R   t   replaceR   t   addAnnotationBatch(
   R   t	   annotPatht   geneAnnotationst	   annotFilet   linesR   R   R.   R/   t   description(    (    sY   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/scerevisiae.pyt   loadGeneAnnotationsj   s"    
,c         C   s  t  d d d d |  } t | d  } t | d  } | j   } i  } xR | D]J } | d d k rO | j d  }	 |	 d	 |	 d
 j   f | |	 d <qO qO W| j   }
 g  } x|
 D] } | d d k r q n  | j d  } | d j d  } | d } | d } | d } | d j d  } | d } | d } y! t j | | d d d  } Wn d | GHd } n X| d } | j d | f | d | | | | | | d	 f  q W| j |  d  S(   NR   t   versiont   SGD1R   R   i    t   !s   	i   i   i
   t   |i   i   i   R7   R8   s   Could not translate %sR   i   (	   R    R	   R9   R&   R   R   R:   R   t   addGoInfoBatch(   R   t   goPatht	   goDefPathR   t	   goDefFilet   goFilet   goDefEntriest   goDefst
   goDefEntryt   colst	   goEntriest   goArrayR   t   fieldst   genest   gIDt   GOIDt   objTypet   objNameArrayt   objNamet   isNott   GOtermt   evidence(    (    sY   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/scerevisiae.pyt   loadGeneOntology   s<    )




!	

7c         C   s)   t  d d d d |  } | j |   d  S(   NR   RB   RC   R   (   R    t   createGeneDB(   R   R   (    (    sY   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/scerevisiae.pyt   createDBFile   s    c         C   s&   t  d d d d |  } | j   d  S(   NR   RB   RC   R   (   R    t   createIndices(   R   R   (    (    sY   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/scerevisiae.pyt   createDBindices   s    c         C   s  d t  } d t  } d t  } i d t  d 6d t  d 6d t  d	 6d
 t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d t  d 6d  t  d! 6d" t  d# 6} d$ |  GHt |   d% GHt |  |  d& GHt |  |  d' GHt |  | |  xb d d d	 d d d d d d d d d d d d! d# g D]* } d( | GHt |  | | | d) |  qWWd* GHt |   d+ |  GHd  S(,   Ns   %s/download/SGD_features.tabs   %s/download/GO.terms_and_idss    %s/download/gene_association.sgds   %s/download/chr01.fsat   1s   %s/download/chr02.fsat   2s   %s/download/chr03.fsat   3s   %s/download/chr04.fsat   4s   %s/download/chr05.fsat   5s   %s/download/chr06.fsat   6s   %s/download/chr07.fsat   7s   %s/download/chr08.fsat   8s   %s/download/chr09.fsat   9s   %s/download/chr10.fsat   10s   %s/download/chr11.fsat   11s   %s/download/chr12.fsat   12s   %s/download/chr13.fsat   13s   %s/download/chr14.fsat   14s   %s/download/chr15.fsat   15s   %s/download/chr16.fsat   16s   Creating database %ss   Adding gene entriess   Adding gene annotationss   Adding gene ontologys   Loading chromosome %ss   /S_cerevisiae/chr%s.bins   Creating Indicess   Finished creating database %s(   R   R]   R6   RA   R[   R   R_   (   R   t   genePathRH   RG   t   chromosR   (    (    sY   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/scerevisiae.pyt   buildScerevisiaeDB   sB    


	
=	
(   R   t   cistematic.genomesR    t   osR   t   getR   t   geneDBR   R6   RA   R[   R]   R_   Rr   (    (    (    sY   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/scerevisiae.pyt   <module>   s   
		)		%		