ó
÷VêNc           @   sý   d  d l  Z  d  d l m Z d  d l m Z e j d ƒ rM e j d ƒ Z n d Z d e Z i d d 6d	 d
 6d d 6d d 6d d 6Z d d d d g Z	 d Z
 d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z e d e d „ Z d S(   iÿÿÿÿN(   t   Genome(   t   environt   CISTEMATIC_ROOTs   /proj/genomes   %s/A_thaliana/athaliana.genedbi3]Ðt   1i®,t   2iÕ"ft   3iÒ•t   4ibà›t   5gpxADjZÔ?g}w+KÇ?iA¤c         C   s.  |  d } |  d d } t  |  d ƒ } t  |  d ƒ } |  d } | d k rW d } n d } |  d	 } | j d
 ƒ } i  } x– | D]Ž }	 y |	 j d ƒ \ }
 } Wn
 qƒ n X| | |
 <|
 d k rÔ | j ƒ  } n  |
 d k rƒ d | k r| j d ƒ d } n  | j ƒ  } qƒ qƒ W| | | | | | | f S(   Ni   i    i   i   i   t   +t   Ft   Riÿÿÿÿt   ;t   =t   Namet   Parentt   ,(   t   intt   splitt   strip(   t   colst   fTypet   chromt   startt   stopt   senset   othert	   otherListt	   otherDictt	   otherItemt   namet   valuet   gid(    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/athaliana.pyt
   decodeGFF35   s0    

	

c         C   sð   g  } t  d d |  ƒ} t | d ƒ } | j ƒ  } x! | D] } | j | j ƒ  ƒ q: Wt j | d ƒ } t | ƒ }	 |	 d k  r‰ d GHn  d | GHt t | d ƒ }
 |
 j	 | ƒ |
 j
 ƒ  d } | j | | d	 ƒ | j d
 | | d	 ƒ d  S(   Nt	   athalianat   dbFilet   rt    i   s#   Problems reading sequence from files   writing to file %st   wt   files   chromo%s(   R    t   opent   readlinet   appendR   t   stringt   joint   lent   cisRoott   writet   closet   addChromosomeEntry(   t   dbt   chromIDt	   chromPatht   chromOutt   seqArrayt   atGenomet   inFilet   linet   seqt   seqLent   outFile(    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/athaliana.pyt   loadChromosomeV   s"    	
c      	   C   s   g  } t  d d |  ƒ} t | d ƒ } x¶ | D]® } | d d k s. t | ƒ d k  r\ q. n  | j ƒ  j d ƒ } | d d	 k r‡ q. n  t | ƒ \ } } }	 }
 } } } d | f } d
 } | j | |	 |
 | | d	 | f ƒ q. Wd t | ƒ GH| j | ƒ d  S(   NR!   R"   R#   i    t   #i
   s   	i   t   genei   s   inserting %d gene entries(   R    R'   R,   R   R   R    R)   t   addGeneEntryBatch(   R1   t   gFilet   geneEntriesR6   t   geneFileR8   t   fieldsR   R   R   R   R   R   R   t   geneIDt   version(    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/athaliana.pyt   loadGeneEntrieso   s    "!&c      
   C   s  g  } g  } t  d d |  ƒ} i d d 6d d 6d d 6d d 6d d	 6} t | d
 ƒ } x~ | D]v } | j d ƒ } t | ƒ \ }	 }
 } } } } } |	 d k r] |
 j d ƒ \ } } |
 | k rÓ | j | ƒ qÓ q] q] Wt | d
 ƒ } x	| D]} | d d k sí t | ƒ d k  rqí n  | j d ƒ } t | ƒ \ }	 }
 } } } } } |
 j d ƒ } y | \ } } | j ƒ  } Wn |
 } d } n X|	 | k ržqí n |	 d	 k r¼| | k r¼qí n  d | f } | j | | | | | | | |	 f ƒ qí Wd t | ƒ GH| j | ƒ d  S(   NR!   R"   t   CDSt   3UTRt   three_prime_UTRt   5UTRt   five_prime_UTRt   miRNAt   exonR#   s   	t   ncRNAt   .i    t   ci
   i   s   inserted %d feature entries(   RN   (   R    R'   R   R    R)   R,   R   t   addFeatureEntryBatch(   R1   R@   t   featureEntriest   trackedGenesR6   t   featureTranslationRB   R8   RC   R   R   R   R   R   R   R   t   locust   revt
   locusFieldRD   (    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/athaliana.pyt   loadFeatureEntries„   sJ    

!"!
*c         C   s  g  } t  | d ƒ } | j ƒ  | j ƒ  } | j ƒ  t d d |  ƒ} x | D]• } | j d ƒ } yv | d j ƒ  } d | k r  | j d ƒ \ }	 }
 |	 } n  | d j ƒ  } | j d | f t j	 | d d	 ƒ f ƒ WqN qN XqN Wd
 t
 | ƒ GH| j | ƒ d  S(   NR#   R!   R"   s   	i    RO   i   t   't   ps   Adding %d annotations(   R'   R(   t	   readlinesR/   R    R   R   R)   R*   t   replaceR,   t   addAnnotationBatch(   R1   t	   annotPatht   geneAnnotationst	   annotFilet   linesR6   R8   t   fieldt   orfNameRU   RV   t   description(    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/athaliana.pyt   loadGeneAnnotations³   s&    

	,c         C   s÷   t  d d |  ƒ} t | d ƒ } | j ƒ  } g  } x° | D]¨ } | j d ƒ } | d } | d }	 t j | d d d	 ƒ }
 t j | d
 d d	 ƒ } d } | d } | d } | j d | f |	 d |
 | | | | | d f ƒ q: W| j | ƒ d  S(   NR!   R"   R#   s   	i    i   i   RY   RZ   i   R$   i   i   i	   (   R    R'   R[   R   R*   R\   R)   t   addGoInfoBatch(   R1   t   goPathR6   t   goFilet	   goEntriest   goArrayR8   RC   t   gIDt   GOIDt   objTypet   objNamet   isNott   GOtermt   evidence(    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/athaliana.pyt   loadGeneOntologyÌ   s    



7c         C   s#   t  d d |  ƒ} | j |  ƒ d  S(   NR!   R"   (   R    t   createGeneDB(   R1   R6   (    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/athaliana.pyt   createDBFileß   s    c         C   s    t  d d |  ƒ} | j ƒ  d  S(   NR!   R"   (   R    t   createIndices(   R1   R6   (    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/athaliana.pyt   createDBindicesä   s    s   %s/downloadc         C   s#  d | } d | } d | } i d | d 6d | d 6d | d	 6d
 | d 6d | d 6d | d 6d | d 6} d |  GHt  |  ƒ d GHt |  | ƒ d GHt |  | ƒ d GHt |  | ƒ d GHt |  | ƒ x8 | j ƒ  D]* } d | GHt |  | | | d | ƒ qÙ Wd GHt |  ƒ d |  GHd  S(   Ns#   %s/TAIR9_GFF3_genes_transposons.gffs    %s/TAIR9_functional_descriptionss   %s/ATH_GO_GOSLIM.txts   %s/chr1.fasR   s   %s/chr2.fasR   s   %s/chr3.fasR   s   %s/chr4.fasR   s   %s/chr5.fasR   s   %s/chrC.fast   Cs   %s/chrM.fast   Ms   Creating database %ss   Adding gene entriess   Adding feature entriess   Adding gene annotationss   Adding gene ontologys   Loading chromosome %ss   /A_thaliana/chr%s.bins   Creating Indicess   Finished creating database %s(   Rt   RF   RX   Re   Rr   t   keysR<   Rv   (   R1   t   downloadDirt   genePathR^   Rg   t   chromosR2   (    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/athaliana.pyt   buildArabidopsisDBé   s4    


	
	
(   R*   t   cistematic.genomesR    t   osR   t   getR-   t   geneDBt	   chromSizet
   backgroundt
   genomeSizeR    R<   RF   RX   Re   Rr   Rt   Rv   R}   (    (    (    sW   /woldlab/castor/data00/home/georgi/code/erange-4.0a-BAM/cistematic/genomes/athaliana.pyt   <module>   s,   


	!			/				