ó
ÀênQc           @   s¬   d  Z  d d l Z d d l Z d d l m Z m Z m Z m Z d d l m	 Z	 d d	 d „  ƒ  YZ
 e d k r¨ e
 e e j d ƒ ƒ Z x e j ƒ  D] Z e GHHq• Wn  d S(
   s4   Produce complete genes or contigs from a GFF3 sourceiÿÿÿÿN(   t
   GFF3Recordt   GFF3Genet   GFF3mRNAt   GFF3Exon(   t   defaultdictt   GFF3Iteratorc           B   sG   e  Z d  „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z RS(   c         C   sd   | |  _  i  |  _ i  |  _ g  |  _ t t ƒ |  _ t t ƒ |  _ t t ƒ |  _ t t ƒ |  _	 d S(   s:    source is an iterator over input lines, e.g a file handleN(
   t   _sourcet   _genest   _transcriptst   _idsR   t   listt   _orphan_transcriptst   _orphan_exonst   _orphan_CDSstartst   _orphan_CDSstops(   t   selft   source(    (    sV   /woldlab/castor/data00/home/georgi/programs/RNA-Seq-Simulator.v1.0/lib/gff3Iterator.pyt   __init__   s    				c         c   s"  xò |  j  D]ç } t | j ƒ  ƒ d k  r. q
 n  | j d ƒ rƒ x |  j D] } |  j | VqG Wg  |  _ |  j j ƒ  |  j j ƒ  q
 | j d ƒ r˜ q
 q
 t | j d ƒ ƒ d k rÞ t	 | j ƒ  j d ƒ Œ  } |  j
 | ƒ q
 t j d I| IJq
 Wx |  j D] } |  j | Vqÿ Wg  |  _ d S(   sW   Creates GFF3Gene objects from a list of .gff3 format lines and returns them one-by-one i   s   ###t   #s   	i   s   Strange input line:N(   R   t   lent   stript
   startswithR	   R   t   clearR   t   splitR    t
   add_recordt   syst   stderr(   R   t   linet   idt   record(    (    sV   /woldlab/castor/data00/home/georgi/programs/RNA-Seq-Simulator.v1.0/lib/gff3Iterator.pyt   genes   s$    	c         c   s}   t  |  j ƒ d k s t ‚ t  |  j ƒ d k s6 t ‚ x |  j D] } |  j | Vq@ Wg  |  _ |  j j ƒ  |  j j ƒ  d S(   sj    Iterates over the GFF3Genes in a GFF3Iterator that has already been loaded with data (e.g. by add_record)i    N(   R   R   t   AssertionErrorR   R	   R   R   R   (   R   R   (    (    sV   /woldlab/castor/data00/home/georgi/programs/RNA-Seq-Simulator.v1.0/lib/gff3Iterator.pyt   gene_iterator/   s    	c         c   sE  x|  j  D]û } t | j ƒ  ƒ d k  r. q
 n  | j d ƒ rÈ t |  j ƒ d k sX t ‚ t |  j ƒ d k ss t ‚ x, |  j D]! } |  j |  j	 | ƒ } | Vq} Wg  |  _ |  j	 j
 ƒ  |  j j
 ƒ  q
 | j d ƒ rÝ q
 q
 t | j ƒ  j d ƒ Œ  } |  j | ƒ q
 Wx, |  j D]! } |  j |  j	 | ƒ } | VqWg  |  _ d  S(   Ni   s   ###i    R   s   	(   R   R   R   R   R   R   R   R	   t   gene2contigR   R   R   R    R   R   (   R   R   R   t   contigR   (    (    sV   /woldlab/castor/data00/home/georgi/programs/RNA-Seq-Simulator.v1.0/lib/gff3Iterator.pyt   contigs:   s(    			c         C   sR   | j  d ƒ | j ƒ  d } | j  d ƒ x! | j ƒ  D] } | j  d ƒ q7 W| S(   NR"   i    t   match(   t   set_typet   get_transcriptst	   get_exons(   R   t   genet
   transcriptt   exon(    (    sV   /woldlab/castor/data00/home/georgi/programs/RNA-Seq-Simulator.v1.0/lib/gff3Iterator.pyR!   Q   s    c         c   s©  x8|  j  D]-} t | j ƒ  ƒ d k  r. q
 n  | j d ƒ rú t |  j ƒ d k sX t ‚ t |  j ƒ d k ss t ‚ x^ |  j D]S } |  j | } x8 | j	 ƒ  D]* } x! | j
 ƒ  D] } | j d ƒ q° Wq W| Vq} Wg  |  _ |  j j ƒ  |  j j ƒ  q
 | j d ƒ rq
 q
 t | j ƒ  j d ƒ Œ  } |  j | ƒ q
 Wx^ |  j D]S } |  j | } x8 | j	 ƒ  D]* } x! | j
 ƒ  D] } | j d ƒ qxWqeW| VqEWg  |  _ d  S(   Ni   s   ###i    R*   R   s   	(   R   R   R   R   R   R   R   R	   R   R&   R'   R%   R   R   R    R   R   (   R   R   R   R(   t   tR*   R   (    (    sV   /woldlab/castor/data00/home/georgi/programs/RNA-Seq-Simulator.v1.0/lib/gff3Iterator.pyt   contigs2genesY   s4    			c         C   s§  | j  ƒ  d k r¿ | j ƒ  |  j k r\ t j | ƒ |  j | j ƒ  <|  j j | j ƒ  ƒ n  | j ƒ  |  j k r£x5 |  j | j ƒ  D]  } |  j | j ƒ  j | ƒ q… W|  j | j ƒ  =q£nä| j  ƒ  d k r»t	 j | ƒ |  j
 | j ƒ  <| j  ƒ  d k r@| j ƒ  } t j | ƒ |  j | j ƒ  <|  j j | j ƒ  ƒ n | j ƒ  } | |  j k r|  j | j |  j
 | j ƒ  ƒ n! |  j | j |  j
 | j ƒ  ƒ | j ƒ  |  j k r x5 |  j | j ƒ  D]  } |  j
 | j ƒ  j | ƒ qÉW|  j | j ƒ  =n  | j ƒ  |  j k r\x1 |  j | j ƒ  D] } | |  j
 | j ƒ  _ q)W|  j | j ƒ  =n  | j ƒ  |  j k r£x1 |  j | j ƒ  D] } | |  j
 | j ƒ  _ q…W|  j | j ƒ  =q£nè| j  ƒ  d k r[t j | ƒ } | j | j  ƒ  ƒ | j ƒ  j d ƒ j d ƒ } x“| D]@ } | |  j
 k r@|  j
 | j | ƒ q|  j | j | ƒ qWnH| j  ƒ  d k rÿ| j ƒ  d k r‹| j ƒ  n	 | j ƒ  }	 | j ƒ  j d ƒ j d ƒ } xë | D]< } | |  j
 k rä|	 |  j
 | _ q¼|  j | j |	 ƒ q¼Wn¤ | j  ƒ  d k r£| j ƒ  d k r/| j ƒ  n	 | j ƒ  }
 | j ƒ  j d ƒ j d ƒ } xG | D]< } | |  j
 k rˆ|
 |  j
 | _ q`|  j | j |
 ƒ q`Wn  d S(   si   Add a GFF3Record instance corresponding to a single .gff3 line to the data accumulating in a GFF3IteratorR(   t   mRNAR)   R"   t
   cDNA_matcht   protein_matcht   expressed_sequence_matchR*   t   CDSR$   t
   match_partt   three_prime_UTRt   five_prime_UTRt   ,t   start_codont   -t
   stop_codonN(   s   mRNAs
   transcripts   contigR.   R/   R0   (   s   contigR.   R/   R0   (   s   exons   CDSs   matchR2   R3   R4   (   t   getTypet   getIDR	   R   t
   fromRecordR   t   appendR   t   add_transcriptR   R   t   get_parentsR   t   add_exonR   t	   _CDSstartR   t   _CDSstopR   R%   t   rstripR   t	   getStrandt   get_endt	   get_start(   R   R   R)   t   parentR*   t   CDSstartt   CDSstopt   new_exont   parentst   startt   stop(    (    sV   /woldlab/castor/data00/home/georgi/programs/RNA-Seq-Simulator.v1.0/lib/gff3Iterator.pyR   v   sh    $!**(	   t   __name__t
   __module__R   R   R    R#   R!   R,   R   (    (    (    sV   /woldlab/castor/data00/home/georgi/programs/RNA-Seq-Simulator.v1.0/lib/gff3Iterator.pyR      s   						t   __main__i   (    (   t   __doc__R   t   ost
   gff3RecordR    R   R   R   t   collectionsR   R   RM   t   opent   argvt   iteratorR   R(   (    (    (    sV   /woldlab/castor/data00/home/georgi/programs/RNA-Seq-Simulator.v1.0/lib/gff3Iterator.pyt   <module>   s   "¨