ó
ž(•Qc           @   sS  d  Z  d d l Z d d l Z e j j e ƒ Z e j j e ƒ Z e j j e ƒ d d l	 m
 Z
 d d l Z d d l m Z d „  Z d d d „  ƒ  YZ d e f d	 „  ƒ  YZ d
 „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z e j d „ Z e d k rOe e j ƒ e j e j d Id IJn  d S(   s  
From a counted.origins file and a .gff3 file giving transcript positions, calculate the empirical probability of
fragmentation at each position in each transcript
Created on Apr 15, 2010
@author: ian
Modified to produce Stranded_Read_Initiation_Probability_Vectors 2012-12-07
iÿÿÿÿN(   t   GFF3Iterator(   t	   Tabixfilec           C   s!   d t  j d GHt  j d ƒ d  S(   NsG   Usage: python %s transcripts.gff3  counted.origins.txt.gz  output.shelfi    i   (   t   syst   argvt   exit(    (    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   usage   s    t   origin_count_linec           B   s2   e  Z d  „  Z d „  Z d „  Z e d „  ƒ Z RS(   c         C   s:   | |  _  t | ƒ |  _ t | ƒ |  _ t | ƒ |  _ d  S(   N(   t   seq_idt   intt   positiont   floatt
   left_countt   right_count(   t   selfR   R	   R   R   (    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   __init__   s    	c         C   s    d |  j  |  j |  j |  j f S(   Ns   %s:%d %f, %f(   R   R	   R   R   (   R   (    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   __str__"   s    c         C   s    d |  j  |  j |  j |  j f S(   Ns   %s	%d	%f	%f(   R   R	   R   R   (   R   (    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   __repr__%   s    c         C   s`   |  j  ƒ  j d ƒ } t | ƒ d k  r+ d  St | d t | d ƒ t | d ƒ t | d ƒ ƒ S(   Ns   	i   i    i   i   i   (   t   stript   splitt   lent   NoneR   R   R
   (   t   linet   fields(    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   fromLine(   s    (   t   __name__t
   __module__R   R   R   t   staticmethodR   (    (    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyR      s   			t+   Stranded_Read_Initiation_Probability_Vectorc           B   sn   e  Z d  Z d „  Z e d „  ƒ Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d „  Z d	 „  Z d
 „  Z RS(   sp   
    Probabilities of read initiation in forward and backward directions at each position in a DNA sequence
    c         C   s#   t  j |  ƒ | |  _ | |  _ d  S(   N(   t   objectR   t   plust   minus(   R   t   _plust   _minus(    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyR   5   s    	c         C   s   |  g  g  ƒ S(   N(    (   t   cls(    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   empty:   s    c         C   s   t  t |  j ƒ t |  j ƒ ƒ S(   N(   t   minR   R   R   (   R   (    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   __len__>   s    c         C   s    d t  |  j ƒ t  |  j ƒ f S(   Ns3   Stranded_Read_Initiation_Probability_Vector(%s, %s)(   t   concise_list_reprR   R   (   R   (    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyR   A   s    c         C   s
   |  j  ƒ  S(   N(   R   (   R   (    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyR   E   s    c         C   s   t  |  j | |  j | ƒ S(   N(   R   R   R   (   R   t   item(    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   __getitem__H   s    c         C   s*   |  j  j | j  ƒ |  j j | j ƒ d  S(   N(   R   t   extendR   (   R   t   other(    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyR(   K   s    c         C   sJ   g  |  j  D] } | | ^ q
 |  _  g  |  j D] } | | ^ q- |  _ d  S(   N(   R   R   (   R   t   factort   p(    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   scaleO   s    #c         C   s   t  d |  j d |  j ƒ S(   NR   R    (   R   R   R   (   R   (    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   cloneS   s    c         C   s7   |  j  |  j |  _ |  _  |  j j ƒ  |  j  j ƒ  d  S(   N(   R   R   t   reverse(   R   (    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   switch_strandV   s    (   R   R   t   __doc__R   t   classmethodR"   R$   R   R   R'   R(   R,   R-   R/   (    (    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyR   0   s   								c         C   s(   d d j  g  |  D] } d | ^ q ƒ S(   Ns   [%s]t   ,s   %.2g(   t   join(   t   alistt   v(    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyR%   \   s    c         C   st   yG g  | j  |  j ƒ  |  j ƒ  |  j ƒ  ƒ D] } t j | ƒ ^ q+ } Wn& t k
 ro } t j | IJg  } n X| S(   s:    exon is a GFF3Exon instance.
    ocl_iter is a Tabixfile.(	   t   fetcht	   get_seqIDt	   get_startt   get_endR   R   t
   ValueErrorR   t   stderr(   t   exont   ocl_iterR   t   countst   ve(    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   get_origin_counts_for_exon`   s    D
c         C   sJ   g  } x= t  |  | ƒ D], } | j | |  j ƒ  7_ | j | ƒ q W| S(   N(   R@   R	   R8   t   append(   R<   t   origin_count_tabixt	   len_probst   exon_countst   ocl(    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   get_exon_origin_counts_listl   s
    c         C   së   d } d } xO t  t |  ƒ d ƒ D]7 } |  | j | k r# |  | j |  | j k r# Pq# q# W|  | j } | d k rç d | } xc t  | d d d ƒ D]H } |  | j d k r˜ d | | |  | j } |  | j | 9_ q˜ q˜ Wn  |  S(   Ni    i   g      ð?i   iÿÿÿÿ(   t   rangeR   R   R   R	   (   R>   t	   avg_countt
   left_flankt   it   slopet   jR*   (    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   compensate_left_flank_countst   s    -
c         C   s  | d } t  |  ƒ d } x_ t t  |  ƒ d t  |  ƒ d d ƒ D]7 } |  | j | k rA |  | j |  | j k rA PqA qA W|  | j } | | d k  rd | d | } xb t | t  |  ƒ ƒ D]H } |  | j d k rÁ d | |  | j | } |  | j | 9_ qÁ qÁ Wn  |  S(   Ni   i   iÿÿÿÿg      ð?i    (   R   RG   R   R   R	   (   R>   RH   RC   t   right_flankRJ   RK   RL   R*   (    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   compensate_right_flank_counts…   s    
--c         C   s"  t  g  |  D] } | j ^ q
 ƒ t  | j ƒ } t  g  |  D] } | j ^ q9 ƒ t  | j ƒ } | d k r¾ g  | j D] } | | ^ qt | _ x. |  D]# } | j | j c | j | 7<q” Wn  | d k rg  | j D] } | | ^ qÔ | _ x. |  D]# } | j | j c | j | 7<qô Wn  | S(   Ni    (   t   sumR   R   R   R   R	   (   R>   t   probsRE   t   minus_totalt
   plus_totalt   pc(    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   calc_probs_from_counts•   s    //#$#$c         C   sÃ   t  j ƒ  } g  } d } d } x= |  j ƒ  D]/ } | j t | | | ƒ ƒ | t | ƒ 7} q+ W| r  d | } | g | | _ | g | | _ t | | ƒ } n  |  j	 ƒ  d k r¿ | j
 ƒ  n  | S(   si    transcript is a GFF3mRNA instance.
    origin_count_tabix is a tabix-indexed file of origin_count_lines.i    g      ð?t   -(   R   R"   t	   get_exonsR(   RF   R   R   R   RU   t
   get_strandR/   (   t
   transcriptRB   RQ   R>   t   totalt   transcript_lenR<   t   pseudocount(    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   get_probs_for_transcript¥   s    
c         C   s5   g  t  |  ƒ D]$ \ } } | d k r | | f ^ q S(   Ni    (   t	   enumerate(   RQ   RJ   R+   (    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   condense_probs¹   s    c         C   s\   | } |  r* t  |  d d d | ƒ } n  d g | } x |  D] \ } } | | | <q> W| S(   Niÿÿÿÿi    i   (   t   max(   t
   tuple_listt   target_lengtht   lengthRQ   RJ   R+   (    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   expand_probs½   s    c         C   s]   t  |  ƒ j ƒ  } xD | D]< } x3 | j ƒ  D]% } t | | ƒ } | | | j ƒ  <q, Wq Wd  S(   N(   R    t   genest   get_transcriptsR]   t   get_ID(   t   gff3t   counts_tabixt   prob_vectorsRe   t   geneRY   RQ   (    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt+   do_calcTranscriptFragmentationProbabilitiesÇ   s
    c         C   ss   yT t  |  d ƒ } t |  d ƒ } t j  |  d d ƒ } t | | | ƒ | j ƒ  Wn t k
 rn t ƒ  n Xd  S(   Ni   i   i   t   c(   t   openR   t   shelveRl   t   closet
   IndexErrorR   (   R   Rh   Ri   Rj   (    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt-   calcTranscriptFragmentationProbabilities_mainÐ   s    t   __main__i    s   done.(    (    R0   R   t   ost   patht   dirnamet   __file__t   this_dirt   srcRA   t   lib.gff3IteratorR    Ro   t   pysamR   R   R   R   R   R%   R@   RF   RM   RO   RU   R]   R_   Rd   Rl   R   Rr   R   R;   (    (    (    sW   /home/ian/PycharmProjects/RNA-Seq-Simulator/calcTranscriptFragmentationProbabilities.pyt   <module>
   s2   	,									
		