ó
ËdTc           @   s±   d  d l  Z  d  d l Z d  d l Z d  d l m Z d  d l m Z d  d l m Z m	 Z	 d  d l
 m Z d  d l m Z d f  d „  ƒ  YZ e d	 k r­ d
 GHd  d l Z n  d S(   iÿÿÿÿN(   t   Wig(   t   importr(   t   rt   FloatVector(   t   deepcopy(   t   timet   Wigsc           B   sË   e  Z d d  e d „ Z d „  Z d d e d „ Z d d d d d e d „ Z d „  Z d „  Z	 e d	 „ Z
 d
 d  d  d d e d „ Z d „  Z d „  Z d d d „ Z d „  Z d „  Z d d d „ Z RS(   i    c         C   s;   i  |  _  | |  _ | d k r7 |  j d | d | ƒ n  d S(   s  
        Parameter:
            file: a pathe to the directory that contain the Wiggle format files
            step: each chrosome will present as an vector, each value in the vector represent a short region of the chrosome, the step value is the size of the short region.
        t   patht   suppressN(   t   datat   stept   Nonet   load(   t   selfR   R
   R   (    (    sD   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wigs.pyt   __init__   s    		c   	      C   s  |  j  } i  } xR | D]J } xA | | j  D]2 } | j | ƒ rR | | c d 7<q* d | | <q* Wq W| j ƒ  } t | j ƒ  ƒ } x. | D]& } | | | k  r‰ | j | ƒ q‰ q‰ WxU | D]M } | | j  j ƒ  } x1 | D]) } | j | ƒ sÚ | | j | ƒ qÚ qÚ Wqº W| S(   sê   
        Description:
            make sure each Wig instance contain the same set of chrosomes, remove the chrosomes that are not contained by some instance
        Parameter:
            None
        Value:
            None
        i   (   R	   t   has_keyt   keyst   lent   pop(	   R   t   wigst   chrst   wigt   chrt   crst   wnumt   crt   wg(    (    sD   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wigs.pyt   ensureSameChrsByRemove   s$    		   c         C   s•  t  ƒ  } |  j } | j ƒ  } | j ƒ  | d k ri  } x" | D] } | | j ƒ  | | <qA Wt | j ƒ  ƒ t | j ƒ  ƒ }	 x,| D]| } | Gd G| | j ƒ  Gd G| d k rÔ | | j |	 d | | ƒ n! | | j | | d | | ƒ | | j ƒ  GHqˆ Wn¥ t | j ƒ  ƒ t | j ƒ  ƒ }
 x€ | D]x } | Gd G| | j ƒ  Gd G| d k r€| | j |
 d | | ƒ n | | j | | | | ƒ | | j ƒ  GHq4W| r‘d GHi  i  } } x6 | D]. } | | j	 ƒ  | | <| | j
 ƒ  | | <qÏWt | j ƒ  ƒ d t | j ƒ  ƒ } xg | D]\ } | Gd G| | j ƒ  Gd G| | j | | | ƒ | | j ƒ  Gd G| | Gd G| GHq.Wn  d S(	   s£   
        Description:
            Normalize between Wig class instances by fold change
        Parameter:
            None
        Value:
            None
        t   fromt   tog      ð?s8   further correction based on count of non-zero base pairss   based on non0sizes   and genome sizei   N(   R   R	   R   t   sortR   t   sumt   valuesR   t
   foldChanget   gsizet   non0size(   R   t
   scalepairst   sampling_totalt   nonzerot   ssR   t   namest   wsumR   t   asumt   average_totalt   namet   gsizest	   non0sizest   agsize(    (    sD   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wigs.pyt   foldNormalize/   sD    			
 "  !"  &*i   i † c         C   s“  |  j  j ƒ  } | d k r7 | d k r7 | d k r7 d Si  } | d k rÁd G| Gd G| Gd GHi  }	 x% | D] }
 |  j  |
 j ƒ  |	 |
 <qk Wt |	 j ƒ  ƒ t |	 j ƒ  ƒ } t |  j  | d ƒ } | j | d |	 | d ƒ xf | d D]Z }
 |  j  |
 j | d |	 |
 ƒ | j |  j  |
 ƒ |  j  |
 j |	 |
 d | ƒ qí W| j d t | ƒ ƒ | j	 d | d | g d	 | d
 | ƒ \ } } | j
 | | ƒ } | d k rÖ| j | ƒ qÖn d G| GHt | ƒ } x. | D]& }
 |  j  |
 j | ƒ j ƒ  | |
 <qÝW| j ƒ  Gd t | j ƒ  d | j ƒ  ƒ d G| j ƒ  Gd GHxF | D]> }
 |
 G| |
 Gd t | |
 d |  j  |
 j ƒ  ƒ d GHqMW| S(   sÀ   
        Description:
            caculate the sum of each wig's values after excluding the low and high percentile
        Parameter:
            None
        Value:
            None
        i    s=   calculate total signal in each sample after excluding the tops
   and bottomsE   percents of genomic regions with extremely high and low signal valuesg      ð?i   t   pid   t   bnumt   nonzero_endsC   calculate total signal in each sample in genomic regions defined byt   (g      Y@s   %) ofs   base pairs calculated:s   % of total)N(   R	   R   R   R   R    R   R   R!   t   addt
   percentilet   regionWithinValueRanget   saveR    t   multiplyt   strR"   (   R   t   region_filet   region_out_filet   exclude_low_percentt   exclude_high_percentR2   R&   R(   R%   t   wsumsR,   t   wavgt   rfwigt   lowcutt   highcutt   rg(    (    sD   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wigs.pyt   samplingTotal[   s:    $  " $. 	 $? <c         C   s   |  j  | S(   sÁ   
        Description:
            retrieve Wig class instance by name
        Parameter:
            k: the name of the Wig class instance
        Value:
            Wig class instance
        (   R	   (   R   t   k(    (    sD   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wigs.pyt   get…   s    	c         C   s   |  j  j ƒ  S(   s§   
        Description:
            Retrieve the names of all Wig class instances
        Parameter:
            None
        Value:
            a list of names
        (   R	   R   (   R   (    (    sD   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wigs.pyR      s    	c      
   C   s'  | } x| j  d ƒ D]	} t j j | ƒ r² xî t j t j j | d ƒ ƒ D]^ } t j j  | ƒ d } | d d k r† | d  } n  |  j | t | d |  j d | ƒƒ qM Wq t j j	 | ƒ r t j j  | ƒ d } | d d k r÷ | d  } n  |  j | t | d |  j d | ƒƒ q q Wd S(	   s   
        Description:
            Load multiple Wig class instances from wiggle format files located in one directory
        Parameter:
            path: a path to the directory that contain the wiggle format files
        Value:
            None
        t   ,s   *.wigiÿÿÿÿiüÿÿÿs   .wigR
   R   N(
   t   splitt   osR   t   isdirt   globt   joint   setR    R
   t   isfile(   R   R   R   t   pathst   infilet   fname(    (    sD   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wigs.pyR   ™   s    	% , t   Fc         C   s‹   | d k r< t  |  j j ƒ  ƒ d k  r< | d k r< d GHd S| d k rd |  j d | d | d | ƒ S| d k rt d	 Sd
 t | ƒ d GHd	 S(   s"  
        Description:
            normalize among Wig class instances.
        Parameter:
            nor: the normalization method, can be 'Q': quantile normalization, 'F':fold change scaling, 'S': sampling to same coverage, or 'N':no normalization
        Value:
            None
        t   Ni   s1   less than 2 datasets, no normalization to be donei   RS   R$   R%   R&   i    s   Normalization method s    not applicable nowN(   R   R	   R   R   R0   R:   (   R   t   norR=   R>   R$   R%   R&   (    (    sD   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wigs.pyRU   ±   s    	3 c         C   s   |  j  j | ƒ S(   sÇ   
        Description:
            remove Wig class instance by name
        Parameter:
            k: the name of the Wig class instance that is to be removed
        Value:
            None
        (   R	   R   (   R   RF   (    (    sD   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wigs.pyR   Ê   s    	c      	   C   sB  t  ƒ  } |  j ƒ  |  j } t d ƒ t d ƒ } i  } xR | D]J } xA | | j D]2 } | j | ƒ r{ | | c d 7<qS d | | <qS Wq? Wt | j ƒ  ƒ } | j ƒ  } | j ƒ  } t | ƒ }	 i  }
 d } xÆ | D]¾ } x§ | D]Ÿ } | | j j | ƒ st j d g ƒ | | j | <n  |
 j | ƒ sI| | j | j	 |
 | <qã |
 | | | j | j	 k  rã | | j | j	 |
 | <qã qã W| |
 | 7} qÖ Wt j d g ƒ } | j
 | |	 d d ƒx± t d |	 ƒ D]  } | | } | | } xƒ t d t | ƒ ƒ D]l } | | } | | j | j
 |
 | d d ƒ| |
 | } | | | c !| | j | |
 |  7+| } qWqÑWt j t | ƒ d | d |	 ƒ} | | ƒ } x‰ t d |	 ƒ D]x } | | } | | } x[ t d t | ƒ ƒ D]D } | | } | |
 | } | | | !| | j | |
 | *| } qâWq²Wd	 Gt  ƒ  | GHd S(
   s    
        Description:
            Normalize between Wig class instances by Quantile
        Parameter:
            None
        Value:
            None
        s   require("preprocessCore")s   normalize.quantilesi   i    g        t   refcheckt   nrowt   ncols	   time cost(   R   R   R	   R   R   R   R   t   numpyt   arrayt   sizet   resizet   ranget   matrixR   (   R   R'   R   t   normqR   R   R   R   R(   t   numt   sizesR[   R,   t   lstt   it   tsizet   jt   ttsizet   mtrt   nmtr(    (    sD   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wigs.pyt   quantileNormalizeÔ   sb    		
	
     


"&!


 g»½×Ùß|Û=c         C   sg  |  j  ƒ  } t |  j | d ƒ ƒ } t | ƒ d k re x+ | d D] } | j |  j | ƒ ƒ qB Wn  | j d t | ƒ ƒ | j ƒ  } d d l m } m	 } | d k r® d St
 | ƒ d k rÏ t
 | ƒ } nâ | j d ƒ }	 t |	 ƒ d	 k rt
 |	 d ƒ | t
 |	 d d  ƒ ƒ } n d | t
 |	 d ƒ ƒ } | }
 t | d
 ƒ } t d ƒ } xW d t
 t | | d | | d | ƒ ƒ j ƒ  d ƒ | d ƒ |
 k  r°| d 7} qZWd G| Gd G| GH|  j  ƒ  } x’| j ƒ  D]„} t | j | ƒ } | | 8} | d	 d
 | d	 } | j | | t j	 | d ƒ } | d GHx| D]} |  j | ƒ } | j j | ƒ ssqFn  | j | j ƒ  } d | Gd G| Gd G| j j | ƒ sÈt j d g ƒ | j | <n  | j | j | j k r| j | j | j d d ƒn  | | j | | j | d | j | <| j | j ƒ  Gd Gd | j | j ƒ  d | GHqFWqÛWd S(   s¤  
        Description:
            Adjust clonal reads count, fold change between samples will not be altered in this process.
        Parameter:
            cut: the cutoff used to define clonal reads.
                When it is interger,  a read count larger than cut will be defined as clonal;
                when it is float, a read count that is larger than mean count by a Poisson test P value < cut will be defined as clonal.
            fsz: the extension length of each read that is used to calculate the wiggle file. Extension length means the length from 5' end to 3' end,
                e.g. a read may be 36bp when it is generated by the sequencing machine, but it might have been extended to be 80bp or cutted to be 1 bp,
                so the extension length will then be 80bp or 1bp.
        Value:
            None
        Note:
            all wiggle file in a Wigs object must have the same step size.
        i    i   g      ð?iÿÿÿÿ(   t   log10t   logt   0Nt   -i   g      à?s?   function(q,avg){return(ppois(q,avg,lower.tail=FALSE,log=TRUE))}i
   s   aveage density iss   , use clonal signal cutofft   :s   	s   reduced fromR   g        RV   g0Žä.ÿ++s   , percent removed:id   g      Y@(   R   R   RG   R   R5   R!   t   meant   mathRj   Rk   t   floatRI   t   intR   R:   t   getChrsR	   RY   R   R   RZ   R[   R\   (   R   t   cutt   extendt   kst   mRF   t   avgRj   Rk   t   cot   lgpcutt   ppoisR   t   tchrvt   twgt   temp(    (    sD   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wigs.pyt   ajClonal  sR        +L 
"	    'c         C   sp  d d l  m } t ƒ  } |  j } i  } t j d g ƒ } x" | D] } | | j ƒ  | | <qA Wt | j ƒ  ƒ t | j	 ƒ  ƒ } xØ| D]Ð} | | } | j ƒ  }	 |	 | }
 |
 d k  rÇ d |
 }
 n | }
 xˆ| j D]}} | j
 t | j | j ƒ  ƒ d d ƒ| d | j | j d d f \ } } } } x‰ | | k  rºt | | j | | d ƒ } | | j k r†| j
 | d d d ƒn  x$ | | k  r¬| | | <| d 7} q‰W| d 7} q2Wd t |
 | j | ƒ |	 | j ƒ | } } } |	 | k r| j | c d 9<n  xB | | k  rS| d 7} | j | | | d | d ƒ c d 7<qWq× Wqˆ Wd	 Gt ƒ  | GHd S(
   s±   
        Description:
            Normalize between Wig class instances by sampling to same coverage
        Parameter:
            None
        Value:
            None
        iÿÿÿÿ(   t   randintg        i    RV   g      à?iè  i   s	   time cost(   t   randomR€   R   R	   RY   RZ   R   R    R   R   R\   Rr   R[   t   chrSumR
   (   R   R€   R'   R   R)   t   tarrayR   R*   RF   t   oldsumR`   R   t   cszRc   t   tszt   newtszt   tnum(    (    sD   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wigs.pyt   samplingNormalizeJ  sD    			"

 &, 
2 
4c         C   s   | |  j  | <d S(   s¸   
        Description:
            add a Wig class instance
        Parameter:
            k: the name of the new Wig class instance
            wig: the new Wig class instance
        N(   R	   (   R   RF   R   (    (    sD   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wigs.pyRN   r  s    t   fixedc         C   sÛ   | d k r |  j } n  |  j } x³ | D]« } | d k r t j j | ƒ s\ t j | ƒ n  t j j t | ƒ ƒ } t j j	 | | d ƒ } n | } | d d k r³ | d } n  | | j
 d | d | d | ƒ q( Wd S(	   sŠ  
        Description:
            save all Wig class instances to wiggle format files in a directory.
            
        Parameter:
            path: the path to the directory that will contain the wiggle format files
            step: the stp size of the wiggle format files
            format: the format of the wiggle files, can be 'fixed' or 'var'
        Value:
            None
        t    iÿÿÿÿiüÿÿÿs   .wigt   filet   formatR
   N(   R   R
   R	   RJ   R   RK   t   mkdirRI   R:   RM   R8   (   R   R   R
   R   R   RF   t   dft   tpath(    (    sD   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wigs.pyR8   {  s     	  N(   t   __name__t
   __module__R   t   FalseR   R   R0   RE   RG   R   R   RU   R   Ri   R   R‰   RN   R8   (    (    (    sD   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wigs.pyR      s   
	,*	
	
	
	?7	(		t   __main__R‹   (   RJ   RL   RY   R   R    t   rpy2.robjects.packagesR   t   rpy2.robjectsR   R   t   copyR   R   R   R‘   t   sys(    (    (    sD   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wigs.pyt   <module>   s   $ÿ †