
dTc           @   s   d  d l  Z  d  d l Z d  d l m Z d  d l m Z m Z d  d l m Z m	 Z	 m
 Z
 d  d l m Z d  d l m Z d f  d     YZ e d	 k r d  d l Z d  d l Z d  d l Z e j e j j   d
 d  e _ n  d S(   iN(   t   deepcopy(   t   rt   FloatVector(   t   log10t   sqrtt   log(   t   Summits(   t   timet   Wigc           B   s  e  Z d  d  d e d  Z d   Z d   Z d   Z d   Z d   Z d d d	  Z	 d
 d  Z
 dF d d d d d d d e d e e d d  Z i  dF d d d d e e d  Z d d d d d e d d d d d dF dF d  Z d d d d d d d d d dF d 
 Z d d d dF d  Z d   Z d  d  Z d   Z d   Z d   Z d   Z d   Z d d   Z d!   Z d"   Z d#   Z d$   Z d% e d&  Z e d'  Z e d(  Z e d)  Z  e d*  Z! d+   Z" d,   Z# d-   Z$ d.   Z% d d/ d d0 d1 g d% e d2  Z& d3   Z' d4   Z( d  d5  Z) dF dF d6  Z* d7   Z+ d8   Z, d9 dF e d:  Z- dF dF d9 d; d<  Z. d=   Z/ d>   Z0 d1 d?  Z1 d@   Z2 dA   Z3 dB   Z4 dC   Z5 dF dD  Z6 dE   Z7 RS(G   t    i    c         C   s   i  |  _  | |  _ | d k r x t |  D] } | j   } t j d g  |  j  | d <| d k r |  j  | d j t | d  | d d d q+ |  j  | d j t | d  d d d q+ Wn  | d k r |  j | d | d | n  d S(	   s   
        Parameter:
            file: a path to a file in Wiggle format
            step: each chrosome will present as an vector, each value in the vector represent a short region of the chrosome, the step value is the size of the short region.
        R	   g        i    i   t   refcheckt   gfilet   suppressN(	   t   datat   stept   opent   splitt   numpyt   arrayt   resizet   intt   load(   t   selft   fileR   R   R   t   linet   col(    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   __init__
   s    		 33 c         C   sI   |  j  } d } x3 | D]+ } x" | | D] } | t |  7} q' Wq W| S(   s   
        Description:
            the sum of absolute value at each data point.
        
        Parameter:
            None
            
        Value:
            float value
        g        (   R   t   abs(   R   t   wigt   sumt   chrt   v(    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   absSum   s    	 c         C   sd  |  j  | j  k r1 t |  } | j |  j   n  i  } x |  j D] } d | | <qA Wx | j D] } d | | <q_ W| j   } x | D] } |  j j |  s |  j |  n  | j j |  s | j |  n  |  j | j } | j | j } | | k r| j | j | d d n) | | k  rB|  j | j | d d n  |  j | c | j | 7<q Wt	 S(   s/  
        Description:
            add value at each data point between two Wig class instances.
        
        Parameter:
            wig2: a Wig class instance whose value will not change but will be add to the Wig class instance calling this method.
        
        Value:
            None
        i   R
   i    (
   R   R    t
   changeStepR   t   keyst   has_keyt   addChrt   sizeR   t   True(   R   t   wig2t   chrsR   t   lth1t   lth2(    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   add)   s,          c         C   s   t  j d g  |  j | <d S(   s   
        Description:
            add a new chrosome to the existance Wig class instance
        
        Parameter:
            chr: the name of the chrosome to be added
            
        Value: none
        g        N(   R   R   R   (   R   R   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyR$   D   s    
c   	      C   s  |  j  } i  } x t |  D] } | j   } t | d  | d } | | | d <| |  j | d j k  r |  j | d |  |  j | d <n  | |  j | d j k r |  j | d j | d d q q W|  j j   } x0 | D]( } | j |  s |  j j	 |  q q Wd  S(   Ni   i    R
   (
   R   R   R   R   R   R%   R   R"   R#   t   pop(	   R   R   R   t   crsR   R   R%   t   tcrst   cr(    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt
   ajust_sizeO   s    	 # % c         C   s   |  j  | S(   s   
        Description:
            retrieve a chrosome by name.
            
        Parameter:
            chr: the name of the chrosome to be retrived.
        
        Value:
            a list as an instance of the numpy.array.
        (   R   (   R   R   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   getChr\   s    i2   c      	   C   s  i  } |  j    } | j    } | |  j :} | |  j :} x'| D]} i  | | <| | k r? t |  j |  | j |   }	 |  j |  |	 k r |  j | |	  n  | j |  |	 k r | j | |	  n  x t | | d  D]w }
 t j t |  j	 |  |	 |  j |
   t | j	 |  |
 |	 |  j !  } t
 t |  j   d  | | |
 <q Wq? q? Wt | d  } | j d  x t | | d  D]~ }
 d } d } |  j   } x- | D]% } | |  j |  | | |
 7} qW| j t |
 |  j  d t | |  d  qWd	 S(
   sV  
        Description:
            Calculate correlation between the two Wig class instances allowing shift distances.
            
        Parameters:
            ofile: the output file used to save the result.
            mind: minimal shift distance
            maxd: maximal shift distance
        
        Value:
            None
        i   it   ws&   Shift_disance	correlation_coefficient
g        i    s   	s   
N(   t   getChrsR   t   maxt   chrSizet	   resizeChrt   rangeR   t   corR   R1   t   floatt   strR   R   t   writet   gsize(   R   t   wg2t   ofilet   mindt   maxdt   dR(   t   chrs2R   t   cst   tdR   t   fot   st   gs(    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   correlationh   s2    
!  M/#i  c         C   s}  t  |  } |  j | j k r1 | j |  j  n  i  } x |  j D] } d | | <qA Wx | j D] } d | | <q_ W| j   } xw | D]o } t |  j |  } t | j |  } | | k r | j | j | d d q |  j | j | d d q W| d k r| j d |  n  | j |  j	   | j	    d G|  j	   GH|  j
 |  d G|  j	   GH|  j   d G|  j	   GHt S(   s  
        Description:
            subtract the value of each data point in wig2 from self after smoothing of wig2
        
        Parameter:
            wig2: the Wig class instance used to do the subtraction
            lmd: the bin size used to smooth wig2
        
        Value:
            None
        
        Note:
            a copy of wig2 is smoothed and used to do the subtraction, wig2 will not change.
        i   R
   i    t   lmds   before subtracting:s   after subtracting:s   after removing negtive values:(   R    R   R!   R   R"   t   lenR   t   smootht
   foldChangeR   t   subtractt   rvNegR&   (   R   R'   RI   R(   R   R)   R*   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   bgsub   s0         
i   i   R2   i@B c   %      C   sQ  t  d  } |  j   } | d k r | d k r t | d  } xJ d t t | | |   j   d  t d  | k  r | d 7} qC Wn  d t |  d G| GH|  j } |  j } | | } | d k  r d } n  i  } | s d	 GHn  x | D] } i  | | <| | j	 } | d k rq n  | s,| Gn  d \ } } x | | k  r| d k  rs| | | | k r| } qnF | | | | k  r| | | k r| | | | | | <n  d } n  | d 7} q;W| s t
 | |  GHq q W| d k rJ| sd
 GHn  | j   } | j   x4| D])} | | j   } | sK| Gd Gt
 |  Gn  | j   t
 |  d } d } x | | k  r&| | d | | | | | k r| | | | | | | | d k  r| | | | d | | | | <n  | | j | | d  | | | | d <n  | d 7} qnW| sd Gt
 | |  GHqqWn  | d k rMt | |  } | r| d k r| r| j d  q| j d  qn  | j   } | j   x| D]} | | j   } | j   xf| D]^} d } | | | | | | } } t | | | | ! }  d }! | | | k r1g  }" x t | |  D] } | | | |  k r|" j t | |   n  | | | | k r| | 7} n  |	 r| | | d k r|! | | | 7}! qqP|! | | | 7}! qPW|! | }! d j |"  }# t
 |"  d } | rt t | |  |   j   d  t d  }$ | j | d t |  d t | | |  d |# d t |   d t |!  d t |  d t d |$  d  qB| j | d t |  d t | | |  d |# d t |   d t |!  d t |  d  q| | j |  qWqWn  | S(   s  
        Description:
            This fuction is designed to call broad peaks, such as histone modification peaks.
        
        Parameter:
            ofile: a path to the file used to save the peaks.
            width: minimal width of peaks
            distance: minimal distance between peaks, neighboring peaks with distance shorter than this value will be merged.
            pheight: a P value cutoff used to call peaks.
            height: the occupancy cutoff used to call peaks. valide only when pheight is set to 1.
            calculate_P_value: calculate P value for each peak if set to 1, else set to 0.
            mode: the mode to write result to ofile, could be either 'w' to create new file or 'a' to append to a existing file
            title_line: set to 1 if need a title line in the result file ofile
        
        Value:
            pks[chrosome_name][start_position]=end_position
            
        s?   function(q,avg){return(ppois(q,avg,lower.tail=FALSE,log=TRUE))}i    g      ?ii
   i   s   whole genome aveage value is s   , use calling cutoffs   calling ...t   meringt   fromt   toR2   s_   chr	start	end	summit_pos	summit_value	strand	total_signal	width_above_cutoff	summit_minus_logP
sM   chr	start	end	summit_pos	summit_value	strand	total_signal	width_above_cutoff
t   ,i   s   	s   	+	s   
(   ii    N(   R   t   meanR   R9   R:   R   R   R   R   R%   RJ   R"   t   sortR,   t   NoneR   R;   R4   R7   t   appendt   join(%   R   R>   t   widtht   distancet   pheightt   heightt   calculate_P_valuet   modet
   title_linet   pos_onlyt   foldR   t   fdrt   fdrSampleSizet   ppoist   mt   dicR   t   twidtht   pksR   t   ltht   startt   posR-   t   pst   it   outft   pt   width_above_cutoffRF   t   eR   t   auct   smtst   smtt   pvl(    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   callRegions   s    < 		
 	 
    	  
 
$( %  

   
/|jc	         C   s  t  d  }	 |  j   }
 | d k r | d k r t |
 d  } xJ d t t |	 | |
   j   d  t d  | k  r | d 7} qC Wn  d t |
  d G| GHt | d	  } | d k r | j d
  n | j d  |  j	 } |  j
 } d } x| D]} | | j   } | j   x| D]} | | | | | k  rFq"n  | | | | | | d } } | | j | k r| | j | d d d n  d } t | | | | ! } | rt | d  } n  d } | } x | | k  rZ| | | | k r	| | 7} n  | r;| | | d k rM| | | | 7} qMn | | | | 7} | d 7} qW| | } | | 7} | r6t t |	 | |
   j   d  t d  } | j | d t |  d t | | |  d t | | | | d  d t |  d t |  d t |  d t d |  d  q"| j | d t |  d t | | |  d t | | | | d  d t |  d t |  d t |  d  q"Wq Wd G| GH| j   d S(   s&   
        Add description here
        s?   function(q,avg){return(ppois(q,avg,lower.tail=FALSE,log=TRUE))}i    g      ?ii
   i   s   whole genome aveage value is s   , use cutoffR2   sH   chr	start	end	center	width_above_cutoff	total_signal	height	height_logP
s<   chr	start	end	center	width_above_cutoff	total_signal	height
R
   s   	i   s   
s   total_width_above_cutoff:N(   R   RT   R   R9   R:   R   R   R   R;   R   R   R"   RU   R%   R   R4   t   close(   R   t   regionsR   R[   R\   RY   R]   R`   R   Rd   Re   Rn   R   Rf   t   total_width_above_cutoffR   t   startsRj   RF   Rq   Rp   R   Rr   Rm   Ru   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   fillRegions  sZ    <  		
 !    

/	i(   gh㈵>c   %   	   C   s  t  | |
  } |
 d k rs | d k rM |	 r= | j d  qp | j d  qs |	 rc | j d  qs | j d  n  |  } t d  } |  j   } | d k r | d k r t | d  } x@ d t t | | |   j   d	  | k  r | d
 7} q Wn  d t |  d G| GHd GH| j d | d d
 d | d |  } d GH| j	 d | d |  | rd GH| j
 d | d | d |  n  | d k rd GH| j | d | n  | rd GHn d GH| d | j :} | j } | d } | d | j } x| D]|} | j |  | j d
 } | GH| d k r.qn  | | d } | | d } | d k ru| | d } | | d } n  | j } |	 r| t | | d  |  } n  d } x| | k  rr| | }  d" \ }! }" | d k r|  d d |  j d
 |  d d |  j d
 }! }" n|  | j }# |# d
 }$ x&|! d k rP|$ | k rLd
 }! n  |# |$ | d k r| j | |$ t | j | |$ | |$ | d
 ! k r|$ | j d
 }! qC| j | |$ | | k r|$ | j d
 }! qC| j | |$ | k  rC|$ | j d
 }! qCn; | d k rC|$ | j | | d
 k rC|$ | j d
 }! n  |$ d
 8}$ q+W|# d
 }$ x9|" d k r|$ | | k r| | j d
 }" n |$ |# | d k rJ| j | |$ t | j | |$ | |$ | d
 ! k r|$ | j d
 }" q| j | |$ | | k r|$ | j d
 }" q| j | |$ | k  r|$ | j d
 }" qn? | | d
 k  r|$ | j | | d
 k r|$ | j d
 }" n  |$ d
 7}$ q^W|" |! k re| d k r|	 r?| j | d  t |!  d  t |"  d  t |  d
  d  t | |  d  t | |  d  t | |  d  t | |  d!  qb| j | d  t |!  d  t |"  d  t |  d
  d  t | |  d  t | |  d!  qe|	 r| j | d  t |!  d  t |"  d  t |  d
  d  t | |  d  t | |  d!  qe| j | d  t |!  d  t |"  d  t |  d
  d  t | |  d!  n  | d
 7} qWqW| j   | S(#   s  
        Description:
            This fuction is designed to call nucleosome positions
            
        Parameter:
            ofile: a path to the file used to save the positions.
            width: minimal width of positions
            distance: minimal distance between positions, neighboring positions with distance shorter than this value will be merged.
            edge: set to 1 if need to search for position edges, else set to 0
            pcut: a P value cutoff used to call positions.
            height: the occupancy cutoff used to call positions. valide only when pheight is set to 1.
            fill_gap: fill the gap between two neighboring nuclesomes with a new nucleosome if the gap size is reasonable.
            fill_value: the default value to be set to a filled nucleosome
            calculate_P_value: calculate P value for each position if set to 1, else set to 0.
            mode: the mode to write result to ofile, could be either 'w' to create new file or 'a' to append to a existing file
            title_line: set to 1 if need a title line in the result file ofile
            poscal: set to 1 if need to calculate nucleosome positioning score and P value,else set to 0
        
        Value:
            None.
            
        R2   i    sR   chr	start	end	smt_pos	smt_value	smt_log10pval	fuzziness_score	fuzziness_log10pval
s0   chr	start	end	smt_pos	smt_value	fuzziness_score
s.   chr	start	end	smt_pos	smt_value	smt_log10pval
s    chr	start	end	smt_pos	smt_value
sI   function(q,avg){return(ppois(q,avg,lower.tail=FALSE,log.p=TRUE)/log(10))}g      ?ii   s   whole genome aveage value is s   , use calling cutoffs   calling summits ...RY   t   pcutR\   Rx   s   merging summits ...t   wgRZ   s   filling gaps ...s   calculating positioning scoret   rds   searching position edges ...s   saving positions ...i   Ro   R   RF   t   pposiJ   s   	s   
(   i    i    (   R   R;   R   RT   R   R9   R:   R   t   callSummitst   merget   fillgapt   positioningR   R   R5   R%   R   t   minRw   (%   R   R>   RY   RZ   t   edgeR|   R\   t   fill_gapt
   fill_valueR]   R^   R_   t   poscalRx   R~   Rn   t   twigRd   Re   Rs   Rf   t   rhalfdist   halfdisR   Ri   t   posest   valusR   R   t   tlent   pvsRm   Rk   Rj   t   endt   pppRo   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   callPositionsP  s      2 $ 	
 	 
 6
 	7   ' 
 7   + ffT
i   c   "      C   s%  |  } t    } t |  | _ | j |  | d k rR d GH| j | d | n  t | |	  } |	 d k r | d k r | r | j d  q | j d  q | r | j d  q | j d  n  t d	  } |  j   } | d k rH| d k rHt	 | d
  } x@ d t
 t | | |   j   d  | k  rD| d 7} qWn  | rVd GHn d GH| d | j :} | j } | d } | d | j } x| D]|} | j |  | j d } | GH| d k rqn  | | d } | | d } | d k r| | d } | | d } n  | j } | rD| t | | d  |  } n  d } x| | k  r| | } d \ } } | d k r| d d |  j d | d d |  j d } } n| | j }  |  d }! x&| d k r|! | k rd } n  |  |! | d k r| j | |! t | j | |! | |! | d ! k rK|! | j d } q| j | |! | | k rz|! | j d } q| j | |! | k  r|! | j d } qn; | d k r|! | j | | d k r|! | j d } n  |! d 8}! qW|  d }! x9| d k r6|! | | k r.| | j d } n |! |  | d k r| j | |! t | j | |! | |! | d ! k r|! | j d } q)| j | |! | | k r|! | j d } q)| j | |! | k  r)|! | j d } q)n? | | d k  r)|! | j | | d k r)|! | j d } n  |! d 7}! qW| | k r| d k rE| r| j | d t |  d t |  d t | d  d t | |  d t | |  d t | |  d t | |  d  q| j | d t |  d t |  d t | d  d t | |  d t | |  d  q| r| j | d t |  d t |  d t | d  d t | |  d t | |  d  q| j | d t |  d t |  d t | d  d t | |  d  n  | d 7} qMWqW| j   | S(   s  
        Description:
            This fuction is designed to call nucleosome positions
            
        Parameter:
            ofile: a path to the file used to save the positions.
            width: minimal width of positions
            distance: minimal distance between positions, neighboring positions with distance shorter than this value will be merged.
            edge: set to 1 if need to search for position edges, else set to 0
            pcut: a P value cutoff used to call positions.
            height: the occupancy cutoff used to call positions. valide only when pheight is set to 1.
            fill_gap: fill the gap between two neighboring nuclesomes with a new nucleosome if the gap size is reasonable.
            fill_value: the default value to be set to a filled nucleosome
            calculate_P_value: calculate P value for each position if set to 1, else set to 0.
            mode: the mode to write result to ofile, could be either 'w' to create new file or 'a' to append to a existing file
            title_line: set to 1 if need a title line in the result file ofile
            poscal: set to 1 if need to calculate nucleosome positioning score and P value,else set to 0
        
        Value:
            None.
            
        i    s   calculating positioning scoreR~   R2   sR   chr	start	end	smt_pos	smt_value	smt_log10pval	fuzziness_score	fuzziness_log10pval
s0   chr	start	end	smt_pos	smt_value	fuzziness_score
s.   chr	start	end	smt_pos	smt_value	smt_log10pval
s    chr	start	end	smt_pos	smt_value
sI   function(q,avg){return(ppois(q,avg,lower.tail=FALSE,log.p=TRUE)/log(10))}g      ?ii   s   searching position edges ...s   saving positions ...i   Ro   R   RF   R   iJ   s   	s   
(   i    i    (   R   R    R   t   fetchValueFromWigR   R   R;   R   RT   R   R9   R:   R   R   R5   R%   R   R   Rw   ("   R   Rf   R   RY   RZ   R   R|   R\   R]   R^   R_   R   R~   R   Rs   Rn   Rd   Re   R   R   R   Ri   R   R   R   R   R   R   Rm   Rk   Rj   R   R   Ro   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   fillPositions  s    	  2  	
 	 
 6
 	7   ' 
 7   + ffT
c         C   sV  | d k rU t  d  } |  j   } t t | | |   j   d  } d G| GHn  t   } | d :} | d k r i  } xA |  j D]3 } i  | | <|  j | j |  j	 d | | | <q Wn  x| D]} | GHt
 j d g  }	 t
 j d g  }
 i  } |  j | } |  j	 } | | } | d } | | j   } | j   d } x| D]} | | | | | | } } x| | k  r | | | | | !j   } | | k  r| | 7} qq| | | k r| |	 j d k r|	 j | d d	 d |
 j | d d	 d n  | | | | | f \ } } } } } xD | | k ro| d 8} | d k r[| | } q,| d d } } q,WxH | | k r| d 7} | | k  r| | } qs| d | d } } qsW| | d } | } | | |	 | <| |
 | <| d 7} | | 7} | | k  r| } qqq| d 7} qqWqKW|	 |  | d
 <|
 |  | d <| | j | <q W| S(   s  
        Description:
            call occupancy summits using a sliding window.
        
        Parameter:
            width: the width of the sliding window used to call summits.
            pcut:  a P value cutoff used to call positions.
            height: the occupancy cutoff used to call positions. valide only when pheight is set to 1.
            regions: A set of regions in which the summits are to be defined, regions[chromatin_name][start_position]=end_position
        i   sD   function(p,avg){return(qpois(log(p),avg,lower.tail=FALSE,log=TRUE))}is   set summit calling cutoff toi   i    g        i  R
   Ro   R   N(   R   RT   R   R:   R   R   RV   R   R%   R   R   R   R"   RU   R4   R   (   R   RY   R|   R\   Rx   t   qpRe   Rs   R/   R   R   Rf   t   lstR   t	   backwidtht   region_posest   numRo   Rm   t   dlthR   t   tit   tstartt   tendt   v1t   v2(    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyR   <  st    %	

*	


$
 
 


 c   	      C   s}  | |  j  k r t Sd G|  j  Gd G| GH| |  j  k rH| |  j  d k re d G| Gd G|  j  Gd GHt S| |  j  } x |  j D] } |  j | j } | | } t j d g  } | j | d d xX t d |  D]G } x> t | | | d	 |  D]! } | | c |  j | | 7<q Wq W| |  j | <|  j | | |  j | <q| Wn  | |  j  k  rp|  j  | d k rd G| Gd G|  j  Gd Gt	 |  j   | GHt S|  j  | } x |  j D] } |  j | j } | | } t j d g  } | j | d d xR t d |  D]A } x8 t | | | d	 |  D] } |  j | | | | <q%WqW| |  j | <|  j | |  j | <qWn  | |  _  t S(
   s#  
        Description:
            change the step size.
            Note: The new value for each step is determined by sampling an old value within the step!
        
        Parameter:
            step: the new step that is to be setted as.
        
        Value:
            None
        s   change wiggle step fromRR   i    s'   Wrong: the fold change between new steps   and old steps   is not integer!g        R
   i   (
   R   R&   t   FalseR   R%   R   R   R   R7   R   (	   R   R   t   fdR   Ri   t   nlthR   t   nposRk   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyR!     sF     
" #)
" 	c      	   C   sy  t  j d d  t |   } t |  } | j | j k rM | j | j  n  t d | j  } | j   } | j   } d } x( | j D] } | t | j |  7} q Wd }	 t	   }
 x| j D]} | d k r | | k r q n  | j j
 |  r | j | c d 7<| j | c d 7<t j d  | j | <t | j |  } t | j |  } t | |  } | | k r| j | j | d d n) | | k r| j | j | d d n  | j | j | d d xt d |  D]r} t  j | j | | | j | | | | j | | | | j | |  } | j | | | j | | k rld | j | | <n t  d	 t  j | d
 d  } t t | d  j   d  } | d k  rd } n  | j | | | j | | k rt |  | j | | <n t |  | j | | <|	 d 7}	 t	   |
 d k rt	   }
 |	 d | Gd G| Gd GHqqWq q W| j   | j   | S(   s  
        Description:
            do Chi-square test to calculate differential signial for each data point between two Wig class instances.
        
        Parameter:
            wig2: a Wig class instance to be compared to
            tchr: specify a chrosome that is to be compared, leave it to '' if want to do for all chrosomes.
            
        Value:
            A Wig class instance that cantain data for the differential signial
        
        t   warniR   i    R	   i   g        R
   s
   chisq.testt   nrowi   ig       i
   g      Y@s
   percent oft   done(   R   t   optionsR    R   R!   R   R   R   RJ   R   R#   R   R   R4   R   R7   t   ct   matrixR9   R:   R   R   t   clearEmptyEnd(   R   R'   t   tchrt   wig1t   outt   sum1t   sum2R   R   t   donenumt   ctimeR)   R*   Ri   Ro   t   vect   testRu   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt	   chisqTest  s\      	   L"   	" 
	'

c         C   s/   |  j  j |  r' |  j  | j |  j Sd Sd S(   s  
        Description:
            retrive chrosome size by name
            
        Parameter:
            chr: the name of chrosome whose size is to be retrived
        Value:
            Interger value (step size has been multiplied)
            
        i    N(   R   R#   R%   R   (   R   R   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyR5     s     c         C   s   |  j  | j   |  j S(   s   
        Description:
            Retrieve the sum of occupancy by chrosome
        Parameter:
            chr: the name of chrosome whose occupancy sum is to be retrieved
        Value:
            Float value
        (   R   R   R   (   R   R   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   chrSum  s    	c         C   s}   xv |  j  D]k } t |  j  |  } x5 |  j  | | d d k rZ | d k rZ | d 8} q& W|  j  | j | d d q
 Wd S(   s   
        Description:
            Clear the 0 values at the end of each chrosome
        Parameter:
            None
        Value:
            None
        i   i    R
   N(   R   RJ   R   (   R   R   R%   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyR     s
    	* c         C   s  t  |   } t  |  } | j | j k r= | j | j  n  i  } x | j D] } d | | <qM Wx | j D] } d | | <qk W| j   } x | D] } t | j |  } t | j |  } | | k r | j | j | d d n | j | j | d d t | |  } | j | c d 7<| j | c d 7<| j | c | j | :<t j	 t
 | j |   d | !| j | d | +q W| S(   s  
        Description:
            divid by wig2 at each data point and then transform the resultant value by log2
        
        Parameter:
            wig2: the Wig class instance that will be used to devide
            
        Value:
            A Wig class instance
        i   R
   i    (   R    R   R!   R   R"   RJ   R   R4   R   t   log2R   (   R   R'   R   R(   R   R)   R*   Ri   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   divideAndLog2  s,       5c         C   sE  t  |   } t  |  } | j | j k r= | j | j  n  i  } x | j D] } d | | <qM Wx | j D] } d | | <qk W| j   } x | D] } t | j |  } t | j |  } | | k r | j | j | d d n | j | j | d d t | |  } | j | c d 7<| j | c | j | :<q W| S(   s  
        Description:
            divid by wig2 at each data point and then transform the resultant value by log2
        
        Parameter:
            wig2: the Wig class instance that will be used to devide
            
        Value:
            A Wig class instance
        i   R
   i    (   R    R   R!   R   R"   RJ   R   R4   (   R   R'   R   R(   R   R)   R*   Ri   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   divide*  s(       t   Cc         C   s   | d k r$ d GH|  j  |  } | S| d k rC |  j |  } | S| d k rg d GH|  j |  } | S| d k r d GHt |   } | j |  | S| d k r d	 GHt d
 |  j  Sd t |  d GHt d
 |  j  Sd S(   sL  
        Description:
            Do differential test wit cwig
        Parameter:
            cwig: the Wig class instance which is to be tesed to.
            test: the statistical method that will be used to do the differential test
        Value:
            a Wig class instance containing the differential signal data
        R   s   Chi-square testt   Ft   Ps   Poisson testt   St   subtractiont   Ns=   No test method appointed, will not do any differential test.
R   s   Normalization method s    not applicable nowN(   R   R   Rd   R    RM   R   R   R:   (   R   t   cwigR   t   pwig(    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   dfTestH  s*    
c      
   C   s4  t  j d d  |  j | j k rA t |  } | j |  j  n  t d t j  } t |  j    } t | j    } d } x( |  j	 D] } | t
 |  j	 |  7} q Wd } xk|  j	 D]`} | j	 j |  r t j d  | j	 | <t
 |  j	 |  } t
 | j	 |  }	 t | |	  }
 |
 |	 k rD| j	 | j | d d n) |
 | k rm|  j	 | j |	 d d n  | j	 | j |
 d d xt d |
  D]z} t  j t |  j	 | |  t | j	 | |  | t |  j	 | |  | t | j	 | |   } |  j	 | | | j	 | | k r7d | j	 | | <n t  d t  j | d	 d
  } t t |  j   d  } | d k  rd } n  |  j	 | | | j	 | | k rt |  | j	 | | <n t |  | j	 | | <| d 7} | d d k r| d | Gd G| Gd GHqqWq q W|  j   | j   | S(   s  
        Description:
            Do Fisher's exact test to wig2.
            
        Parameter:
            wig2: the Wig class instance which is to be tesed to.
            
        Value:
            a Wig class instance containing the differential signal data
        R   iR   i    g        R
   i   s   fisher.testR   i   i   g       i  g      Y@s
   percent ofR   (   R   R   R   R    R!   R   R   R   R   R   RJ   R#   R   R   R4   R   R7   R   R   R9   R:   R   R   R   (   R   R'   R   R   R   R   R   R   R)   R*   Ri   Ro   R   R   Ru   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt
   fisherTesti  sN       d"  	" 
 '

c         C   s+   x$ |  j  D] } |  j  | c | 9<q
 Wd S(   s   
        Description:
            Do fold change at each data point.
            
        Parameter:
            fold: the value that will be multiplied by each data point
            
        Value:
            None
        N(   R   (   R   Ra   R   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyRL     s     c         C   s   |  j  j   S(   s   
        Description:
            Retrive all chrosome names
        Parameter:
            None
        Value:
            a list of chrosome names
        (   R   R"   (   R   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyR3     s    	c         C   s1   d } x$ |  j  D] } | |  j |  7} q W| S(   s   
        Description:
            Calculate genome size
        Parameter:
            None
        Value:
            Interger value
        i    (   R   R5   (   R   Ri   R   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyR<     s    	i c      	   C   s   |  j  d |  \ } } t j g  d | d | | f \ } } xA |  j D]6 } | t j |  j | d | d | | f d 7} qI W| | g S(   Nt   nonzerot   binsR7   i    (   t   maxminR   t	   histogramR   (   R   t   bnumt   nonzero_endt   mat   mit   countsR   R/   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyR     s
    ' 4c         C   s   | d d k r t  |  } n  | d d k rM d d l } | j  |  } n  x | D] } | d d k rp qT qT | d d	 !d
 k r |  j | d | d | d S| d d !d k r |  j | d | d | d Sd G| GHqT Wd S(   s   
        Description:
            Load Wig class instance from Wiggle format file
        Parameter:
            file: a path to the file containing the data
        Value:
            None
        iR   is   wig.gziNi    t   ti	   t	   fixedStepR   R   i   t   vars%   Load failure: format not recoganized!(   R   t   gzipt	   loadFixedt   loadVar(   R   R   R   R   t   fiR   R   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyR     s    	  c         C   s  t    } |  j d k  r t |  } | s2 d GHn  x |  j d k  r | j   } | d k rb q5 n  | d d k r5 | j   } xJ | d D]; } | j d  }	 |	 d d k r t |	 d  |  _ q q Wq5 q5 Wn  | s d G| GHn  d	 }
 d
 g } d } d } xt |  D]} | d k r$qn  | d d k r:qq| d d k r| j   } x | d D]x } | j d  }	 |	 d d k r|	 d } qa|	 d d k rt |	 d  } qa|	 d d k rat |	 d  } qaqaW| s| Gd G| GHn  |
 d	 k r|  j j |
  s-t j	 d g  |  j |
 <n  t j	 |  } | |  j k r@t
 |  | d |  j } t j	 d g  } | j t |  d d x t t
 |   D] } | |  j k  r| | | |  j c | | 7<qx[ t | | |  j | d | |  j  D]0 } | | |  j c | | d |  j | 7<qWqWn  |  j |
 j | |  j t
 |  d d | |  j |
 | |  j )n  g  } | }
 | d } qy! | j t | j   d   Wq| d  d k r| j d  qd G| d  GHqqXqW|  j j |
  s,t j	 d g  |  j |
 <n  t j	 |  } | |  j k r?t
 |  | d |  j } t j	 d g  } | j t |  d d x t t
 |   D] } | |  j k  r| | | |  j c | | 7<qx[ t | | |  j | d | |  j  D]0 } | | |  j c | | d |  j | 7<qWqWn  |  j |
 j | |  j t
 |  d d | |  j |
 | |  j )d S(   s   
        Description:
            load data from Fixed wiggle format file
        Parameter:
            file:a path to the file containging the data
            suppress: suppress waring message? True or False
        Value:
            None
        i   s   detecting step size ...s   
i    t   ft   =R   s   parsing fromR	   t   chr1iR   t   chromRj   s   start from position g        g      ?R
   i   t   nans   wrong line:N(   R   R   R   t   readlineR   R   R   R#   R   R   RJ   R   R7   RW   R9   (   R   R   R   R   t   ssst   tempfR   R   t   termt   kvR   R(   t   pnRk   t   newchrt   newstartt   instepR   t   nplstRi   t   tposRj   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyR     s    
	   $ 	       "0 5+!   "0 5+c         C   sm  |  j  d k  r  d GHd |  _  n  d G| GH|  j  } t   } d \ } } | d d k rf t |  } n  | d	 d
 k r d d l }	 |	 j |  } n  x| D]}
 |
 d d k r q q |
 d d k r| s |
 d  GHn  |
 j   } d } x | d D] } | j d  } | d d k rv| d 7} | d } |  j j |  s]t j d g  |  j | <n  |  j | } | j	 } q | d d k r t
 | d  } | d 7} q q W| d k  r[d G|
 Gd GHq[q |
 j   } t
 | d  t | d  } } | | | } | | } | | k r7| d } | j | d d n  x! t | |  D] } | | | <qGWq W|  j   d S(   s   
        Description:
            load data from Fixed wiggle format file
        Parameter:
            file:a path to the file containging the data
        Value:
            None
        i   s   set step size to 10i
   s   parsing fromR	   i    iR   is   wig.gziNR   R   R   R   g        t   spani   s   wrong format:s   chrom and span must be providedi  R
   (   R	   i    (   R   R   R   R   R   R   R#   R   R   R%   R   R9   R   R7   R   (   R   R   R   R   R   t	   starttimeR   R%   R   R   R   R   t   rightR   R   R   t   inspanR   t   valueR   Rk   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyR   -  sX    			   

!

 c         C   s   d \ } } x |  j  D] } |  j  | j   |  j  | j   } } | r |  j  | j   } |  j  | | j   |  j  | | j   } } | d k r | } n  | d k r | } q n  | | k r | } n  | | k  r | } q q W| | g S(   Ni    (   i    i    (   R   R4   R   R   (   R   R   R   R   R/   t   tmat   tmiR   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyR   j  s    '/ 	  	 c         C   sS   d \ } } x< |  j  D]1 } | |  j  | j   7} | |  j  | j 7} q W| | S(   s   
        Description:
            Calculate the mean occupancy value
        Parameter:
            None
        Value:
            float value
        i    (   i    i    (   R   R   R%   (   R   R%   R   R   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyRT   w  s
    
c         C   s{  |  j  | j  k r1 t |  } | j |  j   n  t d |  j   } i  } x |  j D] } d | | <qS Wx | j D] } d | | <qq W| j   } x | D] } |  j j |  s |  j |  n  | j j |  s | j |  n  |  j | j } | j | j } | | k r+| j | j	 | d d n) | | k  rT|  j | j	 | d d n  |  j | | j | | j | <q W| S(   s   
        Description:
            multiply by wig2 at each data point
        Parameter:
            wig2: the Wiggle class instance to multiply by
        Value:
            an Wiggle class instance
        R   i   R
   i    (
   R   R    R!   R   R   R"   R#   R$   R%   R   (   R   R'   R}   R(   R   R)   R*   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   multiply  s.    	      #c         C   s5   x. |  j  D]# } |  j  | } d | | j   <q
 Wd S(   s}   
        change all non-zero value to value 1
        #can be further improved by using the numpy.nonzero() function
        i   N(   R   R   (   R   R/   t   a(    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   non0to1  s    c         C   sC   d } x/ |  j  D]$ } | |  j  | j   d j 7} q W| |  j S(   sA   
        the number of data point with a non-zero value.
        i    (   R   R   R%   R   (   R   R%   R/   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   non0size  s    "i   iK   id   c   
      C   s.  |  j  d | d |  } | d j   } t |  } | j   t |  } d \ } } d }	 x | | k  r|	 | d | 7}	 x |	 d | | | k r| | k  r| d | d | | d | d | d | | d | | <| d 7} | | k r | j d  | } q q W| d 7} q_ W| |  } | S(	   si   
        bnum: number of histogram bins to be calculated between maximal value and minimal value
        R   R   i    g      Y@i   id   in   (   i    i    (   R   R   R    RU   RJ   RW   (
   R   Ro   R   R   t   hst   totalt   plthRm   t   jt   count(    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt
   percentile  s$    
'>

c         C   s   |  j  j |  S(   s   
        Description:
            remove a chrosome 
        Parameter:
            k: the name of the chrosome that is to be removed
        Value:
            None
        (   R   R,   (   R   t   k(    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyR,     s    	c         C   s]   i  } x |  j  D] } d | | <q W| j   } x& | D] } |  j  | | |  j  | <q7 Wt S(   s   
        Description:
            Self multiply by p times
        Parameter:
            p: the times to do self multiply
        Value:
            None
        i   (   R   R"   R&   (   R   Ro   R(   R   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   power  s    	 c         C   s;  t  d  } |  } | j | j k r7 | j | j  n  t d | j  } d } x| j D]} | GH| d k r | | k r qY n  | j j |  rY t j d  | j | <| j | j } | j | j }	 t	 | |	  }
 |
 |	 k r| j | j
 |
 d d n) |
 | k r+| j | j
 |
 d d n  | j | j
 |
 d d t j d  } t j d  } | j
 |
 d d | j
 |
 d d d } x#| |
 k  r| j | | | j | | k r5| j | | d k  rd | | <n | j | | | | <| j | | d k  rd | | <q| j | | | | <nr | j | | d k  rYd | | <n | j | | | | <| j | | d k  rd | | <n | j | | | | <| d 7} qWd } i  } x( t | |
 |  D] } | | | | <qW|
 | |
 |
 | <| j   } | j   x| D]} | | } d	 G| G| GH| t | | | ! t | | | !  } | } x | | k  r+| | | | k rd | j | | <n | j | | | j | | k rd | | | | j | | <n> | j | | | j | | k  r| | | | j | | <n  | d 7} qjWqWqY qY W| S(
   s  
        Description:
            do Poisson test to calculate differential signial for each data point between two Wig class instances.
        
        Parameter:
            wig2: a Wig class instance to be compared to
            tchr: specify a chrosome that is to be compared, leave it to '' if want to do for all chrosomes.
            
        Value:
            A Wig class instance that cantain data for the differential signial
        
        sG   function(q,avg){return(ppois(q,avg,lower.tail=FALSE,log=TRUE)/log(10))}R   i    R	   g        R
   i   i@B s   region:(   R   R   R!   R   R   R#   R   R   R%   R4   R   R7   R"   RU   R   (   R   R'   R   t   ppR   R   R   R   R)   R*   Ri   t   tstr1t   tstr2Ro   t   frglent   fragsR   Rz   Rj   t   result(    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyRd     sz        "     

) "  " c         C   s  t  |   } | d k r5 | d k r5 | j d  | S| j   \ } } | d k r` | d } n  | d k ry | d } n  x | j D] } d | j | t j |  j | | k  |  j | | k Bg  d <d | j | t j |  j | | k |  j | | k @g  d <q W| S(   s   
        Each value between lowValue and highValue will be set to 1, other value will be set to 0
        #may be further improved by using the numpy.where() or numpy.nonzero() function
        i    i   N(   R    RV   RL   R   R   R   t   where(   R   t   lowValuet	   highValueR   R   R   R/   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   regionWithinValueRange$  s      ?Cc         C   s%   |  j  | j | |  j d d d S(   s   
        Description:
            change chrosome size
        Parameter:
            chr: the name of the chrosome whose size is going to be changed
            size: the new size to be setted for the chrosome
        Value:
            None
        R
   i    N(   R   R   R   (   R   R   R%   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyR6   7  s    
c         C   sX   xQ |  j  D]F } |  j |  |  j } |  j  | } | d d | d |  j  | <q
 Wd S(   s   
        Description:
            set negative value at each data point to 0
        Parameter:
            None
        Value:
            None
        i   g      ?N(   R   R5   R   (   R   R   R   R   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyRN   B  s    	t   fixedc         C   s0  | d k r |  j } n  | s* d | GHn  t | d  } | d k rd d l m } |  j j   } t |  d } d \ }	 }
 xh t |
  D]Z } | | d |  } |  j | | d t	 j | j
 d  } | d k r |	 d 7}	 q q W|	 d |
 d	 k rd
 } n d } d | d GHn  | d k rqd } x% |  j D] } | |  j | j
 7} q:Wd d l m } m } n  d } x|  j D]} | s| GHn  | d
 k r3| j d | d t |  d t |  d  t |  j |  } g  } x( |  j | D] } | j t |   qW| j d j |  d  q| d k rt |  j |  } | j d | d t |  d  d } x| | k  r|  j | | d k r| j t | |  d t |  j | |  d  n  | d 7} q~Wq| d k rt |  j |  } d } x| | k  r| d 7} | |  | j t |  j | |  d t | d |   d | d t | |  d  | d 7} qWqd G| j d | d t |  d t |  d  t |  j |  } x9 t d |  D]( } | j t |  j | |  d  qWqW| j   d GHd S(   s6  
        Description:
            Save data to wiggle format file
        Parameter:
            file: a path to the output file
            format: the format of the output wiggle file, could be 'fixed' or 'var'
            step: the step size of the ouput wiggle file
        Value:
            None
        s   saving  to R2   i(   t   randinti   i    i  g      ?g      ?R   R   s   Will save in .s    formatt   wiq(   t   seedR  s   fixedStep chrom=s    start=1  step=s    span=s   
s   variableStep chrom=s   	s7   ,format not recogonized, will be saved in fixed format,t	   completedN(   i    i  (   RV   R   R   t   randomR  R   R"   RJ   R7   t   sefR%   t   numpy.randomR  R;   R:   RW   RX   Rw   (   R   R   t   formatR   R   Rn   R  R(   Ri   R   R   Rm   R   t   valR   R/   R  t   acculent   otR   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   saveQ  st      (  	  1 # 7

S1 *
i
   c   	      C   s  | } d G| Gt  | d  } x |  j D] } | | k r? q' n  | d k r' | j d | d t |  d t |  d  | d
 k r t |  j |  |  j } n  xF t d | |  D]/ } | j t |  j | | |  j  d  q Wq' q' W| j   d	 GHd
 S(   s  
        Description:
            Save data to wiggle format file by chrosome name
        Parameter:
            file: a path to the output file
            format: the format of the output wiggle file, could be 'fixed' or 'var'
            step: the step size of the ouput wiggle file
            chr: the name of the chrosome that is going to be saved
            lth: the length of the chrosome to be saved, start from 1 to lth
        Value:
            None
        s   saving wig toR2   R   s   fixedStep chrom=s    start=1  step=s    span=s   
i    R  N(	   R   R   R;   R:   RV   RJ   R   R7   Rw   (	   R   R   R   Ri   R  R   R   Rn   Rm   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   saveChr  s     1  4
c         C   sx   |  j    } |  j   | } d } x? |  j D]4 } x+ |  j | D] } | | | | | 7} q@ Wq, W| | :} t |  S(   s   
        Description:
            Calculate standard deviation of occupancy
        Parameter:
            None
        Value:
            None
        i    (   R%   R   R   R   (   R   t   szt   avgt   sqmR   R   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   sd  s    	 
c         C   s   i  } x8 t  |  D]* } | j   } t | d  | | d <q Wxu |  j D]j } | j |  ss |  j j |  qK |  j | j G|  j | j | | |  j d d |  j | j GHqK Wd S(   s{  
        Description:
            Adjust the size of each chrosome.
        
        Parameter:
            gfile: path to the file containing the size of each chrosome, each line in the file would be in the format "chrosome_name size", in which size is an integer value, and chrosome_name should contain no empty space
        
        value:
            None.
        
        i   i    R
   N(	   R   R   R   R   R#   R,   R%   R   R   (   R   R   t   sizesR   R   R   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt
   sizeAdjust  s     %c   
   
   C   s  d G| GHt    } t | |  j  } | d k r5 t S| d } | | } t |  j  } |  j d  |  j } xJ| D]B} | | j } | d | k r x t d | |  D]8 }	 | | | | | c !| | | |	 | | |	 !7+q Wn xY t d | d |  D]@ }	 | | | d | | c !| | | d |	 | | |	 !7+qWxN d | | g D]< }	 | | | | | c !| | | |	 | | |	 !d 7+qXW| | | | | c !| d :+qu W| |  _ t S(   s   
        Description:
            Smooth occupancy by a sliding window
        Parameter:
            the size of the smooth window
        Value:
            None
        s   smooth width:i    i   g        i   g      ?g      ?(	   R   R   R   R&   R    R   RL   R%   R7   (
   R   RI   t   sst   hlmdt   tlmdR=   t   wg1R   Ri   Ro   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyRK     s,    			 

	 9 > :#	c         C   sb   i  } x |  j  D] } d | | <q W| j   } x+ | D]# } t j |  j  |  |  j  | <q7 Wt S(   s   
        Description:
            translate each data point to its square root value
        Parameter:
            None
        Value:
            None
        i   (   R   R"   R   R   R&   (   R   R(   R   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyR     s    	 !c         C   s!  |  j  | j  k r1 t |  } | j |  j   n  i  } x |  j D] } d | | <qA Wx | j D] } d | | <q_ W| j   } x | D] } t |  j |  } t | j |  } | | k r | j | j | d d n |  j | j | d d |  j | c | j | 8<q W| j   t S(   s   
        Description:
            Subtract wig2 at each data point
        Parameter:
            wig2: the Wig class instance to be subtracted
        Value:
            None
        i   R
   i    (	   R   R    R!   R   R"   RJ   R   R   R&   (   R   R'   R(   R   R)   R*   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyRM     s$    	   
c         C   s   |  j    |  j   S(   s   
        Description:
            return the sum of occupancy across the whole genome
        Parameter:
            None
        Value:
            None
        (   RT   R<   (   R   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyR     s    	c         C   s   |  } d } x | j  D] } | j  | j   } d } x` | | k  r | j  | | | k r | j  | | | k r | | j  | | 7} n  | d 7} q8 Wq W| S(   sP   
        add values between lowValue and highValue, ignore other values
        i    i   (   R   R%   (   R   R   R   R   R   R/   R%   Rm   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   sumWithinValueRange"  s    . c         C   s=  | d  k r |  j } n	 | |  _ t |  } | j   } g  } x | d d k r | j |  | j   } | d d k r | d d t | d d  } } t j d g  |  j	 | <|  j	 | j
 | | d d n  | j   } qB Wt |  } x$ t t |   D] }	 | j   qWx| D]} | d	  j d
  } | d } t j d | d  }
 t j d | d  } t | d  } x t t |   D] }	 | |	 d k r| t |
 |	  } |  j	 | | | | | c !d 7+q| |	 d k r
| t |
 |	  } q| |	 d k r| t |
 |	  } qqWq"Wd  S(   Ni    t   @s   @SQi   i   i   g        R
   is   	s   \d+i   s   \D+t   Mt   DR   (   RV   R   R   R   RW   R   R   R   R   R   R   R7   RJ   t   ret   findall(   R   t   sam_fileR   t   infileR   t   hlinesR   R   t   clenRm   t   t1t   t2Rj   R   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   sam_coverage0  s@     	#! 
%  c         C   s   d } |  j    } d } xq |  j D]f } |  j | j } d } x= | | k  r} |  j | | | } | | | 7} | d 7} qA W| | 7} q" Wt | |  } | S(   Ni    i   (   RT   R   R%   R   (   R   R   Re   RG   R   t   tsRm   RF   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   stdT  s    N(8   t   __name__t
   __module__R   R   R    R+   R$   R0   R1   RH   RO   RV   Rv   R{   R   R   R   R!   R   R5   R   R   R   R   R   R   RL   R3   R<   R   R   R   R   R   RT   R   R   R   R   R,   R   Rd   R   R6   RN   R  R  R  R  RK   R   RM   R   R  R$  R&  (    (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyR   	   sh   					%"0f!;0m'H	-7		
			!	,			V=						!	
	@		B						$t   __main__R2   i    (   R   R  t   copyR    t   rpy2.robjectsR   R   t   mathR   R   R   t   summitsR   R   R   R'  t   syst   ost   fdopent   stdoutt   fileno(    (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wig.pyt   <module>   s         _$