
dTc        .   @   s  d  d l  m  Z  d  d l Z d  d l Z d  d l Z d  d l m Z d  d l m Z d  d l m	 Z	 d  d l
 m
 Z
 d  d l m Z d  d l m Z d  d	 l m Z m Z m Z d  d
 l m Z m Z d  d l Z d  d l Z e e d e e d e d e d d d d d e e d d e d d e e d d e d d d e d d d d d d d d e d d d d d d d - Z d   Z e d  Z d   Z e e e e d d d   Z e e e d! d d"  Z  g  d#  Z! g  d$  Z" d%   Z# d&   Z$ d'   Z% e d( e d d d) d) d d e d* 
 Z& d+   Z' e e d, d-  Z( d. e e e e e e e d, e d d e e e d d/  Z) d0 e e d1  Z* e e d2  Z+ d3   Z, d! e d4  Z- d5   Z. d) d6  Z/ d7   Z0 d) e d8 d9  Z1 d d: d d d d d; d< d d= 	 Z2 d> d d d? d@ e e e d d: d d d@ d d d; d< d d d dA dB  Z3 dC i  d d: d d d@ d d d; d< d dD  Z4 e5 dE k re j6 e j7 j8   dF d  e _7 n  d S(G   i(   t   timeN(   t   Wigs(   t   Wig(   t   Summits(   t   reads(   t   deepcopy(   t   randint(   t   log10t   logt   sqrt(   t   rt   FloatVectors   ./t   Qt   Pi    gh㈵>i   i   i(   i   g?i  i,  t   fixedi
   i2   i   iJ   c-   j   "   C   s#  t    }- | d | } }. t t t j t t d | | |# |#     j   d  d |# d | } d G| Gd G|# Gd G| GHt d	 |   \ }/ }0 t	 |0  d k r t
 St d
 | d |0  \ }1 }2 | d k r t	 |1  d k s t	 |2  d k r t
 St d | d |) d |0  }3 | d k r9t	 |3  d k r9t
 Sx | d d k rY| d  } q<Wt	 |  d k  rud } n | d d k r| d 7} n  t j j |  st j |  n  d }4 d }5 x |0 D] }6 t |0 |6 d |& d |! d t
 d |" d |# d |) d |' d |( d |, d |- 
|0 |6 <|5 t	 |0 |6 j    k  rt	 |0 |6 j    }5 qqW| r<| d k s|5 d k s|* d k s| d k s| d k r<t	 |0  d k s|5 d k r<t j j | d  st j | d  n  d GHx: |0 D]2 }6 |0 |6 j t j j | d  d |" d |# qWd  Gt    |- Gd! GHq<n  | d k rx^ |1 D]V }7 t |1 |7 d d" d |! d t
 d |" d |# d d" d |' d |( d |, d |- 
|1 |7 <qOW| rd# GHx: |1 D]2 }7 |1 |7 j t j j | d  d |" d |# qWd  Gt    |- Gd! GHqn  | d k rod$ GH|4 d% 7}4 t j j t j j | d&   sft j t j j | d&   n  i  }8 x|2 D]}6 |6 GH|2 |6 }7 |7 d' k r|8 j |7  s|1 |7 j   }9 t	 |9  d k r|1 |7 j d( d) d* d d+ d d, d d- d d. t
  n  |1 |7 j |9 d  |8 |7 <t	 |9  d k rxx0 |9 d D]$ }: |8 |7 j |1 |7 j |:   q2W|8 |7 j d/ t	 |9   n  t j j t j j | d& |7 d0   s|8 |7 j t j j | d& |7 d0  d |" d |# qqn  xy |0 |6 j   D]g }; |; d1  d2 }< |0 |6 j |;  }= |7 d' k r:|= j |8 |7 d3 |  n  |0 |6 j |< |=  d4 }= qWqsWd  Gt    |- Gd! GH| r`|5 d k s|* d k s| d k s| d k r`t	 |0  d k s|5 d k r`d5 GHt j j t j j | d6   s	t j t j j | d6   n  x: |0 D]2 }6 |0 |6 j t j j | d6  d |" d |# qWd  Gt    |- Gd! GHq`n  d4 }8 d4 }1 n  i d7 d8 6d9 d: 6d; d) 6d4 d 6}> t	 |0  d< k  r|0 j   d }6 t	 |0 |6 j    d< k  r| d k rd \ } } qn  | d k r\
t	 |0  d k s#|5 d k s#| d k r\
d= GH|4 |> | 7}4 t d |#  }? x |0 D] }6 x |0 |6 j   D]w }; |; d1  |> | d> }@ |0 |6 j |@ |0 |6 j |;   |? j |@ |0 |6 j |@   | d k rc|3 |6 |3 |@ <qcqcWqLW| d k rd }3 n  |? j  d? | d@ t j j | dA  d* |$ d+ |%  }A | d k rZ	|A d k rZ	t j j | dA  } n  |? j d( | d, |3 d- |A d. |  }B |B d k r	|B Sd  Gt    |- Gd! GH| rS
|5 d k s	|* d k s	| d k rS
t j j t j j | d(   s
t j t j j | d(   n  dB GH|? j t j j | d(  d |" d |# d  Gt    |- Gd! GHqS
n  d4 }? n  |* d k r|4 dC 7}4 | rdD GHxl |0 D]d }6 x[ |0 |6 j   D]I }; |0 |6 j |;  j! |*  |0 |6 j |; d1  dE |0 |6 j |;   q
Wq
Wd  Gt    |- Gd! GHqn  i  }C t" dF |   }D x|0 D]}6 dG G|6 GdH GHt j j t j j | d&   sut j t j j | d&   n  |0 |6 j   }E |0 |6 j |E d  |C |6 <t	 |E  d k rx0 |E d D]$ }; |C |6 j |0 |6 j |;   qW|C |6 j d/ t	 |E   n  | r*|* d k r*|C |6 j! |*  n  t	 |E  d k s|* d k s|! dI k s| d k s| d k s| d k s|D r|C |6 j t j j | d& |6 |4 d>  d |" d |# n  d  Gt    |- Gd! GHq"Wd4 }0 i dJ dK 6dL d) 6dM dN 6dO d: 6dP dQ 6}F i  }G xgt t	 |/   D]S}H t	 |/ |H  d< k  r;qn  |/ |H d d' k s|/ |H d d' k riqn  dR j |/ |H  }I dS G|I GdH GH|C |/ |H d j# dT | dU |C |/ |H d  |G |I <t j j t j j | dV   st j t j j | dV   n  xX |G D]P }I |G |I j t j j | dV t$ j% dR dW |I |F | d>   d |" d |# qWd  Gt    |- Gd! GHqWi  }J | d k s| d k r| d k rdX }K n dY }K i  i  i  }L }M }N x} |C D]u }6 |K GdZ G|6 GdH GH|C |6 j& d[ d d\ d d] d d^ |
 d_ | d` d da db dc d dd t
  	|L |6 <d  Gt    |- Gd! GHqWt	 |G  d k r| d k rx} |G D]u }I |K GdZ G|I Gde GH|G |I j& d[ d d\ d d] d d^ d d_ | d` d da db dc d dd t'  	|M |I <d  Gt    |- Gd! GHqbWx |G D] }I |K GdZ G|I Gdf GH|G |I j d  |G |I j& d[ d d\ d d] d d^ d d_ | d` d da db dc d dd t'  	|N |I <|G |I j d  d  Gt    |- Gd! GHqWn  t	 |C  d k r| d k rdg GHqdh GHn  |L g }O t	 |G  d k r| d k r|O |M |N g 7}O n  x |O D] }P x |P D] }Q |P |Q }R x |R D] }S |J j |S  s5i  |J |S <n  xs |R |S D]g }T |J |S j |T  sr|R |S |T |J |S |T <q@|J |S |T |R |S |T k  r@|R |S |T |J |S |T <q@q@WqWqWqWn  | d k r{| d k rdi G| GHt( |  }J n/ t	 |C  d k rdj GHt) dk |J d] |  }J n  dl GHx |C D] }6 |6 GHt) dk |L |6 d] |  }R |C |6 j* dm |R dn t j j | d& |6 |4 do  d^ |
 d_ | d\ | d` d dd t
  t	 |C  d k r%|C |6 j* dm |J dn t j j | d& |6 |4 dp  d^ |
 d_ | d\ | d` d dd t
  q%q%Wt	 |G  d k rUx:|G D]/}I |I GH| d k rRt) dk |M |I d] |  }R n  | d k r|G |I j* dm |R dn t$ j% dR dW t j j | dV |I |F | dq   d^ |
 d_ | d\ | d` d dd t'  n  |G |I j* dm |J dn t$ j% dR dW t j j | dV |I |F | dr   d^ |
 d_ | d\ | d` d dd t'  |G |I j d  | d k rdt) dk |N |I d] |  }R n  | d k r|G |I j* dm |R dn t$ j% dR dW t j j | dV |I |F | ds   d^ |
 d_ | d\ | d` d dd t'  n  |G |I j* dm |J dn t$ j% dR dW t j j | dV |I |F | dt   d^ |
 d_ | d\ | d` d dd t'  |G |I j d  qWn  | d k r{d  Gt    |- Gd! GHq{n  | d k r9| d k rdu G| GHt( |  }U n/ t	 |C  d k rdv GHt) dk |J d] |  }U n  dw GHx |C D] }6 |6 GHt) dk |L |6 d] |  }R |C |6 j* dm |R dn t j j | d& |6 |4 dx  d^ |
 d_ | d\ | d` d dd t
  | d k szt	 |C  d k r|C |6 j* dm |U dn t j j | d& |6 |4 dy  d^ |
 d_ | d\ | d` d dd t
  qqWt	 |G  d k r"x:|G D]/}I |I GH| d k rt) dk |M |I d] |  }R n  | d k r|G |I j* dm |R dn t$ j% dR dW t j j | dV |I |F | dz   d^ |
 d_ | d\ | d` d dd t'  n  |G |I j* dm |U dn t$ j% dR dW t j j | dV |I |F | d{   d^ |
 d_ | d\ | d` d dd t'  |G |I j d  | d k r1t) dk |N |I d] |  }R n  | d k r|G |I j* dm |U dn t$ j% dR dW t j j | dV |I |F | d|   d^ |
 d_ | d\ | d` d dd t'  n  |G |I j* dm |U dn t$ j% dR dW t j j | dV |I |F | d}   d^ |
 d_ | d\ | d` d dd t'  |G |I j d  qWn  d  Gt    |- Gd! GHn  | d k r| d k rf| d k rfd }J n  i  }V g  }W x |C D] }6 d~ G|6 GdH GH|W j+ t j j | d& |6 |4 d   |C |6 j, t j j | d& |6 |4 d  d\ | d] | d d d | d | d |
 d_ | d` d d d dm |J d | |V |6 <d  Gt    |- Gd! GHqyW| d k rbd G| GHt- |  }X n t	 |C  d k r|d GHn  t. d |W d] |  }X t	 |C  d k rEt/ t j j | d  db  }Y |Y j0 d  xt |X D]i }S d t	 |X |S d  }H }Z xE |H |Z k  r=|Y j0 |S d t |X |S d |H  d  |H d 7}H qWqWn  x |C D] }6 t	 |C  d< k  rjqLn  d G|6 Gd GHt1 d |X d t j j | d& |6 |4 d  d d d |C |6 d] | d | d |.  }[ |C |6 j2 d |[ dn t j j | d& |6 |4 d  d\ | d] | d | d |
 d_ | d` d da db dc d d d d |  qLWt	 |G  d k ri  i  }\ }] | d k rx |G D] }I d~ G|I Gde GH|G |I j, t$ j% dR dW t j j | dV |I |F | d   d\ | d] | d | d d d | d d d_ | d` d d d dm |J 
|\ |I <d  Gt    |- Gd! GHqnWx |G D] }I d~ G|I Gdf GH|G |I j d  |G |I j, t$ j% dR dW t j j | dV |I |F | d   d\ | d] | d | d d d | d d d_ | d` d d d dm |J 
|] |I <|G |I j d  d  Gt    |- Gd! GHq Wqqn  t	 |G  d k r#x|G D]~}I |I j dR  }^ d }_ x- |G |I j3 D] }` |_ |G |I j3 |` j4 7}_ q8Wt5 d t6 d |_ d   }a | d k ra d G|I GdH GHd GHt7 d |G |I d |a dm |J  }b t8 dU |C |^ d d |C |^ d d |a d | dm |J  }c d GH| d k ret$ j% dR dW t j j | dV |I |F | d   t$ j% dR dW t j j | dV |I |F | d   }d }e n d \ }d }e t9 d t j j | d& |^ d |4 d  d t j j | d& |^ d |4 d  d |d d |e d[ t$ j% dR dW t j j | |I d   dU |C |^ d d |C |^ d d |G |I d | d | dT | d d d |J d |b d |c d |	  d  Gt    |- Gd! GHn  | d k r!d G|I GdH GHd GHt: d t j j | d& |^ d |4 dp  d |C |^ d d |C |^ d d |a d |  }f d GHt; d t j j | d& |^ d |4 dp  d t j j | d& |^ d |4 dp  d t$ j% dR dW t j j | dV |I |F | dr   d t$ j% dR dW t j j | dV |I |F | dt   d[ t$ j% dR dW t j j | |I d   d |f d d |f d d |f d< d |# d |	  
d  Gt    |- Gd! GHn  | d k rd G|I GdH GHd GHt: d t j j | d& |^ d |4 dy  d |C |^ d d |C |^ d d |a d |  }f d GHt; d t j j | d& |^ d |4 dy  d t j j | d& |^ d |4 dy  d t$ j% dR dW t j j | dV |I |F | d{   d t$ j% dR dW t j j | dV |I |F | d}   d[ t$ j% dR dW t j j | |I d   d |f d d |f d d |f d< d |# d |	  
d  Gt    |- Gd! GHqqWn  t< t    |-  }g |g d }h |g |h d d }i |g |h d |i d }g d G|h Gd G|i Gd G|g Gd GHd S(   s  
    Description:
        This is the main function that accepts all input parameters and call other functions in the danpos package to complete the work.
    
    Parameters:
        please see the help messages for DANPOS by command lines "python danpos.py -h"
        
    i   i    igffffff?g       @t   rds   , steps    fcut,t   tpatht   tbgt   groupst   amountt   extendt   /t   resultt   .s   /resultt   fst   cutt   savet   wgfmtt   stept   mifrszt   mafrszt   pairedt	   starttimet   Ns   /raws   
saving raw datat   rawt   formats   time elapsed:t   secondsid   s   
saving raw background datas   
subtracting input effects ... s   bgsub.t   pooledt   Nonet   nort   Ft   exclude_low_percentt   exclude_high_percentt
   scalepairst   sampling_totalt   nonzerog      ?s   .wigis	   bgsub.wigt   lmdt    s"   
saving background subtracted datat   bgsubs   Qnor.R   s   Snor.t   Ss   Fnor.i   s   
normalizing wigs ...t   wigt   region_filet   region_out_files   normalize_region.wigs   
saving normalized wigs ...s   smooth.s   
smoothing ...s
   smooth.wigt   paths   
pooling groups   ...t   0s   .chisq_diff.t   Cs   .fold_diff.s   .pois_diff.R   s
   .sub_diff.s   .lsub_diff.t   Lt   :s   
differential test fort   testt   cwigt   difft   -s   
peaks   
regions   calling fort   ofilet   widtht   distancet   pheightt   heightt   calculate_P_valuet   modet   wt
   title_linet   pos_onlys   gaining ...s   loss ...s"   
merging peaks from all groups ...s$   
merging regions from all groups ...s   reading reference peaks fromsB   define reference peaks by pooling peaks defined in all samples ...t   peakss)   
retriving peak values for each group ...t   regionst   files	   peaks.xlss   refpeaks.xlss   local_gain.peaks.xlss   local_gain.refpeaks.xlss   local_loss.peaks.xlss   local_loss.refpeaks.xlss   reading reference regions fromsE   define reference regions by pooling regions defined in all sample ...s+   
retriving region values for each group ...s   regions.xlss   refregions.xlss   local_gain.regions.xlss   local_gain.refregions.xlss   local_loss.regions.xlss   local_loss.refregions.xlss   
position calling fors   positions.xlst   edget   fill_gapt
   fill_valuet   pcutt   poscals!   
reading reference positions fromsU   
defining a set of reference positions by pooling positions defined in all samples...t   positionFiless   reference_positions.xlss   chr	pos
t   ps   	s   
s   
fine-tuning positions fors%   by comparing to the reference map ...t   dict   infilet   outfilet   wgt   fcutt   hdiffs   positions.ref_adjust.xlss   gain.positions.xlss   loss.positions.xlsi i'  i  s(   
position level integrative analysis fors   FDR simulation ...t   dwigt   simut   twigs   analyzing ...t   controlPositionFilet   treatPositionFilet   gainPositionFilet   lossPositionFiles   .positions.integrative.xlst   dist   fdrsimut
   fdrRegionst
   occFDRlistt
   fuzFDRlistt   fdrs$   
peak level integrative analysis fort	   peakFile1t   wg1t   wg2t   file1t   file2t   gainFilet   lossFiles   .peaks.integrative.xlst   widthFDRlistt
   smtFDRlistt
   aucFDRlists&   
region level integrative analysis fors   .regions.integrative.xlsi  i<   s   
total time elapsed:t   hourst   minutess   seconds

job done, cheers!

N(   i    R!   (   NN(=   R    t   floatt   strR
   t   sdR   t   ranget   splitt
   pathParsert   lent   Falset   bgPathParserR&   t   scaleParsert   osR5   t   isdirt   mkdirt	   loadinputt   keysR   t   joint   has_keyR'   t   popt   addt
   foldChanget   isfileR0   t   setR   t   gett   samplingTotalt   smootht   all_wig_formatt   dfTestt   ret   subt   callRegionst   Truet	   readPeakst!   merge_peaks_by_head_tail_distancet   fillRegionst   appendt   callPositionst   readPositionst   refPositionst   opent   writet   positionAdjustt   fillPositionst   datat   sizet   mint   maxt   occFDRt   fuzFDRt   allPositionsInOneFilet   peakFDRt   region_differentialt   int(j   R   R   t   opathR-   R   R'   R3   R:   R   Rd   RA   RB   t   logpt   call_positiont   ref_positiont   bothR?   R@   RL   RM   RK   RV   R   t   ratiot	   call_peakt   ref_peakt   peak_distancet
   peak_widtht   call_regiont
   ref_regiont   region_distancet   region_widthR.   R   R   R   R)   R*   R   R   R   R   t   smooth_widtht   pcferR   R    RW   t   pairsR   t   bggroupst   subpairsR+   t   addnamet   maxgroupsizet	   groupnamet   bggroupnamet   pooledbggroupst   bgfilenamest
   bgfilenamet   filenamet   newfilenamet   tempt   nornamet   wigst   newnameR,   t   sucnort   pooledgroupst   all_wigt	   filenamest   testnamet   dfgroupst   it   dfnameRH   t	   printheadt
   peakgroupst   dfpeakgroupst   dfpeakgroups2t   peak_group_listt   temp_peak_groupt   namet
   temp_peakst   chrt   posRI   t	   smtgroupsRP   t   refdict   fot   ltht   tdict
   dsmtgroupst   dsmtgroups2t
   groupnamest   tgst   crR`   Rb   Rc   R]   R^   t   fdrlistR$   Ro   Rp   (    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyt   danpos   s   	O 0    	  %+ Z  0%/ 0
! 
 5 "( < N! 0	"( 6!  	3 $ $! (	
-!  " U:) ( 4!  N 	JJJ 	 
   ,	R Y  he  he 	R Y  he  he 	's	 + Qt 7 j*$(&1*+*$332!*+*$332!
c         C   sB  i  } |  j  d  } |  d  k r0 d GHi  i  g Sxt t |   D]} | | j  d  | | <t | |  d k r d GHi  i  g Sxt t | |   D]} | | | d k r q n  t j j | | |  rt j j | | |  rd G| | | Gd GHi  i  g S| | | } x | d	 d
 k r?| d	  } q"Wt j	 d d |  } t j	 d d |  } x | d d k r| d } qmWt j j |  r| d d k r| d  } q| d  } n  | | | | <| j
 |  r"| | | k r,d G| | Gd G| Gd G| Gd GHi  i  g Sq | | | <q WqC W| | g Sd  S(   Nt   ,s   No input files detected
R9   i   s   Wrong: each pair can not contain more than 2 groups, please also make sure that each group name does not contain the symbol "-"R&   s   Wrong: file or directorys   does not existsiR   s   /+t   _s   .gz$R/   i    R   i   it   bowtieiis   Wrong: different group (t   ands"   ) were found for same group name (t   )(   R   R   (   Ru   R&   Rt   Rw   R{   R5   R   R|   R   R   R   (   R   R   R   R   t   jt   groupR   (    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyRv     sD    

 6
   !c   
      C   sv  i  i  } } |  d  k rl|  j d  d } } xt t |   D] } | | j d  | | <t | |  d k r d GHi  i  g Sxt t | |   D]} | | | } | d k r q n  t j j | | |  rt j j | | |  rd G| | | Gd GHi  i  g Sx | d	 d
 k r6| d	  } qWt j	 d d |  } t j	 d d |  } x | d d k r| d } qdWt j j |  r| d d k r| d  } q| d  } n  | | | | <| d k r | d k r | j
 |  r1| | | k r;d G| | Gd G| Gd G| Gd GHi  i  g Sq>| | | <q q WqB Wt |  d k rt | d  d k r| d  k ri| | | d d <x# | D] }	 | d d | |	 <qWqiqlx t t |   D] } | j
 | | d  s| | d | | | d <q| | | d | | d k rd G| | | d Gd G| | d Gd G| | d Gd GHi  i  g SqWn  | | g S(   NR   R/   R9   i   s   Wrong: each pair can not contain more than 2 groups, please also make sure that each group name does not contain the symbol "-"R&   s   Wrong: file or directorys   does not existsiR   s   /+R   s   .gz$i    R   i   iR   iis   Wrong: different group (R   s"   ) were found for same group name (R   s,   Wrong: different genomic background groups (s'   ) were found for the same input group (s   )
(   R   R   (   R&   Ru   Rt   Rw   R{   R5   R   R|   R   R   R   (
   R   R   R   R   t   bgpairst   bggroupR   R   R   R   (    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyRy     sX    
 6
   !(   9c   
      C   s$  i  } |  d  k r |  j d  } t |  d k r | d j d  } t |  d k r | | d <t | d d  | | d d <x  | D] } | d d | | <q W| Sn  xMt t |   D]9} | | GH| | j d  | | <t | |  d k rd GHi  Sy  t | | d  | | d <Wn d G| | d Gd GHd  SXt j j | | d  rt j j	 | | d  rd	 G| | d Gd
 GHi  S| | d } x | d d k r| d  } qWt
 j d d |  }	 x |	 d d k r|	 d }	 qWt j j |  rQ|	 d d k r'|	 d  }	 n  |	 d d k rD|	 d  }	 qQ|	 d  }	 n  |	 | | d <| j | | d  s| | d | | | | d <q | | | d | | d | k r d G| | | d Gd G| | d Gd G| | d Gd GHi  Sq Wt |  t |  k r d GHi  Sn  | S(   NR   i   i    R9   i   s\   
!!!!! Wrong: format error, please make sure that name does not contain the symbol "-"!!!!!
s(   
!!!!! Wrong: amount is not readable fors   !!!!!
s   Wrong: file or directorys   does not existsiR   s   /+R   R   it   gziiR   iis)   Wrong: different expectation of amounts (R   s!   ) were found for the same group (s   )
sD   count of groups for scaling does not equal to count of input groups
(   R   R   (   R&   Ru   Rw   R   Rt   Rq   R{   R5   R   R|   R   R   R   (
   R   R   R   R+   t   mpairsR   R   R   R   R   (    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyRz   !  s^    
 	  6     !$9c
   *      C   s/  t  d  }
 t |   j   t |  j   t |  j   t |  j   f \ } } } } | t k r t | d  } | j d  n  d t |  f \ } } t |  t |  t |  f \ } } } xa| | k  r*| | j   | | j   | | j   | | j   f \ } } } } | d  | d  k rHd G| d  G| d  GHn  t | d  t | d  f \ } } t | d  t | d  } t | d	  | | t | d	  | | f \ } } t | d
  t | d
  f \ } } | | k rd }  n t	 | | | | g  }  | | k r*d }! n t	 | | | | g  }! | | k rWd }" n? t t
 |
 t | |  d t | |  d   j   d  }" t | d
  t | d
  f \ }# }$ |	 d k rPt | d |   d | t | d |!  d | t | d |"  d | t | |#  d | t | |$  d | f \ }% }& }' }( }) n$ d d d d d f \ }% }& }' }( }) | t k ryw| j d j | d  t
 |  t
 |  t
 t t | d  t | d   t d   t
 |"  t
 |'  t
 |  t
 |  t
 t t | d d  t | d   t d   t
 |   t
 |%  t
 |  t
 |  t
 t t | d  t | d   t d   t
 |!  t
 |&  t
 d |#  t
 |(  t
 d |$  t
 |)  t
 d t |# |$   t
 t |( |)   g  d  Wn  | G| G| G| G| G| GHn Xn  | d 7} q Wd  S(   Ns?   function(q,r){ppois(q,r,lower.tail=FALSE,log.p = TRUE)/log(10)}RE   s  chr	start	end	center	control_height	treat_height	height_log2FC	height_diff_log10Pval	height_diff_FDR	control_width	treat_width	width_log2FC	width_diff_log10Pval	width_diff_FDR	control_total_signal	treat_total_signal	total_signal_log2FC	total_signal_diff_log10Pval	total_signal_diff_FDR	local_gain_log10Pval	local_gain_FDR	local_loss_log10Pval	local_loss_FDR	local_change_log10Pval	local_change_FDR
i   i   s   Wrong:i   i   i   i   i    ig      ?R=   s   	s   
(   R
   R   t	   readlinesR&   R   Rw   Ru   R   Rq   t   log10PropTestRr   R   R   t   findRankR   R   (*   Rh   Ri   Rj   Rk   R>   Rl   Rm   Rn   R   Rd   t
   log10ppoist   f1t   f2t   gft   lfR   R   R   t   lwt   lst   ltt   col1t   col2t   gcolt   lcolt   w1t   w2RE   t   t1t   t2t   s1t   s2t   wdifft   tdifft   sdifft   gdifft   ldifft   wfdrt   tfdrt   sfdrt   gfdrt   lfdr(    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyR   M  sF    N*F &6& 	 	 	?& $  x  i@B c   !      C   sk  t  d  } t |   j   } | d  k rG | t |  j   d 7} n  d t |  } }	 | j }
 xi | |	 k  r | | j   } t | d  |
 t | d  |
 | d <| d <| d  | | <| d 7} qf Wi  } xQ | j D]F } | j j	 |  r t
 | j | j | j | j  d | | <q q Wd |	 d } }	 t j t j d g  |  t j t j d g  |  t j t j d g  |  } } } x| | k  r3| d d k r| Gd GHn  t d |	  } | | d | | d | | d | | d | | d f \ } } } } t | |  } | | } | | | | k rc| | | | | } } n  d d | j | | | !j   | j | | | !j   | j | | | !j   |
 | | j | | | !j   |
 | f \ } } } } } } | } xb | | k  rW| j | | | k r&| |
 7} n  | j | | | k rJ| |
 7} n  | d 7} qW| | k rmd } n t | | | | g  } | | k rd } n t | | | | g  } | | k rd }  n? t t | t | |  d t
 | |  d   j   d	  }  | |  | | | <| | <| | <| d 7} qW| j   | j   | j   d | d | d | g S(
   Ns?   function(q,r){ppois(q,r,lower.tail=FALSE,log.p = TRUE)/log(10)}i   i   i   i    g        i  t	   simulatedi(   R
   R   R   R&   Rw   R   Ru   R   R   R   R   R   t   numpyt   resizet   arrayR   R   t   sumR   Rq   Rr   t   sort(!   Re   t	   peakFile2Rf   Rg   R`   R   t   logppoist   pkR   R   R   t   colt   chrsR   RE   t   st   tR   t   rstartt   rendR?   t   startt   endR   R   R  R  R   R   t   kR  R  R  (    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyR   q  sb     	1 2\ J
    	 	 	? 


c         C   s2   t  d  } t t | t |     j   d  S(   s   
    Parameter:
        list: a list contain four number [count_in_A, count_not_in_A, count_in_B, count_not_in_B]
    Return:
        log scaled P value
    s  function (x, correct = TRUE) 
    {
        x=matrix(x,nrow=2)
        l <- nrow(x)
        n <- rowSums(x)
        x <- x[, 1L]
        k <- length(x)
        ESTIMATE <- x/n
        correct <- as.logical(correct)
        YATES <- ifelse(correct && (k <= 2), 0.5, 0)
        DELTA <- ESTIMATE[1L] - ESTIMATE[2L]
        YATES <- min(YATES, abs(DELTA)/sum(1/n))
        p <- sum(x)/sum(n)
        PARAMETER <- k - 1
        x <- cbind(x, n - x)
        E <- cbind(n * p, n * (1 - p))
        STATISTIC <- sum((abs(x - E) - YATES)^2/E)
        names(STATISTIC) <- "X-squared"
        PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE,log.p = TRUE)
        return(PVAL/log(10))
    }
    i(   R
   Rq   Rr   R   Ru   (   t   listR   (    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyR     s    	c         C   s2   t  d  } t t | t |     j   d  S(   s   
    Parameter:
        list: a list contain four number [count_in_A, count_not_in_B, overlap, total]
    Return:
        log scaled P value
    s   function (x) 
    {        
        return( phyper(x[3] - 1, x[1], x[4]-x[1], x[2], lower.tail = FALSE,log.p=TRUE)/log(10) )
    }

    i(   R
   Rq   Rr   R   Ru   (   R  R   (    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyt   log10FisherTest  s    	c         C   s  |  } | } x | D] } | | j    } | j   t |  d } d } x | | k  r| | d | | | | | k r | | | | | | | | d k  r | | | | d | | | | <n  | | j | | d  | | | | d <n  | d 7} qL Wq W| S(   Ni   i    (   R   R  Rw   R   (   RH   R@   RI   R   R   t   psR   R   (    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyR     s    
$( %c         C   s   i  } t  |   j   } xt | d D]h } | j   } | d t | d  t | d  } } } | j |  s} i  | | <n  | | | | <q# W| S(   Ni   i    i   (   R   R   Ru   R   R   (   RJ   RI   t   linest   lineR  R   R  R  (    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyR     s    , c         C   sX  |  j  d  } xB| D]:} x1| j  d  D] }  t j j |   rx' t j t j j |  d   D] } t SWx' t j t j j |  d   D] } t SWx' t j t j j |  d   D] } t SWx' t j t j j |  d   D] } t SWx' t j t j j |  d   D] } t SWx' t j t j j |  d   D] } t SWx' t j t j j |  d	   D] } t SWx t j t j j |  d
   D] } t SWq, t j j |   r, |  d d k rt S|  d d k rt S|  d d k rt S|  d d k rt S|  d d k rt S|  d d k r!t S|  d d k r5t S|  d d k rLt Sq, q, Wq Wt S(   s   
    Description:
        whether all files are in wiggle format? return True is yes, else return False
    Parameters:
        path: a path to the directory of file(s) of sequence reads or occupancy data.

    R   R9   s   *.beds   *.bowties   *.bams   *.sams   *.bed.gzs   *.bowtie.gzs   *.bam.gzs   *.sam.gzit   bediR   t   bamt   sams   bed.gzs	   bowtie.gzs   bam.gzs   sam.gz(	   Ru   R{   R5   R|   t   globR   Rx   R   R   (   R5   t   tpathsR   RS   (    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyR     sL    	% % % % % % % %         g|=id   c         C   sn  i  } t  j j |   rrxyt j t  j j |  d   t j t  j j |  d   D]=} t | d | d |	 d | d d } t j d d	 |  } t j d
 d |  } x | d d( k r | d } q W|	 r| j d d d | d | d |  | | d  d <n0 | j d | d | d | d |  | | d  d <d } | rm| | d  d j	 | d  d d | d | n  |
 d) k rS d Gt   |
 Gd GHqS qS Wxyt j t  j j |  d   t j t  j j |  d   D]=} t | d | d |	 d | d d } t j d d	 |  } t j d
 d |  } x | d d* k rC| d } q&W|	 r}| j d d d | d | d |  | | d  d <n0 | j d | d | d | d |  | | d  d <d } | r| | d  d j	 | d  d d | d | n  |
 d) k rd Gt   |
 Gd GHqqWxHt j t  j j |  d   D](} t | d | d |	 d | d d } t j d d	 |  } x | d d+ k r| d } qqW|	 r| j d d d | d | d |  | | d  d <n0 | j d | d | d | d |  | | d  d <d } | r4| | d  d j	 | d  d d | d | n  |
 d) k r/d Gt   |
 Gd GHq/q/WxHt j t  j j |  d   D](} t | d | d |	 d | d d } t j d d	 |  } x | d d, k r| d } qW|	 r| j d d d | d | d |  | | d  d <n0 | j d | d | d | d |  | | d  d <d } | r| | d  d j	 | d  d d | d | n  |
 d) k rzd Gt   |
 Gd GHqzqzWxt j t  j j |  d   t j t  j j |  d    D] } t j d d	 |  } t j d
 d |  } x | d d- k r1| d } qWt | d | | | <|
 d) k rd Gt   |
 Gd GHqqWnt  j j |   rL|  d! d k s|  d" d# k rt |  d | d |	 d | d d } t j d d	 |   } t j d
 d |  } x | d d. k r| d } qW|	 rL| j d d d | d | d |  | | d  d <n0 | j d | d | d | d |  | | d  d <| r| | d  d j	 | d  d d | d | n  d } |
 d) k rd Gt   |
 Gd GHqn  |  d" d k s|  d$ d% k r8	t |  d | d |	 d | d d } t j d d	 |   } t j d
 d |  } x | d d/ k rl| d } qOW|	 r| j d d d | d | d |  | | d  d <n0 | j d | d | d | d |  | | d  d <| r	| | d  d j	 | d  d d | d | n  d } |
 d) k rId Gt   |
 Gd GHqIqL|  d! d k rm
t |  d | d |	 d | d d } t j d d	 |   } x | d d0 k r	| d } q	W|	 r	| j d d d | d | d |  | | d  d <n0 | j d | d | d | d |  | | d  d <| rA
| | d  d j	 | d  d d | d | n  d } |
 d) k rId Gt   |
 Gd GHqIqL|  d! d k rt |  d | d |	 d | d d } t j d d	 |   } x | d d1 k r
| d } q
W|	 r| j d d d | d | d |  | | d  d <n0 | j d | d | d | d |  | | d  d <| rv| | d  d j	 | d  d d | d | n  d } |
 d) k rId Gt   |
 Gd GHqIqL|  d! d& k s|  d" d' k rLt j d d	 |   } t j d
 d |  } x | d d2 k r| d } qWt |  d | | | <|
 d) k rId Gt   |
 Gd GHqIqLn  t   } | | _ | } d } | S(3   sX  
    Description:
        load occupancy data in '.wig' format file, or calculate occupancy from sequencing reads, use sequencing reads in '.bed','.sam', and '.bam' format as input, generate occupancy data in wiggle format.
    
    Parameters:
        path: a path to the directory of file(s) of sequence reads or occupancy data.
        cut: the cutoff for removing clonal reads, could be P value larger than 0 and small than 1, or count as a positive integer.
        save: set to 'False' if don't need to save the occupancy data in wiggle files.
        nor: the method to normalize the occupancy if multiple replicates are provided in the input directory, 'F': fold change, 'Q':quantile normalization, 'S': resampling, 'N': no normalization to be done.
        wgfmt: the format of the ouput wiggle files, currently support 'fixed' only.
        step: the step size of the occupancy data
        fs: average size of fragments that are subject to sequencing and generate the reads, only for signgle-end reads. When this value is not given, a fs value will be infered by the program.
        extend: a interger value, each read will be extend to this length.
        mifrsz: the minimal estimated average fragment size 
        mafrsz: the maximal estimated average fragment size
        paired: is the reads paired-end (set to 1) or single-end (set to 0)
    
    s   *.beds   *.bed.gzR   R   R   R#   R$  s   /+R   s   .gz$R/   i    R   i   R   R   R   R   is   .wigs   time elapsed:R$   s   *.bowties   *.bowtie.gzR   is   *.bamR%  s   *.samR&  s   *.wigs   *.wig.gziis   bed.gzis	   bowtie.gzR2   s   wig.gz(   R   R   N(   R   R   (   R   R   (   R   R   (   R   R   (   R   R   (   R   R   (   R   R   (   R   R   (   R   R   (   R{   R5   R|   R'  R   R   R   R   t   toWigR   R&   R    R   R   R   R   (   R5   R   R   R   R   R   R   R   R   R   R    R   RS   R   t   fnamet   out(    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyR~     s   A$  30 0 A$  30 0 %$  30 0 %$  30 0 A   $  30 0  $  30 0 $  30 0 $  30 0    		c         C   sM   xF t  |  D]8 } | j |  |  r* | S| j |  |  r d | Sq W| S(   Ni    (   Rt   R   (   R   RR   R_   t   d(    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyt   neighborPosition  s      iK   c         C   s\  i  } d } xt t  |   j   d D]\ } | j   } | j | d  sY i  | | d <n  d | | d t | d  <| d 7} q# Wi  }	 d }
 xt t  |  j   d D]\ } | j   } |	 j | d  s i  |	 | d <n  d |	 | d t | d  <|
 d 7}
 q Wi  } i  } x| d k s-|
 d k rt |  } t |	  } x| | D]t } | j |  sni  | | <n  xO | | D]C } t | | | |  } t |  | k  ry| | | | | <qyqyWqLWx| | D]t } | j |  si  | | <n  xO | | D]C } t | | | |  } t |  | k  r| | | | | <qqWqWx | D] } | j |  sli  | | <n  x | | D] } | | | } | | j |  r| | | | k r
| | | | <| | j |  | d 8} q
qw| | | | <| | j |  | d 8} qwWqJWx | D] } | j |  s;i  | | <n  x | | D] } | | | } | | j |  r| | | | k r| | | | <|	 | j |  |
 d 8}
 qqF| | | | <|	 | j |  |
 d 8}
 qFWqWqWi  } xZ | D]R } i  | | <x? | | D]3 } | | | } | | | | <| g | | | <qWqWx| | g D] } | d  k rmqUn  x t  |  j   d D] } | j   } | d t | d  } } t | | | |  } t | | | |  } t |  | k  st |  | k  rt |  t |  k  r.| | | | } n
 | | } | | | j
 |  qqWqUW| S(   Ni    i   ii   (   R   R   Ru   R   R   R   R-  t   absR   R&   R   (   R[   R\   R]   R^   R_   t   cpdt   ncR#  R  t   tpdt   ntt   c2tt   t2ct   tcpdt   ttpdR   R   R,  t   cpost   tposRJ   t   cdist   tdis(    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyt   combinePositions  s            
 $ 
s
   result.xlsc   :      C   s  | j  | j  k r d GHd S| j  } d GHt | | | | |  } i  i  i  } } } x t |  j   d D]t } | j   } | j | d  s i  i  i  | | d <| | d <| | d <n  d | | d t | d  | <ql Wx t |  j   d D]t } | j   } | j | d  sOi  i  i  | | d <| | d <| | d <n  d | | d t | d  | <q Wx | | g D] } | d  k rqn  x t |  j   d D]t } | j   } | j | d  si  i  i  | | d <| | d <| | d <n  d | | d t | d  | <qWqWd GHt d  } | j	   } | j	   } t |  d	  } | j
 d
 j d d d d d d d d d d d d d d d d d d d d d d  d! g   d g  g  g  g  f \ } } } } }  x| D]}! | j j |!  s| j j |!  r| j j |!  sFt | j |!  d | j |! <n  | j j |!  syt | j |!  d | j |! <n  | j j |!  st | j |!  d | j |! <qn q|! GH| |! j   }" |" j   x|" D]}# |# | |! |# d }$ }% |$ | | j |! j k rqn  |% | | j |! j k r7qn  t d" |$ d# |% d$ |! d% | d& | d' |	  }& |& \ }' }( }) | d 7} |
 d( k r9| j |! |$ | | j |! |% | k rt | | j |! |$ | t | j |! |% | d   d  }* q9t | | j |! |% | t | j |! |$ | d   d  }* n  t | |! |#  d) k  r0t |$ |% | j |! j | | | j |! j | | | j |! j | |  t |$ |%  d }+ }, }- |+ |, d) }. t d |. |  t |. | | j |! j | | | j |! j | | | j |! j | |  }+ }, |+ |, d) | }/ |+ |, d) | }0 |, |+ d) | }1 x |1 d k rt |-  t | j |! |0 |1  k r| j |! |0 |1 }- |0 |1 }/ n  t |-  t | j |! |0 |1  k r | j |! |0 |1 }- |0 |1 }/ n  |1 d 8}1 qbW| |! |# j t |/ |   n  | |! |# d }/ t | j |! |/ |  }2 t | |! |# d  d) k rxS | |! |# d D]< }0 t | j |! |0 |  }3 |3 |2 k r|0 }/ |3 }2 qqWn  | j |2  | j |*  |  j |'  t |/  }4 | j |!  sd* }5 n, | |! j |$ |  s<d* }5 n t |$  }5 | j |!  s`d* }6 n, | |! j |% |  sd* }6 n t |%  }6 |5 d* k r|6 d* k r|4 |4 }5 }6 n. |5 d* k r|5 |6 k n |6 d* k r|5 }6 n  |$ |% d) }7 | j |! t |7 |  t |7 |  t |7  t |5  t |6  t |4  t t |% |$   t | j |! |$ |  t | j |! |% |  t t t | j |! |% | d  t | j |! |% | d   t d)   t d |*  d* t | j |! |/ |  t | j |! |/ |  t t t d | j |! |/ |  t | j |! |/ | d   t d)   t d |2  d* t |(  t |)  t t t d |)  t |( d   t d)   t |'  d* | g  qWqW| d k rM| d k r(d }8 x% | j D] }! |8 | j |! j 7}8 q
Wt d+ t d, |8 d-   } n  d. GH| d  k rWt d/ | d0 | d1 |  } n  x9 t t |   D]% }9 t | | |9  d2 | | |9 <qjWx9 t t |   D]% }9 t | | |9  d2 | | |9 <qWd3 GH| d  k r
t d% | d& | d0 | d' |	 d1 |  } n  x@ t t |    D]) }9 t | d |  |9  d2 | |  |9 <qWn  x | D]} } | d k rt | | d  t | | d  t |  | d  | d4 <| d5 <| d6 <n  | j
 d
 j | d   d7  qTW| j   d  S(8   NsX   step values are different in the control and treatment wiggle data, work can not be donei    s3   combining nucleosome and differential positions ...i   ii   s0   calculating differential values for positions...sI   function(q,avg){return(0-ppois(q,avg,lower.tail=FALSE,log=TRUE)/log(10))}RE   s   	R   R  R  t   centert   control_smt_locat   treat_smt_locat   diff_smt_locat   treat2control_dist   control_smt_valt   treat_smt_valt
   smt_log2FCt   smt_diff_log10pvalt   smt_diff_FDRt   control_point_valt   treat_point_valt   point_log2FCt   point_diff_log10Pvalt   point_diff_FDRt   control_fuzziness_scoret   treat_fuzziness_scoret   fuzziness_log2FCt   fuzziness_diff_log10pvals   fuzziness_diff_FDR
t   pct   ptR   R;   RZ   R   R   i   R=   i i'  i  s*   calculating occupancy differential FDR ...RX   RY   RI   g      ?s*   calculating fuzziness differential FDR ...i   i   is   
(   R   R;  R   R   Ru   R   R   R&   R
   R  R   R   R   R   R   R  R   t   log10fuztestRq   R   Rw   R   R.  R   Rr   R   R   Rt   R   R   t   close(:   R>   R[   R\   R]   R^   R;   RZ   RX   R_   R   R:   R`   Ra   Rb   Rc   Rd   R   t   c2tDicR/  R1  t   dpdR#  R  RJ   t   ppoist   sumct   sumtt   outft   nuct   olinest   dr1t   dr2t   drR   t   cposesR7  t   p1t   p2R   t   dpt   sd1t   sd2t   dp1t   minpt   maxpt   maxvt   midpRQ   t   tpR:  t   dp2t   tdp2t   strpt   strp1t   strp2t   middleR   R   (    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyR     s   	 /$ /$  /(3($$ ! ! $
  -
* CCec''" 	 	 	 	   	M@   # # * * M"i'  c         C   s  d } | d  k rn i  } x |  j D]E } i  | | <|  j | j d |  j | | d <| | | d 7} q" Wn< x9 | D]1 } x( | | D] } | | | | | 7} q Wqu Wt j d g  } | j | d d d }	 x| D]} x| | D]} | | | | | | }
 | d | | | d } } | | |  j k  rM| |  j } n  |  j j |  seq n  | j j |  s}q n  | |  j | j |  j | |  j k r|  j | j |  j | |  j } n  | | j | j | j | | j k r| j | j | j | | j } n  | | k r)q n  d } x | |
 k  rt | |  } t	 d | d | d | d	 |  d
 | d |  } | d | |	 <| d 7} |	 d 7}	 |	 d d k r2|	 Gd GHq2q2Wq Wq W|	 Gd GH| j
   d | S(   Ni    i   g        t   refchecki2   RO  RP  R   R;   RZ   R   i  s   simulated ...s
   simulated.(   R&   R   R   R   R  R  R  R   R   RQ  R  (   R;   RZ   RY   R   RI   t   gsR   R  t   vect   idt   countR  R  R   RQ   t   tempv(    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyR   i  sR    
#   ( %( % -

 	
c         C   s  t  d  } | j } d | d | } }	 x- t | | | |  D] }
 | |
 |
 7} qB W| |	 } | d  k rN|  | j | j d | k r d t |  g S|  | k  r d t |  g St d |  d | d | d | d	 | d
 | d |	  \ } } | | k rd t |  g St t	 | | | | |	   j
   d  } | t |  g SnF| j j |  st d |  d | d | d | d d  d	 |  S| j j |  st d |  d | d | d | d d  d	 |  } t | d | d | d f n  |  | j | j d | k r!d t |  t |  g S| | j | j d | k rXd t |  t |  g S|  | k  r}d t |  t |  g S| | k  rd t |  t |  g St d |  d | d | d | d	 | d
 | d |	  \ } } t d | d | d | d | d	 | d
 | d |	  \ } } | | k  rOt t	 | | | | |   j
   d  } n, t t	 | | | | |   j
   d  } | t |  t |  g Sd  S(   NsA   function(s,df1,df2){return(pf(s, df1, df2,log.p = TRUE)/log(10))}i    i   i   RQ   R   R2   R   R   t   bvt   bciRO  RP  R;   RZ   (   R
   R   Rt   R&   R   R   R	   t   varRq   Rr   Ru   R   t   fuztestt   reurn(   RO  RP  R   R;   RZ   R   t   pfR   Rv  Rw  R,  t   bvct   vt   cRQ   R   t   vct   cct   vtt   ct(    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyRQ    sF    	 
  9 , +-     99 /,c         C   s   d | | | } } |  | | | }	 }
 } d } x t  | | |  D]{ } | | } yJ |
 | | j | |	 | | | 7}
 | | j | |	 | | | 7} WqF |  G| G| j | j GHqF XqF W|
 | | g S(   Ni    (   Rt   R   R   (   RQ   R   R2   R   R   Rv  Rw  R  R  t   tp1t   v1t   c1t   miR,  t   vd(    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyRx    s    
%%c         C   s^  d } | d  k rn i  } x |  j D]E } i  | | <|  j | j d |  j | | d <| | | d 7} q" Wn< x9 | D]1 } x( | | D] } | | | | | 7} q Wqu Wt j d g  } | j | d d d } xt| D]l} xc| | D]W} | | | | | | } | | | | d }	 | |	 | | | |	 }
 } |
 |  j k  r[|  j }
 n  | |  j | j |  j |  j k r|  j | j |  j |  j } n  | |
 k rq n  d } x | | k  rCt |
 |  } y* d t |  j | | |  j  | | <Wn) d G| G| j G| G|  j | j GHn X| d 7} | d 7} qWq Wq W| j	   d | S(   Ni    i   g        Rp  i   t   wrong(
   R&   R   R   R   R  R  R  R   R.  R  (   RX   RY   RI   Rq  R   R  Rr  Rs  Rt  t   rlthR  R  R   RQ   (    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyR     sF    
# $ !  * &

c         C   s   t  |  } t |   d } d | d } } t } x | r | | d } | | k rZ | S| d k rj d S|  | d | k r |  | | k r | S|  | | k r | } q6 |  | | k  r6 | } q6 q6 Wd S(   sf   
    find the rank of v in a list vec
    Note: the vec must have been ranked in decreasing order
    i   i    i   N(   Rq   Rw   R   (   Rr  R}  t   terminalR  R  t   findRQ   (    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyR     s     	  $ 	 c         C   s  i  } x |  D] } d G| Gd GHt  |  } | j   x | D] } | j   } | d t | d  t | d  } } }	 | j |  s i  | | <n  | | j |  s |	 | | | <q= | | | |	 k  r= |	 | | | <q= q= W| j   q Wx | D] } | | j   }
 |
 j   t	 j
 d g  } | j t |
  d d d } x3 | | j k  r| | |
 | | | <| d 7} qMWt	 j
 |
  }
 i  | | <|
 | | d	 <| | | d
 <q Wd GHt   } d } d } x| D]} | Gd G| | d	 } | | d
 } | | j 7} | j Gd GHd } x*| d k rOd } t	 j
 d g  } t	 j
 d g  } | j d } | j | d d d | j | d d d | d k  rq&n  d } d } xd| | k  r| | d | | } | | k r| | | | | | <| | <| d 7} n | d 7} | | d | | d } | | k  rj| | | | | | <| | <| d 7} n | | | | d k  r| | | | d <| | | | d <ne | | | | d k r| | | | d d } | | | d <| | | | d d | | d <n  | d 7} qW| d | d | k r| d | d | d | d f \ | | <| | d <| | <| | d <| d 7} n | d | d k  r| d | d | | <| | <ni | d | d k r| d | d d | d | d d | | <| | <n | d | d | | <| | <| d 7} | d 7} | |  } | |  } q&W| j Gd GH| | | d	 <| | | d
 <qW| S(   Ns   reading froms   ...i    i   i   g        Rp  i   RQ   R}  s
   mering ...R9   s   summits, merging ...i   iit   left(   R   t   readlineRu   R   Rq   R   RR  R   R  R  R  R  Rw   R   R    (   RP   R@   RR   RJ   t   fiR#  R  R   R   t   fuzt   kst   vsR   t   ctimet   tnumt   onumR!  t   merget   npst   nvsR   t   nit   tdt   td2(    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyR     s    
,   

	 
%F 8


c         C   s   g  } t  |   } | j   xg | D]_ } | j   } | d | d } } | j |  sm i g  d 6| | <n  | | d j |  q# Wx- | D]% } t j | | d  | | d <q W| S(   Ni    i   RQ   (   R   R  Ru   R   R   R  R  (   RJ   RR   R  R#  R  R   R   (    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyR   T  s    
 #g?c      	   C   sA  t  |  j   } | d  k r0 t  | d  } n  | d  k rP | j | d  n  i  }	 | | j | | j d | j }
 } } xM| d D]A} | j   } | d t | d  t | d  } } } |	 j |  s i  |	 | <n  | | k r| |	 | | <q | | } t	 | j
 | | d  | j
 | t	 | |
 d  | !j   | j
 | | t | | | j
 | j  !j   } } } t	 | |  } | | | | k r | | | | d k r | |	 | | <q q Wx |  D] } |	 j |  si  |	 | <n  x |  | d D] } |	 | j |  st d | d	 |	 | d
 |  } t |  | k rd j | d d t |  d d g  d |	 | | <qqqWqWx |	 D] } |	 | j   } | j   | d  k rx7 | D], } x# |	 | | D] } | j |  qWqWn  i  |	 | <t j |  |	 | d <qW| d  k r=| j   n  |	 S(   NRE   i    i   i   i   g      ?RQ   R   RR   R_   s   	R=   s   
(   R   R   R&   R   R   Ru   R   Rq   R   R   R   R   R   R-  R.  R   Rr   R   R  R  R  RR  (   RR   RS   RT   RU   R@   RV   RW   R"  R   R   R  t   btdR   R#  R  R   R   R  Ri  t   ht   mi1t   mi2R  R,  R!  RQ   (    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyR   b  sP      ),  
t,   A
 
 i   i  i  c
         C   s  i  }
 x |  D] } t  |  |
 | <q Wi  } xd |
 D]\ } |
 | j d d  d | d | d | d | d d d d	 d
 d d t d d d t  | | <q4 Wi  } x | D] } | | } x | D] } | j |  s i  | | <n  xs | | D]g } | | j |  s| | | | | | <q | | | | | | k  r | | | | | | <q q Wq Wq WxN |
 D]F } |
 | j d | d | d  d d | d | d | d d d t  q_Wd  S(   NR>   R?   R@   RA   RB   RC   i   RD   RE   RF   RG   t   foldi    t   suppressRI   RJ   is	   peaks.xls(   R   R   R&   Rx   R   R   R   (   t   wfsR?   R@   RA   RB   t   linkfoldt   linkLogPt   binSizet   wsizet   wstept   wdRJ   t   pkgR   t   pkst   tpksR   R  (    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyt   scnMerge  s&      Z
   %R   i   i   i c   4   $   C   s  d d l  m } d d l  m } t j j |  sB t j |  n  t |   \ } } t |  } i  i d d 6d d 6d d 6d d 6d d	 6d d
 6d d 6d d 6} } x(| D] } d G| GHt j j	 | |  | | } } t
 | d d  j   i  | | <t j j |  ru| | d | d | d | d d d d | d | d | d | d | d | d | | \ | | <| | <n t d |  t d |  | | <| | <x t j t j j	 | d   D] }  d |  GH| |  d | d | d | d d d d | d | d | d | d | d | d | | }! | | j |! d  | | j |! d  qWd G| GHx: d d d d d d d	 d
 d g	 D] }" |" G| | |" GHqyW| r d  GH| | j | d!  | | j | d"  q q Wt d |  }# | |# _ |# j d# | d$ | d% |  }$ d }% x | D] } |% |$ | 7}% qW|% d& t |$ j    }& x | D] } |# j | j   }% |# j | j |& |$ |  d' G| Gd( G|% Gd) G|# j | j   GH| rL|# j | j t j j	 | |  d*  qLqLWx | D] } d+ G| Gd, GH| | j   }% | | j |& |$ |  d- G|% Gd) G| | j   GH| rf| | j t j j	 | |  d.  n  | | j d/ t j j	 | |  d0 d1 |	 d2 |
 d3 | d4 | d5 d d6 d d7 d d8 t d9 d d: t  | | <qWd; GHi  }' x | D] }( x | |( D] }) |' j |)  si  |' |) <n  x | |( |) D]s }* |' |) j |*  s]| |( |) |* |' |) |* <q'|' |) |* | |( |) |* k  r'| |( |) |* |' |) |* <q'q'WqWqWt |' d2 |
 }' x~ | D]v } d< G| Gd, GH| d= |' d> t j j	 | | d  d? t j j	 | | d@  d | d9 d dA d d | dB | dC |  	qWt |  d k rx| D]x}+ dD G|+ Gd, GH|# j |+ d j |# j |+ d  }, |, j t j j	 | |+ d dE |+ d dF   |, j d/ t j j	 | |+ d dE |+ d dG  d1 |	 d2 |
 d3 | d4 | d5 d d6 d d7 d d8 t d9 d d: t  |, j d  |, j d/ t j j	 | |+ d dE |+ d dH  d1 |	 d2 |
 d3 | d4 | d5 d d6 d d7 d d8 t d9 d d: t  |$ |+ d |$ |+ d t j j	 | |+ d dI  t j j	 | |+ d dI  t j j	 | |+ d d@  t j j	 | |+ d d@  t j j	 | dE j	 |+  d@  f \ }- }. }/ }0 }1 }2 }3 t dJ |- dK |. dL |/ dM |0 dN |1 dO |2 dP |3 d | d1 |	 d2 |
 d3 | d4 | dQ | dR | d | dB | dC |  qRWn  dS GHdT S(U   sB   
    parameter:
    norto: may be 'mappable','unique','trans'
    i(   t   translocationReads(   t   translocationLinksi    t
   unmappablet
   non_uniquet   unique_intert   unique_intrat   unique_othert   uniquet   mappablet   transs   
s   .samRE   t   bindicR  t   outReadsFilet   outmodet   at   pdisR   t   mapqt   clipSizet   intert   intrat
   readsCounts   *sami   s   
reads in groupt   allR/   s
   .trans.wigs   .all.wigR)   R*   t   bnumg      ?s
   
normalizes!   wiggle data of all all reads fromt   tos   .all.nor.wigs   
calling fors   ...s   normalize froms   .trans.nor.wigR>   s   .trans.sites.xlsR?   R@   RA   RB   RC   RD   RF   RG   R  R  s(   
merging peaks by head-tail distance ...s   
linkingRH   t   samFilet   linkfiles   .trans.links.xlst   logPR  R  s
   
comparingR=   s   .all.cnv.wigs   .all.cnv.sites.gain.xlss   .all.cnv.sites.loss.xlss
   .trans.samt   tn1t   tn2t   sf1t   sf2t   lf1t   lf2R   R  R  s   
all job done, cheers!

N(   t   libR  R  R{   R5   t   existsR}   Rv   R   R   R   RR  R   R   R'  R   R   R   R   R   Rw   R   R  R   R   Rx   R   R   R   RU  t
   scnCompare(4   R   R   R  R   R  R  R  R  t   saveWigR?   R@   RA   RB   t   zscoreR  R  R  R  R  R)   R*   R  R  R  R   R   t   allWigsR  R  R   t   groupfilenameR5   RJ   R   R  t   uwigsR,   t   tsumt   taverageR  R  R   R  t   pairt   diffwigR  R  R  R  R  R  R   (    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyt   scn  s     E	 
 g'%	V	(	 & . 'm    )g%0ootgMbP?c   +      C   s;  t  |  t  d  } |  | d } d G|  Gd G| Gd G| GH| |  | | } } t |  t |  t | d  } } } | j   j   } | j   j   } | j d j | d  | d	 d
 !d d d d d d d d d d g
  d  xJ| D]B} | j   } | j   | j   } } t } xI d d d d d d	 d d d d g
 D]# }  | |  | |  k rGt } qGqGW| rd Gt d  GHq n  | d d  k s | d	 d  k rq n  t	 | d!  t	 | d!  t
 | d"  t
 | d"  t
 | d#  t
 | d#  f \ }! }" }# }$ }% }& |! |" d k r)q n  t d$ |! |  |" | g  }' t	 |! | d%  t	 |" | d%  }( }) t  t d |(  d& t d |)   t  d  }* |' | k  r | j d j | d  | d	 d
 !t |!  t |"  t |#  t |$  t |%  t |&  t |(  t |)  t |*  t |'  g
  d  q q Wd  S('   Ni
   g       @s   normalize reads count fromR   R  RE   s   	i   i   i   t   observationAt   observationBt	   expectedAt	   expectedBt   log10PAt   log10PBt   normalized_observationAt   normalized_observationBt   log2FCt
   log10Pdiffs   
i    i   i   i   i   i   i   i	   s   wrong line:iR=   i   i   i   R  g      ?g      ?(   R   R   R  Ru   R   R   Rx   R   R#  R   Rq   R   R   Rr   (+   R  R  R  R  R  R  R   t   pvalueR  R?   R@   RA   RB   R  R  R  R  R  R  t   tnt   nf1t   nf2R   R   t   fR   R   t   l1t   l2R   R   R  R   t   o1t   o2t   e1t   e2R_  R`  t   pvt   no1t   no2t   logFC(    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyR    s8    )K+   f )0 t   __main__RE   (9   R    R  R'  R{   R   R   R2   R   t   summitsR   R   t   copyR   t   randomR   t   mathR   R   R	   t   rpy2.robjectsR
   R   R   t   sysR&   Rx   R   R   Rv   Ry   Rz   R   R   R   R   R   R   R   R~   R-  R;  R   R   RQ  Rx  R   R   R   R   R   R  R  R  t   __name__t   fdopent   stdoutt   fileno(    (    (    sI   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/functions.pyt   <module>   s^   $		! 	 .	,$-"		
	!'z	U*+	"	_	2$HQ-