ó
ÚßZc           @   sŸ  d  d l  Z  d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l	 j
 Z
 d  d l m Z d  d l m Z d  d l m Z d  d l m Z d  d l m Z d  d l m Z d  d l m Z d  d	 l m Z d  d l Z d  d
 l Td Z d Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z  d „  Z! d „  Z" d „  Z# d „  Z$ d „  Z% d „  Z& d „  Z' d d „ Z( d „  Z) d S(   iÿÿÿÿN(   t   importr(   t   resource_string(   t   mannwhitneyu(   t	   ttest_ind(   t   ks_2samp(   t   combine_pvalues(   t   defaultdict(   t   deque(   t   *i    c      	   C   sƒ  |  d } |  d t  k r& d G| GHn  t j j | ƒ sX |  d t k rT d G| GHn  d  St } t j | d ƒ } t } y | j t	 j
 ƒ s— t } n  Wn2 |  d t k rÃ d t	 j
 | f GHn  t } n X| r]t	 j | ƒ \ } } } t	 j | ƒ } |  j d ƒ r#|  d | k r#t } n  | r3|  j d ƒ r3|  j d	 ƒ r3|  j d
 ƒ r÷|  |  d d j |  d | f ƒ r÷|  |  d d |  d | f j ƒ  }	 t |	 ƒ d k r÷t |  |  d d |  d | f |	 d ƒ |  d
 k r÷t } q÷n  | r3| |  d k s'| t | ƒ |  d	 k  r3t } q3n  | ryxt t | ƒ ƒ D]}
 | d k rk|
 | } n | t | ƒ d |
 } |  j d ƒ rÊ|  j d	 ƒ rÊ| |  d k  sL| |  d	 k rÊqLqÊn  | d |
 |  |  d d | | f | <|  |  d d | | f | | d |
 c d 7<|  |  d d | | f | j | d |
 ƒ qLWqyn |  d t k ryd G| GHn  Wd  QXd  S(   Nt   fast5filenamet   outLevels   Read:s   Error!! no such filet   rs   Cannot find %s in %st   Chrt	   start_post   end_post   checkNt   cur_wrkBaset	   norm_meani    t   +i   t   baset   basedicts   INFO: no alignment info for(   t   OUTPUT_DEBUGt   ost   patht   isfilet   OUTPUT_ERRORt   Truet   h5pyt   Filet   __contains__t   myFast5t   rawAlignment_fullt   Falset   ReadMapInfoInReft   ReadNanoraw_eventst   has_keyt   keyst   lent   ranget   appendt   OUTPUT_INFO(   t   moptionst   fnt   tocont   mf5t   noerrort   mapped_chromt   mapped_startt   mapped_strandt   nanoraw_eventst   mposkeyst   it   curpos(    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyt   mReadSignalBase!   sT    
    
 	$4&J0 	(29 c   )      C   sð  g  } g  } g  } | d d } | d d } | d d } |  d d k r² |  d d k r² d |  d d |  d d | d d	 d | d d d | d | | | d f }	 nC d
 |  d d |  d d | d d d | d | | | d f }	 |  |  d d }
 |  |  d d } |
 | g } | | f } t  } i  } |  d j | ƒ } | G| G| GH|  d } |  d d k rŠt | d ƒ } n  x+t | | | | d ƒ D]} t |  d | | ƒ r	|  d | d d } |  d | d } | | | |
 d | | f <n t } | rPn  x˜t t | ƒ ƒ D]„} | | d | | } xe| | d | | D]M} | j d | d ƒ |  d d k r%|  d d k r%t d k rñ| j d | d | | d d | d d | d d | d	 d f ƒ q–| j d | d | | d d | d	 d f ƒ nq t d k rp| j d | d | | d d | d d | d d f ƒ n& | j d | d | | d d f ƒ | j t	 | d	 ƒ ƒ q_Wq,Wq¦W| sì|  d d } |  d d k rþ|  d } | d k  rþd } qþn  | d d d k r’d | | d d k  o9d | k  n r’d G| d G|  d G|  d G| d d G| d d G| d d d G| d d	 GHn  g  } g  g  g  g  g } | j
 ƒ  } | j ƒ  xM| D]E} |  d d k rŒ|  d d k rŒd | d d | d f Gd | | d d | | d d | | d d | | d d | | d d | | d d | | d	 d | | d	 d f GHn} d | d d | d f Gd | | d d | | d d | | d d | | d d | | d d | | d d f GH| j d | d d | d f ƒ | d j t	 t j | | d d ƒ d	 ƒ ƒ | d j t	 t j | | d d ƒ d	 ƒ ƒ | d j t	 t j | | d d ƒ d	 ƒ ƒ |  d d k rÇ|  d d k rÇ| d	 j t	 t j | | d	 d ƒ d	 ƒ ƒ qÇqÇWd GHi t j | ƒ d 6t j | d ƒ d 6} t j | ƒ } i t j | ƒ d 6t j | d ƒ d 6} t j | ƒ }  i t j | ƒ d 6t j | d ƒ d 6}! t j |! ƒ }" |  d d k r|  d d k ri t j | ƒ d 6t j | d	 ƒ d 6}# n* i t j g  ƒ d 6t j | d	 ƒ d 6}# t j |# ƒ }$ i t j | ƒ d 6t j | ƒ d 6t j t j | ƒ ƒ d  6}% t j |% ƒ }& t j |	 g ƒ }' t j t g ƒ }( t j j ƒ  t j d! |& | |  |" |$ |' |( ƒ n  | S("   Ni    i   i   t   neighborPvaluest
   testMethodt   kssT   1=%s VS
 2=%s:
 p-value=%.1E (ks test p=%.1E) at pos %d of %s strand in %s. Rank %d t   ds2i   sD   1=%s VS
 2=%s:
 p-value=%.1E at pos %d of %s strand in %s. Rank %d  t	   sign_testt   windowt   RegionRankbySTR   R   s   %ds   %d/%s
%.1E
%.1E
%.1E
%.1Es   %d/%s
%.1E
%.1Es   %d/%s
%.1E
%.1E
%.1Es
   %d/%s
%.1Et   -i   t   Rankt   FileIDs   %d/%ss6   u=%.3E(%.3E) t=%.3E(%.3E) ks=%.3E(%.3E) pv5=%.3E(%.3E)s'   u=%.3E(%.3E) t=%.3E(%.3E) ks=%.3E(%.3E)t    t   Positiont   Pvaluet   Signalt   DSt   Base_Most_Significant_Plot(   R    t   indext   intR&   t	   pos_checkR   R%   R'   t   has_utt   roundR$   t   sortt   matht   log10t   robjectst	   StrVectort   FloatVectort	   DataFramet   FactorVectort	   IntVectort   has_boxplott   syst   stdoutt   flusht	   globalenv()   R)   t   significant_post   curnt   m_signalt   m_post   m_dst   curchrt	   curstrandR4   t   mtitlet   ds0t   ds1R9   t   skt   noenought   pv3t   cur_indt   nearybysizet   mindt   pkt   pvt   mds_indt   mnat   sgt	   closesizet   poskeyst   pvsp3t   pv3keyst   pv3kt   stut   strut   sttt   strtt   stkst   strkst   stcbt   strcbt   pydft   plotDatt   mrtitlet   mhasbox(    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyt   plot1j   s     !UC
 " !J4?&"
 @T  
!ž}#///!6* * * !-*?&c         C   s,  |  d } |  d } t  d ƒ } t  d ƒ t  d ƒ t j t t d ƒ ƒ d } |  d d	 k ri d
 } n  t d	 k r© t j d | d | d d |  d | d ƒ n1 t j d | d | d d |  d | d ƒ |  d d } |  d d	 k r|  d } | d	 k  rd	 } qn  d } g  } xò |  d D]æ } t }	 xj | D]b }
 |
 d | d d k rD|
 d	 | d d	 k rDt |
 d | d d ƒ | k  rDt }	 PqDqDW|	 r¶q1n  t	 |  | | ƒ s| j
 | d d | d d	 | d d f ƒ | d	 } n  | |  d k r1Pq1q1Wt j d ƒ d  S(   NR?   t	   outFoldert   ggplot2t	   gridExtrat   scaless$   Rscript/Base_Most_Significant_Plot.Rg333333û?R<   i   g      @s   pdf("s   /rplot_s   .pdf", width=s   %.0fR;   s   , height=10, onefile = TRUE)s   , height=4.5, onefile = TRUE)R6   i   i    t   sorted_sign_testt   topNs	   dev.off()(   R    RN   R   R   t   __name__RT   R    t   absR   R   R'   (   R)   t   fignamet
   mresfoldert   ggplott	   wdenlargeRn   RZ   t
   output_post   mostpt   too_close_to_previoust   pre_pos(    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyt   mboxploté   sD    



 	41
 0"  . c         C   s  x|  d D]} |  | d } | j  ƒ  } | j ƒ  xÕ | D]Í } |  d t k rp d G| G| Gt | | ƒ GHn  | | j  ƒ  } | j ƒ  xM | D]E } t | | | ƒ |  d k  r‘ | | | =|  | d | | =q‘ q‘ Wt | | ƒ d k r< | | =|  | d | =q< q< Wq Wd  S(   NR9   R   R
   s   Info: t   MinCoverageR   i    (   R$   RK   R   R%   (   R)   t   dsnt   curdst
   strandkeysRc   Ro   Ri   (    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyt   mfilter_coverage  s      
  
c         C   s$   |  t  j j k  r t  j j S|  Sd  S(   N(   RU   t
   float_infot   min(   t   fv(    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyt   m_min_float  s    
c         C   s$   |  t  j j k r t  j j S|  Sd  S(   N(   RU   R–   t   max(   R˜   (    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyt   m_max_float$  s    
c   	      C   s¬   t  |  | ƒ \ } } t | ƒ } t | ƒ } t |  | d t ƒ\ } } t | ƒ } t | ƒ } t |  | ƒ \ } } t | ƒ } t | ƒ } | | f | | f | | f g S(   Nt	   equal_var(   R   R™   R›   R   R    R   (	   t   at   bt   stt   puRs   t   ptRu   t   pksRw   (    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyt   getUtest)  s    c         C   s²   | d k  s" | t  |  ƒ d k r& t S| | k s¦ |  | d d |  | d d k rª |  | d d |  | d d k rª | | |  | d d |  | d d k rª t St Sd  S(   Ni    i   i   (   R%   R    R   (   t   mlistR3   t   j(    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyRH   9  s    " tc         C   s\   xU t  t |  d ƒ ƒ D]= } t |  | ƒ } | d  k s |  d | d j | ƒ q q Wd  S(   NR:   i   (   R&   R%   t   get_combin_pvaluet   NoneR'   (   R)   R3   t   comb_pv(    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyt   combin_pvalues@  s    c   	      C   sË  |  d d k rŸt  |  d ƒ d k rŸg  } x™ t | |  d | |  d d ƒ D]t } | d k  s“ | t  |  d ƒ d k s“ t |  d | | ƒ r£ | j d ƒ qP | j |  d | d d d ƒ qP W|  d d k rí t | ƒ \ } } n  |  d d	 k r}d
 } | g } xM t |  d ƒ D]; } | j d | d |  d ƒ | j | d |  d ƒ qWt | d d	 d | ƒ\ } } n  t | ƒ } t | ƒ } | | f S|  d d k rÃ|  d | d d Sd  Sd  S(   NR6   i    R:   i   g      ð?i   R7   t   fishert   stoufferid   t
   WeightsDifiÿÿÿÿt   methodt   weights(	   R%   R&   RH   R'   R   t   insertR™   R›   R§   (	   R)   R3   t   pvalue_neighborsR¥   t	   comb_p_stt   comb_p_pt	   midweightt   mweightst   k(    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyR¦   F  s*    &*=%	!
 c            s¾  ˆ  ˆ  d d } ˆ  ˆ  d d } | d j  ƒ  } | j ƒ  g  ˆ  d <t j ƒ  } xX| D]P} | d j | ƒ r[ | d | j  ƒ  } | j ƒ  x| D]} | d | j | ƒ r™ | d | | | d | | k sIˆ  d t k rId G| G| G| d | | G| d | | G| d	 | | j ƒ  G| d	 | | j ƒ  GHqIn  ˆ  d j | d | d | | d | | f t | d | | | d | | ƒ f ƒ q™ q™ Wq[ q[ Wt j ƒ  } ˆ  d t k rÛd
 | | GHn  t j ƒ  } ˆ  d d k st	 ˆ  ƒ n  t j ƒ  } ˆ  d t
 k r0d | | GHn  ˆ  d d k rOd }	 d ‰ n d }	 d ‰ d ‰ ˆ  d d k szd ‰ n  t j ƒ  } t ˆ  ƒ ˆ  d d k rót ˆ  d d ‡ ‡ f d †  ƒˆ  d <|	 d k rhˆ  d d  d  d … ˆ  d <qhnug  }
 ˆ  d d ˆ  d <t ˆ  d ˆ  d d ƒ } ˆ  d t k rYd G| Gt ˆ  d ƒ Gˆ  d GHn  ˆ  d } ˆ  d d k r|d } n  t d „  ƒ } t t ƒ } xx ˆ  d D]l } | d d | d f | | d d | d d f | d d <| d d | | d d | d d f <q¢W| j  ƒ  } | j ƒ  xm | D]e } ˆ  d t k r/| G| | Gt | | ƒ GH| | d d G| | d d G| | d d GHq/q/Wxi| D]a} xXt d | | | ƒ D]@} g  } t } x¾ | D]¶ } | | } | d k  s| | | k s| | j | ƒ rt } Pn  t } ˆ  j d  ƒ sgt ˆ  d  ƒ d k sgˆ  d  | | | d k rÕ| j | | | d ˆ ˆ ƒ qÕqÕW| s¼t j | ƒ } | j ƒ  t | ƒ d! k rü|
 j | d | d | | | | d f | | f ƒ qüq¼q¼WqŸWg  ˆ  d <t |
 d ‡  f d" †  ƒ} t rxä t d# ƒ D]Ó } | | d G| | d t ˆ  d$ t | | d ƒ d d% ƒ Gˆ  d$ Gˆ  d$ t | | d ƒ d d% Gt | | d ƒ Gt ˆ  d | | d j | | d d ƒ ƒ G| | d j | | d d ƒ GHq<Wn  ˆ  d d k r/g  } n  x6t t | ƒ ƒ D]"} ˆ  d d k rt } x’ | D]Š } | | d d | | d d k re| | d d | | d d k ret | | d d | | d d ƒ ˆ  d k  ret } PqeqeW| rÿqBn  | j | ƒ n  ˆ  d j | | d | | | d d | | d d f | | d d d f ƒ qBWt j ƒ  } ˆ  d t
 k r”d& | | GHn  ˆ  d t
 k rºd' Gt ˆ  d ƒ GHn  d  S((   NR9   i    i   R   R:   R   R
   s   Error not equalR   s$   Producing pvalues: consuming time %dR7   R8   s$   Combining pvalues: consuming time %dt   rankUseRj   t   pvalueRŸ   i   i   R<   t   keyc            s1   |  d ˆ  ˆ |  d d ˆ |  d d ˆ f S(   Ni   i   i    (    (   t   mpv(   t
   sorted_indt   use_pind(    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyt   <lambda>“  s    R„   iÿÿÿÿR;   t   windlistt   WindOvlpc           S   s
   t  t ƒ S(   N(   R   t   set(    (    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyR¼   ž  s    i  i  i  t   NAi   c            sU   |  d t  ˆ  d t |  d ƒ d d ƒ t ˆ  d |  d j |  d d ƒ ƒ f S(   Ni   t
   percentileg      à?R;   i   i    (   RG   R%   R‡   RF   (   R¹   (   R)   (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyR¼   ¸  s    i
   RÁ   g      à?s/   Sorting according to pvalues: consuming time %ds   Info in sign_test(   R$   RK   t   timeR#   R   t   itemsR'   R£   R(   R©   R   t	   save_testt   sortedR&   R%   R   RG   R   R    t   copyt   deepcopyR‡   RF   (   R)   Ra   Rb   R”   t
   start_timeRc   Ro   Ri   t   end_timet   rank_use_p_or_stt   windsegR½   t   movesizet   strand_specifict   strans_specific_maxt   mpt   strandskeyst   pvlistt   not50t   windt   curposkt   opvlistt   windseg_sortt   r1t   ovlap_higher_rankt   rmp_indt   closeneighbort   pre_ind(    (   R)   Rº   R»   sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyt   mtest2k  sÄ     

 
$_f    	 	
&! "
 	>, 
8 
0 B(
D
Ô 	z  Y  c         C   si  |  d d k r d  S|  d d |  d d } |  d t  k rJ d G| GHn  t | d	 ƒ } x	|  d
 D]ý } | j d | d d | d d | d d | d d | d d d | d d d | d d d | d d d | d d d | d d d f
 ƒ |  d d k rT|  d d k rT| j d | d d d | d d d f ƒ qd | j d ƒ qd Wd  S(   Nt   SaveTesti    R€   t   /R?   s   _sign_test.txtR
   s   Test data is saved int   wR:   s)   %s %s %d %s %.3f %.3E %.3f %.3E %.3f %.3Ei   i   i   R6   R7   R8   s    %.3f %.3E
s   
(   R   t   opent   write(   R)   t   txtfilet	   txtwriterR   (    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyRÄ   Î  s      —!2c      	   C   sÝ  |  j  d ƒ r„ |  d } |  d } | | } | d k  rB d } n  | | } | |  d <| |  d <|  d G|  d G|  d G|  d GHn  |  d } |  d |  d g |  d	 <x0t t |  d	 ƒ ƒ D]} t j ƒ  } |  d	 | } | |  d
 <i  |  |  d
 <t d „  ƒ |  |  d
 d <t d „  ƒ |  |  d
 d <t d „  ƒ |  |  d
 d <d }	 | g }
 x} t |
 ƒ d k rÔg  } |  d t k rŒd G|
 GHn  x< |
 D]4 } t | |  |	 | | | ƒ \ } }	 | j | ƒ q“W| }
 qXWq½ Wd  S(   Nt   PosR;   i    R   R   s   .fast5t   wrkBase1t   wrkBase2R9   R   c           S   s
   t  t ƒ S(   N(   R   t   str(    (    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyR¼   ô  s    R   c           S   s
   t  t ƒ S(   N(   R   t   list(    (    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyR¼   õ  s    R   c           S   s   t  d „  ƒ S(   Nc           S   s
   t  t ƒ S(   N(   R   RG   (    (    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyR¼   ö  s    (   R   (    (    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyR¼   ö  s    R   R
   s   .sub_fast5_folder(   R#   R&   R%   RÂ   R   t   OUTPUT_WARNINGt   readsubfoldert   extend(   R)   t	   neighborst   pos_of_interestR   R   t   f5suft   cur_wrkBase_indRÈ   R   t   f5numt   f5subt   f5subnewt   cursubt   f5subadd(    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyt   ReadAllFast5Ý  s<    


 	


$

	 !s   .fast5c         C   sÏ  g  } t  j |  ƒ } x†t t | ƒ ƒ D]r} | | }	 | j d ƒ rx| | d d j | d d f ƒ rx| | d d j | d d f ƒ rx| | d d | d d f j ƒ  }
 | | d d | d d f j ƒ  } t |
 ƒ d k rxt | | d d | d d f |
 d ƒ | d k rxt | ƒ d k rxt | | d d | d d f | d ƒ | d k rxPqxn  |	 t | ƒ | k rY|  d |	 | d	 <| d k rB| d
 d k rB| d t k rB| | d d j ƒ  } | j ƒ  d G| GH| | d d | d j ƒ  }
 | | d d | d j ƒ  } | G| G|	 d Gt |
 ƒ d k r| d Gt |
 ƒ G|
 d Gt | | d d | d |
 d ƒ Gn d Gt | ƒ d k rö| d Gt | ƒ G| d Gt | | d d | d | d ƒ Gn d Gt j ƒ  } d | | G| j d ƒ r-| d GHn d GHt	 j
 j ƒ  n  t | ƒ | d } q( t  j j |  d |	 ƒ r( |	 d k sš| j |  d |	 ƒ qšq( q( Wt | ƒ d k rÅd G|  GHd G| GHn  | | g S(   NR   R   R   R   R=   R   i    RÞ   R	   iè  R
   t   chrkeysi   iÎÿÿÿs   consuming time=%dR@   t   mallt   Unders   	 find(   R   t   listdirR&   R%   R#   R$   Ré   RK   RÂ   RU   RV   RW   R5   R   t   isdirR'   (   Ró   R)   Rð   Rï   RÈ   Rî   Rñ   t   f5listt   f5_indt   f5t	   mposkeys1t	   mposkeys2Rö   RÉ   (    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyRê     sN    
Y&&”, 
	  CC 
	c         C   s4   t  j GHt |  ƒ t |  ƒ t |  ƒ t |  ƒ d  S(   N(   R   R   Rõ   R•   RÜ   R   (   R)   (    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyt   mDetect/  s
    


(*   R   RU   RÆ   t   stringRL   R   t   numpyt   npRÂ   t   rpy2.robjectsRN   t   rpy2.robjects.packagesR    t   pkg_resourcesR   t   scipy.statsR   R   R   R   t   collectionsR   R   R   t   myComRT   RI   R5   R   R   R•   R™   R›   R£   RH   R©   R¦   RÜ   RÄ   Rõ   Rê   R   (    (    (    sO   /oak/stanford/groups/akundaje/marinovg/programs/NanoMod/bin/scripts/myDetect.pyt   <module>   sF   
	I		&							%	c		'+