ó
ËdTc           @   s   d  d l  Z  d  d l Z d  d l Z d  d l Z d  d l m Z d  d l m Z d  d l m Z d e d d „ Z
 d „  Z d e d d „ Z d	 d d
 d
 e e d e d e d „
 Z d d „ Z d d „ Z e d k rü e  j e j j ƒ  d d ƒ e _ n  d S(   iÿÿÿÿN(   t   Wig(   t   glob(   t   timei
   c      	   C   s¿  t  ƒ  } | d k rŸ d G|  Gd GH| d  d } t d |  d | d | d	 | ƒ }	 |	 j d | ƒ |	 j d | d
 d d | d	 | ƒ d Gt  ƒ  | GHt  ƒ  } n |  } d G| Gd GH|  d  d }
 x  t j j |
 ƒ râ |
 d }
 qÃ Wt j |
 ƒ | d  k r-d | d t	 | ƒ d t	 |
 ƒ d | } n  d | d t	 |
 ƒ d | } t j
 | ƒ | d k r‡d G| Gd GHt j
 d | ƒ n  d G|
 GHt j
 d t	 |
 ƒ d ƒ d Gt  ƒ  | GHd  S(   Nt   wigs   
convertings   ...iýÿÿÿs   raw.wiqt   filet   gfilet   stept   suppresst   formatt   wiqs
   time cost:s   
sortingt   temps   .temps   sort -r -n -s -k1 -k2 -o s    --buffer-size s    --temporary-directory t    s	   Removing s   rm t   removings    -r(   R   R    t
   ajust_sizet   savet   ost   patht   isdirt   mkdirt   Nonet   strt   system(   t   ifilet
   sort_ofileR   R   R   R   t   buffert   tmt	   raw_ofilet   wgR
   t   cmd(    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wiq.pyt   rawsort   s2    	!"  1 	c         C   s  d GHt  ƒ  } |  j d ƒ } i  } t | d ƒ } x | D] } t | ƒ | | <q9 Wt | ƒ } xŒ | | d D]| }	 |	 j ƒ  }
 t |
 d ƒ } x< | d D]0 } | | j ƒ  } | t | j ƒ  d ƒ 7} q› W| j t | | ƒ d ƒ qn W| j ƒ  d Gt  ƒ  | GHd  S(   Ns   
Preparing reference ...t   :t   wi    i   s   	-	-	-
s
   time cost:(	   R   t   splitt   opent   lent   floatt   readlinet   writeR   t   close(   t   pathst   ofileR   R   t   filest   fit   foR   t   nfilet   linet   colt   vt   add_line(    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wiq.pyt   refquantile    s"    	 
c         C   sZ  d d l  m } |  | k r, d G|  Gd GHn d GHt ƒ  } t |  ƒ t | ƒ }	 }
 t d | d | ƒ } x‹ |	 D]ƒ } | j ƒ  } |
 j ƒ  j ƒ  } t | ƒ d k r± d	 g } n  | d
 t | d ƒ | t	 | d ƒ } } } | | j
 | | <qo Wd } x |
 D] } | d 7} qW| d k r3d G| Gd GHn  | j | d | ƒd Gt ƒ  | GHd  S(   Niÿÿÿÿ(   t   randints   
normalizings   ...s   
saving reference ...R   R   i    g        i   i   i   sK   Warning: the input genome size is smaller than the reference genome size bys   wiggle steps!R   s
   time cost:(   t   randomR2   R   R!   R    R    R$   R"   t   intR#   t   dataR   (   R   t   refR(   R   R   R   R   R2   R   R*   t   frR   R-   R.   t   rcolt   crt   post   vlt   n(    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wiq.pyt   changevalue2   s*     	 0  t
   wiq_resultR   i@B c         C   sÐ  t  |
 d ƒ }
 | d k r% t } n t } | d k r@ t } n t } t ƒ  } t j j | ƒ sq t j | ƒ n  i  i  } } xÉ |  j d ƒ D]¸ } t j j	 | ƒ r³ d | | <qŽ t j j | ƒ rŽ | d k rx0 t
 t j j | d ƒ ƒ D] } d | | <qí Wn  | d k rFx0 t
 t j j | d ƒ ƒ D] } d | | <q,WqFqŽ qŽ Wxù | D]ñ } t j d d	 | ƒ } x> | d
 d k sŸ| d
 d	 k sŸ| d
 d k r¬| d } qoWt j j | | ƒ } | d k sÚ| t k r0| d  d | | d  d <t d | d | d  d d | d | d | d |	 d |
 ƒ qQ| d  d | | <qQW| rdd Gt ƒ  | Gd GHd  S| d  k råd
 } x |  D] } | t | ƒ 7} q}Wt j j | d t | ƒ d ƒ } t d d j | j ƒ  ƒ d | d | ƒ n“ t j j | ƒ } t j j | | d ƒ } | d k s(| t k rr| d  d } t d | d | d  d d | d | d | d |	 d |
 ƒ n | } x= | D]5 } t d | d | d | | d | d | d |	 ƒ qWd Gt ƒ  | Gd GHd  S(   Ni@B i   R   R   s   *wigR	   s   *wiqt   /t   _i    t   .iýÿÿÿs   qnor.wigs   sort.wiqR   R   R   R   R   R   R   s   job done, total time cost:s   
s
   reference.s	   .sort.wiqR'   R(   iÿÿÿÿR6   (   R4   t   Truet   FalseR   R   R   R   R   R    t   isfileR   t   joint   ret   subR   R   t   ordR   R1   t   keysR=   (   R'   R   t   out_dirR6   t   iformatt   rformatt   isortedt   rsortedR   R   R   t   format_onlyt   alltimeR)   t   sfilesR   R   R   t   opatht   tnbt   ct   refpatht   elemst   sfile(    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wiq.pyt   qnorwigE   s`     	 		  " " 3 < #+<3R	   c         C   sù  t  t j ƒ d k  r< d t j k r< d t j k r< d GHd St j d t j d d d	 d
 d d ƒ } | j d d d- d d ƒ| j d d d- d d ƒ| j d d d- d d ƒ| j d d d d d
 d d d d ƒ| j d d d d d
 d d d t d d ƒ| j d  d d! d d
 d d d t	 d d" ƒd t j k sGd t j k r_d# GH| j
 ƒ  d# GHd St  t j ƒ d$ k r—y | j ƒ  } Wq d% GHd SXn	 d& GHd Sd' Gd( j t j ƒ GHt d) | j d* | j d | j d | j d+ t d! | j d, t ƒ d- S(.   sÍ   
    Description:
        This function parses input parameters, calls and passes all parameters values to the function qnorwig, or print some help messages if required.
    parameters:
        none  
    i   s   -hs   --helps‚   
usage:
python danpos.py wig2wiq <chr_file> <file_path> [optional arguments]

for more help, please try: python danpos wig2wiq -h
i    t   formatter_classt   usagesH   
python danpos.py <command> <chr_file> <file_path>[optional arguments]

t   descriptiont    t   epilogsÍ   Kaifu Chen, et al. chenkaifu@gmail.com,                                       Li lab, Biostatistics department, Dan L. Duncan cancer center,                                      Baylor College of Medicine.t   commandt   defaultt   helps8   set as 'wig2wiq' to convert wiggle format to wiq format.t   chr_filesJ   a text file with each line describing a chrosome length, e.g. 'chr1 10043't   input_pathssÌ   paths to the wiggle format input files.                        Each path could point to a file or a directory that contains the files. use ':'                        to seperate paths, e.g. 'Path_A:path_Bs	   --out_dirt   destRJ   t   metavart   wig2wiqs   a path to the output directorys   --stepR   i
   t   types$   the step size in wiggle format data.s   --buffer_sizeR   sU   maximal memory size that can be used to sort file, e.g. set to 1 when need 1G memory.s   
i   s5   
for more help, please try: python danpos wig2wiq -h
s0   
for help, please try: python danpos wig2wiq -h
s   
command:
pythonR   R'   R   R   RO   N(   R"   t   syst   argvt   argparset   ArgumentParsert   ArgumentDefaultsHelpFormattert   add_argumentR   R4   R#   t
   print_helpt
   parse_argsRE   RX   Rb   Ra   RJ   R   RC   R   RB   (   R^   t   parsert   args(    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wiq.pyRe   r   sP    3	$$
				c         C   så  t  t j ƒ d k  r< d t j k r< d t j k r< d GHd St j d t j d d d	 d
 d d ƒ } | j d d d< d d ƒ| j d d d< d d ƒ| j d d d< d d ƒ| j d d d d d
 d d d d ƒ| j d d d d d
 d d< d d ƒ| j d d d d d
 d d  d d! ƒ| j d" d d# d d
 d d  d d$ ƒ| j d% d d& d d
 d d d' t d d( ƒ| j d) d d* d d
 d d d' t d d+ ƒ| j d, d d- d d
 d d. d' t d d/ ƒ| j d0 d d1 d d
 d d. d' t	 d d2 ƒd t j k sd t j k r$d3 GH| j
 ƒ  d3 GHd St  t j ƒ d4 k r\y | j ƒ  } Wqed5 GHd SXn	 d6 GHd Sd7 Gd8 j t j ƒ GHt d9 | j d: | j d | j d | j d | j d# | j d& | j d* | j d- | j d; t d1 | j ƒ d< S(=   sÍ   
    Description:
        This function parses input parameters, calls and passes all parameters values to the function qnorwig, or print some help messages if required.
    parameters:
        none  
    i   s   -hs   --helpsz   
usage:
python danpos.py wiq <chr_file> <file_path> [optional arguments]

for more help, please try: python danpos wiq -h
i    RY   RZ   sI   
python danpos.py <command>  <chr_file> <file_path>[optional arguments]

R[   R\   R]   sÍ   Kaifu Chen, et al. chenkaifu@gmail.com,                                       Li lab, Biostatistics department, Dan L. Duncan cancer center,                                      Baylor College of Medicine.R^   R_   R`   sA   set as 'wiq' to do quantile normalization for wiggle format data.Ra   sJ   a text file with each line describing a chrosome length, e.g. 'chr1 10043'Rb   sî   paths to the input files, each file shoule be in '.wig' or '.wiq' format.                        Each path could point to a file or a directory that contains the files. use ':'                        to seperate paths, e.g. 'Path_A:path_Bs	   --out_dirRc   RJ   Rd   R>   s   a path to the output directorys   --referenceR6   s¿   a path to the file containing reference data. When reference data is available,                         data in each input file will be normalized to have the same quantiles in reference datas	   --iformatRK   R   s5   the data format in input files, can be 'wig' or 'wiq's	   --rformatRL   s8   the data format in reference file, can be 'wig' or 'wiq's	   --isortedRM   Rf   s4   set to 1 if the input file in wiq format and sorted.s	   --rsortedRN   s;   set to 1 if the reference file is in wiq format and sorted.s   --stepR   i
   s$   the step size in wiggle format data.s   --buffer_sizeR   sU   maximal memory size that can be used to sort file, e.g. set to 1 when need 1G memory.s   
i   s1   
for more help, please try: python danpos wiq -h
s,   
for help, please try: python danpos wiq -h
s   
command:
pythonR   R'   R   R   N(   R"   Rg   Rh   Ri   Rj   Rk   Rl   R   R4   R#   Rm   Rn   RE   RX   Rb   Ra   RJ   R6   RK   RL   RM   RN   R   RC   R   (   R^   Ro   Rp   (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wiq.pyR	   ¹   sl    3	$$$$
								t   __main__R   i    (   R   Rg   RF   Ri   R   R    R   R   RC   R   R   R1   R=   RX   Re   R	   t   __name__t   fdopent   stdoutt   fileno(    (    (    sC   /oak/stanford/groups/akundaje/marinovg/programs/danpos-2.2.2/wiq.pyt   <module>   s   0	'-GR