ó
xÈYc           @   sÔ   d  Z  d d l Z d d l Z d d l Z d d l m Z d d l Z d d l m Z d d l	 Td d l
 m Z d d l m Z m Z d d l m Z d d l Td	 „  Z d
 „  Z d d „ Z d „  Z d „  Z d S(   s³  Description: MACS 2 main executable

Copyright (c) 2008,2009 Yong Zhang, Tao Liu <taoliu@jimmy.harvard.edu>
Copyright (c) 2010,2011 Tao Liu <taoliu@jimmy.harvard.edu>

This code is free software; you can redistribute it and/or modify it
under the terms of the BSD License (see the file COPYING included
with the distribution).

@status: release candidate
@version: $Id$
@author:  Yong Zhang, Tao Liu
@contact: taoliu@jimmy.harvard.edu
iÿÿÿÿN(   t   strftime(   t   opt_validate(   t   *(   t   binomial_cdf_inv(   t	   PeakModelt   NotEnoughPairsException(   t
   PeakDetectc         C   sš   t  |  j ƒ  ƒ } t  | j ƒ  ƒ } | j | ƒ } t | ƒ d k r– | d ƒ | d d j t | ƒ ƒ ƒ | d d j t | ƒ ƒ ƒ t j ƒ  n  d S(   s   check common chromosome namesi    sm   No common chromosome names can be found from treatment and control! Check your input files! MACS will quit...s!   Chromosome names in treatment: %st   ,s   Chromosome names in control: %sN(   t   sett   get_chr_namest   intersectiont   lent   joint   sortedt   syst   exit(   t   treatt   controlt   error_streamt	   tchrnamest	   cchrnamest   commonnames(    (    sP   /oak/stanford/groups/akundaje/marinovg/programs/MACS-2.1.0/MACS2/callpeak_cmd.pyt   check_names(   s    
c         C   s  t  |  ƒ } | j } | j } | j } | j } | d | j ƒ | j da k | _ | j re d } n d } | j t	 _ | d | ƒ | j r¢ t
 | ƒ \ } } n t | ƒ \ } } | d` k	 rÓ t | | | ƒ n  | d | | j ƒ d | | j f }	 | j }
 |	 d	 | |
 f 7}	 | d
 | |
 ƒ | j d k rR| j d k rt| d | ƒ t | j |
 ƒ } | d | ƒ n | d | ƒ t | j ƒ } | j r©| d | ƒ n | d | ƒ | j | ƒ | j } | d | | ƒ |	 d | | f 7}	 | j r
|	 d | 7}	 n |	 d | 7}	 | d t |
 | ƒ |
 ƒ |	 d t |
 | ƒ |
 7}	 n |
 } | d` k	 rÇ| j } |	 d | | f 7}	 | d | | ƒ | j d k r¾| j d k rß| d | ƒ t | j | ƒ } | d | ƒ n | d | ƒ t | j ƒ } | j r| d | ƒ n | d | ƒ | j | ƒ | j } | d | | ƒ |	 d | | f 7}	 | j ru|	 d | 7}	 n |	 d | 7}	 | d t | | ƒ | ƒ |	 d  t | | ƒ | 7}	 qÇ| } n  | d! ƒ | d" ƒ | j r€| d# ƒ | j r| j | _ n | j | _ | j d$ k r5| d% | j ƒ n' | j d$ k  r\| d& | j d' ƒ n  | d( | j ƒ d) | j | _ nÑymt d* | d+ t d, | ƒ } | d- ƒ | d. ƒ | d/ | j ƒ | d0 | j ƒ | d1 | j ƒ | d2 | j ƒ | d3 d4 j t  t! | j" ƒ ƒ ƒ | d5 | j# ƒ t$ | | j# | j% ƒ | j | _ d) | j | _ | j d) | j k rì| d6 | j ƒ | j& r¼| j | _ d) | j | _ | d7 | j ƒ qì| d8 d4 j t  t! | j" ƒ ƒ ƒ | d9 ƒ n  Wna t' k
 rP| j& st( j) d: ƒ n  | d# ƒ | j | _ d) | j | _ | d; | j ƒ n X| d< ƒ | j* rq| d= ƒ n  | r| j r| d) } n  | rÞ| j+ r| d> ƒ | | k r| d? | ƒ | j, d$ k  rÛ| d@ ƒ n | dA | j, ƒ | j- | | j, ƒ | dB | j ƒ no | | k r| dC | ƒ | j, d$ k  rG| d@ ƒ n | dA | j, ƒ | j- | | j, ƒ | dD | j | ƒ n  t. | _/ qÞ| j0 rº| | k r®t. | _/ qÛt1 | _/ qÞ| | k rÒt1 | _/ qÞt. | _/ n  t2 dE | dF | d, | ƒ } | j3 ƒ  | j4 j5 dG | j6 ƒ | dH | j7 ƒ t8 | j7 dI ƒ } | j9 dJ t: ƒ | j9 | j d ƒ | j9 |	 ƒ | j d$ k r”| j9 dK | j ƒ n* | j d$ k  r¾| j9 dL | j d' ƒ n  | j9 dM | j ƒ y* | j9 dN d4 j t  t! | j" ƒ ƒ ƒ Wn n X| j* r	| j9 dO ƒ n  | j4 j; | dP | j% ƒ| j< ƒ  | j= d` k rZ	dQ } n | j> d` k rr	dR } n  | j? sH
| dS | j@ ƒ t8 | j@ dI ƒ } | j4 jA | dT dU dP | j% dV | dW | jB ƒ| j< ƒ  | dX | jC ƒ t8 | jC dI ƒ } | j4 jD | dT dU dP | j% dY dZ tE d[ ƒ d\ dV | dW | jB ƒ| j< ƒ  n¼ | d] | jF ƒ t8 | jF dI ƒ } | j4 jG | dT dU dP | j% dY | j% dW | jB ƒ| j< ƒ  | d^ | jH ƒ t8 | jH dI ƒ } | j4 jI | dT dU dP | j% dY | j% dW | jB ƒ| j< ƒ  | d_ ƒ d` S(b   s.   The Main function/pipeline for MACS.
    
    s   
t   BAMPEt   BEDPEt   fragmentt   tags   #1 read %s files...s   #1 %s size = %ds"   # %s size is determined as %d bps
s   # total %ss in treatment: %d
s   #1  total %ss in treatment: %dt   allt   autosS   #1 calculate max duplicate %ss in single position based on binomial distribution...s'   #1  max_dup_tags based on binomial = %ds"   #1 user defined the maximum %ss...sN   #1 filter out redundant fragments by allowing at most %d identical fragment(s)sc   #1 filter out redundant tags at the same location and the same strand by allowing at most %d tag(s)s(   #1  %ss after filtering in treatment: %ds'   # %ss after filtering in treatment: %d
s0   # maximum duplicate fragments in treatment = %d
s@   # maximum duplicate tags at the same position in treatment = %d
s%   #1  Redundant rate of treatment: %.2fs$   # Redundant rate in treatment: %.2f
s   # total %ss in control: %d
s   #1  total %ss in control: %dsa   #1  for control, calculate max duplicate %ss in single position based on binomial distribution...s&   #1  %ss after filtering in control: %ds%   # %ss after filtering in control: %d
s.   # maximum duplicate fragments in control = %d
s>   # maximum duplicate tags at the same position in control = %d
s#   #1  Redundant rate of control: %.2fs"   # Redundant rate in control: %.2f
s   #1 finished!s   #2 Build Peak Model...s   #2 Skipped...i    s9   #2 Sequencing ends will be shifted towards 3' by %d bp(s)s9   #2 Sequencing ends will be shifted towards 5' by %d bp(s)iÿÿÿÿs   #2 Use %d as fragment lengthi   t	   treatmentt   max_pairnumt   opts   #2 finished!s   #2  Summary Model:s   #2   min_tags: %ds
   #2   d: %ds   #2   scan_window: %ds&   #2 predicted fragment length is %d bpss/   #2 alternative fragment length(s) may be %s bpsR   s%   #2.2 Generate R script for model : %ss…   #2 Since the d (%.0f) calculated from paired-peaks are smaller than 2*tag length, it may be influenced by unknown sequencing problem!sˆ   #2 MACS will use %d as EXTSIZE/fragment length d. NOTE: if the d calculated is still acceptable, please do not use --fix-bimodal option!sA   #2 You may need to consider one of the other alternative d(s): %ss   #2 You can restart the process with --nomodel --extsize XXX with your choice or an arbitrary number. Nontheless, MACS will continute computing.i   sB   #2 Since --fix-bimodal is set, MACS will use %d as fragment lengths   #3 Call peaks...s   # local lambda is disabled!sA   #3 User prefers to use random sampling instead of linear scaling.s+   #3 MACS is random sampling treatment %ss...sC   #3 Your results may not be reproducible due to the random sampling!s   #3 Random seed (%d) is used.s"   #3 %d Tags from treatment are kepts)   #3 MACS is random sampling control %ss...s   #3 %d %ss from control are keptR   R   t   fc_lows   #4 Write output xls file... %st   ws,   # This file is generated by MACS version %s
s9   # Sequencing ends will be shifted towards 3' by %d bp(s)
s9   # Sequencing ends will be shifted towards 5' by %d bp(s)
s	   # d = %d
s/   # alternative fragment length(s) may be %s bps
s   # local lambda is disabled!
t   namet   pscoret   qscores-   #4 Write peak in narrowPeak format file... %st   name_prefixs   %s_peak_t   score_columnt	   tracklines   #4 Write summits bed file... %st   descriptions#   Summits for %s (Made with MACS v2, s   %xt   )s2   #4 Write broad peak in broadPeak format file... %ss9   #4 Write broad peak in bed12/gappedPeak format file... %ss   Done!N(   R   R   (J   R   t   infot   warnt   debugt   errort   argtxtt   formatt   PE_MODEt   tempdirt   tempfilet   load_frag_files_optionst   load_tag_files_optionst   NoneR   t   tsizet   totalt   keepduplicatest   cal_max_dup_tagst   gsizet   intt   separate_dupst   floatt   nomodelt   dt   extsizet   shiftt
   scanwindowR   t   MAX_PAIRNUMt   min_tagst   scan_windowR   t   mapt   strt   alternative_dt   modelRt   model2r_scriptR"   t   onautoR   R   R   t   nolambdat
   downsamplet   seedt
   sample_numt   Falset	   tocontrolt   tolarget   TrueR   t
   call_peakst   peakst	   filter_fct   fecutofft   peakxlst   opent   writet   MACS_VERSIONt   write_to_xlst   closet
   log_pvaluet
   log_qvaluet   broadt   peakNarrowPeakt   write_to_narrowPeakR'   t	   summitbedt   write_to_summit_bedR    t   peakBroadPeakt   write_to_broadPeakt   peakGappedPeakt   write_to_gappedPeak(   t   argst   optionsR*   R+   R,   R-   R   R   R   t   tagsinfot   t0t   treatment_max_dup_tagst   t1t   c0t   control_max_dup_tagst   c1t	   peakmodelt
   peakdetectt   ofhd_xlsR&   t   ofhd_bedt   ofhd_summits(    (    sP   /oak/stanford/groups/akundaje/marinovg/programs/MACS-2.1.0/MACS2/callpeak_cmd.pyt   run3   st   					 		  	
									

	
		

#	#	

		
		
*	
			.
1
1
gñhãˆµøä>c         C   s   t  d | | d |  ƒ S(   sæ   Calculate the maximum duplicated tag number based on genome
    size, total tag number and a p-value based on binomial
    distribution. Brute force algorithm to calculate reverse CDF no
    more than MAX_LAMBDA(100000).
    
    i   g      ð?(   R   (   t   genome_sizet   tags_numbert   p(    (    sP   /oak/stanford/groups/akundaje/marinovg/programs/MACS-2.1.0/MACS2/callpeak_cmd.pyR9   V  s    c         C   s—  |  j  d ƒ |  j |  j d d |  j ƒ} | j ƒ  } t |  j ƒ d k r x? |  j d D]- } |  j | d |  j ƒ} | j | ƒ } q[ Wn  | j ƒ  | j |  _	 |  j
 rS|  j  d ƒ |  j |  j
 d d |  j ƒ} | j ƒ  } | j } t |  j
 ƒ d k rFx? |  j
 d D]- } |  j | d |  j ƒ} | j | ƒ } qWn  | j ƒ  n d } |  j  d |  j	 ƒ | d k	 r|  j  d | ƒ n  | | f S(	   sV   From the options, load treatment fragments and control fragments (if available).

    s   #1 read treatment fragments...i    t   buffer_sizei   s   #1.2 read input fragments...s;   #1 mean fragment size is determined as %d bp from treatments@   #1 note: mean fragment size in control is %d bp -- value ignoredN(   R*   t   parsert   tfileR{   t   build_petrackR   t   append_petrackt   finalizeR?   R6   t   cfileR5   (   Rj   t   tpR   R}   t   cpR   t	   control_dR   (    (    sP   /oak/stanford/groups/akundaje/marinovg/programs/MACS-2.1.0/MACS2/callpeak_cmd.pyR3   _  s0    
		c         C   s}  |  j  d ƒ |  j |  j d d |  j ƒ} |  j sM | j ƒ  } | |  _ n  | j ƒ  } t |  j ƒ d k r° x? |  j d D]- } |  j | d |  j ƒ} | j | ƒ } q| Wn  | j ƒ  |  j	 rY|  j  d ƒ |  j |  j	 d d |  j ƒj ƒ  } t |  j	 ƒ d k rLx? |  j	 d D]- } |  j | d |  j ƒ} | j | ƒ } qWn  | j ƒ  n d } |  j  d |  j ƒ | | f S(   sL   From the options, load treatment tags and control tags (if available).

    s   #1 read treatment tags...i    R{   i   s   #1.2 read input tags...s#   #1 tag size is determined as %d bpsN(   R*   R|   R}   R{   R6   t   build_fwtrackR   t   append_fwtrackR€   R   R5   (   Rj   R‚   t   ttsizeR   R}   R   R   Rƒ   (    (    sP   /oak/stanford/groups/akundaje/marinovg/programs/MACS-2.1.0/MACS2/callpeak_cmd.pyR4   †  s,    	
	%(   t   __doc__t   osR   t   loggingt   timeR    R2   t   MACS2.OptValidatorR   t   MACS2.OutputWritert
   MACS2.ProbR   t   MACS2.PeakModelR   R   t   MACS2.PeakDetectR   t   MACS2.ConstantsR   Rw   R9   R3   R4   (    (    (    sP   /oak/stanford/groups/akundaje/marinovg/programs/MACS-2.1.0/MACS2/callpeak_cmd.pyt   <module>   s"   

		ÿ $		'