Ñò
Ìa,Jc        	   @   sÁ   d  d k  Z  d  d k Z d  d k Z d  d k Z d  d k Td  d k Td  d k l Z e  i d ƒ Z	 e  i d ƒ Z
 d „  Z d „  Z d „  Z d	 „  Z d
 „  Z e d j o e e i ƒ n d S(   iÿÿÿÿN(   t   *(   t   OptionParsers   \+s   \-c         C   sb  t  |  ƒ } d } d } t t | d ƒ ƒ } g  } xÞ | D]Ö } t i d | ƒ p½ | i ƒ  } | i ƒ  } t i | d ƒ o/ t | d ƒ | }	 | i	 |	 ƒ | d 7} qt
 i | d ƒ oJ t | d ƒ d | }	 |	 d j  o
 d }	 n | i	 |	 ƒ | d 7} qq; q; W| i ƒ  | | }
 d t |
 ƒ d	 t | ƒ d
 t | ƒ GH| i ƒ  | S(   s¥  
    *This takes into account the identical tags
    *Tags on different strands are positioned differently
    	-> tag start (atoi(sline[1])) + fragment_size/2
    	<- tag start (atoi(sline[2])) - 1 - fragment_size/2, the extra -1 is because that bed format has open-half, the sline[2] is not included.
    The stored positions are not the midpoint rathan than the start
    
    The interface is no longer the same as that for getBedCoords(file)
    input:  
	file:  the file that has the raw tag data from one chromosome
    	fragment_size: the fragment size after CHIP experiment.
    output: 
    	return: a sorted list of positions which might have redundent entries	
    g        i   t   tracki   i   g      ð?i    s   total tag count is: s    = t   +(   t   opent   intt   roundt   ret   matcht   stript   splitt   plust   atoit   appendt   minust   sortt   strt   close(   t   filet   fragment_sizet   infilet   postive_tag_countst   negative_tag_countst   shiftt   taglistt   linet   slinet   positiont   total_tag_counts(    (    sK   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/make_graph_file.pyt   get_bed_coords6   s2      


+
c         C   ss  h  } t  | d ƒ } t |  ƒ d j o=|  d | | } d } xÅ t d t |  ƒ ƒ D]® } |  | | | }	 |	 | j o | d 7} qV |	 | j oi | | | <| | d }
 | d t | ƒ d t |
 ƒ d t | ƒ d } | i | ƒ |	 } d } qV d GHqV W| | | <| | d }
 | d t | ƒ d t |
 ƒ d t | ƒ d } | i | ƒ n | i ƒ  | S(   s  
    taglist: sorted list of positions that includes every tag on a chromosome
    window_size: the artificial bin size for binning the tags
    bed_vals: a dictionary keyed by the start of tag_containing
    	windows, with value being the tag count in the window.
    
    In this function, the bins are set up using an absolute coordinate
    system.  Namely [0, window_size-1),[window_size,
    2*window_size-1),...
    
    The result writen into the file is guaranteed to be already sorted
    within a chromosome.
    t   wi    i   s   	s   
s   Something is wrong!!!!!!!(   R   t   lent   rangeR   t   writeR   (   R   t   chromt   window_sizeR   t   bed_valst   outfilet   current_window_startt   tag_count_in_current_windowt   it   startt   current_window_endt   outline(    (    sK   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/make_graph_file.pyt   Generate_windows_and_count_tagsf   s0      
4
	
4
c         C   s   t  |  i ƒ  ƒ S(   N(   R   t   keys(   R$   (    (    sK   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/make_graph_file.pyt   Total_number_of_windows–   s    c         C   s(   t  |  | ƒ } t | | | | ƒ } d  S(   N(   R   R,   (   t   tagfileR"   R#   R   R%   t   tag_listR$   (    (    sK   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/make_graph_file.pyt   make_graph_fileš   s    c         C   si  t  ƒ  } | i d d d d d d d d d	 d
 d d ƒ| i d d d d d d d d d	 d d d ƒ| i d d d d d d d d d	 d d d ƒ| i d d d d d d d d d d d	 d ƒ| i d d d d d d d d d	 d d d ƒ| i |  ƒ \ } } t |  ƒ d  j  o | i ƒ  t i d! ƒ n t | i | i	 ƒ } t
 | | i | i | i ƒ } d  S("   Ns   -fs	   --tagfilet   actiont   storet   typet   stringt   destR/   t   helps"   file with tag coords in bed formatt   metavars   <file>s   -cs   --chromR"   s   chromosome name for graphs   <string>s   -ws   --window_sizeR   R#   s   window size to make summarys   <int>s   -is   --fragment_sizeR   s0   average size of a fragment after CHIP experiments   -os	   --outfileR%   s   output file namei
   i   (   R   t
   add_optiont
   parse_argsR   t
   print_helpt   syst   exitR   R/   R   R,   R"   R#   R%   (   t   argvt   parsert   optt   argsR0   R$   (    (    sK   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/make_graph_file.pyt   mainŸ   s.    	
t   __main__(   R   t   osR<   t   shutilt   mathR5   t   optparseR   t   compileR   R   R   R,   R.   R1   RB   t   __name__R>   (    (    (    sK   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/make_graph_file.pyt   <module>   s   0

	0	0			