
uMc           @   s  d  d k  Z  d  d k Z d  d k Z d  d k Z d  d k Z d  d k Z d  d k l Z d  d k l Z d  d k l	 Z	 d  d k
 Te Z e  i Z d e f d     YZ d   Z y e Wn# e j
 o d  d	 k l Z n Xd
 e f d     YZ d e f d     YZ d e f d     YZ d e f d     YZ d e f d     YZ d e f d     YZ d e f d     YZ  e d  Z! e" e d  Z# e" d  Z$ e" e d  Z% d S(   iN(   t   random(   t   izip(   t
   itemgetter(   t   *t
   ParseErrorc           B   s#   e  Z d  Z d d  Z d   Z RS(   s9   Indicates that a file format is not to expectation.

    s   Parsing Errorc         C   s   | |  _  d  S(   N(   t	   parameter(   t   selft   value(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   __init__   s    c         C   s   |  i  S(   N(   R   (   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   __str__   s    (   t   __name__t
   __module__t   __doc__R   R	   (    (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR      s   c         C   s=   d d k  } | i d |   } | | d | d <d i |  S(   s   Add a prefix to a filename. 

    i.e. filename = ./test_data/anns.bed, prefix = complement
    returns ./test_data/complement_anns.bed
    iNt   /(   t   ret   splitt   join(   t   filenamet   prefixR   t   filename_portions(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   prefix_filename   s    (   t   Sett   intervalc           B   s   e  Z d  Z g  Z d   Z e d    Z e d    Z e d    Z d   Z	 d   Z
 d   Z d   Z d	   Z d
   Z d   Z d   Z d   Z d   Z RS(   s  Store a closed interval of ints. 
    
    The interval is closed at both ends. ie, interval(1,3) is {1,2,3}

    This is designed to be used to indicate genomic regions, thus the int. To
    this end, there are several functions that facilitate comparison.

    intersection: returns the intersection of self and another interval
    union: returns the union of self and another interval
    
    overlap: returns the size ( in bp's ) of an interval and a second interval
    does_overlap: returns whether or not an overlap overlaps

    c         C   s   | d j  pA | d j  p4 t  | t t f  t j p t  | t t f  o) t d | | t |  t |  f  n | | j  o t d | | f  n t i t | | f  }  |  S(   Ni    sH   All values must be non-negative (integers|longs), not (%i, %i), (%s, %s)s=   The end value (%d) must be greater than the start value (%d) (   t
   isinstancet   intt   longt
   ValueErrort   typet   tuplet   __new__R   (   R   t   startt   end(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR   ?   s    N)c         C   s   |  d  S(   i    (    (   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   <lambda>G   s    c         C   s   |  d  S(   i   (    (   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR    H   s    c         C   s   |  d  |  d d  S(   i   i    (    (   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR    I   s    c         C   sg   t  | t  p t  t |  i | i  } t |  i | i  } | | j o t | |  f Sd Sd S(   s   Return the intersection of self and otherInt as a tuple of intervals.
           If the intersecton is empty, returns an empty tuple.
        N(    (   R   R   t   AssertionErrort   maxR   t   minR   (   R   t   otherIntervalR   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   intersectionK   s    c         C   s   t  | t  p t  |  i p. | i |  i d j p |  i | i d j o; t |  i | i  } t |  i | i  } t | |  f S|  | f Sd S(   sL   Return the union of self and otherInterval as a tuple of intervals.
        i   N(   R   R   R!   t   does_overlapR   R   R#   R"   (   R   R$   R   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   unionW   s    
c         C   sN   t  | t  p t  t t |  d | d  t |  d | d  d d  S(   Ni   i    (   R   R   R!   R"   R#   (   R   R$   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   overlape   s    c         C   s*   t  | t  p t  |  i |  d j S(   Ni    (   R   R   R!   R(   (   R   t   other(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR&   i   s    c         C   sb   t  | t t t f  o |  i | j  St |  t j o |  i | i j  St d t |   d  S(   Ns4   Can't compare an object of type %s to a slice object(	   R   R   R   t   floatR   R   R   R   t	   TypeError(   R   R)   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   __lt__n   s
    c         C   sb   t  | t t t f  o |  i | j St |  t j o |  i | i j St d t |   d  S(   Ns4   Can't compare an object of type %s to a slice object(	   R   R   R   R*   R   R   R   R   R+   (   R   R)   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   __le__u   s
    c         C   s8   t  | t t t t f  p t S|  | j o
 |  | j S(   N(   R   R   R   R*   R   t   False(   R   R)   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   __eq__|   s     c         C   s   |  | j S(   N(    (   R   R)   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   __ne__   s    c         C   sb   t  | t t t f  o |  i | j St |  t j o |  i | i j St d t |   d  S(   Ns4   Can't compare an object of type %s to a slice object(	   R   R   R   R*   R   R   R   R   R+   (   R   R)   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   __ge__   s
    c         C   sb   t  | t t t f  o |  i | j St |  t j o |  i | i j St d t |   d  S(   Ns4   Can't compare an object of type %s to a slice object(	   R   R   R   R*   R   R   R   R   R+   (   R   R)   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   __gt__   s
    (   R
   R   R   t	   __slots__R   t   propertyR   R   t   sizeR%   R'   R(   R&   R,   R-   R/   R0   R1   R2   (    (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR   ,   s    										t   regionc           B   s  e  Z d d d d d   Z d   Z d   Z e e e  Z d   Z d d  Z	 d   Z
 e e
  Z e d  Z d   Z d	   Z d
   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z g  d  Z d   Z d   Z RS(   c         C   sB   g  |  _  t i |  i  |  t i |  |  | |  _ | |  _ d  S(   N(   t   valuest   listR   t   namet   _length(   R   t   intervals_itert   values_iterR9   t   length(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR      s
    		c         C   sX   t  | t  p t  t |   d j o! | |  d i j o t d  n | |  _ d  S(   Ni    is3   All bp indices must be less than the region length.(   R   R   R!   t   lenR   R   R:   (   R   t
   new_length(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   _set_length   s    'c         C   s   |  i  S(   N(   R:   (   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   _get_length   s    c         C   sp  g  } g  } t  | i |  | i  } t | i |  | d i  } | | d j oF |  | i | i j o/ |  | i | i j o |  i | c | 7<d St i |  t | |   }	 |	 i d t	 d   g  }
 |	 D] } |
 | i q ~
 } g  } |	 D] } | | i q ~ } x t
 t |	  d  D] } yu | i t | | | |   | i |  i | | |  | i t | | d | | d d   | i |  Wq)q)Xq)W| i | d j  o8 | i d t | i | d d   | i d |  nT | d | i j  o? | i d t | d | i d   | i d |  i |  n y | i | d j o2 | i t | d d | i   | i |  nR | d | i j o= | i t | i d | d   | i |  i | d  n Wn d d k } | i   n X|  | | 5|  i | | 5xF t t | |   D]/ \ } } |  i | |  |  i i | |  q9Wd S(   si  Merge overlapping intervals. End is *not* inclusive ( like range )

        start, end refers to the start, end indexes of the overlaping intervals 
        ( in self ). new, new_value are the new interval and it's value, 
        respectively.

        We sum the overlaps. For instance, [0,2] v 2 with [1,3] v 1 will yield
        [0,0]v2, [1,2]v3, [3,3]v2. Then, we delete the old intervals and insert
        the new.

        This is not so tough if we note that regardless of the size of end-start
        we can only have 2 interval splits. Everything else is just
        additions or value changes. 
        i   Nt   keyi    i(   R"   R   R#   R   R7   R8   t   __getitem__t   slicet   sortR   t   xrangeR>   t   appendR   t   insertt   pdbt	   set_tracet   reversedt   zip(   R   R   R   t   newt	   new_valuet   new_intervalst
   new_valuest   start_bpt   end_bpt   old_intervalst   _[1]t   old_intervalt   old_interval_startst   _[2]t   old_interval_endst   indexRI   t   itemR   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   _merge_intervals   sV    ($$ *$$!! 
 i   c         C   s
  |  i  d  j o  | i |  i  j o t d  n t |   d j p$ t |   d j o5 | |  d j o$ t i |  |  |  i i |  n| t i	 |  |  } |  | i
 |  o, t i |  |  } |  i | | | |  n* t i |  | |  t i |  i | |  d  S(   Ns+   All bps must be less than the region lengthi    i(   R=   t   NoneR   R   R>   R8   RG   R7   t   bisectt   bisect_leftR&   t   bisect_rightR[   RH   (   R   t   coverage_regionR   t   l_loct   r_loc(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   add   s    #7c         C   s
   t  |   S(   N(   R>   (   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   _numRegions  s    c            s    d j o  d j p t   | o
    n d   t i |    } t i |    } |  t | |  } t |   d     f d   | i   D d t |  i | | ! d |  i	 d    S(   s1   Return a copy of the genomic subregion. 
        i    R;   c         3   sB   x; |  ]4 } t  t | i     t | i      Vq Wd  S(   N(   R   R"   R   R#   R   (   t   .0RZ   (   t   shift_valuet   stop_bpRQ   (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pys	   <genexpr>  s   	R<   R9   R=   (
   R!   R]   R^   R_   RD   R   t   iter_feature_regionst   iterR7   R9   (   R   RQ   Rg   t   shift_to_zerot   start_indext
   stop_indext	   intervals(    (   Rf   Rg   RQ   sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   get_subregion
  s    ! 
	c      
   C   s   t  i |  | i  } t  i |  | i  } |  t | |  } | | j o
 d } n! |  | d i |  | i d } t |   d d   | i   D d t |  i	 | | ! d |  i
 d |  S(   sH   Return a copy of the genomic subregion that overlaps interval. 
        i    i   R;   c         s   s(   x! |  ] } t  | i | i  Vq Wd  S(   N(   R   R   R   (   Re   RZ   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pys	   <genexpr>0  s   	R<   R9   R=   (   R]   R^   R   R_   R   RD   R   Rh   Ri   R7   R9   (   R   t   interRk   Rl   R=   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   get_overlapping_intervals!  s    
 	c         C   sP   y) d d k  } | i t |  i     SWn  t j
 o t |  i    SXd S(   ss   Return a numpy array of all scores. 
        
        If numpy is not available, return a list of scores. 
        iN(   t   numpyt   arrayR8   t   iter_featurest   ImportError(   R   Rq   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   bp_score_array7  s
    c         C   sE   d } x+ |  i    D] \ } } | | i | 7} q Wt |  |  i S(   s%   Return the mean of all values in selfi    (   t   iter_intervals_and_valuesR5   R*   R=   (   R   t   rvRZ   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   meanC  s
     c         C   s   |  i    } d } d } x@ |  i   D]2 \ } } | | i 7} | | i | | d 7} q% W| |  i | | d 7} t |  |  i d S(   s2   Return the centered variance of all values in selfi    i   i   (   Rx   Rv   R5   R=   R*   (   R   Rx   t   ret_valt
   total_sizeRZ   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   varJ  s     c         C   s<  t  |   d j p t  |  d j o t Gd GHd St d  n d } d } |  i   } | i   } y& d d k } t | | i    } WnS t j
 oG t g  } t | |  D] \ } }	 | t |  |	 q ~  } n X| |  i |  i	   | i	   }
 |
 |  i d t
 i |  i    t
 i | i    }
 |
 S(   s=   Return the pearson correlation of self and other, by basepairi    s/   Can't take the correlation of a 0 length vectorg        iNi   (   R>   R   Ru   Rq   R*   t   sumRt   R   R=   Rx   t   matht   sqrtR{   (   R   t   otherRegiont   overlapProductt   other_indext   d1t   d2Rq   RT   t   i1t   i2t   p_corr(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   regionCorrelationU  s     &	E!7c         c   s7   x0 t  t |    D] } |  | |  i | f Vq Wd  S(   N(   RF   R>   R7   (   R   RY   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyRv     s     c      
   c   s  x! t  |  d d  D] } d Vq Wx t  t |   d  D]n } |  i | } x  t  |  | i  D] } | Vqb Wx5 t  |  | d d |  | d d  D] } d Vq Wq; W|  i d } x  t  |  d i  D] } | Vq Wx( t  |  i |  d d  D] } d Vq Wd S(   s   Iterates through every feature in the interval.
        
        For instance, if the region was of length 10 and had a single
        feature interval [2,8] this would return 0 0 1 1 1 1 1 1 1 0.
        i    i   iN(   RF   R>   R7   R5   R=   (   R   t   loopR   t   iloop(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyRs     s(     	  	)  	 	c         C   s.   t  g  } |  i   D] } | | i q ~  S(   N(   R|   Rh   R5   (   R   RT   t   feature(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   featuresLength  s    c         c   s#   x t  i |   D] } | Vq Wd S(   s   Iterates through every feature region in the interval.
        
        For instance, if the region was of length 10 and had a single
        feature interval [2,8] this would return interval(2, 8).
        N(   R8   t   __iter__(   R   t   feature_interval(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyRh     s     	c         C   s   t  d  d S(   sy   Make sure that we arent using iter incorrectly.

        If you want the list iterator call list.__iter__(self).
        sX   No defined iterator - call the particular fn directly ( ie iter_feature_regions, etc. ).N(   t   NotImplementedError(   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR     s    c         C   s5  t    } t |  d j o |  | |  i <| S|  i d j o
 d } n |  i d } t t i |   } | i d d  | i |  i  xw t	 d t |  d  D]\ } |  i
 | | | | d t  | | d | d <| d | d | | d | d _ q W|  i t d   | i   D  j p t  | S(   s;   Splits this region into a regions object.
        
        i    t    t   _i   s   split_%ic         s   s   x |  ] } | i  Vq Wd  S(   N(   R=   (   Re   RZ   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pys	   <genexpr>  s   	 N(   t   regionsR>   R9   R\   R8   t   copyRH   RG   R=   RF   Rn   t   TrueR|   R7   R!   (   R   t   split_pointsRw   R9   RY   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR     s     	
 1)-c         C   s   |  i  t | | d    S(   N(   RC   RD   R\   (   R   R   t   stop(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   __getslice__  s    c         C   s~   t  |  t j o t i |  |  St  |  t j p t  t  |   t t i |  |   t t i |  i |   |  i  S(   N(	   R   R   R8   RC   RD   R!   Ri   R7   R9   (   R   RB   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyRC     s    c         C   st   t  d d |  i |  i  } d } xL |  i   D]> \ } } | i | | | | | i f  | | | i 7} q. W| S(   sl   Return a copy of this region in cumulative space. 
        
        1,1,1,2,2,2 becomes 1,2,3,5,7,9
        i    (    (    (   t
   cum_regionR9   R=   Rv   Rc   R5   (   R   t
   new_regiont
   curr_valueR   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt	   build_cdf  s     !c         C   s   d d  k  } g  } |  i d |  i  D] } | | q& ~ } | i |  i  g  } | D] } | |  i |  qW ~ } | i | |  x | D] } | i |  q W| i   d  S(   Nii    (   t   pylabt   iter_split_pointsR=   RG   R   t   plott   axvlinet   show(   R   R   R   RT   t   entryt   pointsRW   R7   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR     s    0* c         c   s   t  i |   } | i   } x! | i | j  o | i   } q Wx] | i d | j  oH | i d V| i d j o | i | j  o | i Vn | i   } qB Wd  S(   Ni   (   R8   R   t   nextR   R   R5   (   R   t   minValuet   maxValuet   iteratorR   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR   
  s       c         C   sw   t  | t t f  p t  t i |  |  } | d j o d S|  | d i } | | j o d S|  i | d Sd S(   s2   Return the value at a specific basepair.

        i    i   N(   R   R   R   R!   R]   R_   R   R7   (   R   t   bp_indext   r_indext   prev_interval_start(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR     s    (    (    N(   R
   R   R\   R   R@   RA   R4   R=   R[   Rc   Rd   t
   numRegionsR.   Rn   Rp   Ru   Rx   R{   R   Rv   Rs   R   Rh   R   R   R   RC   R   R   R   R   (    (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR6      s4   			I						/						&				R   c           B   s)   e  Z d  Z d   Z d   Z d   Z RS(   s   Store region data as a frozen cumulative sum.

    The 'value' at a bp is the sum of the values *up to and including* that bp. 
    Also, the object is frozen, making way for future optimization.
    c         G   s   t  d  d  S(   Ns-   'cum_region' object has no attribute 'append'(   t   AttributeError(   R   t   args(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyRG   3  s    c         G   s   t  d  d  S(   Ns4   'cum_region' object does not support item assignment(   R+   (   R   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   __setitem__6  s    c         C   s   t  | t t f  p t  t i |  |  } | d j o d S|  | d i } | | j o |  i | d d S|  i | d d |  i | d d |  | d i } |  i | d d | | |  | d i	 d Sd S(   s2   Return the value at a specific basepair.

        i    i   N(
   R   R   R   R!   R]   R_   R   R7   R5   R   (   R   R   R   R   t   per_bp_factor(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR   9  s    7(   R
   R   R   RG   R   R   (    (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR   ,  s   		t   binary_regionc           B   sw   e  Z d  Z d   Z d d d d d  Z d   Z d   Z d   Z d d  Z	 d   Z
 d	   Z d
   Z d   Z RS(   s<   A region object optimized to work with binary regions.

    c         C   s   t  i |  | d  d  S(   Ni   (   R6   Rc   (   R   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyRc   W  s    c         C   s   t  i |  | d | |  d  S(   N(    (   R6   R   (   R   R;   R9   R=   R<   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR   Z  s    c   
      C   s   | | | } } } t  t |  | i | i  t |  | d i | i   } t o t o/ d | | | f GHd GHd |  | | f GHn xC t | d | d  D]+ }	 t o d t |  |	  GHn |  |	 =q Wt o	 d GHq n | |  | <d S(	   s   Merge overlapping intervals down.

        This tells the region class to correctly merge intervals - rather than 
        add them just merge the intervals down.
        i   sK   WARNING! WARNING! WARNING!
 	Feature interval %s intersects %i other FI(s) s+   	The intersecting intervals will be merged.s   	%s is being changed to %sis"   	The interval %s is being removed.s   WARNING! WARNING! WARNING!
N(	   R   R#   R   R"   R   R.   t   verboseRF   t   str(
   R   R   R   RM   RN   Ra   Rb   t   newItemt   new_intervalRY   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR[   ]  s     9  c         C   s  t  |   d j p t  |  d j o d Sd } g  } |  i   } | i   } x | i   D] } x- | D]% } | i | i j o Pqi | d =qi Wx^ | d  j oP | i | i j o= | i |  y | i   } Wq t j
 o d  } q Xq Wx! | D] } | | i |  7} q Wq\ W| S(   Ni    (	   R>   Rh   R   R   R   R\   RG   t   StopIterationR(   (   R   t   otherAnnTrackt   totalOverlapt   currentMatchest   thisItert	   nextMatcht   ivRZ   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR(   v  s0    &         c         C   s   t  |   d j p t  |  d j o d S|  i | i j p t d |  i | i f  n |  i } |  i |  } |  i   } | i   } | | | | t i | | t i | d   t i | | t i | d   } | S(   sG    Calculate the correlation of this region with another region

        i    sB   Can't calculate correlation of regions of unequal length: (%i, %i)i   (   R>   R=   R   R(   R   R}   R~   t   pow(   R   R   t   nR(   t   bp1t   bp2t   corr(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR     s    & 	Tg        c         C   s  t  |   d j o d Sd } d } d } x t i |   D] } x9 | t  |  j  o% | | i | i j  o | d 7} qC W| t  |  j o Pn | i | |  } | d j oG | | | i j o3 | | | | i | | | i f | d 7} q: q: q: W| S(   s  Calculate the number of regions that overlap self with the given region

        We take self to be the covered region. ie, for 
        self    ===============================
        other   ---   ----   -----------    -         
        the region overlap is 1, whereas for  
        self    ===========  ==================
        other   ---   ----   -----------    -                 
        it is 2.

        The algorithm is nearly identical to they overlap fn - it's a merge join
        i    i   (   R>   R8   R   R   R   R(   R5   (   R   R   t   min_coverage_percentR   t
   self_indexR   R   R(   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   regionOverlap  s(       !
c         C   s?   d } x2 | D]* } x! |  D] } | | i  |  7} q Wq W| S(   Ni    (   R(   (   R   R   R   t   other_ivt   self_iv(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   bruteOverlap  s      c         C   s]   d } xP t  i |   D]? } x6 t  i |  D]% } | i |  o | d 7} Pq, q, Wq W| S(   Ni    i   (   R8   R   R&   (   R   R   R   R   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   brute_regionOverlap  s      
c         c   s0   x) t  t |    D] } |  | d f Vq Wd  S(   Ni   (   RF   R>   (   R   RY   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyRv     s     c      
   c   s  x! t  |  d d  D] } d Vq Wx t  t |   d  D]j } x- t  |  | d |  | d  D] } d Vqb Wx1 t  |  | d d |  | d  D] } d Vq Wq; Wx- t  |  d d |  d d  D] } d Vq Wx( t  |  i |  d d  D] } d Vq Wd S(   s)  Iterates through every feature in the interval.
        
        Overrides the iter_features for region class and applies to binary regions
        
        For instance, if the region was of length 10 and had a single
        feature interval [2,8] this would return 0 0 1 1 1 1 1 1 1 0.
        i    i   iN(   RF   R>   R=   (   R   R   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyRs     s$    
 	 ! 	% ! 	 	(    N(    (   R
   R   R   Rc   R\   R   R[   R(   R   R   R   R   Rv   Rs   (    (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR   S  s   				.				t   genomic_coverage_regionc           B   s/   e  Z d    Z e e  Z d   Z d   Z RS(   c         C   s[   y |  i  SWnF t j
 o: t g  } |  i   D] } | | i q0 ~  |  _  n X|  i  S(   N(   t   _cached_lengthR   R|   Rh   R5   (   R   RT   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR:     s
     8c         C   s.   t  |  d  o
 |  ` n t i |  |  d  S(   NR   (   t   hasattrR   R6   RG   (   R   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyRG     s    
c         C   sN  t  i |  | i  } t  i |  | i  } | | j o d St |   d j p t  |  d i | i j p t  |  d i | i j p t  t g  } t	 i
 |   D] } | | i |  q ~  } | d j o |  d | !i } n d } | i |  | i j o | | i |  | i 7} n | d j p t  t | | | d  S(   s|   Return the intersection of inter and self, *in genomic coordinates*
        
        inter needs to be an interval.
        i    ii   N(   R]   R^   R   R_   R   R\   R>   R!   R|   R8   R   R(   R=   R   (   R   Ro   Ra   Rb   RT   RZ   t
   bp_overlapt	   new_start(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR%     s     9(   R
   R   R:   R4   R=   RG   R%   (    (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR     s   		R   c           B   sG   e  Z d  Z d   Z e e  Z d   Z d   Z d   Z d   Z	 RS(   s   A region container object. 
    
    This is just a dictionary of region's whereby each key is the 
    region's track name. Also contains a method to store an overlap stat.
    c         C   sb   y |  i  SWnP t j
 oD t t g  } |  i   D] } | | i q3 ~   |  _  |  i  SXd S(   s@   Calculate the total length and cache it for later use.

        N(   t   _cached_totalLengthR   R*   R|   R7   R=   (   R   RT   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   _totalLengthK  s
     9c         C   sG   t  g  } |  i   D]) \ } } | | t | i  |  i f q ~  S(   s   Returns a dict of the relative region lengths.

        For instance, if there are two regions of names R1 and R2 and they are both
        the same length then this will return { 'R1': 0.5, 'R2': 0.5 }

        (   t   dictt	   iteritemsR*   R=   t   totalLength(   R   RT   RB   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   regionFractionV  s    c         C   ss   xL | i    D]> } |  i |  o t d |  | | f  n | | |  | <q Wy
 |  ` Wn t j
 o n Xd  S(   Ns>   Can not extend %r with %r: they both contain a region named %s(   t   keyst   has_keyR   R   R   (   R   R)   RB   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   extend`  s     
c         C   s   |  i    } | i   xm | D]e } x> |  | i   D], \ \ } } } | i d | | | f  q4 W| i d | |  | i f  q Wd  S(   Ns	   %s	%i	%i
s   %s	0	%i
(   R   RE   Rv   t   writeR=   (   R   t   bed_ft	   lengths_fR   RB   R   R   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   writeBedFilem  s    
  c         C   s  |  i    } | i   x| D] } x t |  | i    D] \ } \ \ } } } t |  | t  oU | i d i t |  d d t |  t |  d d d d t |  f	  d  q: | i d i t |  d d t |  t |  t |  d d d t |  f	  d  q: Wq Wd  S(   Ns   	t   merged_featuret   .s   ID=s   
(	   R   RE   t	   enumerateRv   R   R   R   R   R   (   R   t   gff3_fR   RB   t   iR   R   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   writeGff3Filew  s    
  U(
   R
   R   R   R   R4   R   R   R   R   R   (    (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR   A  s   			
		
t   lazy_wiggle_filec           B   s;   e  Z d  Z d   Z d   Z d   Z d   Z d   Z RS(   s   A memory safe wiggle parser.
    
    We region subselects that are done on disk to increase access efficiency
    when the entire file is too big to fit in memory.    
    c         C   sf   d d k  } | i   } x1 | i d  } | d j o Pn | i |  q |  i i d  | i   S(   s   Return the hex digest of a file without loading it all into memory
        
        This caches the data structure and checks that it hasnt changed.iNi   R   i    (   t   md5RM   t   readt   updatet   ft   seekt	   hexdigest(   R   t   fhR   t   digestt   buf(    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   md5file  s    c   	         sv  d   }   f d   }   i  i d  d } d } d }   i  i   } x| d j o| i d  oh   i  i   }   i  i   } x@ | i d  o/ | d j o"   i  i   }   i  i   } q WqL n | i d  p | i d  oe d } | d j o | |  n | | |  } | } | d	 d j o | d	 | d
 | d <q4n | d j o | d 7} | d d j oQ t t i	 d |  d  } | d	 d j o | | d	 <n | | d
 | d <n- | d d j p t
  | d c | d 7<|   i j o% | d i | d | f  d } q4n   i  i   }   i  i   } qL W| d j o | |  n d S(   sM   Scan through the file and find and store all the declaration lines.

        c         S   s  t  t i d |    } y t | d  | d <Wn t j
 o d | d <n Xy t | d  | d <Wn t j
 o d | d <n Xy t | d  | d <Wn t j
 o n X|  i d  o d | d	 <n- |  i d
  o d | d	 <n t p t  | | d <g  | d <| S(   Ns   (\S+)=(\S+)R   i    t   spani   t   stept   variableStept   Variablet	   step_typet	   fixedStept   Fixedt   f_post   internal_indexes(   R   R   t   findallR   t   KeyErrort
   startswithR.   R!   (   t   lineR   R7   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   parse_dec_line  s*        

c            sl     i  i |  d  p g    i  |  d <n t i   i  |  d |  d  }   i  |  d i | |   d  S(   Nt   chromR   (   t   declarationsR   R]   R^   RH   (   R7   t   insert_index(   R   (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   add_dec_line  s    !i    R   t   tracks   \R   R   R   R   R   i   R   R   s   \s+R   R   R   N(   R   R   R\   t   readlineR   t   tellt   endswithR   R   R   R!   t	   max_linesRG   (	   R   R   R   R7   R   t	   line_cntrR   t	   line_datat   curr_bp(    (   R   sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   _build_annotation_points  sT    	  
c         C   s  d } g  } |  i | D]3 } | d | j o | d | j o | | q q ~ } t d   | D  } t d d | t d   | D   }	 x| D]} |  i i | d  |  i i   }
 |
 i d  p |
 i d  p t  d } x0 | d D]$ \ } } | | j  o
 | } q Pq W| d j o |  i i |  n | d	 d
 j o t	 | d  | d } n |  i i   }
 x|
 d j o |
 i d  o |
 i d  o |
 i
   }
 | d	 d j o4 t i d |
  \ } } t |  } t	 |  } n6 | d	 d
 j o$ | | d 7} t |
 i
    } n |	 i t t	 |  t	 |  | d d  |  | | j o Pn |  i i   }
 qWq W|	 i | |  S(   s   Get a subregion.
        
        This is a memory efficient way to get a region without having to parse 
        the entire file.
        R   R   c         s   s   x |  ] } | d  Vq Wd S(   R   N(    (   Re   R7   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pys	   <genexpr>  s   	 c         s   s   x |  ] } | d  Vq Wd S(   R   N(    (   Re   R7   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pys	   <genexpr>  s   	 R   R   R   R   R   R   R   R   R   s   \s+R   i   N(    (    (   R\   R   R"   R6   R   R   R   R   R!   R   t   stripR   R   R*   Rc   R   Rn   (   R   t   chr_nameRQ   Rg   R   RT   R7   t
   all_valuest   sub_region_lengtht
   sub_regionR   t
   search_locR   t   f_loct   post   tmp_posR   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyRn     sL    6% ' 
 1c         C   st  d d  k  } d d  k } d |  _ t   |  _ t | t t f  o t |  _	 t
 |  |  _ n t |  _	 | |  _ |  i |  i  |  _ d |  i i } | i i |  oh | i i |  oU t
 | d  } | i |  |  _ |  i |  i i j o t   |  _ n | i   n t |  i  d j oI |  i   |  i |  i _ t
 | d  } | i |  i |  | i   n d  S(   Nii'  s   .cached_annotation_pntst   rbi    t   wb(   t   cPicklet   osR   t   sc_dictR   R   R   t   unicodeR   t   opened_ft   openR   R.   R   t   md5sumR9   t   patht   existst   isfilet   loadt   closeR>   R  t   dump(   R   t   wig_fR  R  t   fnameR   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR   -  s,    				&
c         C   s   t  |  i  S(   N(   R   R   (   R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR	   P  s    (   R
   R   R   R   R  Rn   R   R	   (    (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyR     s   		[	:	#c         C   s  | i  d  t   } x t |  D] \ } } t i d | i    } t |  d j  o* t d | d | i   | i f   n | d d !\ } } }	 t	 t
 |  t
 |	   }
 | i |  p t d |  | | <n | | i |
  q# Wt   } x[ | i   D]M } | | i } | o t d | |  | | <qt d d | |  | | <qWd } d } d } |  i   } xt| d j of| i d  o |  i   } qyn4| i d	  oQ | i   } x/ | i d
  o | d  |  i   i   } qW|  i   } qyn| i d  o t d  n| i d  o | i   } x/ | i d
  o | d  |  i   i   } qFWt t i d |   } | d } y | | d i d } Wn+ t j
 o d } n t j
 o n Xy t
 | d  } Wqt j
 o d } qXn | i   } t t t i d |   \ } } | d j o | o
 d } n | d j o- | | i t	 t
 |  t
 |   |  n2 | | i t	 t
 |  t
 |  | d  |  t
 |  d } n |  i   } qyW| S(   s0   Parse a wiggle file into a regions object.

    i    s   \s+i   s,   Error parsing line #%i "%s" in the file "%s"i   R9   R   t   #R   s   \iR   s3   Only VariableStep wiggle annotations are permitted.R   s   (\S+)=(\S+)R   R   (    (    (    N(   R   R   R   R   R   R  R>   R   R9   R   R   R   R   RG   R   R=   R   R6   R\   R   R   R   R   R   R   R   t
   IndexErrorR   t   mapR*   Rc   (   R  t   lengths_filet   BINARYt   lengthst   line_numR   R7   t   chNameR   R   R   t   chromosomesR=   t   curr_chrt	   curr_spant   last_posR  R   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   parse_wiggle_fileS  s    	 *	   
 

 !
-1c         C   sB  d d  k  } t   } x t |  D] \ } } | i d | i    } t |  d j  o* t d | d | i   | i f   n | d d !\ }	 }
 } t t	 |
  t	 |   } | i
 |	  p t d |	  | |	 <n | |	 i |  q" Wt   } x[ | i   D]M }	 | |	 i } | o t d |	 |  | |	 <qt d d |	 |  | |	 <qWx|  D]} | i   d j o q^n | i d	  o q^n | i d | i    } | d d !\ }	 }
 } t t	 |
  t	 |   } | d  j	 oO y" | d } | | j o w^n Wq:t j
 o t d
 |  i   q:Xn | om | i
 |	  p q^n | |	 i |  } | d  j o q^q*y | |	 i |  Wq*t j
 o q*Xq^t | d  } | i
 |	  p q^n | |	 i |  } | d  j o q^q^y | |	 i | |  Wq^t j
 o q^Xq^q^W| i d  | S(   Nis   \s+i   s,   Error parsing line #%i '%s' in the file '%s'i   i    R9   R   R  s1   Error parsing groups in "%s": no name field foundi   (    (    (    (   R   R   R   R   R  R>   R   R9   R   R   R   R   RG   R   R=   R   R6   R   R\   R  R%   Rc   R   R*   R   (   R   R!  t   groupt	   is_binaryR   R#  R$  R   R7   R%  R   R   R   R&  R=   R9   t   shifted_featureR   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   parse_bed_line_iterator  sv    	 $	    
 	 c            s1     f d   } t  |   | d | d t } | S(   Nc          3   sf   x_   D]W }  |  i  d  o q n d i d   t t i d |  i     D  d } | Vq Wd  S(   NR  s   	c         s   s0   x) |  ]" \ } } | d j o	 | Vq q Wd S(   i    i   i   N(   i    i   i   (    (   Re   R   RZ   (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pys	   <genexpr>	  s   	 	s   \s+s   
(   R   R   R   R   R   R  (   R   t	   this_line(   R   (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   iter_gffslines_as_bedlines  s      "R+  R,  (   R.  R   (   R   R!  R+  R0  t   projected_bed(    (   R   sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   parse_gff3_file  s    
c         C   s   d d k  } |  i d  | i d  | i d |  i   i    } t |  d j  o1 t i d d i |  IJt	 d |  i
   n+ t |  d j p | o
 t } n t } |  i d  t |  | | |  } | S(	   s   Parse a bed and lengths file into a regions object.

       If group is specified, only BED lines whose "name" line matches
       group are added to the regions object
    iNi    s   \s+i   s   Offending values: '%s'
s   	s0   Error parsing "%s": this is not a valid BED file(   R   R   R   R   R  R>   t   syst   stderrR   R   R9   R   R.   R.  (   R   R!  R+  t   force_binaryR   R7   R"  R&  (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   parse_bed_file  s    
(&   R3  R}   t   operatorR]   R   R   R    t	   itertoolsR   R   t   statistical_functionsR.   R   t   stdoutt   outputt	   ExceptionR   R   t   sett	   NameErrort   setsR   R   R   R8   R6   R   R   R   R   R   t   objectR   R*  R\   R.  R2  R6  (    (    (    sO   /woldlab/castor/data00/home/georgi/programs/block_bootstrap-0.8.1/base_types.pyt   <module>   s:   
			 f '6B[W