ó
”iHc           @   s›   d  d l  Z  d  d l Z d  d l Z d  d l Td e f d „  ƒ  YZ d „  Z e d k r— e	 e  j
 ƒ d k  rƒ d GHe  j d	 ƒ n  e e  j
 d
 ƒ n  d S(   iÿÿÿÿN(   t   *t   Bedc           B   sA   e  Z d  Z d „  Z d „  Z d d „ Z d „  Z d d „ Z RS(   s   Bed file object
    1) if the bed file is not very large:
    bed1 = Bed()
    bed1.readbed(bedfilename)
    bed1.bedsort()
    bed1.release_space()
    2) if the bed file is very large:
    bed1 = Bed()
    bed1.bedsort_by_chr(bedfilename)
    bed1.release_space()
    c         C   s:   g  |  _  g  |  _ g  |  _ g  |  _ g  |  _ d  |  _ d  S(   N(   t   chromst   chrt   begint   endt   contentt   Nonet   bedfilename(   t   self(    (    s<   /woldlab/castor/data00/home/georgi/programs/NPS-1.3.2/Bed.pyt   __init__   s    					c         C   sQ  | |  _  x» t | ƒ j ƒ  D]§ } | d  d k r | j ƒ  j ƒ  } |  j j | d ƒ | d |  j k r‚ |  j j | d ƒ n  |  j j | d ƒ |  j	 j | d ƒ |  j
 j | j ƒ  ƒ q q Wt j |  j ƒ |  _ t g  |  j D] } t | ƒ ^ qé t ƒ |  _ t g  |  j	 D] } t | ƒ ^ qt ƒ |  _	 t j |  j
 ƒ |  _
 d S(   s3   readbed(bedfilename)
        read bed file
        i   R   i    i   i   N(   R   t   opent
   xreadlinest   stript   splitR   t   appendR   R   R   R   t   chart   arrayt   intt   int32(   R	   R   t   lineoldt   linet   i(    (    s<   /woldlab/castor/data00/home/georgi/programs/NPS-1.3.2/Bed.pyt   readbed   s    	..t    c         C   s?  |  j  j t ƒ | r | } nu |  j j d ƒ } t | ƒ d k rg d j | d d | d g ƒ } n- d j d j | d d !ƒ d | d g ƒ } t | d ƒ } x• t t |  j  ƒ ƒ D]~ } t	 |  j
 |  j  | k ƒ d } |  j | } |  j | } | j d sq¹ n  | j ƒ  }	 x |	 D] }
 | | |
 IJqWq¹ W| S(   sd   bedsort(newfilename = '')
        The default new file name is self.bedfilename + '.sorted'
        t   .i   i    t   _sortedi   iÿÿÿÿt   w(   R   t   sortt   cmpR   R   t   lent   joinR   t   xranget   whereR   R   R   t   shapet   argsort(   R	   t   newfilet   ofnt   lt
   outputfilet   kt   segmentt   chrbegint
   chrcontentt   indexR   (    (    s<   /woldlab/castor/data00/home/georgi/programs/NPS-1.3.2/Bed.pyt   bedsort/   s$    	$- c         C   s:   g  |  _  g  |  _ g  |  _ g  |  _ g  |  _ d |  _ d S(   s3   release_space()
        release the memory
        N(   R   R   R   R   R   R   R   (   R	   (    (    s<   /woldlab/castor/data00/home/georgi/programs/NPS-1.3.2/Bed.pyt   release_spaceM   s    					c         C   ss  | |  _  | r | } nu |  j  j d ƒ } t | ƒ d k r` d j | d d | d g ƒ } n- d j d j | d d !ƒ d | d g ƒ } t | d ƒ } d } g  |  _ g  |  _ g  |  _ g  |  _ g  |  _	 t
 j d	 It j ƒ  IJi  } i  } xŸt | ƒ j ƒ  D]‹}	 |	 d
  d k r|	 j ƒ  }	 |	 j ƒ  }
 | |
 d k r¤| j |
 d ƒ sid | |
 d <n | |
 d c d 7<|  j j |
 d ƒ |  j	 j |	 ƒ q’| j |
 d ƒ sit j j | ƒ røt j j | ƒ t j j d |
 d d } n d |
 d d } t j | t j ƒ r/t j | ƒ n  t | d ƒ | |
 d <d | |
 d <| |
 d |	 IJq’| |
 d c d 7<| |
 d |	 IJqqWx" | j ƒ  D] } | | j ƒ  q£Wt |  j ƒ d k r`t
 j d It j ƒ  IJt g  |  j D] } t | ƒ ^ qöt ƒ |  _ t j |  j	 ƒ |  _	 |  j j ƒ  } x" | D] } | |  j	 | IJqBWn  | j ƒ  } | j  t! ƒ xãt" t | ƒ ƒ D]Ï} | | } t j j | ƒ rÙt j j | ƒ t j j d | d } n d | d } g  |  _ g  |  _	 t
 j d I| Id It j ƒ  IJx| t | ƒ j ƒ  D]h }	 |	 d
  d k r1|	 j ƒ  }	 |	 j ƒ  }
 | |
 d k r™|  j j |
 d ƒ |  j	 j |	 ƒ q™q1q1Wt |  j ƒ d k rNt
 j d I| Id It j ƒ  IJt g  |  j D] } t | ƒ ^ qät ƒ |  _ t j |  j	 ƒ |  _	 |  j j ƒ  } x" | D] } | |  j	 | IJq0Wn  t j | ƒ qŒW| j ƒ  | | f S(   s{   bedsort_by_chr(bedfilename)
        If the bed file is larger than 0.5G (smaller than 10G), to use chr by chr sort
        R   i   i    R   i   iÿÿÿÿR   t   chr1s   Beginning with chr1 ....i   R   s   .bedt   as   Sorting chr1 ....t   Handlings   ....t   Sorting(#   R   R   R   R   R   R   R   R   R   R   t   syst   stderrt   timet   asctimeR   R   t   has_keyR   t   ost   patht   dirnamet   sept   accesst   F_OKt   unlinkt   keyst   closeR   R   R   R   R#   R   R   R    (   R	   R   R$   R%   R&   R'   t   chrnamet   filelistt
   tagnumbersR   R   t   filetmpR   R,   t	   sortedchrR(   (    (    s<   /woldlab/castor/data00/home/georgi/programs/NPS-1.3.2/Bed.pyt   bedsort_by_chrX   s”    		$-					/.
+		%%.
(   t   __name__t
   __module__t   __doc__R
   R   R-   R.   RF   (    (    (    s<   /woldlab/castor/data00/home/georgi/programs/NPS-1.3.2/Bed.pyR      s   			c         C   s   t  j j |  ƒ } | d k r; t j d IJt j d ƒ nÁ | d k  rµ t j d IJt j d It j ƒ  IJt ƒ  } | j	 |  ƒ | j
 ƒ  } | j ƒ  t j d It j ƒ  IJnG i  } t ƒ  } | j |  ƒ \ } } | j ƒ  t j d It j ƒ  IJ| S(   s  sortBed(bedfilename)
    If the input file is too large (>10G), the program cann't sort the bed file. Users can try other ways to sort the bed file.
    If the size is larger than 0.5G, the program use chr by chr sort instead of sorting all chr regions together.
    I äT   s#   The file is too large to be sorted!i    i eÍs   Sorting bed file ....s   Start time:s	   End time:(   R8   R9   t   getsizeR3   R4   t   exitR5   R6   R   R   R-   R.   RF   (   R   t   filesizet   bedR%   RC   (    (    s<   /woldlab/castor/data00/home/georgi/programs/NPS-1.3.2/Bed.pyt   sortBed»   s$    	
	
t   __main__i   s   Usage: %prog bedfilei    i   (   R3   t   os.pathR8   R5   t   numpyt   objectR   RN   RG   R   t   argvRK   (    (    (    s<   /woldlab/castor/data00/home/georgi/programs/NPS-1.3.2/Bed.pyt   <module>   s   $
´	