ó
z?Qc           @   sˆ  d  Z  d d l Z d d l Z d d l Z e j j e ƒ Z e j j e ƒ Z e j j	 e ƒ d d l
 Z
 d d l m Z d d l m Z m Z m Z m Z d Z d Z d „  Z d „  Z d	 „  Z d
 „  Z e d k r„e ƒ  Z e e j ƒ Z e Z e e j ƒ Z e e j d d ƒZ  e j! ƒ  r9e  e j! ƒ  IJn e j" d e j IJe e e  e ƒ e  j# ƒ  e j" e j$ d Id IJn  d S(   t   ianiÿÿÿÿN(   t   Samfile(   t   SAMTextt   SAMTextFileIteratort   get_cigar_lengtht   get_aligned_lengthsT   Adjust a SAM file of true mappings of simulated reads for 3' trimming by trimmomatics   0.1c       	   C   sµ   t  j d t ƒ }  |  j d d d d d t ƒ|  j d d d d d	 d
 d d ƒ|  j d d d d d t d d ƒ|  j d d d t d d ƒ|  j d d d t d d ƒ|  j ƒ  S(   Nt   descriptions	   --versiont   actiont   versions   %(prog)ss	   --verboses   -vt   countt   defaulti    t   helpsd   Omit to see only fatal error messages; -v to see warnings; -vv to see warnings and progress messagess   --ins   -it   destt   inputt   requireds    Path to the input file; requireds   --outs   -os!   Path to the output file; requireds   --lengths   -lsB   Path to the trimmomatic log file showing trimmed lengths; required(   t   argparset   ArgumentParsert   DESCRIPTIONt   add_argumentt   VERSIONt   Truet
   parse_args(   t	   argparser(    (    s<   /home/ian/PycharmProjects/RNA-Seq-Simulator/trimSAMbyRead.pyt   get_args   s    "c         C   sF  t  |  ƒ | } | d k  r; t d | t  |  ƒ f ƒ ‚ n  | d k rK |  S|  j ƒ  rÞ|  j | |  _ |  j | |  _ |  j |  j } t j d |  j	 ƒ d  } | } | } g  } xÿ t
 t  | ƒ d d d ƒ D]á } t | | ƒ }	 | | d }
 |	 | k  s|
 d k rr|
 d	 k r3| j d
 |	 |
 f ƒ n  |
 d k rV| |	 8} | |	 8} q²|
 d k r²| |	 8} q²qÑ |
 d	 k r˜| j d
 | |
 f ƒ n  |
 d k r±| | 8} n  PqÑ W| j ƒ  d j | ƒ |  _	 | |  _ n|  j |  |  _ |  j |  |  _ t j d |  j	 ƒ d  } | } g  } x¿ t
 d t  | ƒ d ƒ D]¥ } t | | ƒ }	 | | d }
 |	 | k  sx|
 d k rº|
 d	 k rž| j d
 |	 |
 f ƒ n  |
 d k rá| |	 8} qáq<|
 d	 k rà| j d
 | |
 f ƒ n  Pq<Wd j | ƒ |  _	 t |  j	 ƒ |  _ d } g  } x–|  j D]‹} d | k rW| d } t j d | ƒ } g  } | } t  | ƒ d k r¹|  j ƒ  rx<t
 t  | ƒ d d d ƒ D]} } t | | ƒ }	 |	 | k  r÷| j | | ƒ | j | | d ƒ | d 7} | |	 d 8} q”| j t | ƒ ƒ d } Pq”Wq¹xž t
 d t  | ƒ d d ƒ D]} } t | | ƒ }	 | | d } |	 | k  r˜| j | | | d !ƒ | d 7} | |	 d 8} q5| j t | ƒ ƒ d } Pq5Wn  | d k rÛ| j t | ƒ ƒ n  |  j ƒ  rô| j ƒ  n  | d j ƒ  s| j d d ƒ n  | d j ƒ  s7| j d ƒ n  | j d j d g | ƒ ƒ q| j d ƒ rlqq| j d ƒ rd |  j	 k rª| j | ƒ qªq| j | ƒ qW| j d | ƒ | |  _ t  |  ƒ | k rÿt d |  j t  |  ƒ | f ƒ ‚ n  t |  j	 ƒ | k rBt d t |  j	 ƒ |  j	 |  j | f ƒ ‚ n  |  S(   Ni    s8   Specified truncated end %d is longer than current end %ds   ([DIMNSHP=X])iÿÿÿÿi   iþÿÿÿi   t   Nt   MIDNs   %d%st   Mt    s   MD:Z:i   s   ([ACGTacgt])t   0s   NM:i:s   XS:As   NM:i:%dsO   Length of read %s after right trimming, %d, is not equal to requested length %dsL   Length %d of trimmed cigar %s in read %s is not equal to requested length %d(   t   lent
   ValueErrort   is_reversedt   seqt   qualt   post   alent   ret   splitt   cigart   ranget   intt   appendt   reverset   joinR   t   tagst   strt   extendt   isdigitt   insertt
   startswitht   qnameR   (   t   readt   new_endt   losst   originR&   t   residuet   new_post	   new_cigart   iR	   t   opcodet   nmt   new_tagst   tagt   mdt   fieldst   new_mdt   base(    (    s<   /home/ian/PycharmProjects/RNA-Seq-Simulator/trimSAMbyRead.pyt   right_trim_read   sÌ    #


#
#
 	%.c         C   s"  | d k r |  S| d k  r/ t  d | ƒ ‚ n  | t |  ƒ k r` t  d | t |  ƒ f ƒ ‚ n  t |  ƒ } |  j ƒ  rÙ|  j |  |  _ |  j |  |  _ t j d |  j ƒ d  } | | } g  } xþ t d t | ƒ d d ƒ D]à } | d k rö Pn  t	 | | ƒ } | | d } | | k s,| d k r| d k rR| j
 d	 | | f ƒ n  | d
 k rÀ| j
 d	 | | f ƒ d } qÀqà | d k r§| j
 d	 | | f ƒ n  | d k rà | | 8} qà qà Wd j | ƒ |  _ nr|  j | |  _ |  j | |  _ t j d |  j ƒ d  } | | } g  } |  j |  j }	 xû t t | ƒ d d d ƒ D]Ý } t	 | | ƒ } | | d } | | k  s‹| d k ré| d k r´| j d d	 | | f ƒ n  | d
 k rÍ| | 8} n  | d k r,|	 | 8}	 q,qO| d k r| j d d	 | | f ƒ n  | d
 k r+|	 | 8}	 n  PqOWd j | ƒ |  _ |	 |  _ t |  j ƒ |  _ d }
 g  } x˜|  j D]} d | k r­| d } t j d | ƒ } g  } | | } t | ƒ d k r(|  j ƒ  rpxSt d t | ƒ d d ƒ D]} } t	 | | ƒ } | | k  rO| j
 | | ƒ | j
 | | d ƒ |
 d 7}
 | | d 8} qì| j
 t | ƒ ƒ d } PqìWq(xµ t t | ƒ d d d ƒ D]” } t	 | | d ƒ } | | } | | k  r| j d | | d ƒ | j d | | ƒ |
 d 7}
 | | d 8} q| j d t | ƒ ƒ d } PqWn  | d k rJ| j
 t | ƒ ƒ n  | d j ƒ  sm| j d d ƒ n  | d j ƒ  s| j
 d ƒ n  | j
 d j d g | ƒ ƒ qs| j d ƒ rÂqsqs| j d ƒ ród |  j k r | j
 | ƒ q qs| j
 | ƒ qsW| j
 d |
 ƒ | |  _ |  S(   Ni    s   Specified start %d is negatives(   Specified start %d is beyond read end %ds   ([DIMNSHP=X])iÿÿÿÿi   i   R   s   %d%sR   R   t   MNR   iþÿÿÿs   MD:Z:i   s   ([ACGTacgt])R   s   NM:i:s   XS:As   NM:i:%d(   R   R   R   R    R!   R$   R%   R&   R'   R(   R)   R+   R"   R#   R0   R   R,   R-   R/   R1   (   R3   t	   new_startt   original_lenR&   R7   R9   R:   R	   R;   R8   R<   R=   R>   R?   R@   RA   RB   (    (    s<   /home/ian/PycharmProjects/RNA-Seq-Simulator/trimSAMbyRead.pyt   left_trim_read—   sÈ    
#	
#	

#
#

 	c   
      C   sú   xó |  D]ë } yÂ | j  ƒ  } | j ƒ  j ƒ  } | d } | | j k r´ t | d ƒ } t | d ƒ } | d k r… t | | ƒ } n  | d k rÎ t | | | ƒ } | | IJqÎ n t j d | j | f IJWq t	 k
 rñ }	 t j |	 IJq Xq Wd  S(   Ni    i   i   s'   Read %s is out of sync with trim log %s(
   t   readlinet   stripR%   R2   R(   RG   RC   t   syst   stderrR   (
   R   t   outputt   lengthsR3   t   length_lineR@   t   namet   startt   endt   ve(    (    s<   /home/ian/PycharmProjects/RNA-Seq-Simulator/trimSAMbyRead.pyt   do_trimSAMreads  s     
t   __main__t   modet   ws(   Warning: input file %s has no SAM headeri    s   done.(%   t
   __author__RJ   t   osR$   t   patht   dirnamet   __file__t   this_dirt   srcR)   R   t   pysamR   t   lib.samTextR   R   R   R   R   R   R   RC   RG   RS   t   __name__t   argsR   t   infilet   opent   lengthRM   t   outRL   t   headerRK   t   closet   argv(    (    (    s<   /home/ian/PycharmProjects/RNA-Seq-Simulator/trimSAMbyRead.pyt   <module>   s2   $"		z			
