ó
ÀênQc           @   s×  d  Z  d d l Z d d l Z d d l Z d d l Z d d l m Z d „  Z d „  Z d „  Z	 d „  Z
 d „  Z d	 „  Z d
 „  Z e d k rÓy¦ e e j d ƒ Z e e j d ƒ Z e j j e j d ƒ d Z e e d d ƒ Z g  Z x6 e d ƒ D]( Z e j e e d e d d ƒ ƒ qý We j d Z Wn e k
 rQe ƒ  n Xe d Z e j d e d e j  d d ƒ e e e e e e ƒ e j! ƒ  x" e d ƒ D] Z e e j! ƒ  q¨We j d Gd GHn  d S(   s\  Process a master simulation file into separate read1, read2, and sam files with random labels

Input file should be a sequence of stanzas with the format:
readset name
read1 qual1
sam1
[ read2 qual2
sam 2]
< blank line>

If the read2, sam2 lines are omitted, the empty ..._2.fastq file produced can be deleted.

Created on Jun 1, 2010
@author: ian
iÿÿÿÿN(   t   SeqIOc           C   s!   d t  j d GHt  j d ƒ d  S(   Ns0   Usage: python %s input.txt sample_num genome.faii    i   (   t   syst   argvt   exit(    (    (    s]   /woldlab/castor/data00/home/georgi/programs/RNA-Seq-Simulator.v1.0/lib/repackageSimulation.pyt   usage   s    c         C   s}   g  } t  |  ƒ } xQ | D]I } | j ƒ  j ƒ  } | d } | d } d | | f } | j | ƒ q W| j ƒ  d j | ƒ S(   Ni    i   s   @SQ	SN:%s	LN:%ss   
(   t   opent   stript   splitt   appendt   closet   join(   t   fai_filenamet   header_linest   fait   linet   fieldst   ref_namet
   ref_lengtht   hdr_line(    (    s]   /woldlab/castor/data00/home/georgi/programs/RNA-Seq-Simulator.v1.0/lib/repackageSimulation.pyt   generate_SAM_header_from_fai   s    


c         C   so   g  } t  j |  d ƒ } xG t | j ƒ  ƒ D]3 } t | | ƒ } d | | f } | j | ƒ q+ Wd j | ƒ S(   Nt   fastas   @SQ	SN:%s	LN:%ss   
(   R    t   indext   sortedt   keyst   lenR   R
   (   t   fasta_filenameR   t   genome_indexR   R   R   (    (    s]   /woldlab/castor/data00/home/georgi/programs/RNA-Seq-Simulator.v1.0/lib/repackageSimulation.pyt   generate_SAM_header_from_fasta)   s    c         C   sh   y* |  j  d ƒ \ } } d | | | f SWn7 t k
 rc } t d j | |  t | ƒ g ƒ ƒ ‚ n Xd  S(   Ns   	s   @%s
%s
+
%st   |(   R   t
   ValueErrorR
   t   str(   R   t   labelt   readt   qualt   ve(    (    s]   /woldlab/castor/data00/home/georgi/programs/RNA-Seq-Simulator.v1.0/lib/repackageSimulation.pyt	   format_fq2   s
    c         C   s&   |  j  d ƒ } | | d <d j | ƒ S(   Ns   	i    (   R   R
   (   R   R   R   (    (    s]   /woldlab/castor/data00/home/georgi/programs/RNA-Seq-Simulator.v1.0/lib/repackageSimulation.pyt   relabel_sam9   s    
c         C   sü   t  |  ƒ d k  r d  Sy¿ t  |  ƒ d k r¢ | d t |  d | d ƒ IJ| t |  d | d ƒ IJ| d t |  d | d ƒ IJ| t |  d | d ƒ IJn2 | d t |  d | ƒ IJ| t |  d | ƒ IJWn  t k
 r÷ } t j | ƒ n Xd  S(   Ni   i   i    i   s   /1i   s   /2(   R   R#   R$   t	   Exceptiont   loggingt   error(   t   groupt   fq_listt   samoutR   t   e(    (    s]   /woldlab/castor/data00/home/georgi/programs/RNA-Seq-Simulator.v1.0/lib/repackageSimulation.pyt   output_group>   s    c         C   sü   t  j j | ƒ r9 t  j j | ƒ d k r9 t | ƒ } n | d  } t | ƒ } | | IJg  t d | d ƒ D] } t | ƒ ^ qm } t j	 | ƒ g  }	 d } xW |  D]O }
 |
 d k rá t
 |	 | | | | ƒ g  }	 | d 7} q¥ |	 j |
 j ƒ  ƒ q¥ Wd  S(   Ni    iüÿÿÿi   s   
(   t   ost   patht   isfilet   getsizeR   R   t   xrangeR   t   randomt   shuffleR,   R   R   (   t   inputt
   sample_numR*   t   fqst   fai_patht   headert
   fasta_patht   it   codesR(   R   (    (    s]   /woldlab/castor/data00/home/georgi/programs/RNA-Seq-Simulator.v1.0/lib/repackageSimulation.pyt   doRepackageSimulationN   s    *

,t   __main__i   i   i    s   .true_mappings.samt   ws	   _%d.fastqi   s   .repackage.logt   filenamet   levelt   filemodes   done. ("   t   __doc__R   R-   R2   R&   t   BioR    R   R   R   R#   R$   R,   R<   t   __name__R   R   R4   t   intR5   R.   t   splitextt   basepathR*   R6   t   ranget   jR   R   t
   IndexErrort   LOG_FILENAMEt   basicConfigt   DEBUGR	   (    (    (    s]   /woldlab/castor/data00/home/georgi/programs/RNA-Seq-Simulator.v1.0/lib/repackageSimulation.pyt   <module>   s:   $								&

