
,\c           @   s  d  d l  Z  d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l	 Z	 d  d l
 Z
 d  d l m Z d  d l Z d  d l m Z m Z m Z d  d l m Z d Z d Z d Z d Z d a d a d e e d	  Z d d d
  Z d   Z d   Z d   Z  d d e d  Z! d d e d  Z" d d  Z# d   Z$ d   Z% d   Z& d   Z' d   Z( d   Z) d   Z* e j+ j, d  d  Z- d   Z. d d d  Z/ d d d d  Z0 d   Z1 e2 d  Z3 d    Z4 e d!  Z5 d"   Z6 d#   Z7 d$   Z8 d d d%  Z9 d&   Z: d'   Z; d(   Z< d)   Z= d*   Z> d+   Z? d,   Z@ d-   ZA d.   ZB dN d1 d2  ZC d3   ZD d4   ZE d5   ZF d6   ZG d7   ZH d8   ZI d9   ZJ d:   ZK d;   ZL d<   ZM d=   ZN d>   ZO d?   ZP d@   ZQ dA   ZR dB dC  ZS e dD  ZT dE   ZU dF e dG  ZV dH   ZW dI   ZX dJ   ZY dK   ZZ dL   Z[ dM   Z\ d S(O   iN(   t   SeqIO(   t   abspatht
   expandusert   join(   t   LooseVersions   == Error == s   == Warning == s    ERROR s    WARN c         C   s0  | s d } n d } | r | j  d | d |   t | d t | j  d | d  | j  d  | j  d	 | j   d
  ns t j j d | d |  d  t j j d | d  t j j d  t j j d	 | j   d  t j j   t rt	 j
 j t  rt j t  n  t j d  d  S(   Nt   SPAdest	   dipSPAdess   

t    t
   with_errors#   
In case you have troubles running s-   , you can write to spades.support@cab.spbu.rusC   or report an issue on our GitHub repository github.com/ablab/spadess&   Please provide us with params.txt and s%   .log files from the output directory.s.   , you can write to spades.support@cab.spbu.ru
sD   or report an issue on our GitHub repository github.com/ablab/spades
s&   .log files from the output directory.
i   (   t   infot   log_warningst   Truet   lowert   syst   stderrt   writet   flusht   current_tmp_dirt   ost   patht   isdirt   shutilt   rmtreet   exit(   t   err_strt   logt	   dipspadest   prefixt   binary_name(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   error%   s"    	 c         C   sW   | r& | j  d | d |  d  n- t j j d | d |  d  t j j   d  S(   Ns   

R   s   


(   R	   R   t   stdoutR   R   (   t   warn_strR   R   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   warning;   s      c          C   s  d   }  t  j j   d } g  } x t j D] } | d } d | k r` | j d  \ } } n- | j d  r | d  | } } n
 | } } |  |  } | j d | | j d d  f  t |  t |  k o t |  k  n r, t	 Sq, Wt
 d | d	 d
 d j |   d  S(   Nc         S   sq   |  j  d  } xR t t t |    D]8 } | | j   r( t t | |  d  | | <Pq( q( Wd j |  S(   Nt   .i   (   t   splitt   reversedt   ranget   lent   isdigitt   strt   intR   (   t   versiont
   componentst   i(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   __next_versionD   s    i    t   -t   +is   Python%s: %ss    and highers   Python version s    is not supported!
s   Supported versions are s   , (   R   R)   R"   t   options_storaget   SUPPORTED_PYTHON_VERSIONSt   endswitht   appendt   replaceR   R   R   R   (   R,   t   current_versiont   supported_versions_msgt   supported_versionst   majort   min_inct   max_inct   max_exc(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   check_python_versionC   s     	

#.c           C   s   d d d S(   Ns2   You can obtain SPAdes binaries in one of two ways:s:   
1. Download them from http://cab.spbu.ru/software/spades/s5   
2. Build source code with ./spades_compile.sh script(    (    (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt    get_spades_binaries_info_message^   s    c         C   sj   xc d d d d d g D]L } t  j j |  |  } t  j j |  s t d | d t   |  q q Wd  S(   Ns   spades-hammers   spades-ionhammers   spades-cores
   spades-bwas   spades-dipspades-cores   SPAdes binaries not found: s   
(   R   R   R   t   isfileR   R<   (   t
   binary_dirR   t   binaryt   binary_path(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   check_binariesd   s    t    c         C   se   t  t |    } t | |  t j j |  sT t d | | f d | d | n  | t j |  <| S(   Ns   file not found: %s (%s)R   R   (	   R   R   t   check_path_is_asciiR   R   R=   R   R/   t   dict_of_rel2abs(   t   input_filenamet   messageR   R   t   filename(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   check_file_existencek   s    #c         C   se   t  t |    } t | |  t j j |  sT t d | | f d | d | n  | t j |  <| S(   Ns   directory not found: %s (%s)R   R   (	   R   R   RC   R   R   R   R   R/   RD   (   t   input_dirnameRF   R   R   t   dirname(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   check_dir_existencet   s    #c         C   s'   t  |   s# t d |  | f  n  d  S(   Ns+   path contains non-ASCII characters: %s (%s)(   t   is_ascii_stringR   (   R   RF   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyRC   }   s    c         C   sH   t  j j |   r" t  j |   n  t  j j |   sD t  j |   n  d  S(   N(   R   R   R=   t   removet   existst   makedirs(   RJ   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   ensure_dir_existence   s    c         C   s3   t  j j |   r" t j |   n  t  j |   d  S(   N(   R   R   RN   R   R   RO   (   RJ   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   recreate_dir   s    c         C   s>   x7 |  D]/ } |  j  |  d k r t d | |  q q Wd  S(   Ni   s$   file %s was specified at least twice(   t   countR   (   t	   filenamesR   RG   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   check_files_duplication   s    c         C   s  |  t  j k r t  j |  } n t j j |   d } | j   d k r t j j |  t |    d } | | j   t  j k r | | } q t j j |  t | |    d } | | } n  | j   t  j k rt d d j	 t  j  d |  | f |  n  | r\| j   t  j
 k r\| t  j k r\t d d j	 t  j
  d |  | f |  n  | j d  r| j   t  j k rt d | d	 d j	 t  j  d |  | f |  n  d  S(
   Ni   s   .gzs-   file with reads has unsupported format (only s   , s    are supported): %s (%s)s?   to run read error correction, reads should be in FASTQ format (t   contigss
   file with s    should be in FASTA format  ((   R/   t   dict_of_prefixesR   R   t   splitextR   R%   t   ALLOWED_READS_EXTENSIONSR   R   t   BH_ALLOWED_READS_EXTENSIONSt   LONG_READS_TYPESR1   t    CONTIGS_ALLOWED_READS_EXTENSIONS(   RG   RF   t   only_assemblert   library_typeR   t   extt   pre_extt   pre_pre_ext(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   check_reads_file_format   s*    !%$c         C   s   d   } t  j j |   \ } } | r: | |   r |  Snj d t  j k r xX t  j d j t  j  D]: } | j d  } t  j j | |   } | |  rc | Sqc Wn  d  S(   Nc         S   s%   t  j j |   o$ t  j |  t  j  S(   N(   R   R   R=   t   accesst   X_OK(   t   fpath(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   is_exe   s    t   PATHt   "(   R   R   R"   t   environt   pathsept   stripR   t   None(   t   programRe   Rd   t   fnameR   t   exe_file(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   which   s    	 c          C   s   d }  d } t  j j |   r yX xQ t |   D]C } | j |  r. t | t |  j   d  } | d :} | Sq. WWq t k
 r d  St
 k
 r d  SXn  d  S(   Ns   /proc/meminfos	   MemTotal:i    i   i   (   R   R   R=   t   opent
   startswithR(   R%   R"   t
   ValueErrorRk   t   IOError(   t   mem_info_filenamet   avail_mem_headert   linet	   avail_mem(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   get_available_memory   s     
c         C   s?   y |  j  d  Wn# t k
 r% t St k
 r6 t SXt Sd  S(   Nt   ascii(   t   encodet   UnicodeDecodeErrort   Falset   UnicodeEncodeErrorR   (   Rv   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyRL      s    s   3.c         C   s#   | r t  |  d  j   S|  j   S(   Ns   utf-8(   R'   t   rstrip(   Rv   t
   is_python3(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   process_readline   s    c         C   s*   |  j  d  d k r& d |  d }  n  |  S(   NR   iRg   (   t   find(   R'   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   process_spaces   s    c   	      C   sQ  d d  l  } d d  l } t |  t  r0 |  } n | j |   } | j | d | j d | j d | } d } xe | j   s t	 | j
 j    } | r | r | j |  q | | d 7} n  | j d  k	 ro Pqo qo WxP | j
 j   D]? } t	 |  } | r | r| j |  q#| | d 7} q q W| j rMt d |  | j f |  n  | S(   NiR   R   t   cwdRB   s   
s7   system call for: "%s" finished abnormally, err code: %d(   t   shlext
   subprocesst
   isinstancet   listR"   t   Popent   PIPEt   STDOUTt   pollR   R   t   readlineR	   t
   returncodeRk   t	   readlinesR   (	   t   cmdR   R   R   R   t   cmd_listt   proct   outputRv   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   sys_call   s0    	'	c         C   s+  d d l  } d d l } t |  t  r0 |  } n | j |   } | rW t | d  } n	 | j } | rx t | d  }	 n	 | j }	 | j | d | d |	 d | }
 | r| s | rx |
 j   sC| s t	 |
 j
 j    } | r | j |  q n  | s-t	 |
 j j    } | r-| j |  q-n  |
 j d k	 r Pq q W| sx< |
 j
 j   D]( } | d k rZ| j t	 |   qZqZWn  | sx< |
 j j   D]( } | d k r| j t	 |   qqWqn
 |
 j   | r| j   n  | r|	 j   n  |
 j r't d |  |
 j f |  n  d S(	   s   
    Runs cmd and redirects stdout to out_filename (if specified), stderr to err_filename (if specified), or to log otherwise
    iNt   wR   R   R   RB   s7   system call for: "%s" finished abnormally, err code: %d(   R   R   R   R   R"   Rp   R   R   R   R   R   R   R	   R   R   Rk   R   t   waitt   closeR   (   R   R   t   out_filenamet   err_filenameR   R   R   R   R   R   R   Rv   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   universal_sys_call  sN    			! 
	c         C   s_   t  | d  } | j |  j    | j   t j | t j t j Bt j	 Bt j
 Bt j B d  S(   Nt   wb(   Rp   R   t   readR   R   t   chmodt   statt   S_IWRITEt   S_IREADt   S_IXUSRt   S_IXGRPt   S_IXOTH(   t   datat   fileR   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   save_data_to_fileD  s    
c         C   s  d   } | r t  } t } n t } t } t r t |  d  } | j t  | j   } x | j   sv | j   } q[ W| j	   } | j
   t |  d  j	   } | j |  }	 | |	  | } n t |  d  j	   } g  }
 g  } d } x| D] } | j |  rq n  | j |  d k r| | j |  t |  j   } | j | d  j   } | |
 |  s|
 j | |  qq | j |  d k r | | j |  t |  j   } | j   } | | |  s| j | |  qq q W|
 | f S(   Nc         S   s(   x! |  D] } | j  |  r t Sq Wt S(   N(   R1   R   R|   (   t   list_to_checkt   suffixt   item(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   already_savedL  s    t   rs    * iRB   (   t   SPADES_PY_WARN_MESSAGEt   SPADES_WARN_MESSAGEt   SPADES_PY_ERROR_MESSAGEt   SPADES_ERROR_MESSAGEt   continue_logfile_offsetRp   t   seekR   Rj   R   R   t   indexRq   R   R%   R3   R2   (   t   log_filenamet   warningsR   t   spades_py_messaget   spades_messaget   continued_logt   continued_stage_phraset   lines_to_checkt	   all_linest   failed_stage_indext   spades_py_msgst   spades_msgst    IMPORTANT_MESSAGE_SUMMARY_PREFIXRv   R   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   get_important_messages_from_logK  sF    		
##c         C   s@   d  } x3 |  j d D]$ } | j j d k r | j } q q W| S(   Nt   handlerst   FileHandler(   Rk   t   __dict__t	   __class__t   __name__t   baseFilename(   R   t   log_filet   h(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   get_logger_filename|  s
    c         C   s  t  |   } | s t Sx |  j d D] } | j   q$ Wt | d t \ } } | s\ | r| rr |  j d  n |  j d  t j j	 t j j
 |  d  } t j | d d } |  j |  |  j d  | r|  j d	  x | D] } |  j |  q Wn  | r7|  j d
  x | D] } |  j |  qWn  |  j d |  |  j |  | rt | d t \ }	 }
 |  j d  |  j d  x" |
 |	 D] } |  j |  qWn  t St S(   NR   R   s?   
======= SPAdes pipeline finished abnormally and WITH WARNINGS!s0   
======= SPAdes pipeline finished WITH WARNINGS!s   warnings.logt   modeR   RB   s   === Pipeline warnings:s-   === Error correction and assembling warnings:s   ======= Warnings saved to s   === ERRORs:(   R   R|   R   R   R   R   R	   R   R   R   RJ   t   loggingR   t
   addHandlert   removeHandler(   R   R   R   R   t   spades_py_warnst   spades_warnst   warnings_filenamet   warnings_handlerRv   t   spades_py_errorst   spades_errors(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyR
     s@    !c         C   sY   t  j rU t t  _ t |   } | rU t | d  } | j d d  | j   a qU n  d  S(   NR   i    i   (   R/   t   continue_modeR|   R   Rp   R   t   tellR   (   R   R   R   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   continue_from_here  s    		c         C   s!   |  j  d t j  t t _ d  S(   Ns   
======= Skipping the rest of SPAdes pipeline (--stop-after was set to '%s'). You can continue later with --continue or --restart-from options
(   R	   R/   t
   stop_afterR   t   run_completed(   R   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   finish_here  s    	c            sj   d       f d   } d  } xE t t j |   d | d t D]" } t j j |  r@ | } Pq@ q@ W| S(   Nc         S   s   |  j    r t |   S|  S(   N(   R&   R(   (   t   text(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   atoi  s    
c            s)   g  t  j d |   D] }   |  ^ q S(   Ns   (\d+)(   t   reR"   (   R   t   c(   R   (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   natural_keys  s    t   keyt   reverse(   Rk   t   sortedt   globR   R   R   R   (   t   patternR   t
   latest_dirt   dir_to_test(    (   R   sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   get_latest_dir  s    	(c         C   sP   | s t  j } n  t j j |  s4 t j |  n  t j d | d |   a t S(   Nt   dirR   (	   R/   t   tmp_dirR   R   R   RO   t   tempfilet   mkdtempR   (   R   t   base_dir(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   get_tmp_dir  s    c         C   sc   x\ t  j j   D]K } |  j d |  r |  t d |  t d |  d !j   r[ | Sq q Wd  S(   Ns   --i   (   R/   t   SHORT_READS_TYPESt   keysRq   R%   R&   Rk   (   t   optiont   short_reads_type(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   get_short_reads_type  s
    +c         C   s;   x4 t  j D]) } |  j d  r
 |  d | k r
 | Sq
 Wd  S(   Ns   --(   R/   RZ   Rq   Rk   (   R   t   long_reads_type(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   get_long_reads_type  s    c         C   s   |  j  d  o |  d j   S(   Ns   --si   (   Rq   R&   (   R   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   is_single_read_type  s    c         C   sn   d } d } t  |   rI t  |   } t |  t j d |   j    } n t |   rd t |   } n  | | f S(   Nt   pei   s   \d(   R   R(   R   t   searcht   startR   (   R   t   lib_typet
   lib_number(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   get_lib_type_and_number  s    %c         C   s   |  j  d  r d } n |  j  d  r0 d } nu |  j  d  rH d } n] |  j  d  so t |   so t |   rx d } n- |  j  d	  s |  j  d
  r d } n d } | S(   Ns   -12s   interlaced readss   -1s
   left readss   -2s   right readss   -ss   single readss   -ms   -mergeds   merged readst   orientation(   R1   R   R   (   R   t	   data_type(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   get_data_type  s    			'		c         C   sr   d  } |  j d  d k rh d |  |  j d   t j k rh |  |  j d   } |  |  j d  d }  n  |  | f S(   Nt   :iR!   i   (   Rk   R   R/   RX   (   R   R   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   get_option_prefix  s
    5c         C   s  t  |   \ } } t |   } | d k r7 |  d } n  | t j k rv t j t t j j    j |  | d } nL | t j k r t j t	 t j j    t j j |  } n t
 d |   | | s| t j k r t j | | | d <q| | | d <n  | j d  r{t |  \ } } | r?d | t j | <n  | | | k rg| | | j |  q| g | | | <n | | | | <d  S(   NR   ii   s)   can't detect library type from option %s!t   typet   readsR!   (   R   R   R/   R   t   MAX_LIBS_NUMBERR   R   R   RZ   R%   R   R1   R   RV   R2   (   R   R   t   dataset_dataR   R   R   t	   record_idR   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   add_to_dataset  s,    0
c         C   s4  g  } x'|  D]} | s q n  t  } t  } xB | j   D]4 } | j d  rV t } n  | d k r8 t } Pq8 q8 W| s| q n  | r | d d k r d | d <d	 | k r | d	 =q n  d	 | k r| d d k s | d d
 k s | d d k r d | d	 <q| d d k rd | d	 <qn  | j |  q W| S(   NR   s   interlaced readss   merged readss
   left readss   right readsR   s
   paired-endt   singleR   s   hq-mate-pairst	   clouds10xt   frs
   mate-pairst   rf(   s   interlaced readss   merged readss
   left readss   right reads(   R|   R   R1   R   R2   (   R  t   corrected_dataset_datat   reads_libraryt	   has_readst   has_paired_readsR   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   correct_dataset0  s2    	
0c   	      C   s   t  t |   } g  } x |  D] } x | j   D] \ } } | j d  r2 g  } x | D]w } t  t | t |    } | t j | <| t j k r | | k r t j | t j | <t j | =n  | j |  qZ W| | | <q2 q2 W| j |  q W| S(   NR   (	   R   R   t   itemsR1   R   R/   RD   RV   R2   (	   R  RJ   t   abs_paths_dataset_dataR	  R   t   valuet   abs_paths_readst
   reads_filet   abs_path(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   relative2abs_pathsM  s     i
   i   i   c   
      C   s   g  t  |  | |  D] } t | | |  ^ q } t |  t |  } xS | D]K } t j | |  | k rN t d t |  d t |  d |  qN qN Wt |  }	 | j	 d t |	  d  |	 S(   Ns1   Read lengths differ more than allowable. Length: s   . Avg. length: R!   s   
Reads length: s   
(
   t   get_reads_filest   get_max_reads_lengtht   sumR%   t   matht   fabsR    R'   t   minR	   (
   R  R   t   ignored_typest   num_checkedt   diff_len_allowableR  t   max_reads_lenghtst   avg_lent   max_lent   reads_length(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   get_reads_length`  s    10c         c   s   xz |  D]r } xi | j    D][ \ } } | | k rM | j d | d  q q | j d  r x | D] } | Vqc Wq q Wq Wd  S(   Ns   Files with s    were ignored.R   (   R  R	   R1   (   R  R   R  R	  R   R  R  (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyR  l  s    c         C   s   t  j |   } | s) t d |  |  n  t g  t j t  j t  j |  d  |  |  D] } t |  ^ qW  } | j	 |  d t
 |   | S(   Ns#   Incorrect extension of reads file: R   s   : max reads length: (   R    t   get_read_file_typeR   t   maxt	   itertoolst   islicet   parset   OpenR%   R	   R'   (   R  R   R  t	   file_typet   rect   max_reads_length(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyR  w  s    Ic      	   C   s  g  } xIt  |   D];\ } } d } d } x | j   D] \ } }	 | j d  r8 x |	 D]y }
 t |
 | d t | d  d | d |  t |
 | d t | d  d | d | | d |  | j |
  qZ W| d k r t |	  } q| d k rt |	  } qq8 q8 W| | k r t d	 t | d  d | d d
 |  q q Wt |  snt d |  n  t	 | |  d  S(   Ni    R   s   , library number: i   s   , library type: R   s
   left readss   right readssx   the number of files with left paired reads is not equal to the number of files with right paired reads (library number: s   )!s0   You should specify at least one file with reads!(
   t	   enumerateR  R1   RH   R'   Ra   R2   R%   R   RT   (   R  R\   R   t	   all_filest   idR	  t   left_numbert   right_numberR   R  R  (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   check_dataset_reads  s,    (2/c         C   s   t  } t } xP |  D]H } | t j k r. q n  | t j k rU | d k r[ t  } q[ q t } q W| r| | r| t d |  n  d  S(   Ns   -ssq   It is recommended to specify single reads with --pe<#>-s, --mp<#>-s, --hqmp<#>-s, or --s<#> option instead of -s!(   R   R|   R/   t   reads_optionst   OLD_STYLE_READS_OPTIONSR    (   t   optionsR   t   only_old_style_optionst   old_style_single_readsR   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   check_single_reads_in_options  s    
c         C   se   t  |  t k	 r | g } n  g  } x: t |   D], \ } } | d | k r1 | j |  q1 q1 W| S(   NR   (   R   R   R+  R2   (   R  t   typest   lib_idsR-  R	  (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   get_lib_ids_by_type  s    c         C   s;   t  |  |  } g  } x | D] } | j |  |  q W| S(   N(   R9  R2   (   R  R7  t   idst   resultR-  (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   get_libs_by_type  s
    c         C   s7   t  |  |  } x! t | d t D] } |  | =q" W|  S(   NR   (   R9  R   R   (   R  R7  R:  R-  (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   rm_libs_by_type  s    c         C   s   x |  D] } | r t  Sq Wt S(   N(   R|   R   (   R  R	  (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   dataset_is_empty  s    c         C   s`   xY |  D]Q } xH | D]@ } | j  d  r x( | | D] } | j  d  r4 t Sq4 Wq q Wq Wt S(   NR   s   .gz(   R1   R   R|   (   R  R	  R   R  (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   dataset_has_gzipped_reads  s    c         C   s%   x |  D] } d | k r t  Sq Wt S(   Ns   interlaced reads(   R   R|   (   R  R	  (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   dataset_has_interlaced_reads  s    c         C   s,   x% |  D] } | d j  d  r t Sq Wt S(   NR   RU   (   R1   R   R|   (   R  R	  (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   dataset_has_additional_contigs  s    c         C   s)   x" |  D] } | d d k r t  Sq Wt S(   NR   t   nxmate(   R   R|   (   R  R	  (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   dataset_has_nxmate_reads  s    c      	   C   s  t    } x|  D]} t |  } | d j d  rg  } xk| d D]_} | t j k rq t j | } | }	 n t j j |  \ }	 } t }
 | j d  r t	 }
 | t j k r t j j |	  \ }	 } q n  t
 | t j d d d |
 \ } } | rt j j |  st j |  n  t j j | t j j |	  d  } | t j k r^t j | =n  | j d	 | t j | f  t | |  | j |  qF | j |  qF W| | d <n  | j |  q W| S(
   NR   RU   s   single readss   .gzt   replace_chart   At   gzippeds   .fastas   == Processing additional contigs (%s): changing Ns to As and splitting by continues (>= %d) Ns fragments (results are in %s directory)(   R   t   dictR1   R/   RV   R   R   RW   R|   R   t   break_scaffoldst)   THRESHOLD_FOR_BREAKING_ADDITIONAL_CONTIGSR   RO   R   t   basenameR	   t   write_fastaR2   (   R  t   dstR   t   new_dataset_dataR	  t   new_reads_libraryt	   new_entryRU   R^   RJ  RF  t   _t   modifiedt	   new_fastat   new_filename(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt    process_Ns_in_additional_contigs  s@    		%	c      
      s  d  t t   f d  } t   } xw|  D]o} t |  } xM| j   D]?\ } } | d k rG d | k r g  | d <g  | d <n  x| D]}	 |	 t j k r t j |	 }
 n t j j	 |	  d }
 t } |
 j
 d  r.t } t j |	 d  } t j j	 |	  d } t j j	 t j j |   \ } }
 n3 t |	 d  } t j j	 t j j |	   \ } }
 |	 t j k rt j |	 }
 n  |
 j   j d	  s|
 j   j d
  rt } d
 }
 n t } d }
 t j j | | d |
  } t j j | | d |
  } t j o)t j j |  o)t j j |  s0t t _   j d |	 d | d  t | d  t | d  g } d } | | | | d  | t j j d  o|  } xF | r| d d } | | | | | | t j j d  o|  } qW| d k rt d |	 d    n  | d j   | d j   n  | j   | d j |  | d j |  |	 t j k r t j |	 =q q W| d =qG qG W| j |  q( W| S(   Nc   	         s  | d  k r$ t |  j   |  } n  | s. d St |  j   |  } t |  j   |  } |  j   } x{ | r} | j d  s | r | j d  r | | 7} t |  j   |  } | sg | |  j   k r Pn  |  j   } qg qg W| j | d  | j | d  | rt |  j   |  } t |  j   |  } x] | j d  s| | 7} t |  j   |  } | s7| |  j   k rPn  |  j   } q7q7Wt |  t |  k rt d | t |  t |  f    n  | j d  | j | d  n  | S(   NRB   R.   t   >s   
t   @sp   The length of sequence and quality lines should be the same! Check read %s (SEQ length is %d, QUAL length is %d)s   +
(   Rk   R   R   R   Rq   R   R%   R   (	   t   in_filet   out_filet	   read_namet   is_fastqR   t
   read_valueRv   t   fpost   read_quality(   R   (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   write_single_read  s@    0

#s   interlaced readss
   left readss   right readsi   s   .gzR   i    s   .fqs   .fastqs   .fastat   _1t   _2s   == Splitting s    into left and right reads (in s    directory)R   s   3.i   s3   The number of reads in file with interlaced reads (s   ) should be EVEN!(   Rk   R|   R   RG  R  R/   RV   R   R   RW   R1   R   t   gzipRp   RJ  R   Rq   R   R   R=   R	   R   R)   R   R   R2   (   R  RL  R   R^  RM  R	  RN  R   R  t   interlaced_readsR^   t   was_compressedt
   input_filet	   ungzippedt   out_basenameRZ  t   out_left_filenamet   out_right_filenamet	   out_filesR+   t   next_read_name(    (   R   sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   split_interlaced_reads  sj    $	
'$*	-		
c         C   s@  yd d  l  } t   } x |  D] } t |  } | d d k r| d } | d } g  | d <g  | d <g  | d <xx t |  D]j \ }	 }
 | |	 } | j |
 | | |  \ } } } | d j |  | d j |  | d j |  q Wd | d <d | d	 <n  | j |  q W| SWn t k
 r;t d
 |  n Xd  S(   NiR   RB  s
   left readss   right readss   single readss
   mate-pairsR  R   sA   Can't process Lucigen NxMate reads! lucigen_nxmate.py is missing!(   t   lucigen_nxmateR   RG  R+  t   process_readsR2   t   ImportErrorR   (   R  RL  R   Rl  RM  R	  RN  t   raw_left_readst   raw_right_readsR-  t   left_reads_fpatht   right_reads_fpatht   processed_left_reads_fpatht   processed_right_reads_fpatht   single_reads_fpath(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   process_nxmate_readsp  s.    	





!
s       c         C   s   d d d d d g } x t  |   D] \ } } | j | d t | d  d | d	  d
 | k r | j | d | d
  n  xP | D]H } | | k r d } n t | |  } | j | d | d |  q Wq" Wd  S(   Ns
   left readss   right readss   interlaced readss   single readss   merged readss   Library number: i   s   , library type: R   R   s     orientation: s   not specifieds     s   : (   R+  R	   R'   (   R  R   t   indentt   READS_TYPESR-  R	  t
   reads_typeR  (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   pretty_print_reads  s    +	c         C   s   g  } g  } t  } d } | r0 t j |   } n t |   } x | D] } t | | od t j j d   } | sv qC n  | d d k r | j | j    | s | j |  n t	 } d } qC | | j   7} qC W| j |  | j
   t | |  S(   NRB   s   3.i    RU  (   R   Ra  Rp   R   R   R)   Rq   R2   Rj   R|   R   t   zip(   RG   RF  t   res_namet   res_seqt   firstt   seqt   file_handlerRv   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt
   read_fasta  s*    !	
c         C   s   t  |  d  } xd | D]\ \ } } | j | d  x< t d t |  d  D]" } | j | | | d !d  qL Wq W| j   d  S(   NR   s   
i    i<   (   Rp   R   R$   R%   R   (   RG   t   fastat   outfilet   nameR  R+   (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyRK    s    $t   Nc         C   s  g  } t  } xt t |  |   D]\ } \ } } d }	 d }
 d } x-|	 t |  k  ru| j d |	  d k ru| d k r t } n  | j d |	  } | d } x0 | t |  k r | | d k r | d 7} q W| d }	 | | | k rI t } | | k ri| j | j   d d t |
  d d j	 | j   d  | | | !j
 d |  f  |
 d 7}
 n  | } qI qI W| t |  k  r" | j | j   d d t |
  d d j	 | j   d  | | j
 d |  f  q" q" W| | f S(   Ni    i   R  iRP  R   (   R|   R+  R  R%   R   R   R2   R"   R'   R   R3   (   RE   t	   thresholdRD  RF  RR  RQ  R-  R  R  R+   t   cur_contig_numbert   cur_contig_startR   t   end(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyRH    s2    (-	
%
<<!c         C   s1   i d d 6d d 6d d 6d d 6d d 6|  j    S(   Nt   TRE  t   Gt   CR  (   t   upper(   t   letter(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   comp  s    c         C   s&   d j  t j t |  d  d  d    S(   NRB   i(   R   R$  t   imapR  (   R  (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   rev_comp  s    c         C   s|   |  j  d  } t |  d k  sA | d d k rS | d d k rS t d |   d  S|  j d  d k rt | d	 d S| d	 S(
   NRP  i   i    s   >NODEt   NODEs   Contig %s has unknown ID formatt   'ii   (   R"   R%   R    Rk   R   (   t   st   values(    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   get_contig_id  s    2c         C   s   |  j  d  r |  d S|  S(   NRU  i   (   Rq   (   R  (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   remove_fasta_pref  s    c         C   s+   y t  |   t SWn t k
 r& t SXd  S(   N(   t   floatR   Rr   R|   (   R  (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   is_float  s
    
c         C   s+   y t  |   t SWn t k
 r& t SXd  S(   N(   R(   R   Rr   R|   (   R  (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   is_int  s
    
i'  (]   R   R   R   R   R   R   Ra  R   R   R/   R$  t   commonR    R  t   os.pathR   R   R   t   distutils.versionR   R   R   R   R   Rk   R   R   R|   R   R    R;   R<   RA   RH   RK   RC   RP   RQ   RT   Ra   Ro   Rx   RL   R)   Rq   R   R   R   R   R   R   R   R   R
   R   R   R   R   R   R   R   R   R   R   R  R  R  R!  R  R  R0  R6  R9  R<  R=  R>  R?  R@  RA  RC  RT  Rk  Rv  Rz  R  RK  RH  R  R  R  R  R  R  (    (    (    sW   /srv/scratch/marinovg/spades-cloudspades-paper/assembler/src/spades_pipeline/support.pyt   <module>
   s   													#7	1	%											 				
			
				
				%	a						
		