ó
°=ú[c           @   sY   d  d l  Z  d  d l Z d  d l Z d  d l Z d  d l Z d „  Z d „  Z e ƒ  d S(   iÿÿÿÿNc         C   s“   i
 d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d	 6d	 d 6d
 d
 6} d } x: t  t |  ƒ ƒ D]& } | | |  t |  ƒ | d } qe W| S(   Nt   Tt   At   Ct   Gt   Nt   tt   at   ct   gt   nt    i   (   t   ranget   len(   t   preliminarysequencet   DNAt   sequencet   j(    (    s   guidesMismatches.pyt   getReverseComplement   s
    L$c          C   sB  t  t j ƒ d k  r: d t j d GHd GHt j d ƒ n  t j d }  t j d } t t j d ƒ } t j d } g  } d } | j d ƒ r¡ t j | ƒ } n t | ƒ } d } x§ | D]Ÿ } | j d	 ƒ sº | j	 ƒ  d
 k rç qº n  | j	 ƒ  j
 d ƒ d }	 | j |	 ƒ | d k r(t  |	 ƒ } n' t  |	 ƒ | k rOd GHt j d ƒ n  | d 7} qº Wd GHi  }
 d
 } |  j d ƒ rt j |  ƒ } n t |  ƒ } xƒ | D]{ } | d d k r
| d
 k rÚd
 j | ƒ |
 | <n  | j	 ƒ  j
 d ƒ d } | GHg  } t } q¢q¢| j | j	 ƒ  ƒ q¢Wd
 j | ƒ |
 | <d GHi  } d } i  } xå |
 j ƒ  D]× } | d 7} | | | <xº t t  |
 | ƒ | ƒ D]ž } | d d k rÁd G| Gt | d ƒ d GHn  |
 | | | | !} | j | ƒ rèn
 i  | | <| | j | ƒ rn g  | | | <| | | j | ƒ qWqXWd GHt | d ƒ } d } | j | d ƒ d } xÍ| D]Å}	 d } | d 7} | d d k r| Gd GHn  t |	 ƒ } x„| D]|} | d 7} | d d k rä|	 G| Gd G|	 GHn  t j | |	 ƒ } | | k rˆxƒ | | j ƒ  D]n } xe | | | D]U } |	 d | d t | ƒ d | | d t | ƒ d d } | j | d ƒ q(WqWn  t j | | ƒ } | | k r°xƒ | | j ƒ  D]n } xe | | | D]U } |	 d | d t | ƒ d | | d t | ƒ d d } | j | d ƒ qÌWq·Wq°q°WqkW| j ƒ  d  S(   Ni   s@   usage: python %s fasta inputfilename N_mismatches outputfilenamei    s(   	Note: guides have to be the same lengthi   i   i   s   .gzt   #R
   s   	s,   guides of different length detected, exitings   finished parsing guidest   >s   finished parsing genomei@B s   kmer parsings   M positionss   finished parsing kmerst   ws1   #sgRNA	genomic_sequence	mismatches	chr	pos	strands   
id   s   sgRNAs processeds   kmers processed for sgRNAt   +t   -(   R   t   syst   argvt   exitt   intt   endswitht   gzipt   opent
   startswitht   stript   splitt   appendt   joint   Falset   keysR   t   strt   has_keyt   writeR   t   Levenshteint   hammingt   close(   t   fastat   guidest   MMt   outfilenamet	   GuideListt   Kt   linelistt   it   linet   sgRNAt
   GenomeDictR   t   inputdatafilet   chrt   Keept   KmerDictR   t   CDictt   kmert   outfilet   outlinet   PSGRt   KKt   RCsgRNAt   H(    (    s   guidesMismatches.pyt   run   s°    !

!


>>$(   R   t   ost   stringR   R(   R   RB   (    (    (    s   guidesMismatches.pyt   <module>	   s   		y