##################################
#                                #
# Last modified 2021/08/09       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import os
import gzip
from sets import Set
import Levenshtein
import numpy as np

def getReverseComplement(preliminarysequence):
    
    DNA = {'A':'T','T':'A','G':'C','C':'G','N':'N','X':'X','a':'t','t':'a','g':'c','c':'g','n':'n','x':'x','R':'R','r':'r','M':'M','m':'m','Y':'Y','y':'y','S':'S','s':'s','K':'K','k':'k','W':'W','w':'w'}
    sequence=''
    for j in range(len(preliminarysequence)):
        sequence=sequence+DNA[preliminarysequence[len(preliminarysequence)-j-1]]
    return sequence

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s sequence editDist [-splitSequence bp]' % sys.argv[0]
        print '\t the script will read stdin by default'
        print '\t the script will print out std out by default'
        print '\t use the [-splitSequence] option to only print the portion of the script after the last match; use the bp parameter if you want additional bases removed'
        sys.exit(1)

    sequence = sys.argv[1]
    ED = int(sys.argv[2])

    doSS = False
    if '-splitSequence' in sys.argv:
        doSS = True
        SSbp = int(sys.argv[sys.argv.index('-splitSequence') + 1])

    input_stream = sys.stdin
    i=0
    HasMatch = False
    for line in input_stream:
        i+=1
        if i == 1:
            if line.startswith('@'):
                pass
            else:
                print 'fastq input is broken, exiting'
                sys.exit(1)
            ID = line.strip()
            continue
        if i == 2:
            seq = line.strip()
            if sequence in seq:
                HasMatch = True
                if doSS:
                    k = len(seq.rpartition(sequence)[0])
                    if len(seq) > k + len(sequence) + SSbp:
                       seq = seq[k +len(sequence) + SSbp:]
                    else:
                       HasMatch = False
            else:
                if len(seq) > len(sequence):
                    for k in range(len(seq)-len(sequence),0,-1):
                        LDist = Levenshtein.distance(sequence,seq[k:k+len(sequence)])
                        if LDist <= ED:
                            HasMatch = True
                            if doSS:
                                if len(seq) > k + len(sequence) + SSbp:
                                    seq = seq[k +len(sequence) + SSbp:]
                                else:
                                    HasMatch = False
                            break
            continue
        if i == 3:
            if line.startswith('+'):
                pass
            else:
                print 'fastq input is broken, exiting'
                sys.exit(1)
            continue
        if i == 4:
            qscores = line.strip()
            if HasMatch:
                print ID
                print seq
                print '+'
                print qscores[-len(seq):]
            i=0
            HasMatch = False
            continue
            
run()
