##################################
#                                #
# Last modified 2020/02/23       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import os
import subprocess
import regex

def run():

    if len(sys.argv) < 5:
        print 'usage: python %s fasta adapter minMatch maxDist outfilename' % sys.argv[0]
        sys.exit(1)

    fasta = sys.argv[1]
    adapter = sys.argv[2]
    MM = int(sys.argv[3])
    maxD = int(sys.argv[4])
    outputfilename = sys.argv[5]

    seqDict={}
    sequence=''
    inputdatafile = open(fasta)
    for line in inputdatafile:
        if line[0]=='>':
            if sequence != '':
                seqDict[chr] = ''.join(sequence)
            chr = line.strip().split('>')[1]
            sequence=[]
            continue
        else:
            sequence.append(line.strip())
    seqDict[chr] = ''.join(sequence)

    outfile = open(outputfilename,'w')
    
    for ID in seqDict.keys():
        sequence = seqDict[ID]
#        Found = False
#        for i in range(len(adapter)):
#            if sequence.startswith(adapter[i:]):
#                Found = True
#                print ID, i, sequence[:50]
#                break
#        if Found:
#            pass
#        else:
        Found = True
        while Found:
            pos = sequence.find(adapter[-MM:])
            if pos == -1 or pos + MM > maxD:
                Found = False
            else:
                sequence = sequence[pos + MM:]
        outline = '>' + ID
        outfile.write(outline + '\n')
        for i in range(0,len(sequence),50):
             outfile.write(sequence[i:min(i+50, len(sequence))] + '\n')

    outfile.close()

run()

