##################################
#                                #
# Last modified 04/30/2012       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

try:
	import psyco
	psyco.full()
except:
	pass

def run():

    if len(sys.argv) < 7:
        print 'usage: python %s <input filename?  <-f | -q>  <adapter filename> <minimal match to adapter> <maximum trimmed read length> <minimal trimmed read length> outfilename' % sys.argv[0]
        sys.exit(1)

    DNA = {'A':'T','T':'A','G':'C','C':'G','N':'N'}

    inputfilename = sys.argv[1]
    type = sys.argv[2]
    adapterfilename = sys.argv[3]
    MinMatch = int(sys.argv[4])
    bptokeep = int(sys.argv[5])
    MinLength = int(sys.argv[6])
    outputfilename = sys.argv[7]

    listoflines = open(adapterfilename)
    lineslist = listoflines.readlines()
    adapter = lineslist[0].split('\n')[0].split('\t')[0].split(' ')[0]

    outfile = open(outputfilename, 'w')

    lineslist = open(inputfilename)
    lIndex=0
    bad=0
    if type =='-f':
        for line in lineslist:
            if lIndex % 2000000 == 0:
                print int(lIndex/2.0), 'reads processeds' 
            lIndex+=1
            if line[0]=='>':
                line1 = line
            else:
                fields = line.rpartition(adapter[0:MinMatch])
                if len(fields[0])<MinLength or len(fields[0])>bptokeep or len(fields[len(fields)-2]+fields[len(fields)-1])<MinMatch:
                    bad+=1
                    continue
                else:
                    outfile.write(line1)
                    outfile.write(fields[0]+'\n')
    if type =='-q':
        for line in lineslist:
            if lIndex % 4000000 == 0:
                print int(lIndex/4.0), 'reads processeds' 
            if lIndex % 4 == 0 and line[0]=='@':
                readID1 = line.strip()
                linegood=True
                lIndex+=1
                continue
            if lIndex % 4 == 0 and line[0]!='@':
                print 'broken fastq file '
                sys.exit(1)
            if lIndex % 4 == 1:
                fields = line.rpartition(adapter[0:MinMatch])
                if len(fields[0])<MinLength or len(fields[0])>bptokeep or len(fields[len(fields)-2]+fields[len(fields)-1])<MinMatch:
                    bad+=1
                    linegood=False
                else:
                    sequence=fields[0]
                lIndex+=1
                continue
            if lIndex % 4 == 2 and line[0]=='+':
                readID2 = line
                lIndex+=1
                continue
            if lIndex % 4 == 3:
                if linegood:
                    quality=line[0:len(sequence)]
                    outfile.write(readID1+'\n')
                    outfile.write(sequence+'\n')
                    outfile.write('+'+readID1[1:-1]+'\n')
                    outfile.write(quality+'\n')
                lIndex+=1
                continue

    print bad, ' reads failed conversion'
            
    outfile.close()

run()

