##################################
#                                #
# Last modified 08/31/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys

try:
	import psyco
	psyco.full()
except:
	pass

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s inputfilename outfilename bpToKeep [-seqfield fieldID]' % sys.argv[0]
        sys.exit(1)

    inputfilename = sys.argv[1]
    outputfilename = sys.argv[2]
    trim = int(sys.argv[3])
    fieldID=0
    if '-seqfield' in sys.argv:
        fieldID=int(sys.argv[sys.argv.index('-seqfield')+1])
        print 'will use string in field', fieldID, 'as sequence'

    outfile = open(outputfilename, 'w')

    listoflines = open(inputfilename)
    i=0 
    for line in listoflines:
        i=i+1
        if i % 1000000 == 0:
            print i, 'reads processed'
        fields=line.split('\t')
        try:
            read=fields[fieldID][0:trim].replace('.','N').split(' ')[0]
        except:
            continue
        fastaheader='>read'+str(i)
        outfile.write(fastaheader+'\n')
        outfile.write(read.strip()+'\n')
        
    outfile.close()

run()

