##################################
#                                #
# Last modified 07/22/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s SAM errorlog outfilename' % sys.argv[0]
        sys.exit(1)

    SAM = sys.argv[1]
    error = sys.argv[2]
    outputfilename = sys.argv[3]

    ErrorLineList={}
    ErrorDict={}
    lineslist = open(error)
    for line in lineslist:
        if line.startswith('Error [file'):
            chr=line.strip().split(') (')[1].split(':')[0]
            pos=line.strip().split(') (')[1].split(': ')[1]
        if line.startswith('query'):
            query=line.strip().split('query ')[1].split('  dna')[0]
        if line.startswith('align: ciglen'):
            cigar=line.strip().split('cigar ')[1].split(' ')[0]
            ErrorDict[(cigar,chr,pos,query)]=''

    i=0
    outfile = open(outputfilename, 'w')
    lineslist = open(SAM)
    for line in lineslist:
        if i % 1000000 == 0:
            print i, 'alignments processed'
        fields=line.strip().split('\t')
        i+=1
        try:
            query=fields[9]
        except:
            continue
        chr=fields[2]
        pos=str(int(fields[3])-1)
        cigar=fields[5]
        if ErrorDict.has_key((cigar,chr,pos,query)):
            print 'skipping line', i
            continue
        outfile.write(line)

    outfile.close()

run()

