##################################
#                                #
# Last modified 2022/03/20       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import os
import pysam
import random
import string
from sets import Set

def getReverseComplement(preliminarysequence):
    
    DNA = {'A':'T','T':'A','G':'C','C':'G','N':'N','a':'t','t':'a','g':'c','c':'g','n':'n'}
    sequence=''
    for j in range(len(preliminarysequence)):
        sequence=sequence+DNA[preliminarysequence[len(preliminarysequence)-j-1]]
    return sequence

def run():

    if len(sys.argv) < 7:
        print 'usage: python %s barcodes(1+2+3) fieldID wanted_barcodes columnID 1|2|3 rev|for outfile [-fullBC]' % sys.argv[0]
        print '\tNote: the position of the barcodes refers to the ligation rounds'
        print '\t\t so if 1 is indicated, but [rev] is listed in the last argument, the script will look at the third sequenced barcode in a reverse complement direction'
        print '\tuse the [-fullBC] option if you want full barcodes and not just one of the rounds; this option is only compatible with the [for] argument'
        sys.exit(1)

    BCs = sys.argv[1]
    fieldID = int(sys.argv[2])
    BCfile = sys.argv[3]
    BCfileID = int(sys.argv[4])
    BCpos = int(sys.argv[5])
    ForRev = sys.argv[6]
    outfilename = sys.argv[7]

    doFullBC = False
    if '-fullBC' in sys.argv:
        doFullBC = True
        print 'will use full barcodes'

    i=0
    BCDict={}

    linelist=open(BCfile)
    for line1 in linelist:
        if line1.startswith('#'):
            continue
        fields = line1.strip().split('\t')
        if ForRev == 'rev':
            BC = getReverseComplement(fields[BCfileID])
        if ForRev == 'for':
            BC = fields[BCfileID]
        BCDict[BC] = 1

    outfile = open(outfilename, 'w')

    linelist=open(BCs)
    for line1 in linelist:
        if line1.startswith('#'):
            continue
        fields = line1.strip().split('\t')
        if doFullBC:
            BC = fields[fieldID]
        else:
            BC = fields[fieldID].split('+')
        if ForRev == 'for':
            if doFullBC:
                if BCDict.has_key(BC):
                    outfile.write(line1)
            else:
                if BCDict.has_key(BC[BCpos-1]):
                    outfile.write(line1)
        if ForRev == 'rev':
            if BCDict.has_key(BC[-BCpos]):
                outfile.write(line1)

run()
