##################################
#                                #
# Last modified 2026/01/19       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import pysam
import string
from sets import Set
import os

def getReverseComplement(preliminarysequence):
    
    DNA = {'A':'T','T':'A','G':'C','C':'G','N':'N','a':'t','t':'a','g':'c','c':'g','n':'n'}
    sequence=''
    for i in range(len(preliminarysequence)):
        sequence=sequence+DNA[preliminarysequence[len(preliminarysequence)-i-1]]
    return sequence

def run():

    if len(sys.argv) < 1:
        print 'usage: python %s stdin [-trimFirst bp] [-noUMI] [-appendUMI] [-noBC]' % sys.argv[0]
        print '\tNote: the script assumes the following barcode+UMI format'
        print '\t\tba:Z:CTGAGCC	bb:Z:CATACCAA	bc:Z:ACCTCCAA	um:Z:GGGTTAGGGT'
        print '\tthe script takes stdin (from samtools)'
        print '\tthe script will print to stdout'
        sys.exit(1)

    doBC = True
    if '-noBC' in sys.argv:
        doBC = False

    doUMI = True
    if '-noUMI' in sys.argv:
        doUMI = False

    doUMIappend = False
    if '-appendUMI' in sys.argv:
        doUMIappend = True

    TFBP = 0
    if '-trimFirst' in sys.argv:
        TFBP = int(sys.argv[sys.argv.index('-trimFirst') + 1])

    input_stream = sys.stdin
    for line in input_stream:
        fields = line.strip().split('\t')
        if doBC:
            if 'ba:Z:' not in line:
                continue
            BC1 = line.strip().split('ba:Z:')[1].split('\t')[0]
            BC2 = line.strip().split('bb:Z:')[1].split('\t')[0]
            BC3 = line.strip().split('bc:Z:')[1].split('\t')[0]
            if doUMI:
                UMI = line.strip().split('um:Z:')[1].split('\t')[0]
            if doUMIappend:
                readID = fields[0] + ':::[' + BC1 + '+' + BC2 + '+' + BC3 + ']'
                sequence = UMI + fields[9]
                quality = fields[10][0:len(UMI)] + fields[10]
            else:
                if doUMI:
                    readID = fields[0] + ':::[' + BC1 + '+' + BC2 + '+' + BC3 + '+' + UMI + ']'
                else:
                    readID = fields[0] + ':::[' + BC1 + '+' + BC2 + '+' + BC3 + ']'
        else:
            readID = fields[0]
            sequence = fields[9][TFBP:]
            quality = fields[10][TFBP:]
        print '@' + readID
        print sequence
        print '+'
        print quality

run()

