##################################
#                                #
# Last modified 2022/11/19       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import pysam
import string
from sets import Set
import os

def getReverseComplement(preliminarysequence):
    
    DNA = {'A':'T','T':'A','G':'C','C':'G','N':'N','a':'t','t':'a','g':'c','c':'g','n':'n'}
    sequence=''
    for i in range(len(preliminarysequence)):
        sequence=sequence+DNA[preliminarysequence[len(preliminarysequence)-i-1]]
    return sequence

def run():

    if len(sys.argv) < 1:
        print 'usage: python %s stdin' % sys.argv[0]
        print '\tNote: the script assumes the following barcode+UMI format'
        print '\t\tn1:Z:CTGAGCC	n2:Z:CATACCAA	n3:Z:ACCTCCAA	b2:Z:GGGTTAGGGT'
        print '\tthe script takes stdin (from samtools)'
        print '\tthe script will print to stdout'
        sys.exit(1)

    input_stream = sys.stdin
    for line in input_stream:
        if 'b2:Z:' not in line:
            continue
        BC1 = line.strip().split('n1:Z:')[1].split('\t')[0]
        BC2 = line.strip().split('n2:Z:')[1].split('\t')[0]
        BC3 = line.strip().split('n3:Z:')[1].split('\t')[0]
        UMI = line.strip().split('b2:Z:')[1].split('\t')[0]
        fields = line.strip().split('\t')
        readID = fields[0] + ':::[' + BC1 + '+' + BC2 + '+' + BC3 + '+' + UMI + ']'
        sequence = fields[9]
        quality = fields[10]
        print '@' + readID
        print sequence
        print '+'
        print quality

run()

