##################################
#                                #
# Last modified 2025/04/21       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import os
from sets import Set
import Levenshtein
import numpy as np

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s UMIlen read1|read2 [-BCformat] [-FASTQ]' % sys.argv[0]
        print '\t stream the output of PEFastqToTabDelimited.py, then capture the output of this script with PEFastqToTabDelimited-reverse.py'
        sys.exit(1)

    lenUMI = int(sys.argv[1])
    readUMI = sys.argv[2]

    if '-FASTQ' in sys.argv:
        lineslist = sys.stdin
        LQ = 0
        for line in lineslist:
            if LQ == 0:
                ID = line.strip()
                LQ = 1
                continue
            if LQ == 1:
                sequence = line.strip()
                LQ = 2
                continue
            if LQ == 2:
                LQ = 3
                continue
            if LQ == 3:
                qscores = line.strip()
                UMI = sequence[0:lenUMI]
                newID = ID[0:-1] + '+' + UMI + ']'
                print newID
                print sequence[lenUMI:]
                print '+'
                print qscores[lenUMI:]
                LQ = 0
    if '-BCformat' in sys.argv:
        lineslist = sys.stdin
        for line in lineslist:
            fields = line.strip().split('\t')
            if readUMI == 'read1':
                UMI = fields[1][0:lenUMI]
            if readUMI == 'read2':
                UMI = fields[3][0:lenUMI]
            print fields[0].split(' ')[0][0:-1] + '+[' + UMI + ']' + '\t' + fields[1] + '\t' + fields[2] + '\t' + fields[3] + '\t' + fields[4]
    else:
        lineslist = sys.stdin
        for line in lineslist:
            fields = line.strip().split('\t')
            if readUMI == 'read1':
                UMI = fields[1][0:lenUMI]
            if readUMI == 'read2':
                UMI = fields[3][0:lenUMI]
            print fields[0].split(' ')[0][0:-1] + '+' + UMI + ']' + '\t' + fields[1] + '\t' + fields[2] + '\t' + fields[3] + '\t' + fields[4]
            
run()
