##################################
#                                #
# Last modified 2019/01/22       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import os
import random

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s input CellBCLength UMILength readLength' % sys.argv[0]
        print '\tThe script can read compressed files as long as they have the correct suffix - .bz2 or .gz'
        print '\tThe script can also read stdin (-)'
        sys.exit(1)

    input = sys.argv[1]
    CBCLen = int(sys.argv[2])
    UMILen = int(sys.argv[3])
    ReadLen = int(sys.argv[4])

    doStdIn = False
    if input == '-':
        doStdIn = True
    elif input.endswith('.bz2'):
        cmd1 = 'bzip2 -cd ' + input
        p1 = os.popen(cmd1, "r")
    elif input.endswith('.gz'):
        cmd1 = 'gunzip -c ' + input
        p1 = os.popen(cmd1, "r")
    else:
        cmd1 = 'cat ' + input
        p1 = os.popen(cmd1, "r")
    line = 'line'
    i=1
    while line != '':
        if doStdIn:
            line = sys.stdin.readline()
        else:
            line = p1.readline()
        if line == '':
            break
        if i==1:
            if line.startswith('@'):
                pass
            else:
                print 'FASTQ file broken, exiting'
                sys.exit(1)
            ID = line.strip().split(' ')[0]
            i = 2
            continue
        if i == 2:
            seq = line.strip()
            i = 3
            continue
        if i == 3:
            i = 4
            continue
        if i == 4:
            scores = line.strip()
            ID = ID + '---' + seq[0:CBCLen] + '---' + seq[CBCLen:CBCLen+UMILen]
            print ID
            print seq[-ReadLen:]
            print '+'
            print scores[-ReadLen:]
            i = 1
            continue

run()