##################################
#                                #
# Last modified 2017/03/31       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import os

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s list_of_fastq_files IDsuffix [-config]' % sys.argv[0]
        print '\tNote: the script will follow the order of files in the list; if you have paired-end reads, make sure you run the script on list of reads for each end that are in identical order'
        print '\tNote: if you have multiple files per cell, separate them by comma on the same line'
        print '\tNote: the script can read .gz and .bz2 files, just make sure they end with those precise same file extensions'
        print '\tNote: the [-config] option assumes the list of fastq files is the following format:'
        print '\t\t\t label (has to be unique) <tab> file(s)'
        print '\t\tand it will use that label instead of the IDsuffix plus a counter'
        print '\tNote: the script will print to stdout'
        sys.exit(1)

    inputfilename = sys.argv[1]
    suffixID = sys.argv[2]

    doConfig = False
    if '-config' in sys.argv:
        doConfig = True

    inputlines = open(sys.argv[1])
    f = 0
    for inputline in inputlines:
        f += 1
        if doConfig:
            label = inputline.strip().split('\t')[0]
            inputfilenames = inputline.strip().split('\t')[1].split(',')
        else:
            inputfilenames = inputline.strip().split('\t')[0].split(',')
        for inputfilename in inputfilenames:
            if inputfilename.endswith('.bz2'):
                cmd = 'bzip2 -cd ' + inputfilename
            elif inputfilename.endswith('.gz'):
                cmd = 'zcat ' + inputfilename
            else:
                cmd = 'cat ' + inputfilename
            p = os.popen(cmd, "r")
            line = 'line'
            i = 1
            while line != '':
                line = p.readline()
                if line == '':
                    break
                if i==1 and line[0]=='@':
                    ID = line.strip()
                    if doConfig:
                        ID = '@' + label + ':::' + ID[1:len(ID)]
                    else:
                        ID = '@' + suffixID + '_' + str(f) + ':::' + ID[1:len(ID)]
                    print ID
                    i=2
                    continue
                if i==2:
                    i=3
                    print line.strip()
                    continue
                if i==3 and line[0]=='+':
                    print line.strip()
                    i=4
                    continue
                if i==4:
                    print line.strip()
                    i=1
                    continue

run()

