##################################
#                                #
# Last modified 2024/10/28       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys

try:
	import psyco
	psyco.full()
except:
	pass

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s inputfilename outfilename [-7only] [-5only] [-SHARE]' % sys.argv[0]
        print '\tnote: use - instead of an input filename to indicate standard input' 
        sys.exit(1)

    inputfilename = sys.argv[1]
    outputfilename = sys.argv[2]

    doStdInput = False
    if inputfilename == '-':
        doStdInput = True

    doSHARE = False
    do5 = True
    do7 = True

    if '-SHARE' in sys.argv:
        doSHARE = True
        print 'will treat barcodes as SHARE-seq single-cell barcodes'
        do7 = False
        do5 = False

    if '-5only' in sys.argv:
        do7 = False
        print 'will only consider 5XX barcodes'

    if '-7only' in sys.argv:
        do5 = False
        print 'will only consider 7XX barcodes'

    print do7, do5

    BarcodeDict = {}

    i=0
    pos=1
    scoresNext=False
    seqNext=False
    shorter=0
    if doStdInput:
        input_stream = sys.stdin
    else:
        input_stream = open(inputfilename)
    for line in input_stream:
        previous=line
        if pos==1:
            if line.startswith('@'):
                if doSHARE:
                    barcode = line.strip().split(' ')[0].split(':')[-1].split('[')[1].split(']')[0]
                else:
                    barcode5 = line.strip().split(' ')[1].split(':')[-1].split('+')[1]
                    barcode7 = line.strip().split(' ')[1].split(':')[-1].split('+')[0]
                    if do7 and do5:
                        barcode = barcode7 + '+' + barcode5
                    elif do7:
                        barcode = barcode7
                    elif do5:
                        barcode = barcode5
                    else:
                        print 'no barcode selected, exiting'
                        sys.exit(1)
                if BarcodeDict.has_key(barcode):
                    pass
                else:
                    BarcodeDict[barcode] = 0
                BarcodeDict[barcode] += 1
                pos=2
                continue
            else:
                print 'invalid read', line
                break
        if pos==2:
            i=i+1
            if i % 10000000 == 0:
                print str(i/1000000) + 'M reads processed'
            pos=3
            continue
        if pos==3 and line.startswith('+'):
            pos=4
            continue
        if pos==4:
            pos=1
            continue

    outfile = open(outputfilename, 'w')

    BCs = []

    for BC in BarcodeDict.keys():
        BCs.append((BarcodeDict[BC],BC))

    BCs.sort()
    BCs.reverse()

    for (counts,BC) in BCs:
        outline = str(counts) + '\t' + BC
        outfile.write(outline + '\n')

    outfile.close()

run()

