##################################
#                                #
# Last modified 2017/05/12       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import os

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s <inputfilename> outfile_prefix' % sys.argv[0]
        print '\tUse - to specify standard input, and the -stdout option to tell the script to print to standard output'
        print '\tThe script can read compressed files as long as they have the correct suffix - .bz2 or .gz'
        print '\tThe script will take a FASTQ file that has the two ends mixed and will resort them properly into two separate FASTQ files for each end'
        sys.exit(1)

    inputfilename = sys.argv[1]
    outprefix = sys.argv[2]

    doStdIn = False
    if inputfilename != '-':
        if inputfilename.endswith('.bz2'):
            cmd = 'bzip2 -cd ' + inputfilename
        elif inputfilename.endswith('.gz'):
            cmd = 'gunzip -c ' + inputfilename
        else:
            cmd = 'cat ' + inputfilename
        p = os.popen(cmd, "r")
    else:
        doStdIn = True

    line = 'line'

    Read1List = []
    Read2List = []

    outfile1 = open(outprefix + '.end1.fastq.temp', 'w')
    outfile2 = open(outprefix + '.end2.fastq.temp', 'w')

    i=1
    while line != '':
        if doStdIn:
            line = sys.stdin.readline()
        else:
            line = p.readline()
        if line == '':
            break
        if i==1:
            if line[0]=='@':
                pass
            else:
                print 'FASTQ broken, exiting'
                sys.exit(1)
            i=2
            ID = line.strip()
            continue
        if i==2:
            i=3
            sequence=line.strip()
            continue
        if i==3 and line[0]=='+':
            i=4
            continue
        if i==4:
            scores=line.strip()
            i=1
            if ID.endswith('/1'):
                outfile1.write(ID + '\t' + sequence + '\t' + scores + '\n')
            elif ID.endswith('/2'):
                outfile2.write(ID + '\t' + sequence + '\t' + scores + '\n')
            elif ID.split(' ')[1].startswith('1:N:'):
                outfile1.write(ID + '\t' + sequence + '\t' + scores + '\n')
            elif ID.split(' ')[1].startswith('2:N:'):
                outfile2.write(ID + '\t' + sequence + '\t' + scores + '\n')
            else:
                print 'incorrect read ID format, exiting'
                print ID
                sys.exit(1)
            continue

    outfile1.close()
    outfile2.close()

    cmd = 'sort ' + outprefix + '.end1.fastq.temp' + ' > ' + outprefix + '.end1.fastq.temp' + '.sorted'
    os.system(cmd)

    cmd = 'rm ' + outprefix + '.end1.fastq.temp'
    os.system(cmd)

    cmd = 'sort ' + outprefix + '.end2.fastq.temp' + ' > ' + outprefix + '.end2.fastq.temp' + '.sorted'
    os.system(cmd)

    cmd = 'rm ' + outprefix + '.end2.fastq.temp'
    os.system(cmd)

    lineslist = open(outprefix + '.end1.fastq.temp' + '.sorted')
    outfile = open(outprefix + '.end1.fastq', 'w')
    for line in lineslist:
        fields = line.strip().split('\t')
        ID = fields[0]
        sequence = fields[1]
        scores = fields[2]
        outfile.write(ID + '\n')
        outfile.write(sequence + '\n')
        outfile.write('+' + '\n')
        outfile.write(scores + '\n')
    outfile.close()

    cmd = 'rm ' + outprefix + '.end1.fastq.temp' + '.sorted'
    os.system(cmd)

    lineslist = open(outprefix + '.end2.fastq.temp' + '.sorted')
    outfile = open(outprefix + '.end2.fastq', 'w')
    for line in lineslist:
        fields = line.strip().split('\t')
        ID = fields[0]
        sequence = fields[1]
        scores = fields[2]
        outfile.write(ID + '\n')
        outfile.write(sequence + '\n')
        outfile.write('+' + '\n')
        outfile.write(scores + '\n')
    outfile.close()

    cmd = 'rm ' + outprefix + '.end2.fastq.temp' + '.sorted'
    os.system(cmd)
run()

