##################################
#                                #
# Last modified 2018/11/25       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import random
import os
from sets import Set

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s values fields(comma-separated) ordering_file outfile' % sys.argv[0]
        print '\Note: the ordering file should contain each of the states in its own line, tab-delimited'
        sys.exit(1)

    values = sys.argv[1]
    fields = sys.argv[2]
    fields = fields.split(',')
    IDfields=[]
    for ID in fields:
        IDfields.append(int(ID))
    ordering = sys.argv[3]
    outfilename = sys.argv[4]


    outfile = open(outfilename,'w')

    LineDict = {}
    if values.endswith('.bz2'):
        cmd = 'bzip2 -cd ' + values
    elif values.endswith('.gz') or values.endswith('.bgz'):
        cmd = 'zcat ' + values
    elif values.endswith('.zip'):
        cmd = 'unzip -p ' + values
    else:
        cmd = 'cat ' + values
    RN = 0
    P = os.popen(cmd, "r")
    peakline = 'line'
    while peakline != '':
        peakline = P.readline().strip()
        if peakline == '':
            break
        if peakline.startswith('#'):
            outfile.write(peakline.strip() + '\n')
            continue
        fields = peakline.strip().split('\t')
        state = []
        for ID in IDfields:
            state.append(fields[ID])
        state = tuple(state)
        if LineDict.has_key(state):
            pass
        else:
            LineDict[state] = []
        LineDict[state].append(peakline)

    if ordering.endswith('.bz2'):
        cmd = 'bzip2 -cd ' + ordering
    elif ordering.endswith('.gz') or ordering.endswith('.bgz'):
        cmd = 'zcat ' + ordering
    elif ordering.endswith('.zip'):
        cmd = 'unzip -p ' + ordering
    else:
        cmd = 'cat ' + ordering
    RN = 0
    P = os.popen(cmd, "r")
    peakline = 'line'
    while peakline != '':
        peakline = P.readline().strip()
        if peakline == '':
            break
        if peakline.startswith('#'):
            continue
        state = peakline.strip().split('\t')
        state = tuple(state)
        if LineDict.has_key(state):
            LineDict[state].sort()
            for line in LineDict[state]:
                outfile.write(line.strip() + '\n')

    outfile.close()
            
run()

