##################################
#                                #
# Last modified 2017/08/30       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import os
from sets import Set

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s datafilename labelfields valuefields columns_order_files fieldID' % sys.argv[0]
        print '\tvaluefields format: either comma separated, or start:end (including start and end, 0-based)'
        print '\tthe script can read compressed input files'
        sys.exit(1)

    datafilename = sys.argv[1]
    columnorder = sys.argv[4]
    fieldID = int(sys.argv[5])

    fields = sys.argv[2].split(',')
    labelFields=[]
    for f in fields:
        labelFields.append(int(f))
    labelFields.sort()

    valueFields=[]
    if ':' in sys.argv[3]:
        fields = sys.argv[3].split(':')
        start = int(fields[0])
        end = int(fields[1])
        for f in range(start,end+1):
            valueFields.append(f)
    else:
        fields = sys.argv[3].split(',')
        for f in fields:
            valueFields.append(int(f))
    valueFields.sort()

    SortList = []
    linelist = open(columnorder)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        SortList.append(fields[fieldID])

    if datafilename.endswith('.bz2'):
        cmd = 'bzip2 -cd ' + datafilename
    elif datafilename.endswith('.gz'):
        cmd = 'gunzip -c ' + datafilename
    else:
        cmd = 'cat ' + datafilename
    p = os.popen(cmd, "r")
    line = 'line'
    while line != '':
        line = p.readline()
        if line == '':
            break
        fields = line.strip().split('\t')
        if line.startswith('#'):
            SortDict = {}
            for ID in valueFields:
                label = fields[ID]
                sortPos = SortList.index(label)
                SortDict[sortPos] = ID
            SortingPositions = SortDict.keys()
            SortingPositions.sort()
        outline=''
        for ID in labelFields:
            outline = outline + fields[ID] + '\t'
        for sortPos in SortingPositions:
            outline = outline + fields[SortDict[sortPos]] + '\t'
        print outline.strip()
        
run()

