##################################
#                                #
# Last modified 2017/09/03       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import os

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s input labelFieldID valueFieldIDs outfilename [-order filename fieldID]' % sys.argv[0]
        sys.exit(1)
    
    input = sys.argv[1]
    labelFieldID = int(sys.argv[2])
    valueFields = []
    if ':' in sys.argv[3]:
        fields = sys.argv[3].split(':')
        start = int(fields[0])
        end = int(fields[1])
        for f in range(start,end+1):
            valueFields.append(f)
    else:
        fields = sys.argv[3].split(',')
        for f in fields:
            valueFields.append(int(f))
    valueFields.sort()

    doOrder = False
    if '-order' in sys.argv:
        doOrder = True
        order = sys.argv[sys.argv.index('-order') + 1]
        print 'will order entries using externally supplied file:', order
        orderID = int(sys.argv[sys.argv.index('-order') + 2])

    outfilename = sys.argv[4]

    OrderDict = {}
    DataDict = {}
    Header = []

    if doOrder:
        linelist = open(order)
        L = 0
        for line in linelist:
            if line.startswith('#'):
                continue
            fields = line.strip().split('\t')
            label = fields[orderID]
            OrderDict[L] = label
            L+=1
        if input.endswith('.bz2'):
            cmd = 'bzip2 -cd ' + input
        elif input.endswith('.gz'):
            cmd = 'zcat ' + input
        else:
            cmd = 'cat ' + input
        p = os.popen(cmd, "r")
        line = 'line'
        while line != '':
            line = p.readline()
            if line == '':
                break
            fields=line.strip().split('\t')
            if line.startswith('#'):
                for ID in valueFields:
                    Header.append(fields[ID])
                continue
            label = fields[labelFieldID]
            DataDict[label] = []
            for ID in valueFields:
                DataDict[label].append(fields[ID])
    else:
        if input.endswith('.bz2'):
            cmd = 'bzip2 -cd ' + input
        elif input.endswith('.gz'):
            cmd = 'zcat ' + input
        else:
            cmd = 'cat ' + input
        p = os.popen(cmd, "r")
        line = 'line'
        L = 0
        while line != '':
            line = p.readline()
            if line == '':
                break
            fields=line.strip().split('\t')
            if line.startswith('#'):
                for ID in valueFields:
                    Header.append(fields[ID])
                continue
            label = fields[labelFieldID]
            L += 1
            OrderDict[L] = label
            DataDict[label] = []
            for ID in valueFields:
                DataDict[label].append(fields[ID])

    outfile = open(outfilename, 'w')

    outline = 'GID\t#\tNAME\tGWEIGHT'
    for F in Header:
        outline = outline + '\t' + F
    outfile.write(outline + '\n')

    outline = 'AID\t\t\tG'
    k = 0
    for F in Header:
        outline = outline + '\t' + 'ARRY' + str(k) + 'X'
        k+=1
    outfile.write(outline + '\n')

    outline = 'EWEIGHT\t\t\t'
    for F in Header:
        outline = outline + '\t1'
    outfile.write(outline + '\n')

    for i in range(L):
        label = OrderDict[i]
        outline = 'GENE' + str(i) + 'X' + '\t' + label + '\t' + label + '\t1'
        for F in DataDict[label]:
            outline = outline + '\t' + F
        outfile.write(outline + '\n')

    outfile.close()
   
run()
