##################################
#                                #
# Last modified 02/26/2015       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

try:
    import psyco
    psyco.full()
except:
    print 'psyco not running'

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s cdt data fields outfile' % sys.argv[0]
        print '\tfields: any combination of comma separated and start:end (including) will work'
        print '\tthe fields will not be sorted!!!'
        sys.exit(1)

    cdt = sys.argv[1]
    data = sys.argv[2]
    wantedfields = []
    fields = sys.argv[3].split(',')
    for f in fields:
        if ':' in f:
            start = int(f.split(':')[0])
            end = int(f.split(':')[1])
            for i in range(start,end+1):
                wantedfields.append(i)
        else:
            wantedfields.append(int(f))

    outfile = open(sys.argv[4],'w')

    DataDict = {}

    linelist = open(data)
    LabelsDict = {}
    for line in linelist:
        if line.startswith('#'):
            outline = line.strip().split('\t')[0] + '\t' + line
            outfile.write(outline)
            continue
        fields = line.strip().split('\t')
        profile = []
        for i in wantedfields:
            profile.append(float(fields[i]))
        profile = tuple(profile)
        if DataDict.has_key(profile):
            pass
        else:
            DataDict[profile] = []
        DataDict[profile].append(line)
    
    print DataDict.keys()

    linelist = open(cdt)
    PeakDict={}
    LabelsDict = {}
    for line in linelist:
        fields = line.strip().split('\t')
        if line.startswith('GID\t'):
            startFieldID = fields.index('GWEIGHT') + 1
            continue
        if line.startswith('EWEIGHT\t'):
            continue
        profile = []
        for i in range(len(wantedfields)):
            profile.append(float(fields[i + startFieldID]))
        profile = tuple(profile)
        for line in DataDict[profile]:
            outline = line.strip().split('\t')[0] + '\t' + line
            outfile.write(outline)

    outfile.close()

run()
