##################################
#                                #
# Last modified 03/25/2015       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math

def run():

    if len(sys.argv) < 7:
        print 'usage: python %s files fieldID(s) dataFieldID(s) file2 fieldID(s) dataFieldID(s) outfile' % sys.argv[0]
        print '\t The multiple entries are in the first file; nan will be added if nothing is found in the second file'
        print '\t fields can be comma separated or from:to (including)'
        print '\t The presence of headers is assumed'
        sys.exit(1)

    file1 = sys.argv[1]
    file2 = sys.argv[4]
    IDfields1 = []
    fields = sys.argv[2].split(',')
    for f in fields:
        if ':' in f:
            start = int(f.split(':')[0])
            end = int(f.split(':')[1])
            for i in range(start,end+1):
                IDfields1.append(i)
        else:
            IDfields1.append(int(f))
    Datafields1 = []
    fields = sys.argv[3].split(',')
    for f in fields:
        if ':' in f:
            start = int(f.split(':')[0])
            end = int(f.split(':')[1])
            for i in range(start,end+1):
                Datafields1.append(i)
        else:
            Datafields1.append(int(f))
    IDfields2 = []
    fields = sys.argv[5].split(',')
    for f in fields:
        if ':' in f:
            start = int(f.split(':')[0])
            end = int(f.split(':')[1])
            for i in range(start,end+1):
                IDfields2.append(i)
        else:
            IDfields2.append(int(f))
    Datafields2 = []
    fields = sys.argv[6].split(',')
    for f in fields:
        if ':' in f:
            start = int(f.split(':')[0])
            end = int(f.split(':')[1])
            for i in range(start,end+1):
                Datafields2.append(i)
        else:
            Datafields2.append(int(f))

    print IDfields1 
    print Datafields1
    print IDfields2
    print Datafields2

    Header = []
    DataDict = {}

    linelist = open(file2)
    for line in linelist:
        fields = line.strip().split('\t')
        if line.startswith('#'):
            for f in Datafields2:
                Header.append(fields[f])
            continue
        ID = []
        for f in IDfields2:
            ID.append(fields[f])
        ID = tuple(ID)
        DataDict[ID] = []
        for f in Datafields2:
            DataDict[ID].append(fields[f])

    outfile = open(sys.argv[7],'w')

    linelist = open(file1)    
    for line in linelist:
        fields = line.strip().split('\t')
        if line.startswith('#'):
            outline = '#'
            for f in IDfields1:
                outline = outline + fields[f] + '\t'
            for f in Datafields1:
                outline = outline + fields[f] + '\t'
            for F in Header:
                outline = outline + F + '\t'
            outfile.write(outline.strip() + '\n')
            continue
        outline = ''
        ID = []
        for f in IDfields1:
            outline = outline + fields[f] + '\t'
            ID.append(fields[f])
        ID = tuple(ID)
        for f in Datafields1:
            outline = outline + fields[f] + '\t'
        if DataDict.has_key(ID):
            for F in DataDict[ID]:
                outline = outline + F + '\t'
        else:
            for f in Datafields2:
                outline = outline + 'nan' + '\t'
        outfile.write(outline.strip() + '\n')
        
    outfile.close()

run()
