##################################
#                                #
# Last modified 5/6/2009         # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from cistematic.core.geneinfo import geneinfoDB
from cistematic.genomes import Genome

def run():

    if len(sys.argv) < 6:
        print 'usage: python %s genome expressionfilename Set1outfilename Set2outfilename Set3outfilename garbooutfilename' % sys.argv[0]
        sys.exit(1)
    
  
    genome = sys.argv[1]
    expressiondatafilename = sys.argv[2]
    outfilename1 = sys.argv[3]
    outfilename2 = sys.argv[4]
    outfilename3 = sys.argv[5]
    garbooutfilename = sys.argv[6]

    expressiondatafile = open(expressiondatafilename)
    expressiondatalist = expressiondatafile.readlines()
    headerline = expressiondatalist[0]
    expressiondatalist.remove(expressiondatalist[0])

    hg = Genome(genome)
    idb = geneinfoDB()
    geneinfoDict = idb.getallGeneInfo(genome)
    print 'max(geneinfoDict.keys())', max(geneinfoDict.keys())
    featDict = hg.getallGeneFeatures()

    genes={}
    Set1chromosomes=['1','4','7','10','13','16']
    Set2chromosomes=['2','5','8','11','14','17']
    Set3chromosomes=['3','6','9','12','15','18','19']
    Set1=[]
    Set2=[]
    Set3=[]
    Set4=[]
    for line in expressiondatalist:
        fields = line.split('\n')[0].split('\t')
        k=fields[0]
        genes[k]={}
        genes[k]['line']=line
        if k not in featDict.keys():
            Set4.append(k)
            continue
        chr = (featDict[k][0][1])
        chromosome = 'chr'+str(chr)
        genes[k]['chromosome']=chromosome
        if chr in Set1chromosomes: 
            Set1.append(k)
        if chr in Set2chromosomes: 
            Set2.append(k)
        if chr in Set3chromosomes: 
            Set3.append(k)
    outfile = open(outfilename1, 'w')
    outfile.write('chromosome\t')
    outfile.write(headerline)
    for k in Set1:
        outfile.write(genes[k]['chromosome'])
        outfile.write('\t')
        outfile.write(genes[k]['line'])
    outfile.close()

    outfile = open(outfilename2, 'w')
    outfile.write('chromosome\t')
    outfile.write(headerline)
    for k in Set2:
        outfile.write(genes[k]['chromosome'])
        outfile.write('\t')
        outfile.write(genes[k]['line'])
    outfile.close()

    outfile = open(outfilename3, 'w')
    outfile.write('chromosome\t')
    outfile.write(headerline)
    for k in Set3:
        outfile.write(genes[k]['chromosome'])
        outfile.write('\t')
        outfile.write(genes[k]['line'])
    outfile.close()

    outfile = open(garbooutfilename, 'w')
    outfile.write(headerline)
    for k in Set4:
        outfile.write(genes[k]['line'])
    outfile.close()

run()
