##################################
#                                #
# Last modified 10/31/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys

try:
	import psyco
	psyco.full()
except:
	pass

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s transcripts.gtf transcripts.tmap outfilename [-classes j,u,... (default j,u)]' % sys.argv[0]
        sys.exit(1)

    gtf = sys.argv[1]
    tmap = sys.argv[2]
    outfilename = sys.argv[3]

    NovelIsoforms={}    
    NovelGenes={}    

    classes=['u','j']
    if '-classes' in sys.argv:
        classes=sys.argv[sys.argv.index('-classes')+1].split(',')

    lineslist=open(tmap)
    for line in lineslist:
        if line.startswith('ref_gene_id'):
            continue
        fields=line.strip().split('\t')
        if fields[2] not in classes:
            continue
        CuffGeneID=fields[3]
        CuffTranscriptID=fields[4]
        if fields[2]=='j':
            NovelIsoforms[CuffTranscriptID]=fields
        if fields[2]=='u':
            NovelGenes[CuffTranscriptID]=fields
       
    outfile=open(outfilename,'w')
 
    print 'finished processing tmap file'

    lineslist=open(gtf)
    i=0
    for line in lineslist:
        fields=line.strip().split('\t')
        try:
            transcriptID=fields[8].split('transcript_id "')[1].split('";')[0]
        except:
            print i
        if NovelGenes.has_key(transcriptID):
            outline=line.split('FPKM')[0]
            names=outline.split('\t')[8].replace('_id','_name')
            outfile.write(outline+' ' + names + '\n')
        if NovelIsoforms.has_key(transcriptID):
            geneName=NovelIsoforms[transcriptID][0]
            geneID=NovelIsoforms[transcriptID][1]
            transcriptName=geneName + '-' + line.split('"; transcript_id "')[1].split('";')[0]
            outline=line.split('gene_id "')[0] + 'gene_id "' + geneID + '"; transcript_id "' + transcriptName + '"; gene_name "' + geneName + '"; transcript_name" ' + transcriptName
            outfile.write(outline+'\n')
        i+=1

    print 'finished processing gtf file'
            
    outfile.close()

run()

