#
#  getNovelSNPs.py
#  ENRAGE
#
# This script attempts to annotate the novel sncs/snps from the snp summary file 
# Written by: Wendy Lee
# Written on: Aug 7th, 2008

import sys
import string
from cistematic.genomes import Genome 
from commoncode import writeLog

print "getNovelSNPs: version 1.6"

try:
    import psyco
    psyco.full()
except:
    pass


def main(argv=None):
    if not argv:
        argv = sys.argv

    usage = "usage: python %s genome snpsfile nondbsnp_geneinfo_outfile" % argv[0]

    if len(argv) < 4:
        print usage
        sys.exit(2)

    genome = argv[1]
    snpfile = argv[2]
    outfilename = argv[3]

    getNovelSNPsFromFile(genome, snpfile, outfilename)


def getNovelSNPsFromFile(genome, snpfile, outfilename): 
    infile = file(snpfile, "r")
    writeNovelSNPFile(genome, infile, outfilename)
    writeLog("snp.log", sys.argv[0], "outputfile: %s" % outfilename)
    infile.close()


def writeNovelSNPFile(genome, snpPropertiesList, outfilename):
    hg = Genome(genome)
    outString = ""
    outfile  = open(outfilename, "w")
    outfile.write("#Sl\tCl\tchrom\tmis pos\t\tmatch\tuniq_mis\ttot_mis\tbase_chg\tknown_snp\tfunction\tgene\tgeneId\trpkm\n") 
    for line in snpPropertiesList:
        if doNotProcessLine(line):
            continue

        outString = getNovelSNPInfo(genome, line, hg)
        if outString == line:
            outfile.write(outString)
        else:
            outfile.write("%s\n" % outString)

    outfile.close()


def doNotProcessLine(line):
    return line[0] == "#"


def getNovelSNPInfo(genome, snpEntry, hg):
    fields = snpEntry.split()
    if fields[8].find("N\A") == -1: 
        return snpEntry
    else:
        snpInfo = ""
        gid = fields[11]
        snc_start = int(fields[3])
        featuresList = hg.getGeneFeatures((genome, gid))
        func = "N\A"
        for (ftype, chromosome, start, stop, orientation) in featuresList:
            if int(start) <= snc_start <= int(stop):
                func = ftype
                break 
 
        for i in range (0, 9):
            snpInfo = string.join([snpInfo, fields[i]], "\t")

        snpInfo = string.join([snpInfo, func], "\t")
        for i in range (10, 13):
            snpInfo = string.join([snpInfo, fields[i]], "\t")

    return snpInfo


if __name__ == "__main__":
    main(sys.argv)
