import sys
import optparse
from erange.commoncode import getConfigParser, getConfigOption, getConfigIntOption, getConfigBoolOption

print "processvelvet: version 1.2"

def main(argv=None):
    if not argv:
        argv = sys.argv

    usage = "usage: python %prog infile outfile [--prefix contigpref] [--filter pslfile] [--min bp] [--keepcov]"

    parser = getParser(usage)
    (options, args) = parser.parse_args(argv[1:])

    if len(args) < 2:
        print usage
        sys.exit(2)

    infile = args[0]
    outfile = args[1]

    processvelvet(infile, outfile, options.contigPrefix, options.filterFileName, options.minSize, options.keepCoverage)


def getParser(usage):
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("--prefix", dest="contigPrefix")
    parser.add_option("--filter", dest="filterFileName")
    parser.add_option("--min", type="int", dest="minSize")
    parser.add_option("--keepcov", action="store_true", dest="keepCoverage")

    configParser = getConfigParser()
    section = "processvelvet"
    contigPrefix = getConfigOption(configParser, section, "contigPrefix", "chr")
    filterFileName = getConfigOption(configParser, section, "filterFileName", "")
    minSize = getConfigIntOption(configParser, section, "minSize", 0)
    keepCoverage = getConfigBoolOption(configParser, section, "keepCoverage", False)

    parser.set_defaults(contigPrefix=contigPrefix, filterFileName=filterFileName, minSize=minSize, keepCoverage=keepCoverage)

    return parser


def processvelvet(inFileName, outFileName, contigPrefix="chr", filterFileName="", minSize=0, keepCoverage=False):
    infile = open(inFileName)
    outfile = open(outFileName, "w")
    filterList = getFilterList(filterFileName)

    node = {"contigPrefix": contigPrefix,
            "completeID": "",
            "currentSeq": ""
    }

    counts = {"acceptedSize": 0,
              "nSize": 0,
              "contigsAccepted": 0,
              "filteredSize": 0
    }

    for line in infile:
        if ">NODE" in line:
            writeNode(outfile, node, filterList, counts, minSize, keepCoverage)
            node["completeID"] = line.strip()[1:]
            node["currentSeq"] = ""
        else:
            node["currentSeq"] += line

    writeNode(outfile, node, filterList, counts, minSize, keepCoverage)

    infile.close()
    outfile.close()

    print "%d contigs accepted" % counts["contigsAccepted"]
    print "%d bp original" % (counts["acceptedSize"] + counts["filteredSize"])
    print "%d bp accepted" % counts["acceptedSize"]
    print "%d bp accepted N" % counts["nSize"]
    print "%d bp filtered\n" % counts["filteredSize"]


def getFilterList(filterFileName=""):
    filterList = []

    if filterFileName:
        try:
            filterFile = open(filterFileName)
        except IOError:
            return filterList

        for line in filterFile:
            if "NODE" in line:
                fields = line.strip().split()
                try:
                    exclude = fields[9]
                except IndexError:
                    continue

                if exclude not in filterList:
                    filterList.append(exclude)

        filterFile.close()

    return filterList


def writeNode(outfile, node, filterList, counts, minSize=0, keepCoverage=False):
    completeID = node["completeID"]
    currentSeq = node["currentSeq"]
    sequenceLength = len(currentSeq) - currentSeq.count("\n")
    if len(completeID) > 5 and completeID not in filterList:
        fields = completeID.split("_")
        newID = fields[1]
        if keepCoverage:
            newID = fields[1] + "_" + fields[-1].strip()

        if sequenceLength >= minSize:
            outfile.write(">%s%s\n%s" % (node["contigPrefix"], newID, currentSeq))
            counts["acceptedSize"] += sequenceLength
            counts["nSize"] += currentSeq.count("N")
            counts["contigsAccepted"] += 1
    else:
        counts["filteredSize"] += sequenceLength

if __name__ == "__main__":
    main(sys.argv)