#!/usr/bin/env python2.2
########################################
# The contents of this file are subject to the MLX PUBLIC LICENSE version
# 1.0 (the "License"); you may not use this file except in
# compliance with the License.
# 
# Software distributed under the License is distributed on an "AS IS"
# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
# the License for the specific language governing rights and limitations
# under the License.
# 
# The Original Source Code is "compClust", released 2003 September 03.
# 
# The Original Source Code was developed by the California Institute of
# Technology (Caltech).  Portions created by Caltech are Copyright (C)
# 2002-2003 California Institute of Technology. All Rights Reserved.
########################################
#
# Written By : Christopher Hart
# Date       : June 2001
#
###################################

"""
A parser for the confusionMatrix summary stats
"""

import string
import Numeric
import sys

from compClust.util import Usage
from compClust.util import WrapperUtil
from compClust.visualize import plot

def generateKeyDict(keyline):

    """
    This function takes in a comment column key (As found in the
    confusionMatrix output files and generates a dictionary for use in
    extracting the data based on the column names.

    """
    key = {}
    keyline = keyline[1:]
    count = 0
    for token in string.split(keyline):
        key[token] = count
        count = count + 1


    return(key)


def parseConfusionMatrixStats(filename, statistic):

    """
    Returns a tuple of a Numeric array containing the requested stats and a
    dictionary describing the indexes of the array.

    The index provides a translation from a fileName--> indexValue and
    indexValue-->fileName.  

    stats is in the set [NMI, TransposedNMI, AverageNMI, LinearAssignment]
    """

    # initializations

    infile = open(filename,'r')
    keyline = infile.readline()
    lines = infile.readlines()
    index = {}

    # generate a key for the data ordering within the file

    key = generateKeyDict(keyline)

    # Check and find out if the statistic is in this file
    
    try:
        key[statistic]
    except:
        "**WARNING STATISTIC NOT FOUND***"
        Usage.showHelp(parseConfusionMatrixStats, exit=1)

    
    # Generate the <fileName> to <array index> dictionary
    count = 0
    for line in lines:
        file = string.split(line)[key['File1']]
        if not index.has_key(file):
            index[file] = count
            index[count] = file
            count = count + 1

    stats = Numeric.zeros( (len(index.keys())/2,len(index.keys())/2) , 'float')
        
    # fill the array with the requested statistic
    for line in lines:
        tokens = string.split(line)
        row = index[tokens[key['File1']]]
        col = index[tokens[key['File2']]]
        value = float(tokens[key[statistic]])
        stats[row][col] = value

    return(stats, index)

def printStatsMatrix(statsArray, statsIndex, stream=sys.stdout):

    """
    statsArray is a numeric array and statsIndex is a dictionary
    describing the index of the array both created using
    parseConfusionMatrixScores

    This function prints the statsArray with the first column being
    the index for both the rows and the columns.
    """

    count = 0
    for row in statsArray:
        stream.write("%s\t"%statsIndex[count])
        for value in row:
            stream.write("%f\t"%value)
        stream.write("\n")
        count = count + 1

def createColorMap(stats, index, filename, title=None, abbreviate=0):

    """
    Parses the index into a list and sends it off to compClust.util.plot.colorMap
    """
    labels = []
    for i in range(0, stats.shape[0]):
        if abbreviate:
            try:
                labels.append(string.split(index[i],'_a_')[1])
            except:
                labels.append(index[i])
        else:
            labels.append(index[i])
    plot.colorMap(stats, filename, rowLabels=labels, colLabels=labels,
                  dataMin=0, dataMax=1, printValues=1, title=title)
    

def main():
    """
    Usage: confusionMatrixScoresParser.py -s <statistic> -f <Summary filename>

       -s indicates what statistic to grab valid entries are:
           NMI, TransposedNMI, AverageNMI, or LinearAssignment

       -f is the summary file which contains data of the form
          #File1  \t File2   \t <scoreName> \t ...
          <file1> \t <file2> \t <score> \t ...   

       -p <filename> this optional argument generates a postscript
          heat map display of all the confusionMatrix scores

       -a abbriviate file names by splitting on _a_ 
    """

    (opts, args) = WrapperUtil.createOptTree("as:f:hp:")

    if (opts.has_key('-h')):
        Usage.showHelp(main, exit=1)

    try:    
         (stats, index) = parseConfusionMatrixStats(opts['-f'], opts['-s'])
    except:
        Usage.showUsage(main, exit=1)

    printStatsMatrix(stats,index)

    if (opts.has_key('-p')):
        createColorMap(stats, index, opts['-p'], title=opts['-f'], abbreviate=opts.has_key('-a'))
        
if __name__ == "__main__":
    main()
