#!/usr/bin/env python2.2
########################################
# The contents of this file are subject to the MLX PUBLIC LICENSE version
# 1.0 (the "License"); you may not use this file except in
# compliance with the License.
# 
# Software distributed under the License is distributed on an "AS IS"
# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
# the License for the specific language governing rights and limitations
# under the License.
# 
# The Original Source Code is "compClust", released 2003 September 03.
# 
# The Original Source Code was developed by the California Institute of
# Technology (Caltech).  Portions created by Caltech are Copyright (C)
# 2002-2003 California Institute of Technology. All Rights Reserved.
########################################
#
#
#  Written By :  Christopher Hart
#  Date       :  April 2001
#

## Module importation 

import re
import os
import sys
import getopt
import string
from compClust.util  import Usage
from compClust.util  import Assert
from string    import atoi
from compClust.score.ConfusionMatrix2 import ConfusionMatrix

def previouslyRun(previousRuns, file1, file2):

    """
    previouslyRun(previousRuns, file1, file2)
    
    returns 1 if a confusion matrix scores were calculated against file1 and
    file2 otherwise 0.  This parameter is meant to be used with
    scoreAllLikeClusters only therefore assumes that all NMI/LA scores were
    required (ie. a total of 4 scores)
    """

    
    if re.search('\@[\w,\.]*c', file1):
        file1 = 'reference_confMat'
    if re.search('\@[\w,\.]*c', file2):
        file2 = 'reference_confMat'

    isgood = 1
    try:
        list =  previousRuns[(file1,file2)]
        for item in list:
            try:
                value = string.atof(item)
                if not (value >= 0 and value <= 1):
                    isgood = 0
            except:
                isgood = 0
                                    
    except:
        isgood = 0

    return(isgood)
    

def performComparison(fileList, printNMI=0, printAvgNMI=0, printTransposeNMI = 0,
                      printLA=0, printConfMatrixes = 0, verbose=0,
                      printConfMatrixWithTitles=0, reference = 0,
                      outputStream = sys.stdout, append=0,
                      previousRuns={}):

    """
    performComparison(fileList, printNMI=0, printAvgNMI=0,
                      printTransposeNMI = 0, printLA=0, printConfMatrixes = 0,
                      verbose=0, printConfMatrixWithTitles=0, reference = 0,
                      outputStream = sys.stdout, append=0, previousRuns={})
                      
    Constructs the the pairwise confusion matrixes and calculates
    and prints the desired scores.  Added the ability to redirect
    the outup to any specified and open output stream.  The additional
    paramters append and previousRuns are to be used with
    scoreAllLikeClusters
    """

    STDERR = sys.stderr

    if not append:
        outputStream.write("#File1\tFile2\t")
        if printNMI == 1:
            outputStream.write("NMI\t")
        if printTransposeNMI == 1:
            outputStream.write("TransposedNMI\t")
        if printAvgNMI == 1:
            outputStream.write("AverageNMI\t")
        if printLA == 1:
            outputStream.write("LinearAssignment")
        outputStream.write("\n")
    

    # this is a little clause to keep the code from
    # comparing the reference to itself - which is
    # uninteresting.
    stoppingPoint = len(fileList)
    if (reference == 1):
        stoppingPoint = stoppingPoint-1

    for k in range(0, stoppingPoint):

        #check to see if it is a symmetric operation or refernece
        if (reference == 1):
            # this points l to be the reference file, because the reference was appended to args
            startOfInnerLoop = len(fileList)-1
        elif (printNMI == 1):
            # not symmetric
            startOfInnerLoop = 0
        else:
            # symetric
            startOfInnerLoop = k

        # print the column labels:

        for l in range(startOfInnerLoop, len(fileList)): 
            if (not previouslyRun(previousRuns, os.path.basename(fileList[k]), os.path.basename(fileList[l])) or not append):
                if (verbose==1):
                    STDERR.write('\n\tComparing Files: '+os.path.basename(fileList[k]) +' and '+os.path.basename(fileList[l]))
                tempConfusionMatrix = ConfusionMatrix()
                tempConfusionMatrix.createConfusionMatrixFromFile(fileList[l], fileList[k])
                outputStream.write( os.path.basename(fileList[l])+"\t"+ os.path.basename(fileList[k])+"\t")
                if (printNMI == 1):
                    outputStream.write("%1.4f\t"%tempConfusionMatrix.NMI())
                if (printTransposeNMI == 1):
                    outputStream.write("%1.4f\t"%tempConfusionMatrix.transposeNMI())            
                if (printAvgNMI ==1):
                    outputStream.write("%1.4f\t"%tempConfusionMatrix.averageNMI())
                if (printLA == 1):
                    outputStream.write("%1.4f\t"%tempConfusionMatrix.linearAssignment())
                outputStream.write('\n')
                if (printConfMatrixes == 1):
                    tempConfusionMatrix.printCounts(outputStream=outputStream)
                if (printConfMatrixWithTitles==1):
                    tempConfusionMatrix.printCounts(labels=1, outputStream=outputStream)
            else:
                if (verbose==1):
                    STDERR.write('\n\tSkipping  Files: '+os.path.basename(fileList[k]) +' and '+os.path.basename(fileList[l]))


def createOptTree(flags):
    """ this is just a general function to generate a dictionary of options """

    try:
        opts, args = getopt.getopt(sys.argv[1:], flags)
    except:
        Usage.showUsage(main)
        sys.exit(2)

    optTree = {}
    for opt in opts:
        optTree[opt[0]] = opt[1]
        
    return(optTree, args)

def main():
    """
    confusionMatrix.py <options> <filelist>

    Purpose    :  This is a wapper around the Confussion matrix utilities.  It
                  can constuct the confusion matrixes and it can also perform
                  various scoring operations on those confusion matrexes.  

    options    :  -c print out the confusion matrix 
                  -t print out the confusion matrix with titles
                  -n print out the NMI score
                  -a print out the average NMI score
                  -l print out the Linear assignment score
                  -p print out the transposed confusion matrix's NMI score
                  -v verbose messages to stderr
                  -h display help usage
                  -r <referenceFileName> perform the comparison of the filelist
                                         only against the reference opposed to
                                         pairwise

    requires   : mlx  (on our caltech systems source
                           /proj/code/python/setup.sh)

                  enviroment variable WMATCH_COMMAND be set.
                    (eg. export WMATCH_COMMAND=/proj/code/bin/wmatch)
"""
    
    optTree, args = createOptTree('cnalvthr:p')
    
    if optTree.has_key('-h'):
        Usage.showHelp(main, exit=1)
        
    if len(args)==0:
        Usage.showUsage(main, exit=1)

    if optTree.has_key('-r'):
        args.append(optTree['-r'])

    Assert.fs_objects_have_permissions(args, os.R_OK)

    performComparison(args,
                      printNMI = optTree.has_key('-n'),
                      printAvgNMI = optTree.has_key('-a'),
                      printTransposeNMI = optTree.has_key('-p'),
                      printLA = optTree.has_key('-l'),
                      printConfMatrixes = optTree.has_key('-c'),
                      verbose = optTree.has_key('-v'),
                      printConfMatrixWithTitles = optTree.has_key('-t'),
                      reference = optTree.has_key('-r'),
                      append=0
                      )

if __name__ == "__main__":
    main()