########################################
# The contents of this file are subject to the MLX PUBLIC LICENSE version
# 1.0 (the "License"); you may not use this file except in
# compliance with the License.
# 
# Software distributed under the License is distributed on an "AS IS"
# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
# the License for the specific language governing rights and limitations
# under the License.
# 
# The Original Source Code is "compClust", released 2003 September 03.
# 
# The Original Source Code was developed by the California Institute of
# Technology (Caltech).  Portions created by Caltech are Copyright (C)
# 2002-2003 California Institute of Technology. All Rights Reserved.
########################################
import Numeric
import LinearAlgebra
import MLab
from compClust.util.DistanceMetrics import MahalanobisDistance

# kinds was temporarily removed from python2.3 so try and provide a reasonable
# default DBL_MIN & DBL_MAX (assuming a 64 bit double).
try:
  import kinds
  DBL_MIN = kinds.default_float_kind.MIN
  DBL_MAX = kinds.default_float_kind.MAX
except ImportError:
  DBL_MIN = 2.2250738585072014e-308
  DBL_MAX = 1.7976931348623157e+308

def make_diag_matrix(matrix, k=0):
  """Transform a matrix into a diagonal matrix.
  """
  # stolen from the MLab code for diag
  m = Numeric.asarray(matrix)
  s = m.shape
  matrix = MLab.eye(s[0], s[1], k=k)*m
  return matrix

def vector_prob_gauss(data, mean, covar, min_diag=1.0e-6):
  """Compute the conditional probability of the datum assuming it was drawn from a 
     multivariate normal distribution with the specified mean and covariance matrix,
     using the normal probability density function.  Returns the vector of proba-
     bilities, one for each datum.
  """
  data_rows = data.shape[0]
  data_cols = data.shape[1]
  
  restrict_illcond(covar, min_diag)

  cholesky_covar = LinearAlgebra.cholesky_decomposition(covar)
  cholesky_covar_diag = MLab.diag(cholesky_covar)

  # compute the square root of the determanent
  denom = 1.0
  for i in range(data_cols):
    denom *= cholesky_covar_diag[i]
  #denom = Numeric.sqrt(LinearAlgebra.determinant(covar))

  # compute the constant factor
  denom = pow(2.0*Numeric.pi, data_cols/2.0)*denom

  probabilities = Numeric.zeros((data_rows),'d')
  
  # compute probabilities for each datum in the dataset
  for k in xrange(data_rows):
    datum = Numeric.take(data, [k])
    mahalanobis = MahalanobisDistance(datum, mean, covar)[0]
    probabilities[k] = Numeric.exp(-0.5*mahalanobis)/denom
    if probabilities[k] == 0.0:
      probabilities = DBL_MIN
    elif probabilities > DBL_MAX:
      probabilities = DBL_MAX

  return probabilities

def restrict_illcond(covar, min_diag=1e-06):
  """Make sure that the elements on the diaginal are greater than min_diag
  """
  #new_covar = Numeric.copy(covar)
  for i in xrange(len(covar)):
    if covar[i][i] < min_diag:
      covar[i][i] = min_diag
  return covar
  
def mixture_likelihood(data, means, covariances, weights, min_diag=1.0e-6):
  """Returns the log-likelihood of data given an N dimensional mixture of
     k-Gaussian models.
     
     Where:
       - data is a M-by-N matrix containing N-dimensional datapoints (one per
         row) to use when computing the log-likelihood.
     
       - means is a k-by-N matrix containing the means (one per row) of the
         k-Gaussian models.
     
       - covars is a 3D matrix composed of k N-by-N covariance matrices.
     
       - weights is a 1-by-k vector of model weights.
     
       - min_diag is a small number for perturbing ill-conditioned covariance
         matrices.  Optional.  If not specified, defaults to 1.0e-8.
     Returns:
       - likelihood
       - probabilities :- a matrix containing all intermediate probabilities
       - probability_data :- a vector containing the posterior probability of
                             each datum
       - probability_sum :- a vector containing the posterior probability of each
                            class
                            
         
  """
  ######### 
  # massage & tests parameters
  data = Numeric.asarray(data)
  data_rows = data.shape[0]
  data_cols = data.shape[1]

  means = Numeric.asarray(means)
  if means.shape[1] != data_cols:
    raise ValueError("Data and means must have the same number of dimensions")
  
  num_classes = means.shape[0]

  covars = Numeric.asarray(covariances)
  if covars.shape[0] != num_classes:
    raise ValueError("The number of covariance matricies does not match the "\
                     "number of models (classes).")
  elif covars.shape[1] != covars.shape[2]:
    ValueError("Each Covariance matrix must be square)")
  # FIXME: C version tests for symmetry. I'm going to ignore this for the moment
  # FIXME: C version tests for positive definiteness

  weights = Numeric.asarray(weights)
  if num_classes != weights.shape[0]:
    raise ValueError("The number of class weights does not match the "\
                     "number of models (classes).")

  ############
  # create scratch space
  probabilities = Numeric.zeros((num_classes, data_rows),'d')
  probability_data = Numeric.zeros((data_rows))
  probability_sums = Numeric.zeros((num_classes))

  ###########
  # compute probability data
  for k in xrange(num_classes):
    # A. first compute the likelihood of the data given the parameters
    # for this class; store it in the array probabilities[k]
    probabilities[k] = vector_prob_gauss(data,means[k], covars[k], min_diag)
    
  # B. multiply the class prior, or weight, to get the joint probability
  # of the data and the parameters; store it in probabilities[k]
  probabilities *= weights[k]
  
  # contribute to the probability of each datum accross all models
  probability_data = Numeric.sum(probabilities)
  
  # C normalize by the probability of the dta across all models to get the
  # posterior probability of each datum; store it in proabilities[k]
  for k in xrange(num_classes):
#    probabilities[k] /= probability_data[k]
    for i in xrange(data_rows):
      if probability_data[i] != 0:
        probabilities[k][i] /= probability_data[i]

  # compute the sum of the posterior probabilities for each class, which is
  # used in computing the parameters later
  for k in xrange(num_classes):
    probability_sums[k] = Numeric.sum(probabilities[k])

  log_likelihood = Numeric.sum(Numeric.log((probability_data+DBL_MIN)))
  return log_likelihood

def covar_weights_estimate(data, means, class_labels):
  """Compute covariance matrix and mixture weights based on the means
  and cluster memberships.

  Input:
    matrix (2D matrix) [num_data x num_dimensions]
    means (2D matrix)
    class labels (1D matrix) [num_data]
  Output:
    covariance matrix (3D matrix)
    weights (1D matrix) [num_classes]
  """
  data = Numeric.asarray(data)
  if len(data.shape) != 2:
    raise ValueError("Data matrix needs to be 2D not %dD" %(len(data.shape)))
  data_rows, data_cols = data.shape
  means = Numeric.asarray(means)
  (num_classes, means_cols) = means.shape
  if data_cols != means_cols:
    raise ValueError("Data and means must have the same number of dimensions "\
		     "(matrix columns)." );

  class_labels = Numeric.asarray(class_labels)
  
  weights = Numeric.zeros((num_classes),'d')
  covars = Numeric.zeros((num_classes, data_cols, data_cols),'d')
  
  for i in xrange(data_rows):
    k = class_labels[i]

    weights[k] += 1 
    for r1 in xrange(data_cols):
      for r2 in xrange(r1, data_cols):
        covars[k][r1][r2] += (data[i][r1]-means[k][r1]) * (data[i][r2]-means[k][r2])

  for k in xrange(num_classes):
    covars[k] = covars[k] / (weights[k]-1)
  
  weights = weights/data_rows

  for k in xrange(num_classes):
    for r1 in xrange(data_cols):
      for r2 in xrange(r1):
        covars[k][r1][r2] = covars[k][r2][r1]
  return (covars, weights)
