########################################
# The contents of this file are subject to the MLX PUBLIC LICENSE version
# 1.0 (the "License"); you may not use this file except in
# compliance with the License.
# 
# Software distributed under the License is distributed on an "AS IS"
# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
# the License for the specific language governing rights and limitations
# under the License.
# 
# The Original Source Code is "compClust", released 2003 September 03.
# 
# The Original Source Code was developed by the California Institute of
# Technology (Caltech).  Portions created by Caltech are Copyright (C)
# 2002-2003 California Institute of Technology. All Rights Reserved.
########################################

"""
Code to create histogram labelings.  Allows very convient
selections based on distribution critea.

"""

import Numeric
import MLab

from compClust.mlx.labelings import Labeling

def _createLabelingList(data, nBins, dataRange=None):

  """
  _calculateBins(data, bins, range=None)

  Helper function which returns the min, max, and bin_width to use in
  catagorizing the data.

  """

  #
  # This code is largely modeled after ScienticPython's Histograme
  # module
  #
  
  # work out the bin size and stuff.

  if dataRange is None:
    dataMin = float(MLab.min(data))
    dataMax = float(MLab.max(data))
  else:
    dataMin,dataMax = map(float, dataRange)

  binWidth = (dataMax-dataMin)/nBins

  # create an array of the minimum of each bin... 
  bins = dataMin + binWidth*(Numeric.arange(nBins)+0.5)
  
  # now we filter out the data that isn't within the data range
  data = Numeric.array(data, Numeric.Float)

  #histo = N.add.reduce(N.equal(N.arange(nbins)[:,N.NewAxis], data), -1)
  #histo[-1] = histo[-1] + N.add.reduce(N.equal(nbins, data))
  #self.array[:, 1] =  self.array[:, 1] + histo

  labels = []
  for index in ((data - dataMin) / binWidth ).astype(Numeric.Int):
    try:
      labels.append(bins[index])
    except:
      if index < 0:
        labels.append(bins[0])
      else:
        labels.append(bins[-1])
  return(labels)

def binOnRowFunction(ds, func, nBins, dataRange=None, name=None):

  """
  binOnRowFunction(ds, func, dataRange=None, name=None)

  returns a row labeling on ds that marks each row in ds with the bin
  it belongs to according to the function func.  func should take in a
  dataset and a row and return a numeric value:

  if a dataRange is smaller than the actual data range, data points
  are kept in the outer most bins.

  """

  data = Numeric.array(map(func, [ds]*ds.getNumRows(), range(ds.getNumRows())))
  if name is None:
    name = 'histogram row function'
  lab = Labeling(ds, name)
  lab.labelRows(_createLabelingList(data, nBins, dataRange))

  return(lab)

def binOnColFunction(ds, func, nBins, dataRange=None, name=None):

  """
  binOnColFunction(ds, func, nBins, dataRange=None, name=None)

  returns a col labeling on ds that marks each col in ds with the bin
  it belongs to according to the function func.  func should take in a
  dataset and a col and return a numeric value:

  if a dataRange is smaller than the actual data range, data points
  are kept in the outer most bins.
  
  """

  data = Numeric.array(map(func, [ds]*ds.getNumCols(), range(ds.getNumCols())))
  if name is None:
    name = 'histogram col function'
  lab = Labeling(ds, name)
  lab.labelCols(_createLabelingList(data, nBins, dataRange))

  return(lab)

def binOnColData(ds, col, nBins, dataRange=None, name=None):

  """
  binOnColData(ds, col, nBins, dataRange=None, name=None):

  returns a col labeling on ds that marks each row in ds with the bin
  it belongs to according to the value of column col in ds.

  if a dataRange is smaller than the actual data range, data points
  are kept in the outer most bins.
  """

  data = ds.getColData(col)
  if name is None:
    name = 'histogram column %i values'%(col)
  lab = Labeling(ds, name)
  lab.labelRows(_createLabelingList(data, nBins, dataRange))

  return(lab)

def binOnRowData(ds, row, nBins, dataRange=None, name=None):

  """
  binOnRowData(ds, row, nBins, dataRange=None, name=None):

  returns a row labeling on ds that marks each row in ds with the bin
  it belongs to according to the value of rowumn row in ds.
  
  """

  data = ds.getRowData(row)
  if name is None:
    name = 'histogram row %i values'%(row)
  lab = Labeling(ds, name)
  lab.labelCols(_createLabelingList(data, nBins, dataRange))

  return(lab)

def binOnRowVector(ds, rowVector, nBins, dataRange=None, name=None):
   
  """
  returns a labeling on ds with bins based on the rowVector of length num rows 
  """
  if name is None:
    name = 'histogram by rowVector'
  lab = Labeling(ds, name)
  lab.labelRows(_createLabelingList(rowVector, nBins, dataRange))
  return(lab)

def binOnColVector(ds, colVector, nBins, dataRange=None, name=None):
   
  """
  returns a labeling on ds with bins based on the rowVector of length num rows 
  """
  if name is None:
    name = 'histogram by colVector'
  lab = Labeling(ds, name)
  lab.labelCols(_createLabelingList(colVector, nBins, dataRange))
  return(lab)


