#!/usr/bin/env python2.2
########################################
# The contents of this file are subject to the MLX PUBLIC LICENSE version
# 1.0 (the "License"); you may not use this file except in
# compliance with the License.
# 
# Software distributed under the License is distributed on an "AS IS"
# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
# the License for the specific language governing rights and limitations
# under the License.
# 
# The Original Source Code is "compClust", released 2003 September 03.
# 
# The Original Source Code was developed by the California Institute of
# Technology (Caltech).  Portions created by Caltech are Copyright (C)
# 2002-2003 California Institute of Technology. All Rights Reserved.
########################################

"""
Test suite for the old Dataset schema (obsolete).
"""

import unittest
import os
import string
import tempfile

import Numeric

from compClust.mlx.datasets import Dataset
from compClust.mlx.labelings import Labeling
import compClust.mlx

simple_data = """
a\t 1.0\t 2.0\t 3.0
b\t 4.0\t 5.0\t 6.0
c\t 7.0\t 8.0\t 9.0
d\t10.0\t11.0\t12.0
e\t13.0\t14.0\t15.0
f\t16.0\t17.0\t18.0
g\t19.0\t20.0\t21.0
h\t22.0\t23.0\t24.0
"""

nan_data = """
a\t1\t2\tnan
b\t4\tnaN\t6
c\tnAn\t8\t9
d\t10\tnAN\t12
e\t13\t14\tNan
f\t16\tNaN\t18
g\tNAn\tNAN\t21
h\t22\t23\t24
"""

spurious_tab_nan_data = """
a\t1\t2\tNaN\t
b\t4\tnan\t\t6
c\tnan\t8\t  9
d\t10\tnan\t12
e\t13\t  14\tnan
f\t16\tnan\t18
g\tnan \tnan\t21
h\t22\t23\t24
"""

class DatasetTestCases(unittest.TestCase):
  def setUp(self):
    self.original_dir    = os.getcwd()
    os.chdir(compClust.mlx.__path__[0])

  def tearDown(self):
    os.chdir(self.original_dir)

  def check_read_array_spurious_whitespace(self):
    """Try parsing an array with trailing delimiters.

    Read in an tab delimited array that has extra
    delimiters at the end and has scattered extra spaces
    in the file.
    """
    fd, filename = tempfile.mkstemp()
    os.write(fd, spurious_tab_nan_data)    
    dataset = Dataset(filename)
    os.close(fd)
    os.unlink(filename)
    
    self.failUnless(dataset.getNumRows() == 8, 
                    "num rows %d" %(dataset.getNumRows()))
    self.failUnless(dataset.getNumCols() == 3,
                    "num cols %d" %(dataset.getNumCols()))
    
    for row in dataset.getData():
      self.failUnless(len(row) == 3, "len(%s) = %d" % (str(row), len(row)))

  def check_write_data(self):
    """Verify that we can write out a dataset correctly.
    """
    src_fd, src_filename = tempfile.mkstemp()
    os.write(src_fd, simple_data)    
    dataset = Dataset(src_filename)
    os.close(src_fd)
    os.unlink(src_filename)

    # create temp file
    output_stream = tempfile.TemporaryFile()
    dataset.writeDataset(output_stream)

    # seek to the beginning and try and read some data
    output_stream.seek(0)
    data = []
    for row in output_stream.xreadlines():
      data.append(map(float, map(string.strip, row.split())))
    numeric_data = Numeric.asarray(data, 'd')
    self.failUnless(dataset.getData() == numeric_data)
    output_stream.close()
    #status = os.system("diff -q " + file_arguments + " > /dev/null")
    #if os.WEXITSTATUS(status) == 0:
    #  fail("write_data preserved meaningless whitespace.")

    #status = os.system("diff -w -q " + file_arguments + " > /dev/null")
    #if os.WEXITSTATUS(status) != 0:
    #  fail("write_data didn't preserve non-whitespace correctly.")
    
def suite(**kw):
  suite = unittest.TestSuite()
  suite.addTest(DatasetTestCases("check_read_array_spurious_whitespace"))
  suite.addTest(DatasetTestCases("check_write_data"))
  return suite

if __name__ == "__main__":
  unittest.main(defaultTest="suite")
