import cPickle
import math
import os

from compClust.mlx import datasets
from compClust.mlx import labelings
from compClust.mlx import views

from compClust.gui.DataSource import DataSource

from matplotlib.pylab import size

def loadGNF(directory='/home/diane/proj/pca-bmc-2005/'):
  picklefile = '%s/U133A+GNF1B_101402.AD.data.pkl' %(directory)
  if os.path.exists(picklefile):
    print "loading pickle file"
    return cPickle.load(open(picklefile,'r'))
  else:
    datafile = '%s/U133A+GNF1B_101402.AD.data.tab' %(directory)
    datafile_log2 = '%s/U133A+GNF1B_101402.AD.filtered.log2.data.tab' %(directory)
    if not os.path.exists(datafile_log2):
      print "loading gnf dataset"
      
      ds = datasets.Dataset(datafile)
      # In order to do log2 I've replaced the zeros with 0.5.
      print "starting filtering"  
      data = ds.getData() 
      nRows = size(data,0)
      nCols = size(data,1)
      for r in range(nRows):
        for c in range(nCols):
          if (data[r,c] < 1):
            data[r,c] = 0.5
      print "doing function transform"
      # This dataset has 6 values that are zero; next lowest value is 1.
      logged_ds = views.FunctionView(ds, log2)
      print "done log2 transform"
      print "saving xformed ds"
      logged_ds.writeDataset(open(datafile_log2, 'w'))
      logged_ds = None
      
    print "loading xformed ds"
    ds = DataSource(datafile_log2)
    ds.name='gnf-human-U133_GNF1B_101402-33689-genes-158-cond'
    # add row PCA view
    print "creating pca view"
    from compClust.mlx.views import RowPCAView
    ds.dataset.addViewDefault('RowPCAView', RowPCAView, ds.dataset)
    # make ginsu object
    
    labelFiles = []
    labelFiles+=[('Tissue', '%s/U133A+GNF1B_101402.AD.column_labeling.tab' %(directory),False)]
    labelFiles+=[('ProbeId', '%s/U133A+GNF1B_101402.AD.row_labeling.tab' %(directory), True)]
    labelFiles+=[('Aliases', '%s/U133A+GNF1B_101402.AD.row_labeling-Aliases.tab' %(directory), True)]
    labelFiles+=[('Description','%s/U133A+GNF1B_101402.AD.row_labeling-Description.tab' %(directory),True)]
    labelFiles+=[('Ensembl', '%s/U133A+GNF1B_101402.AD.row_labeling-Ensembl.tab' %(directory),True)]
    labelFiles+=[('Function', '%s/U133A+GNF1B_101402.AD.row_labeling-Function.tab' %(directory),True)]
    labelFiles+=[('Genome Location', '%s/U133A+GNF1B_101402.AD.row_labeling-Genome Location.tab' %(directory),True)]
    labelFiles+=[('LocusLink','%s/U133A+GNF1B_101402.AD.row_labeling-LocusLink.tab' %(directory),True)]
    labelFiles+=[('Name','%s/U133A+GNF1B_101402.AD.row_labeling-Name.tab' %(directory),True)]
    labelFiles+=[('Protein Families', '%s/U133A+GNF1B_101402.AD.row_labeling-Protein Families.tab' %(directory),True)]
    labelFiles+=[('RefSeq','%s/U133A+GNF1B_101402.AD.row_labeling-RefSeq.tab' %(directory),True)]
    labelFiles+=[('Reporters','%s/U133A+GNF1B_101402.AD.row_labeling-Reporters.tab' %(directory),True)]
    labelFiles+=[('Taxon','%s/U133A+GNF1B_101402.AD.row_labeling-Taxon.tab' %(directory),True)]
    labelFiles+=[('UniGene','%s/U133A+GNF1B_101402.AD.row_labeling-UniGene.tab' %(directory),True)]
    labelFiles+=[('UniProt','%s/U133A+GNF1B_101402.AD.row_labeling-UniProt.tab' %(directory),True)]
  
    print "loading labelings"
    for name, path, isrow in labelFiles:
      ds.add_labeling(name, path, isrow, True, None)
    ds.primary = "ProbeId"
    ds.secondary = "Name"
  
    print "creating ginsu object"
    ginsu = ds.outlier_analysis
      
    print "dumping pickle file"
    cPickle.dump(ds, open(picklefile,'w'))
  return ds

def log2(x):
  return math.log(x,2)
