/* Copyright (C) 2014 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */

#include "common.h"
#include "obscure.h"
#include "linefile.h"
#include "hash.h"
#include "options.h"
#include "jksql.h"
#include "bed.h"
#include "cart.h"
#include "expData.h"
#include "expRecord.h"
#include "hdb.h"
#include "microarray.h"

/* Make a 2D array out of the list of expDatas' expScores. */
struct maExpDataMatrix
{
    int nrow;
    int ncol;
    float **data;
};

float **maExpDataArray(struct expData *exps, int *pNrow, int *pNcol)
/* Make a 2D array out of a list of expDatas' expScores. */
{
struct expData *cur;
float **ret;
int i = 0;
if (!exps)
    return NULL;
*pNrow = slCount(exps);
*pNcol = exps->expCount;
AllocArray(ret, *pNrow);
for (cur = exps; cur != NULL; cur = cur->next)
    {
    float *thecopy = CloneArray(cur->expScores, *pNcol);
    ret[i++] = thecopy;
    }
return ret;
}

struct maExpDataMatrix *makeMatrix(struct expData *exps)
/* Convert the expData list into an easier-to-manipulate matrix. */
{
struct maExpDataMatrix *ret;
AllocVar(ret);
ret->data = maExpDataArray(exps, &ret->nrow, &ret->ncol);
return ret;
}

void freeExpDataMatrix(struct maExpDataMatrix **pMat)
/* Free up an maExpDataMatrix. */
{
int i;
struct maExpDataMatrix *mat;
if (!pMat || !(*pMat))
    return;
mat = *pMat;
for (i = 0; i < mat->nrow; i++)
    freeMem(mat->data[i]);
freeMem(mat->data);
freez(pMat);
}

void transposeExpDataMatrix(struct maExpDataMatrix **pMat)
/* Transpose the matrix i.e. mat[i][j] = mat[j][i]. */
/* This may come in handy later if the job is to combine genes instead of */
/* arrays... or both. */
{
int i, j;
struct maExpDataMatrix *mat, *newMat;
if (!pMat || !(*pMat))
    return;
mat = *pMat;
AllocVar(newMat);
AllocArray(newMat->data, mat->ncol);
for (i = 0; i < mat->ncol; i++)
    {
    AllocArray(newMat->data[i], mat->nrow);
    for (j = 0; j < mat->nrow; j++)
	newMat->data[i][j] = mat->data[j][i];
    }
newMat->nrow = mat->ncol;
newMat->ncol = mat->nrow;
for (i = 0; i < mat->nrow; i++)
    freeMem(mat->data[i]);
*pMat = newMat;
}

/* 2D int array to keep track of indeces in a grouping situation. FOr example */
/* the mapArray as follows x = */
/* [[ 2 0 2 0 0 0 ]  */
/*  [ 1 4 0 0 0 0 ]  */
/*  [ 2 1 3 0 0 0 ]  */
/* x is 3 x 6, so the original array had 5 columns.  The number of rows x has */
/* is the new number of columns.  The first number in each row of x keeps */
/* track of y number of columns being grouped, and the next y numbers in that */
/* row are the column indeces of the columns being grouped.  For example, */
/* using x, then if a = [[ 2 3 4 5 6 ]], then b grouped by x is */
/* b = [[ 3 6 4 ]]. */
struct mapArray 
{
    int nrow; 
    int ncol;
    int **data;
};

double maDoubleMeanHandleNA(int count, double *array)
/* Calculate the mean value of an array, skipping the NA vals. */
{
int i, num = 0;
double result = MICROARRAY_MISSING_DATA, sum = 0;
for (i = 0; i < count; i++)
    if (array[i] > MICROARRAY_MISSING_DATA)
	{
	sum += array[i];
	num++;
	}
if (num > 0) 
    result = sum/num;
return result;
}

double maDoubleMedianHandleNA(int count, double *array)
/* Return median value in array, skipping the NAs. */
/* This one will sort the inputted array as a side-effect. */
{
double median, *pastNA;
int countSansNA = count;
doubleSort(count, array);
pastNA = array;
while ((countSansNA > 0) && (*pastNA <= MICROARRAY_MISSING_DATA))
    {
    pastNA++;
    countSansNA--;
    }
if (countSansNA < 1)
    return MICROARRAY_MISSING_DATA;
if ((countSansNA&1) == 1)
    median = pastNA[countSansNA>>1];
else
    {
    countSansNA >>= 1;
    median = (pastNA[countSansNA] + pastNA[countSansNA-1]) * 0.5;
    }
return median;
}

static double *floatArrayToDoubleArray(int count, float *array)
/* Pretty stupid function but saves a line or two later. */
{
double *ret;
int i;
AllocArray(ret, count);
for (i = 0; i < count; i++)
    ret[i] = array[i];
return ret;
}

static float maCalcNewScore(float *expScores, int *mapRow, enum maCombineMethod method, int minExps)
/* Calculate a median, mean, etc for a group of scores and return it. */
{
double ret = MICROARRAY_MISSING_DATA;
double *grouping;
int k;
int realCount = 0;
if (!expScores || !mapRow)
    return MICROARRAY_MISSING_DATA;
AllocArray(grouping, mapRow[0]);
for (k = 0; k < mapRow[0]; k++)
    {
    grouping[k] = (double)expScores[mapRow[k+1]];
    if (grouping[k] > MICROARRAY_MISSING_DATA)
	realCount++;
    }
if ((realCount == 0) || ((minExps > 0) && (realCount < minExps)))
    ret = MICROARRAY_MISSING_DATA;
else if (method == useMedian)
    ret = maDoubleMedianHandleNA(mapRow[0], grouping);
else if (method == useMean)
    ret = maDoubleMeanHandleNA(mapRow[0], grouping);
freez(&grouping);
return (float)ret;
}

struct maExpDataMatrix *maCombineCols(struct maExpDataMatrix *origMat, struct mapArray *mapping, enum maCombineMethod method, int minExps)
/* Make a new matrix combining the columns of the given one however then */
/* return it. */
{
struct maExpDataMatrix *newMat;
int i, j;
if (!origMat)
    return NULL;
AllocVar(newMat);
newMat->nrow = origMat->nrow;
newMat->ncol = mapping->nrow;
AllocArray(newMat->data, newMat->nrow);
for (i = 0; i < newMat->nrow; i++)
    {
    AllocArray(newMat->data[i], newMat->ncol);
    for (j = 0; j < newMat->ncol; j++)
	newMat->data[i][j] = maCalcNewScore(origMat->data[i], mapping->data[j], method, minExps);
    }
return newMat;
}

struct expData *maExpDataCombineCols(struct expData *exps, struct mapArray *mapping, enum maCombineMethod method, int minExps)
/* median, mean, etc. the columns of the expData according to the grouping */
/* from the mapping array. Return a new expData list. */
{
struct expData *cur, *newList = NULL;
struct maExpDataMatrix *mat, *combinedMat;
int i;
if (!exps)
    return NULL;
mat = makeMatrix(exps);
combinedMat = maCombineCols(mat, mapping, method, minExps);
for (cur = exps, i = 0; cur != NULL; cur = cur->next, i++)
    {
    struct expData *newExp;
    AllocVar(newExp);
    newExp->name = cloneString(cur->name);
    newExp->expCount = mapping->nrow;
    newExp->expScores = combinedMat->data[i];
    slAddHead(&newList, newExp);
    }
slReverse(&newList);
freeMem(mat->data);
freez(&mat);
freez(&combinedMat);
return newList;
}

struct mapArray *maMappingFromGrouping(struct maGrouping *grouping)
/* Convert a microarrayGroups.ra style grouping to the array. */
{
struct mapArray *ret;
int i, offset = 0;
AllocVar(ret);
ret->nrow = grouping->numGroups;
ret->ncol = grouping->size + 1;
AllocArray(ret->data, ret->ncol);
for (i = 0; i < ret->nrow; i++)
    {
    int j; /* Iterator for grouping->groupSizes */
    int k = 1; /* Iterator for ret->data[i][] */
    AllocArray(ret->data[i], ret->ncol);
    ret->data[i][0] = grouping->groupSizes[i];
    for (j = offset; j < offset+grouping->groupSizes[i]; j++)
	ret->data[i][k++] = grouping->expIds[j];
    offset += grouping->groupSizes[i];
    }
return ret;
}

struct mapArray *mappingFromExpRecords(struct expRecord *erList, int extrasIndex)
/* Make a mapping matrix out of a list of expRecord structs using the */
/* specific grouping provided by the expRecord->extras[extrasIndex].  */
{
struct expRecord *er;
struct mapArray *ret;
struct slName *erNames = NULL;
int i;
if (!erList)
    return NULL;
if ((extrasIndex < 0) || (extrasIndex >= erList->numExtras))
    return NULL;
for (er = erList; er != NULL; er = er->next)
    slNameStore(&erNames, er->extras[extrasIndex]);
slReverse(&erNames);
AllocVar(ret);
ret->nrow = slCount(erNames);
ret->ncol = slCount(erList) + 1;
AllocArray(ret->data, ret->nrow);
for (i = 0; i < ret->nrow; i++)
    AllocArray(ret->data[i], ret->ncol);
for (er = erList, i = 0; er != NULL; er = er->next, i++)
    {
    int rowIx = slNameFindIx(erNames, er->extras[extrasIndex]);
    ret->data[rowIx][++ret->data[rowIx][0]] = i;    
    }
slNameFreeList(&erNames);
return ret;
}

struct maMedSpec *maMedSpecReadAll(char *fileName)
/* Read in file and parse it into maMedSpecs. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct maMedSpec *medList = NULL, *med;
char *row[256], *line;
int i, count;
while (lineFileNextReal(lf, &line))
    {
    char *name = nextQuotedWord(&line);
    char *group = nextQuotedWord(&line);
    if (group == NULL)
	lineFileShort(lf);
    count = chopLine(line, row);
    if (count < 1)
        lineFileShort(lf);
    if (count >= ArraySize(row))
        errAbort("Too many experiments in median line %d of %s",
		lf->lineIx, lf->fileName);
    AllocVar(med);
    med->name = cloneString(name);
    med->group = cloneString(group);
    med->count = count;
    AllocArray(med->ids, count);
    for (i=0; i<count; ++i)
        {
	char *asc = row[i];
	if (!isdigit(asc[0]))
	    errAbort("Expecting number got %s line %d of %s", asc,
	    	lf->lineIx, lf->fileName);
	med->ids[i] = atoi(asc);
	}
    slAddHead(&medList, med);
    }
lineFileClose(&lf);
slReverse(&medList);
return medList;
}

struct mapArray *mappingFromMedSpec(struct maMedSpec *specList)
/* Make an array for mapping out of the maMedSpec.ra files */
/* used by hgMedianMicroarray for the gene sorter. */
{
struct maMedSpec *spec;
struct mapArray *ret;
int numTotalExps = 0;
int i;
if (specList == NULL)
    return NULL;
for (spec = specList; spec != NULL; spec = spec->next)
    numTotalExps += spec->count;
AllocVar(ret);
ret->nrow = slCount(specList);
ret->ncol = numTotalExps + 1;
AllocArray(ret->data, ret->nrow);
for (spec = specList, i = 0; spec != NULL; spec = spec->next, i++)
    {
    int j;
    AllocArray(ret->data[i], ret->ncol);
    for (j = 0; j < spec->count; j++)
	{
	ret->data[i][0]++;
	ret->data[i][j+1] = spec->ids[j];
	}
    }
return ret;
}

struct expData *maExpDataGroupByExtras(struct expData *exps, struct expRecord *erList, int extrasIndex, enum maCombineMethod method, int minExps)
/* Combine experiments using mean or median and the grouping defined by the */
/* expRecord/extrasIndex combination. */
{
struct mapArray *mapping = mappingFromExpRecords(erList, extrasIndex);
struct expData *ret = maExpDataCombineCols(exps, mapping, method, minExps);
freeMem(mapping->data);
freez(&mapping);
return ret;
}

struct expData *maExpDataMeanByExtras(struct expData *exps, struct expRecord *erList, int extrasIndex, int minExps)
/* Given the data, the expRecords, and the index for the grouping, mean the */
/* columns in the data. */
{
return maExpDataGroupByExtras(exps, erList, extrasIndex, useMean, minExps);
}

struct expData *maExpDataMedianByExtras(struct expData *exps, struct expRecord *erList, int extrasIndex, int minExps)
/* Given the data, the expRecords, and the index for the grouping, median the */
/* columns in the data. */
{
return maExpDataGroupByExtras(exps, erList, extrasIndex, useMedian, minExps);
}

struct expData *maExpDataMedianFromSpec(struct expData *exps, struct maMedSpec *specs, int minExps)
/* Given the data, the and a maMedSpec list, median the columns in the data. */
{
struct mapArray *mapping = mappingFromMedSpec(specs);
struct expData *ret = maExpDataCombineCols(exps, mapping, useMedian, minExps);
freeMem(mapping->data);
freez(&mapping);
return ret;
}

int maExpDataNumBadScoresForProbe(struct expData *exps, char *probeName)
/* Given a probe name, find that probe's data in the list and report */
/* the # of NA (< -9999) values in the expScores. I.e. how many bad */
/* experiments are there. Return -1 if name not found. */
{
struct expData *exp;
int numBad = 0;
boolean foundName = FALSE;
if (probeName == NULL)
    return -1;
for (exp = exps; exp != NULL; exp = exp->next)
    if (sameString(probeName, exp->name))
	{
	int i;
	foundName = TRUE;
	for (i = 0; i < exp->expCount; i++)
	    if (exp->expScores[i] <= MICROARRAY_MISSING_DATA)
		numBad++;
	break;
	}
return (foundName) ? numBad : -1;
}

int maExpDataNumBadScoresForOneArray(struct expData *exps, struct expRecord *ers, char *arrayName)
/* Find # of bad datapoints for a certain column (array) in the dataset. */
/* return -1 if not found. */
{
int expId = -1, numBad = 0;
struct expRecord *findMe;
struct expData *exp;
if (arrayName == NULL)
    return -1;
for (findMe = ers; findMe != NULL; findMe = findMe->next)
    if (sameString(findMe->name, arrayName))
	{
	expId = findMe->id;
	break;
	}
if (expId < 0)
    return -1;
for (exp = exps; exp != NULL; exp = exp->next)
    {
    if (expId >= exp->expCount)
	return -1;
    if (exp->expScores[expId] <= MICROARRAY_MISSING_DATA)
	numBad++;
    }
return numBad;
}

float maExpDataOverallMean(struct expData *exps)
/* Return the overall mean for the expression data. */
{
double sum = 0;
long goodVals = 0;
int i;
struct expData *exp;
if (exps == NULL)
    return -1;
for (exp = exps; exp != NULL; exp = exp->next)
    {
    for (i = 0; i < exp->expCount; i++)
	{
	if (exp->expScores[i] > MICROARRAY_MISSING_DATA)
	    {
	    goodVals++;
	    sum += exp->expScores[i];
	    }
	}
    }
return (float)(sum/(double)goodVals);
}

float maExpDataOverallMedian(struct expData *exps)
/* Return the overall median for the expression data. */
{
double *expVals;
int goodVals = 0;
int i;
float ret = 0;
struct expData *exp;
if (exps == NULL)
    return -1;
AllocArray(expVals, slCount(exps) * exps->expCount);
for (exp = exps; exp != NULL; exp = exp->next)
    {
    for (i = 0; i < exp->expCount; i++)
	{
	if (exp->expScores[i] > MICROARRAY_MISSING_DATA)
	    expVals[goodVals++] = exp->expScores[i];
	}
    }
ret = (float)doubleMedian(goodVals, expVals);
freez(&expVals);
return ret;
}

void maExpDataClipMin(struct expData *exps, float minGood, float minVal)
/* If an expData->expScores datapoint is < minGood, set it to minVal. */
{
struct expData *exp;
for (exp = exps; exp != NULL; exp = exp->next)
    {
    int i;
    for (i = 0; i < exp->expCount; i++)
	if ((exp->expScores[i] > MICROARRAY_MISSING_DATA) && (exp->expScores[i] < minGood))
	    exp->expScores[i] = minVal;
    }
}

struct expData *maExpDataTranspose(struct expData *exps)
/* Return a transposed copy of the expData. */
{
struct expData *transposed = NULL;
int probeCount, expCount, i;
if (!exps)
    errAbort("Tried to transpose empty expData");
expCount = exps->expCount;
probeCount = slCount(exps);
for (i = 0; i < expCount; i++)
    {
    struct expData *newProbe, *cur;
    char name[256];
    int j;
    AllocVar(newProbe);
    safef(name, sizeof(name), "array-%d", i);
    newProbe->name = cloneString(name);
    newProbe->expCount = probeCount;
    AllocArray(newProbe->expScores, newProbe->expCount);
    for (j = 0, cur = exps; (cur != NULL) && (j < probeCount); cur = cur->next, j++)
	{
	if (i >= cur->expCount)
	    errAbort("There needs to be a uniform expCount in given expData in order to transpose");
	newProbe->expScores[j] = cur->expScores[i];
	}
    slAddHead(&transposed, newProbe);
    }
slReverse(&transposed);
return transposed;
}

void maExpDataUntranspose(struct expData *exps, struct expData *transpose)
/* Copy values over from the transposed expData. */
{
int expCount = transpose->expCount;
int probeCount = exps->expCount;
int i; 
struct expData *exp;
for (exp = exps, i = 0; (exp != NULL) && (i < expCount); exp = exp->next, i++)
    {
    struct expData *trans;
    int j;
    for (j = 0, trans = transpose; (j < probeCount) && (trans != NULL); j++, trans = trans->next)
	exp->expScores[j] = trans->expScores[i];
    }
}

void maExpDataDoLogRatioMeanOrMedian(struct expData *exps, boolean mean)
/* For the M x N sized expression matrix, change each value to be the ratio */
/* of that value to the mean or median of values in that value's row (gene). */
{
struct expData *exp;
for (exp = exps; exp != NULL; exp = exp->next)
    {
    double *tmpVals;
    int i;
    double q, invQ;
    AllocArray(tmpVals, exp->expCount);
    for (i = 0; i < exp->expCount; i++)
	tmpVals[i] = exp->expScores[i];
    q = (mean) ? maDoubleMeanHandleNA(exp->expCount, tmpVals) : maDoubleMedianHandleNA(exp->expCount, tmpVals);
    freez(&tmpVals);
    if (q <= 0)
	{
	for (i = 0; i < exp->expCount; i++)
	    exp->expScores[i] = MICROARRAY_MISSING_DATA;
	}
    else
	{
	invQ = 1/q;
	for (i = 0; i < exp->expCount; i++)
	    if (exp->expScores[i] > MICROARRAY_MISSING_DATA)
		exp->expScores[i] = logBase2(exp->expScores[i] * invQ);
	}
    }
}

void maExpDataDoLogRatioTranspose(struct expData *exps, boolean mean)
/* For the M x N sized expression matrix, change each value to be the ratio */
/* of that value to the mean or median of values in that value's column (probe). */
/* This involves no clumping of experiments to calculate the median/mean. */
{
struct expData *transpose = maExpDataTranspose(exps);
maExpDataDoLogRatioMeanOrMedian(transpose, mean);
maExpDataUntranspose(exps, transpose);
expDataFreeList(&transpose);
}

void maExpDataDoLogRatioClumping(struct expData *exps, struct mapArray *mapping, enum maCombineMethod method)
/* Sort of an intermediary function to be called by maExpDataDoLogRatioMedianOfMedians or */
/* maExpDataDoLogRatioMeanOfMeans. */
{
struct expData *cur;
struct maExpDataMatrix *originalMat = makeMatrix(exps);
struct maExpDataMatrix *clumpedMat = maCombineCols(originalMat, mapping, method, 0);
int i, j;
if (!clumpedMat)
    return;
for (i = 0, cur = exps; (cur != NULL) && (i < clumpedMat->nrow); cur = cur->next, i++)
    {
    double *tmpArray = floatArrayToDoubleArray(clumpedMat->ncol, clumpedMat->data[i]);
    double q = (method == useMean) ? maDoubleMeanHandleNA(clumpedMat->ncol, tmpArray) :
	maDoubleMedianHandleNA(clumpedMat->ncol, tmpArray);
    double invQ;
    freeMem(tmpArray);
    if (q <= 0)
	{
	for (j = 0; j < cur->expCount; j++) 
	    cur->expScores[j] = MICROARRAY_MISSING_DATA;
	}
    else
	{
	invQ = 1/q;
	for (j = 0; j < cur->expCount; j++)
	    cur->expScores[j] = (cur->expScores[j] > 0) ? 
		logBase2(invQ * cur->expScores[j]) : cur->expScores[j];
	}
    }
freeExpDataMatrix(&originalMat);
freeExpDataMatrix(&clumpedMat);
}

void maExpDataDoLogRatioClumpExpRecord(struct expData *exps, struct expRecord *erList, int extrasIndex, enum maCombineMethod method)
/* Log ratio the expData list.  The ratio of the denominator is the median of */
/* medians or the mean of means of each group of experiments.  This grouping */
/* is defined by the expRecord/extrasIndex combination. The log is base 2. */
{
struct mapArray *mapping = mappingFromExpRecords(erList, extrasIndex);
maExpDataDoLogRatioClumping(exps, mapping, method);
freeMem(mapping->data);
freez(&mapping);
}

void maExpDataDoLogRatioGivenMedSpec(struct expData *exps, struct maMedSpec *specList, enum maCombineMethod method)
/* Same as maExpDataDoLogRatioClumpExpRecord except use the maMedSpec as the */
/* thing to base the groupings off of. */
{
struct mapArray *mapping;
if (specList == NULL) 
    {
    if (method == useMedian)
	maExpDataDoLogRatioMeanOrMedian(exps, FALSE);
    else if (method == useMean)
	maExpDataDoLogRatioMeanOrMedian(exps, TRUE);
    return;
    }
mapping = mappingFromMedSpec(specList);
maExpDataDoLogRatioClumping(exps, mapping, method);
freeMem(mapping->data);
freez(&mapping);
}

/* static void uglyPrintSlInt(struct slInt *list) */
/* { */
/* struct slInt *item; */
/* int i = 0; */
/* for (item = list; item != NULL; item = item->next) */
/*     uglyf("i = %d, item->val = %d\n", i++, item->val); */
/* } */

static struct slRef *groupingLoL(struct maGrouping *grouping)
/* return the grouping groups array as an slRef list of slInts */
{
struct slRef *list = NULL;
int i;
int off = 0;
for (i = 0; i < grouping->numGroups; i++)
    {
    struct slInt *ints = NULL;
    int j;
    for (j = off; j < off + grouping->groupSizes[i]; j++)
	{
	struct slInt *oneInt = slIntNew(grouping->expIds[j]);
	slAddHead(&ints, oneInt);
	}
    slReverse(&ints);
    refAdd(&list, ints);
    off += grouping->groupSizes[i];
    }
slReverse(&list);
return list;
}

static void maPruneLoL(struct slRef *lol, struct slInt *subsetList)
/* remove things from list where the subset */
{
struct slRef *ref;
for (ref = lol; ref != NULL; ref = ref->next)
    {
    struct slInt *intList = (struct slInt *)ref->val;
    struct slInt *cur;
    struct slInt *newList = NULL;
    while ((cur = slPopHead(&intList)) != NULL)
	{
	if (slIntFind(subsetList, cur->val))
	    slAddHead(&newList, cur);
	else
	    freeMem(cur);
	}
    if (newList)
	slSort(&newList, slIntCmp);
    ref->val = newList;
    }
}

static struct slInt *makeSubsetSlInt(struct maGrouping *subset, int subsetOffset)
/* just make a linked-list out of an array */
{
struct slInt *list = NULL;
if (subset)
    {
    int i;
    int j = 0;
    for (i = 0; i < subsetOffset; i++)
	j += subset->groupSizes[i];
    for (i = j; i < j + subset->groupSizes[subsetOffset]; i++)
	{
	struct slInt *newint = slIntNew(subset->expIds[i]);
	slAddHead(&list, newint);
	}
    slReverse(&list);
    }
return list;
}

static void maPruneGroupingWithSubset(struct maGrouping *grouping, struct maGrouping *subset, int subsetOffset)
/* remove all the expIds not contained in the subset from the given grouping, */
/* and remove groups that have no expIds left */
{
if (grouping && subset && (subsetOffset < subset->numGroups) && (subsetOffset >= 0))
    {
    int i, j, k;
    int newSize = subset->groupSizes[subsetOffset];
    struct slRef *lol = groupingLoL(grouping);
    struct slRef *cur;
    struct slInt *ssList = makeSubsetSlInt(subset, subsetOffset);
    int *newExpIds;
    int newNumGroups = 0;
    char **newNames;
    int *newGroupSizes;
    maPruneLoL(lol, ssList);
    for (cur = lol; cur != NULL; cur = cur->next)
	{
	struct slInt *curList = cur->val;
	if (slCount(curList) > 0)
	    newNumGroups++;
	}
    AllocArray(newGroupSizes, newNumGroups);
    AllocArray(newNames, newNumGroups);
    AllocArray(newExpIds, newSize);
    i = 0;
    j = 0;
    for (k = 0, cur = lol; (k < grouping->numGroups) && (cur != NULL); k++, cur = cur->next)
	{
	struct slInt *curList = cur->val;
	int groupSize = slCount(curList);
	if (groupSize > 0)
	    {
	    struct slInt *intcur;
	    for (intcur = curList; intcur != NULL; intcur = intcur->next)
		newExpIds[j++] = intcur->val;
	    newNames[i] = cloneString(grouping->names[k]);
	    newGroupSizes[i] = groupSize;
	    i++;
	    }
	freeMem(grouping->names[k]);
	}
    freeMem(grouping->names);
    freeMem(grouping->groupSizes);
    freeMem(grouping->expIds);
    grouping->names = newNames;
    grouping->groupSizes = newGroupSizes;
    grouping->numGroups = newNumGroups;
    grouping->expIds = newExpIds;
    grouping->size = newSize;
    }
}

struct expData *maExpDataClumpGivenGrouping(struct expData *exps, struct maGrouping *grouping)
/* Clump expDatas from a grouping from a .ra file. */
{
struct mapArray *mapping;
struct expData *ret = NULL;
enum maCombineMethod combine = useMedian;
char *combType;
/* if (!grouping->type || sameWord(grouping->type, "all")) */
/*     return NULL; */
combType = cloneString(grouping->type);
eraseWhiteSpace(combType);
if (sameWord(combType, "combinemean"))
    combine = useMean;
mapping = maMappingFromGrouping(grouping);
ret = maExpDataCombineCols(exps, mapping, combine, 0);
freeMem(mapping->data);
freez(&mapping);
freeMem(combType);
return ret;
}

void maExpDataAddConstant(struct expData *exps, double c)
/* Add a constant c to all of the microarray data. Ensure that NA values */
/* aren't inadvertantly created. */
{
struct expData *exp;
for (exp = exps; exp != NULL; exp = exp->next)
    {
    int i;
    for (i = 0; i < exp->expCount; i++)
	{
	if (c <= MICROARRAY_MISSING_DATA)
	    errAbort("error: Constant used caused expData value to fall into the NA range");
	if (exp->expScores[i] > MICROARRAY_MISSING_DATA)
	    exp->expScores[i] = exp->expScores[i] + c;
	}
    }
}

/* Functions for getting information from the microarrayGroups.ra files */
/* that are in the hgCgiData/<org>/<database> .ra tree. */

void maGroupingFree(struct maGrouping **pMag)
/* free up a maGrouping */
{
int i, size;
struct maGrouping *mag;
if (!pMag || !*pMag)
    return;
mag = *pMag;
size = sameString(mag->type, "all") ? mag->size : mag->numGroups;
freeMem(mag->name);
freeMem(mag->description);
freeMem(mag->expIds);
freeMem(mag->groupSizes);
freeMem(mag->type);
for (i = 0; i < size; i++)
    freeMem(mag->names[i]);
freeMem(mag->names);
freez(pMag);
}

void maGroupingFreeList(struct maGrouping **pList)
/* Free up a list of maGroupings. */
{
struct maGrouping *el, *next;
for (el = *pList; el != NULL; el = next)
    {
    next = el->next;
    maGroupingFree(&el);
    }
*pList = NULL;
}

void microarrayGroupsFree(struct microarrayGroups **pGroups)
/* Free up the microarrayGroups struct. */
{
struct microarrayGroups *groups;
if (!pGroups || ((groups = *pGroups) == NULL))
    return;
maGroupingFree(&groups->allArrays);
maGroupingFreeList(&groups->combineSettings);
maGroupingFreeList(&groups->subsetSettings);
freez(pGroups);
}

struct maGrouping *maHashToMaGrouping(struct hash *oneGroup)
/* This converts a single "stanza" of the microarrayGroups.ra file to a */
/* maGrouping struct. */
{
struct maGrouping *ret;
char *s;
struct slName *wordList = NULL, *oneWord;
int i = 0;
AllocVar(ret);
/* required settings. */
ret->name = cloneString((char *)hashMustFindVal(oneGroup, "name"));
ret->type = cloneString((char *)hashMustFindVal(oneGroup, "type"));
ret->description = cloneString((char *)hashMustFindVal(oneGroup, "description"));
s = hashMustFindVal(oneGroup, "expIds");
wordList = slNameListFromComma(s);
ret->size = slCount(wordList);
AllocArray(ret->expIds, ret->size);
for (oneWord = wordList; oneWord != NULL; oneWord = oneWord->next)
    ret->expIds[i++] = atoi(oneWord->name);
slNameFreeList(&wordList);
s = hashMustFindVal(oneGroup, "names");
wordList = slNameListFromComma(s);
i = 0;
ret->numGroups = slCount(wordList);
AllocArray(ret->names, ret->numGroups);
for (oneWord = wordList; oneWord != NULL; oneWord = oneWord->next)
    ret->names[i++] = cloneString(oneWord->name);
slNameFreeList(&wordList);
s = hashMustFindVal(oneGroup, "groupSizes");
wordList = slNameListFromComma(s);
i = 0;
AllocArray(ret->groupSizes, ret->numGroups);
if (ret->numGroups != slCount(wordList))
    errAbort("Bad format of microarrayGroups.ra in %s, groupSizes size "
	     "= %d, != to names size = %d", ret->name, slCount(wordList), ret->numGroups);
for (oneWord = wordList; oneWord != NULL; oneWord = oneWord->next)
    ret->groupSizes[i++] = atoi(oneWord->name);
slNameFreeList(&wordList);
return ret;
}

struct maGrouping *maGetGroupingsFromList(char *commaList, struct hash *allGroupings)
/* Load all the maGroupings from a comma-delimited list. */
{
struct slName *list = slNameListFromComma(commaList), *oneItem;
struct maGrouping *retList = NULL;
for (oneItem = list; oneItem != NULL; oneItem = oneItem->next)
    {
    struct hash *oneGrouping = hashMustFindVal(allGroupings, oneItem->name);
    struct maGrouping *newGroup = maHashToMaGrouping(oneGrouping);
    slAddHead(&retList, newGroup);
    }
slNameFreeList(&list);
slReverse(&retList);
return retList;
}

struct maGrouping *maGetGroupingFromCt(struct customTrack *ct)
/* Spoof an "all" maGrouping from a customTrack. */
{
struct maGrouping *ret;
char *words = trackDbRequiredSetting(ct->tdb, "expNames");
struct slName *list = slNameListFromComma(words), *oneItem;
int i;
AllocVar(ret);
/* ret->name not used in custom tracks. */
ret->type = cloneString("all");
ret->description = cloneString("Custom Track");
ret->size = slCount(list);
ret->numGroups = ret->size;
AllocArray(ret->expIds, ret->size);
AllocArray(ret->groupSizes, ret->size);
AllocArray(ret->names, ret->size);
for (oneItem = list, i = 0; (oneItem != NULL) && (i < ret->size); oneItem = oneItem->next, i++)
    {
    ret->expIds[i] = i;
    ret->groupSizes[i] = 1;
    ret->names[i] = cloneString(oneItem->name);
    }
slFreeList(&list);
return ret;
}

struct bed *ctLoadMultScoresBedDb(struct customTrack *ct, char *chrom, int start, int end)
/* If the custom track is stored in a database, load it. */
{
int rowOffset;
char **row;
struct sqlConnection *conn = hAllocConn(CUSTOM_TRASH);
struct sqlResult *sr;
struct bed *bed, *bedList = NULL;
sr = hRangeQuery(conn, ct->dbTableName, chrom, start, end,
		 NULL, &rowOffset);
while ((row = sqlNextRow(sr)) != NULL)
    {
    bed = bedLoadN(row+rowOffset, ct->fieldCount);
    slAddHead(&bedList, bed);
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
slReverse(&bedList);
return bedList;
}

struct microarrayGroups *maGetTrackGroupings(char *database, struct trackDb *tdb)
/* Get the settings from the .ra files and put them in a convenient struct. */
{
struct microarrayGroups *ret;
char *groupings = trackDbSetting(tdb, "groupings");
char *s = NULL;
struct hash *allGroups;
struct hash *mainGroup;
struct hash *tmpGroup;
struct hash *hashList; 
if (groupings == NULL)
    return NULL;
hashList = 
    hgReadRa(hGenome(database), database, "hgCgiData", 
	     "microarrayGroups.ra", &allGroups);
if (allGroups == NULL)
    errAbort("Could not get group settings for track.");
AllocVar(ret);
mainGroup = hashMustFindVal(allGroups, groupings);
s = hashMustFindVal(mainGroup, "all");
tmpGroup = hashMustFindVal(allGroups, s);
ret->allArrays = maHashToMaGrouping(tmpGroup);
s = hashFindVal(mainGroup, "combine");
if (s)
    {
    ret->combineSettings = maGetGroupingsFromList(s, allGroups);
    s = hashFindVal(mainGroup, "combine.default");
    if (s)
	{
	struct maGrouping *cur;
	if (lastChar(s) == ',')
	    s[strlen(s)-1] = '\0';
	for (cur = ret->combineSettings; cur != NULL; cur = cur->next)
	    if (sameWord(s, cur->name))
		{
		ret->defaultCombine = cur;
		break;
		}
	if (ret->defaultCombine == NULL)
	    errAbort("$%s$ not a valid combine.default in %s", s, groupings);
	}
    }
s = hashFindVal(mainGroup, "subset");
if (s)
    ret->subsetSettings = maGetGroupingsFromList(s, allGroups);
ret->numCombinations = slCount(ret->combineSettings);
ret->numSubsets = slCount(ret->subsetSettings);
hashFreeList(&hashList);
return ret;
}

struct maGrouping *maGetGrouping(struct microarrayGroups *groupings, char *name)
/* Return the specfic grouping (combine or subset), or NULL if not found */
{
struct maGrouping *ret;
for (ret = groupings->combineSettings; ret != NULL; ret = ret->next)
    if (sameString(ret->name, name))
	return ret;
for (ret = groupings->subsetSettings; ret != NULL; ret = ret->next)
    if (sameString(ret->name, name))
	return ret;
return NULL;
}

struct maGrouping *maCombineGroupingFromCart(struct microarrayGroups *groupings, 
				    struct cart *cart, char *trackName)
/* Determine which grouping to use based on the cart status or lack thereof. */
{
char *setting = NULL;
char *cartVar = expRatioCombineDLName(trackName);
/* Possibly NULL from custom trackness. */
if (!groupings)
    return NULL;
setting = cartUsualString(cart, cartVar, NULL);
if (setting && sameWord(groupings->allArrays->name, setting))
    return groupings->allArrays;
if (setting)
    {
    struct maGrouping *cur;
    for (cur = groupings->combineSettings; cur != NULL; cur = cur->next)
	if (sameWord(cur->name, setting))
	    return cur;
    }
if (groupings->defaultCombine)
    return groupings->defaultCombine;
return groupings->allArrays;
}

/* int maSubsetOffsetFromCart(struct microarrayGroups *groupings, struct cart *cart, char *trackName) */

struct maGrouping *maSubsetGroupingFromCart(struct microarrayGroups *groupings, 
				    struct cart *cart, char *trackName)
/* Determine which grouping to use based on the cart status or lack thereof. */
{
char *setting = NULL;
char *cartVar = expRatioSubsetRadioName(trackName, groupings);
/* Possibly NULL from custom trackness. */
if (!groupings)
    return NULL;
setting = cartUsualString(cart, cartVar, NULL);
if (setting)
    {
    struct maGrouping *cur;
    for (cur = groupings->subsetSettings; cur != NULL; cur = cur->next)
	if (sameWord(cur->name, setting))
	    return cur;
    }
return NULL;
}

int maSubsetOffsetFromCart(struct maGrouping *subset, struct cart* cart, char *trackName)
{
int setting;
char *cartVar;
if (!subset)
    return -1;
cartVar = expRatioSubsetDLName(trackName, subset);
setting = cartUsualInt(cart, cartVar, -1);
return setting;
}

/********* Dealing with BED. ************/

struct expData *maExpDataFromExpBed(struct bed *oneBed)
/* Convert a bed record's expScores into an expData. */
{
struct expData *ret = NULL;
AllocVar(ret);
ret->name = cloneString(oneBed->name);
ret->expCount = oneBed->expCount;
if (ret->expCount > 0) 
    ret->expScores = CloneArray(oneBed->expScores, oneBed->expCount);
return ret;
}

struct expData *maExpDataListFromExpBedList(struct bed *bedList)
/* Convert list of bed 15 records to a list of expDatas. */
{
struct expData *newList = NULL;
struct bed *cur;
for (cur = bedList; cur != NULL; cur = cur->next)
    {
    struct expData *addMe = maExpDataFromExpBed(cur);
    slAddHead(&newList, addMe);
    }
slReverse(&newList);
return newList;
}

void maBedClumpGivenGrouping(struct bed *bedList, struct maGrouping *grouping, struct maGrouping *subset, int subsetOffset)
/* Clump (mean/median) a bed 15 given the grouping kind. */
{
struct expData *exps = maExpDataListFromExpBedList(bedList);
struct expData *clumpedExps;
struct bed *bed;
struct expData *exp;
if (subset)
    maPruneGroupingWithSubset(grouping, subset, subsetOffset);
clumpedExps = maExpDataClumpGivenGrouping(exps, grouping);
expDataFreeList(&exps);
if (!clumpedExps)
    return;
/* Go through each bed and copy over the new expDatas */
/* and free up what was there. */
for (bed = bedList, exp = clumpedExps; (bed != NULL) && (exp != NULL); 
     bed = bed->next, exp = exp->next)
    {
    int i;
    bed->expCount = exp->expCount;
    if (bed->expScores)
	freeMem(bed->expScores);
    if (bed->expIds)
	freeMem(bed->expIds);
    bed->expScores = CloneArray(exp->expScores, exp->expCount);
    AllocArray(bed->expIds, bed->expCount);
    for (i = 0; i < bed->expCount; i++)
	bed->expIds[i] = i;
    }
expDataFreeList(&clumpedExps);
}

enum expColorType getExpColorType(char *colorScheme)
/* From a color type return the respective enum. */
{
if (sameString(colorScheme, "redBlue"))
    return redBlue;
if (sameString(colorScheme, "yellowBlue"))
    return yellowBlue;
if (sameString(colorScheme, "redBlueOnWhite"))
    return redBlueOnWhite;
if (sameString(colorScheme, "redBlueOnYellow"))
    return redBlueOnYellow;
return redGreen;
}

char *expRatioCombineDLName(char *trackName)
{
char dropDownName[128];
safef(dropDownName, sizeof(dropDownName), "%s.combine", trackName);
return cloneString(dropDownName);
}

char *expRatioSubsetRadioName(char *trackName, struct microarrayGroups *groupings)
{
char radioVarName[128];
safef(radioVarName, sizeof(radioVarName), "%s.subset", trackName);
return cloneString(radioVarName);
}

char *expRatioSubsetDLName(char *trackName, struct maGrouping *group)
{
char dropVarName[256];
safef(dropVarName, sizeof(dropVarName), "%s.subset.%s", trackName, group->name);
return cloneString(dropVarName);
}
