/*
 * PeakSeq
 * Version 1.01
 * Paper by Joel Rozowsky, et. al.
 * Coded in C by Theodore Gibson.
 * simulator.c
 * This module deals with the simulation for one set of parameters.
 */


#include <stdio.h>
#include "simulator.h"
#include "window.h"
#include "random.h"
#include "util.h"
#include "config.h"


//----------------------------------------------------------------------------
// PRIVATE STRUCTURE DEFINITIONS
//----------------------------------------------------------------------------
/*
 * This structure holds the information for a position.
 * Pos is a pointer to this structure.
 */
typedef struct
{
	// The position.
	int pos;
	// The score at this position.  Score is incremented once for each read
	// that begins at this position and decremented once for each read that
	// ends at this position.
	int score;
}* Pos;

/*
 * This structure holds an array of positions.
 * Ps is a pointer to this structure.
 */
typedef struct
{
	// The array of positions.
	Pos* ps;
	// The number of positions in the array.
	int nps;
}* Ps;

struct thresh
{
	// This flag tells whether a stop or start position is currently sought.
	// 0 indicates that a start of a peak is sought and 1 indicates that a
	// stop is sought.
	int flag;
	// This counts the number of peaks found so far.
	int count;
	// This stores the most-recently-found stop position.  It is used to
	// determine whether the next peak should be merged with the previous peak
	// or if the new peak should be called a new peak.
	int stop;

	// At the end of all simulations this field will contain the average
	// count.
	double avg;
};


//----------------------------------------------------------------------------
// PRIVATE FUNCTION PROTOTYPES
//----------------------------------------------------------------------------
Thresh newThresh( void );
void initializeThreshArray( Thresh* ta );
void simulate( Window w, Thresh* ta );
Ps mergesortPs( Window w, const int num );
Ps newPs( const int nps );
Pos newPos( const int position, const int score );
void freePs( Ps array );
Ps mergePs( Ps left, Ps right );
void walk( Ps array, Thresh* ta );


//----------------------------------------------------------------------------
// PUBLIC FUNCTIONS
//----------------------------------------------------------------------------
/*
 * This function allocates memory for a new array of thresh structures.
 * Outputs a pointer to the array.
*/
Thresh* newThreshArray( void )
{
	// Allocate memory for the array.
	Thresh* ta = safe_malloc( N_THRESHES * sizeof(Thresh) );

	// Initialize each thresh structure.
	for( int i = 0; i < N_THRESHES; i++)
		ta[i] = newThresh();

	return ta;
}

void simulator( Window w, Thresh* ta )
{
	// This loop runs once for each simulation.
	for( int i = 0; i < N_SIMS; i++ )
	{
		// Reset all the fields in the threshes structure except for
		// the running average counts.
		initializeThreshArray( ta );

		simulate( w, ta );	// Run this simulation.
	}
}

int findThresh( Window w, Thresh* ta, long double* fdrp )
{
	// Check if the minimum threshold qualifies.
	/// Calculate the FDR at the minimum threshold.
	(*fdrp) = ((long double) ta[0]->avg) / getWCount( w, MIN_THRESH );

	// If the FDR is below the minimum threshold and the average count is
	// decreasing, the minimum threshold qualifies.
	if( ( (*fdrp) <= MIN_FDR ) && ( ta[1]->avg < ta[0]->avg ) )
		return MIN_THRESH;

	// Initialize the threshold index to the index of the second-lowest
	// threshold.
	int ti = 1;

	// Advance until the counts begin decreasing.
	while( (ti < N_THRESHES) && ( ta[ti]->avg >= ta[ti-1]->avg)  ) ti++;

	// Once the counts are decreasing, find the first value that qualifies as
	// below the minimum FDR.
	for(; ti < N_THRESHES; ti++)
	{
		// Correct for the case that the actual count (the denominator) is 0.
		if( getWCount( w, ti + MIN_THRESH ) == 0 )
		{
			(*fdrp) = 0.0L;
			return( ti + MIN_THRESH );
		}

		// Otherwise, calculate the FDR at this threshold.
		(*fdrp) = ((long double) ta[ti]->avg) /
			getWCount( w, ti + MIN_THRESH );

		// If the FDR is below the required FDR, then this is the threshold.
		if( (*fdrp) <= MIN_FDR )
			return (ti + MIN_THRESH);
	}

	// If no value is found, return -1 indicating a failure to find a match.
	return -1;
}

void freeThreshArray( Thresh* ta )
{
	// Free the thresh structures.
	for( int i = 0; i < N_THRESHES; i++ )
		free( ta[i] );

	// Free the array.
	free( ta );
}


//----------------------------------------------------------------------------
// PRIVATE FUNCTIONS
//----------------------------------------------------------------------------
/*
 * This function allocates memory for a new thresh structure and initializes
 * 	its fields.
 * Outputs a pointer to the new structure.
 */
Thresh newThresh( void )
{
	// Allocate memory for the structure.
	Thresh t = safe_malloc( sizeof(*t) );

	// Initialize the only field that is not initializes before each run of
	// the simulation.
	t->avg = 0.0;

	return t;
}

/*
 * This function initializes the fields of all the thresh structures at the
 * 	beginning of each simulation except the running average count field.
 * ta: a pointer to the array of thresh structures whose fields are to be
 * 	reset.
 */
void initializeThreshArray( Thresh* ta )
{
	// This loop runs for each thresh structure.
	for(int i = 0; i < N_THRESHES; i++)
	{
		// Reset the fields.
		ta[i]->flag = 0;
		ta[i]->count = 0;

		// Set the stop position so that even if the first peak starts at
		// position 0 it will be counted as a new peak.
		ta[i]->stop = -MAX_GAP - 1;
	}
}

/*
 * This function runs one simulation for a given set of parameters.
 * w: a pointer to the structure containing data about this window.
 * ta: a pointer to the thresh array containing all information that differs
 * 	between thresholds.
 */
void simulate( Window w, Thresh* ta )
{
	// Get the sorted array of simulated positions.
	Ps array = mergesortPs( w, getWNReads( w ) );

	// Walk down the positions beginning at the lowest position and ending at
	// the highest position keeping track of height and cataloguing when each
	// threshold is exceeded.
	walk( array, ta );

	// For each thresh structure, add the count to the numerator of the
	// average field.
	for( int i = 0; i < N_THRESHES; i++ )
		ta[i]->avg += ( ((double) ta[i]->count) / N_SIMS );
}

/*
 * This function simulates all the positions and sorts them using a standard
 * 	mergesort algorithm.
 * w: a pointer to the window structure containing all data about this window.
 * num: the number of reads to be simulated and sorted by this function.
 * Outputs a pointer to the sorted array of positions.
 */
Ps mergesortPs( Window w, const int num )
{
	// If there is one read to be simulated by this function, create the read.
	if( num == 1 )
	{
		// Create an array with room for the start and stop positions of this
		// simulated read.
		Ps array = newPs( 2 );

		// Randomly select a position in the availible region.
		int position = RandomInteger( 0, (int) (W_SIZE * getWMapFract( w )) );

		// Store the start and stop positions.
		array->ps[0] = newPos( position, START );
		array->ps[1] = newPos( position + READ_LENGTH, STOP );

		return array;	// Return a pointer to the array.
	}

	// If the number of reads to be simulated by this function is more than 1,
	// halve the number.
	int half = num / 2;

	// Run the function recursively on both halves.
	Ps left = mergesortPs( w, half );
	Ps right = mergesortPs( w, num - half );

	// Merge the resulting sorted arrays into a single sorted array and return
	// it.
	return( mergePs( left, right ) );
}

/*
 * This function allocates memory for a new ps structure.
 * nps: the number of position pointers for which memory must be allocated.
 * Outputs a pointer to the new structure.
 */
Ps newPs( const int nps )
{
	// Allocate memory for the structure.
	Ps array = safe_malloc( sizeof(*array) );

	// Initialize the field.
	array->nps = nps;

	// Allocate memory for the array.
	array->ps = safe_malloc( nps * sizeof(Pos) );

	return array;
}

/*
 * This function allocates initial memory for a new pos strucutre and
 * 	initializes all its fields.
 * position: the position.
 * score: its score (-1 for a stop position and +1 for a start position).
 * Outputs a pointer to the new pos structure.
 */
Pos newPos( const int position, const int score )
{
	// Allocate memory for the fields of the structure.
	Pos p = safe_malloc( sizeof(*p) );

	// Initialize the fields.
	p->pos = position;
	p->score = score;

	return p;
}

/*
 * This function merges two already-sorted, already-simulated position arrays
 * 	into a single sorted array.
 * left: one of the arrays to be merged.
 * right: the other array to be merged.
 * Outputs a pointer to the sorted array.
 */
Ps mergePs( Ps left, Ps right )
{
	// Allocate memory for the sorted array.
	Ps result = newPs( left->nps + right->nps );

	int i = 0;	// The index of the current element in the left array.
	int j = 0;	// The index of the current element in the right array.

	// This loop continues while these indices are within the bounds of their
	// respective arrays.
	while ( (i < left->nps) && (j < right->nps) )
	{
		// If the position of the current element in the left array is less
		// than or equal to the position of the current element in the right
		// array, store the element from the left array as the next element in
		// the result array and advance the left array pointer.
		if(left->ps[i]->pos < right->ps[j]->pos)
		{
			result->ps[i+j] = left->ps[i];
			i++;
		}

		// Otherwise, the element in the right array should be the next
		// element in the result array and the right array pointer is
		// advanced.
		else
		{
			result->ps[i+j] = right->ps[j];
			j++;
		}
	}

	// Once the end of one of the arrays is reached, copy the remainder of the
	// other array.
	while (i < left->nps)
	{
		result->ps[i+j] = left->ps[i];
		i++;
	}

	while (j < right->nps)
	{
		result->ps[i+j] = right->ps[j];
		j++;
	}

	// Free the memory that had been holding the left and right arrays.
	freePs( left );
	freePs( right );

	// Return the merged array.
	return result;
}

/*
 * This function frees all memory allocated to a single ps structure.  It does
 * 	NOT free any of the positions within the array.
 * array: a pointer to the structure to be freed.
 */
void freePs( Ps array )
{
	free( array->ps );
	free( array );
}

/*
 * This function walks down the array.  Where the height goes above one of the
 * 	thresholds, the peak is counted.
 * array: the array of positions.
 * ta: a pointer to the array of thresh structures.
 */
void walk( Ps array, Thresh* ta )
{
	int height = 0;		// Initialize the height.

	// This loop runs once for each element in the array.
	for(int i = 0; i < array->nps; i++)
	{
		// Increment the height by the score at this position.  Thus the
		// height field will always contain the number of reads at this
		// position.
		height += array->ps[i]->score;

		// This loop continues while the position is constant.  In this way,
		// position structures with the same location are treated at the same
		// time.
		while( (i+1 < array->nps) && (array->ps[i]->pos == array->ps[i+1]->pos) )
		{
			free( array->ps[i] );
			i++;
			height += array->ps[i]->score;
		}

		// This loop runs once for each threshold.
		for(int thresh = MIN_THRESH; thresh <= MAX_THRESH; thresh++)
		{
			// Get a pointer to the current threshold's structure.
			Thresh t = ta[thresh-MIN_THRESH];

			// If the height is above the threshold and we are looking for a
			// starting position of a peak, count the peak.
			if( (height >= thresh) && (t->flag == 0) )
			{
				// If this peak is further than the maximum gap from the most
				// recently found peak, then this peak is a new peak and must
				// be counted.
				if( array->ps[i]->pos - t->stop > MAX_GAP )
					t->count++;

				// Whether this is a new peak or not, set the flag to 1 to
				// look for the end of this peak.
				t->flag = 1;
			}

			// If the height is below the threshold and the end of a peak is
			// sought, then this is the end of the peak.
			else if( (t->flag == 1) && (height < thresh) )
			{
				t->stop = array->ps[i]->pos;
				t->flag = 0;
			}
		}

		free( array->ps[i] );	// Free this position.
	}

	freePs( array );			// Free the array.
}
