/*
 * PeakSeq
 * Version 1.01
 * Paper by Joel Rozowsky, et. al.
 * Coded in C by Theodore Gibson.
 * filter.c
 * This module deals with output to the OUTPUT file.
 */


#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <math.h>
#include "filter.h"
#include "analyze.h"
#include "io.h"
#include "util.h"
#include "config.h"


//----------------------------------------------------------------------------
// PRIVATE STRUCTURE DEFINITION
//----------------------------------------------------------------------------
struct starts
{
	// The array of positions
	int* ps;
	// The number of positions that have been stored.
	int nps;
	// The number of positions that can be stored without allocating more
	// memory.
	int size;
	// The current index.
	int i;
};


//----------------------------------------------------------------------------
// PRIVATE FUNCTION PROTOTYPES
//----------------------------------------------------------------------------
int* mergesortStarts( int* left, const int num );
int* mergeStarts( int* left, int* right, const int lnum, const int rnum );
int getTags( Starts starts, const int start, const int stop );
int getStopIndex( Starts starts, const int stop, const int left_index,
		const int right_index );
int getStartIndex( Starts starts, const int start, const int left_index,
		const int right_index );
long double getTagsExtended( Starts starts, const int start, const int stop );
long double binom( long double* zscore, long double* pCache,
		long double* factCache, const int sample_tags,
		const int adj_control_tags, const int max_fact, const int max_exp );
long double powCache( long double* cache, const long double n, const int k );
long double choose( long double* cache, const int n, const int k );
long double factCache( long double* cache, const int n );
long double getTerm( const int n, int k, const long double p_to_n );
long double getTermDiv( const int n, int k, const long double p_to_n,
		const long double p_to_n_div );


//----------------------------------------------------------------------------
// PUBLIC FUNCTIONS
//----------------------------------------------------------------------------
long double* newCache( const int num )
{
	// Allocate memory for the cache, allowing for num to be the highest index
	// of the array.
	long double* cache = safe_malloc( (num+1) * sizeof(long double) );

	// Initialize the first value to 1 since 0! and n^0 are both 1.
	cache[0] = 1.0L;

	// Initialize the other values to -1 to distinguish uncomputed values from
	// computed ones.
	for(int i = 1; i <= num; i++)
		cache[i] = -1.0L;

	// Return the pointer to the array.
	return cache;
}

int getMaxFact( void )
{
	// Create a cache.  This cache takes up 500 MB.  If more than this many
	// factorials can be computed, only 500 MB will be allocated.
	long double* cache = newCache( 50000 );

	// This loop runs until the end of the cache is reached.
	for( int i = 0; i < 50000; i++ )
	{
		// If the answer is infinity, return the previous factorial that was
		// computed successfully.
		if( isinf( factCache(cache, i) ) )
		{
			free(cache);
			return (i-1);
		}
	}

	free( cache );

	// If the function reaches this point, it is possible to compute 50000!
	// To compute a higher factorial would require more than 500 MB, so return
	// 50000.
	return 50000;
}

int getMaxExp( void )
{
	// Create a cache.  This cache takes up 500 MB.  If more than this many
	// powers of 0.5 can be computed, only 500 MB will be allocate
	long double* cache = newCache( 50000);

	long double pow = 0.0L;	// The value.

	// This loop runs until the end of the cache is reached.
	for( int i = 0; i < 50000; i++ )
	{
		// Compute the value.
		pow = powCache( cache, 0.5, i );

		// If the value is 0 or nan, return the previous exponent.
		if( pow == 0.0L || isnan(pow) )
		{
			free( cache );
			return (i-1);
		}
	}

	free( cache );

	// If the function reaches this point, it is possible to compute
	// 0.5^50000. To compute with a higher exponent would require more than
	// 500 MB, so return 50000.
	return 50000;
}

Starts newStarts( void )
{
	// Allocate memory for the structure.
	Starts starts = safe_malloc( sizeof(*starts) );

	// Initialize the fields.
	starts->nps = 0;
	starts->i = 0;
	starts->size = 10;

	// Allocate memory for the array.
	starts->ps = safe_malloc( starts->size * sizeof(int) );

	return starts;
}

void insertStart( Starts starts, const int pos )
{
	// Insert the position into the array.
	starts->ps[starts->nps] = pos;
	starts->nps++;

	// Allocate more memory if necessary to store the next position.
	if( starts->nps >= starts->size )
	{
		starts->size *= 2;
		starts->ps = safe_realloc( starts->ps, starts->size * sizeof(int) );
	}
}

void sortStarts( Starts starts )
{
	int* temp  = mergesortStarts( starts->ps, starts->nps );
	free( starts->ps );
	starts->ps = temp;
}

void getStarts( FILE* input, Starts starts )
{
	// These local variables are used for temporary storage.
	char seq[99];
	int pos = 0;
	char dir = '\0';

	// These variables are used in loop and scanf() syntax.
	int ret = 0;
	char ch = '\0';

	// This loop runs until the end of the file is reached.
	while( (ch = getc(input)) != EOF )
	{
		// Return the character used for testing to the file.
		ungetc( ch, input );

		// Get the information from this line of the file.
		ret = fscanf(input, "%*s%98s%*s%*s%*s%*s%*s%d %c", seq, &pos, &dir);

		// If one argument was not successfully read, return 0, indicating
		// that an error has occurred.
		if( ret != 3 )
		{
			getNewline( input );
			printf("Error reading Input file. Line skipped.\n");
			continue;
		}

		// If this is a reverse read, correct the position start.
		if( dir == 'R' )
			pos += strlen( seq ) - READ_LENGTH;

		// If the direction is not reverse or forward, there is an error in
		// this line, so skip the line.
		else if( dir != 'F' )
		{
			getNewline( input );
			printf("Error reading Input file. Line skipped.\n");
			continue;
		}

		// Insert this position into the position array.
		insertStart( starts, pos );

		getNewline( input );	// Skip to the next line.
	}

	sortStarts( starts );	// Sort the position array.
}

long double getScalingFactor( Starts control_starts, Starts sample_starts,
		const int bin_size )
{
	// This variable stores the number of bins containing reads in both files.
	long double n = 0.0L;
	// This variable stores the number of input control reads in the
	// overlapping bins.
	long double sx = 0.0L;
	// This variable stores the number of sample Eland reads in the
	// overlapping bins.
	long double sy = 0.0L;
	// These variables store calculations on these numbers.
	long double sxx = 0.0L;
	long double sxy = 0.0L;

	// These variables are used in loop control.
	// The start position of this bin.
	int bin_start = 0;
	// The stop position of this bin.
	int bin_stop = 0;
	// The last position counted from the control_starts array.
	int c_last_pos = -1;
	// The last position counted from the sample_starts array.
	int s_last_pos = -1;
	// The number of reads in this bin in the control_starts array.
	int c_bin = 0;
	// The number of reads in this bin in the sample_starts array.
	int s_bin = 0;
	// This tells the loop whether the end of one of the arrays has been
	// reached.  If the end of an array is reached it becomes no longer
	// possible to find a bin with reads in both files and the loop exits.
	bool done = false;

	// This loop runs once for each bin.
	for(int bin = 0; bin < N_BINS; bin++)
	{
		// If the end of one of the arrays has been reached, exit the loop.
		if( done ) break;

		// Calculate the start and stop positions of this bin.
		bin_start = bin * bin_size;
		bin_stop = ((bin+1) * bin_size) - 1;

		// Since the control_starts array is checked first, the index at the
		// beginning of every loop must be a position beyond the last bin
		// checked.  Thus the current position must be located after the start
		// of this bin and it is only necessary to check whether the position
		// is located before the end of the current bin.
		while( (control_starts->i < control_starts->nps) &&
				(control_starts->ps[control_starts->i] <= bin_stop) )
		{
			// Check if this position has been counted.
			if( control_starts->ps[control_starts->i] == c_last_pos )
			{
				control_starts->i++;
				continue;
			}

			// If this position has not yet been counted, count it.
			c_bin++;
			c_last_pos = control_starts->ps[control_starts->i++];

			// If the end of the control_starts array has been reached, break
			// this loop.  In addition, it will be impossible to need to check
			// any further bins beyond the current, so tell the for loop to
			// exit once the current loop is completed using the done flag.
			if( control_starts->i >= control_starts->nps )
			{
				done = true;
				break;
			}
		}

		// If there were no elements in the control_starts array in this bin,
		// skip this bin.
		if( c_bin == 0 )
			continue;

		// Skip to the first element beyond the start position of this bin.
		while( (sample_starts->i < sample_starts->nps) &&
				(sample_starts->ps[sample_starts->i] < bin_start) )
			sample_starts->i++;

		// Run the same loop for the sample_starts array as for the
		// control_starts array.
		while( (sample_starts->i < sample_starts->nps) &&
				(sample_starts->ps[sample_starts->i] <= bin_stop) )
		{
			if( sample_starts->ps[sample_starts->i] == s_last_pos )
			{
				sample_starts->i++;
				continue;
			}

			s_bin++;
			s_last_pos = sample_starts->ps[sample_starts->i++];
			if( sample_starts->i >= sample_starts->nps )
			{
				done = true;
				break;
			}
		}

		// If there were no elements in the sample_starts array in this bin,
		// skip this bin.
		if( s_bin == 0 )
		{
			c_bin = 0;
			continue;
		}

		// Otherwise, deal with the fields that determine the scaling factor.
		n++;
		sx += c_bin;
		sy += s_bin;
		sxx += c_bin * c_bin;
		sxy += c_bin * s_bin;

		// Reset the fields that count the number of reads in this bin.
		c_bin = 0;
		s_bin = 0;
	}

	// Reset the pointers.
	sample_starts->i = 0;
	control_starts->i = 0;

	// Return the scaling factor.
	return ( (n * sxy - sx * sy) / (n * sxx - sx * sx) );
}

void fprintfTagData( FILE* out, Starts sample_starts, Starts control_starts,
		Data data, long double* pCache, long double* factCache,
		const int start, const int stop, const long double scaling_factor,
		const int max_fact, const int max_exp, const String cname )
{
	// Get the number of reads from the sample Eland file in this peak.
	const int sample_tags = getTags( sample_starts, start, stop );

	long double control_tags = 0.0L;

	// Get the number of reads from the control input file in this peak.  If
	// the size of the region is less than the specified amount, use the same
	// function as for the sample file.
	if( stop - start > MAX_REG_EXT )
		control_tags = (long double) getTags( control_starts, start, stop );
	else
		control_tags = getTagsExtended( control_starts, start, stop );

	// Adjust the control tags with the scaling factor.
	int adj_control_tags = ceil(control_tags * scaling_factor);

	// Make sure there is at least one tag in both fields.
	if (control_tags == 0)
		control_tags = 1;
	if (adj_control_tags == 0)
		adj_control_tags = 1;

	// Calculate the enrichment and excess.
	long double enrich = ( (long double) sample_tags) /
				(control_tags * ((long double) scaling_factor) );
	int excess = sample_tags - adj_control_tags;

	// Initialize the z-score.  The z-score will only be used for very small
	// p-values.
	long double zscore = 0.0L;

	// Calculate the pvalue.
	long double pval = binom( &zscore, pCache, factCache, sample_tags,
			adj_control_tags, max_fact, max_exp );

	// Print the peak to the file.
	fprintf(out, "%s\t%d\t%d\t%d\t%d\t%.15Lg\t%d\t%.15Lg\n", cname, start,
			stop, sample_tags, adj_control_tags, enrich, excess, pval);

	// Insert the peak into the data structure.
	insertData( data, cname, start, stop, sample_tags, adj_control_tags,
			enrich, excess, pval, zscore );
}

void freeStarts( Starts starts )
{
	free( starts->ps );	// Free the array.
	free( starts );		// Free the structure.
}


//-------------------------------------------------
// PRIVATE FUNCTIONS
//-------------------------------------------------
/*
 * This function implements a standard mergesort algorithm on a positions
 * 	array.
 * left: a pointer to the array to be sorted.
 * num: the number of elements in the array.
 * Outputs a pointer to the sorted array.
 */
int* mergesortStarts( int* left, const int num )
{
	// If the array contains only one element, return an array containing only
	// that element.
	if ( num <= 1)
	{
		int* res = safe_malloc( sizeof(int) );
		res[0] = left[0];
		return res;
	}

	// Otherwise split the array in half to sort each half of the list.
	int half = num / 2;

	// Get a pointer to the right half of the list.
	int* right = &left[half];

	// Sort both halves of the array.
	left = mergesortStarts( left, half );
	right = mergesortStarts( right, num - half );

	// Merge the two sorted halves.
	return ( mergeStarts( left, right, half, num - half ) );
}

/*
 * This function merges two already sorted arrays into a single sorted array.
 * left: a pointer to one of the arrays.
 * right: a pointer to the other array.
 * lnum: the number of elements in the left array.
 * rnum: the number of elements in the right array.
 * Outputs a pointer to the sorted array.
 */
int* mergeStarts( int* left, int* right, const int lnum, const int rnum )
{
	// The number of regions in the merged array must be calculated in order
	// to allocate enough storage in the new array to hold every element in
	// both of the input arrays.
	int num = lnum + rnum;
	int* result = safe_malloc( num * sizeof(int) );

	int i = 0;	// The index of the current element in the left array.
	int j = 0;	// The index of the current element in the right array.

	// This loop continues while these indices are within the bounds of their
	// respective arrays.
	while ( (i < lnum) && (j < rnum) )
	{
		// If this position in the left array comes before this position in
		// the right array, the position in the left array comes next.
		if (left[i] < right[j])
		{
			result[i+j] = left[i];
			i++;
		}
		// Otherwise, the current element in the right array should be the
		// next element in the result array.
		else
		{
			result[i+j] = right[j];
			j++;
		}
	}

	// Once the end of one of the arrays is reached, copy the remainder of the
	// other array.
	while (i < lnum)
	{
		result[i+j] = left[i];
		i++;
	}

	while (j < rnum)
	{
		result[i+j] = right[j];
		j++;
	}

	// Free the old arrays.
	free( left );
	free( right );

	return result;	// Return the merged array.
}

/*
 * This function finds the number of reads that are located within a peak.
 * starts: a pointer to the structure containing an array of all read start
 * 	positions from a file.
 * start: the start position of the peak.
 * stop: the stop position of the peak.
 * Outputs the number of reads located within the peak.
*/
int getTags( Starts starts, const int start, const int stop )
{
	// These local variables are used for temporary storage.
	int count = 0;
	int last_pos = -1;

	int tags = 0;	// Initialize the returned field.

	// Get the index of the last position in the array.  This must be between
	// the last position of the last peak and the end of the array.
	int stop_i = getStopIndex( starts, stop, starts->i, starts->nps - 1 );

	// Get the index of the start position in the array.  This must be between
	// the last position of the last peak and the stop of this peak.
	int start_i = getStartIndex( starts, start - READ_LENGTH, starts->i,
			stop_i );

	// Now for each index between the start and stop indices, increment the
	// number of reads.
	for( starts->i = start_i; starts->i <= stop_i; starts->i++ )
	{
		// Make sure that reads beginning at the same location are not counted
		// more than max_count times.
		if( starts->ps[starts->i] == last_pos )
		{
			if( count == MAX_COUNT )
				continue;
			count++;
		}

		else
		{
			count = 1;
			last_pos = starts->ps[starts->i];
		}

		// Increment the number of tags as long as max_count has not yet been
		// reached for this position.
		tags++;
	}

	starts->i = stop_i;	// Reset the pointer.

	// Return the number of reads that are located within the peak.
	return tags;
}

/*
 * This function finds the index of the last element in a region.
 * starts: a pointer to the structure containing the array.
 * stop: the stop position of the region.
 * left_index: the leftmost index that could possibly be the correct index.
 * right_index: the rightmost index that could possibly be the correct index.
 * Outputs the index of the last element in the region.
 */
int getStopIndex( Starts starts, const int stop, const int left_index,
		const int right_index )
{
	// If this is a leaf, return the correct index.
	if( right_index - left_index <= 0 )
	{
		// If the stop position is less than this position, then the first
		// element in the region is the position just to the left of this
		// position.
		if( stop < starts->ps[right_index] )
			return (right_index - 1);

		// Otherwise this position is the first position in the region.
		else
			return right_index;
	}

	// The number of elements between the right and left indices, inclusive.
	int n_elements = right_index - left_index + 1;

	// The index of the father of this branch in the tree.
	int father_index = (n_elements / 2) + left_index;

	int index = 0;	// The index.

	if( stop < starts->ps[father_index] )
		index = getStopIndex( starts, stop, left_index, father_index - 1 );
	else
		index = getStopIndex( starts, stop, father_index + 1, right_index );

	return index;	// Return the index.
}


/*
 * This function finds the index of the first element in a region.
 * starts: a pointer to the structure containing the array.
 * start: the starting position of the region.
 * left_index: the leftmost index that could possibly be the correct index.
 * right_index: the rightmost index that could possibly be the correct index.
 * Outputs the index of the first element in the region.
 */
int getStartIndex( Starts starts, const int start, const int left_index,
		const int right_index )
{
	// If this is a leaf, return the correct index.
	if( right_index - left_index <= 0 )
	{
		// If the starting position is greater than this position, then the
		// first element in the region is the position just to the right of
		// this position.
		if( start > starts->ps[right_index] )
			return (right_index + 1);

		// Otherwise this position is the first position in the region.
		else
			return right_index;
	}

	// The number of elements between the right and left indices, inclusive.
	int n_elements = right_index - left_index + 1;

	// The index of the father of this branch in the tree.
	int father_index = (n_elements / 2) + left_index;

	int index = 0;	// The index.

	if( start > starts->ps[father_index] )
		index = getStartIndex( starts, start, father_index + 1, right_index );
	else
		index = getStartIndex( starts, start, left_index, father_index - 1 );

	return index;	// Return the index.
}

/*
 * This function finds the number of reads that are located within a peak and
 * 	within an extended region surrounding the peak.  It returns the higher of
 * 	the adjusted extended region and the actual region.
 * starts: a pointer to the structure containing an array of all positions
 * 	from the control input file.
 * start: the start position of the peak.
 * stop: the stop position of the peak.
 * Outputs the correct adjusted number of reads.
 */
long double getTagsExtended( Starts starts, const int start, const int stop )
{
	// These local variables are used for temporary storage.
	int count = 0;
	int last_pos = 0;

	// The number of control tags in the region.
	long double tags = 0;

	// The number of control tags in the extended region.
	long double ext_tags = 0;

	// Get the pointer to the end of the extended region.  This must be
	// between the start of last peak's extended region and the final index of
	// the array.
	int ext_stop_i = getStopIndex( starts, stop + EXTENDED_REGION_SIZE,
			starts->i, starts->nps - 1 );

	// Now get the start index of the extended region.  This must be between
	// the start of last peak's extended region and the index of the last read
	// within the extended region.
	int ext_start_i = getStartIndex( starts,
			start - READ_LENGTH - EXTENDED_REGION_SIZE, 0, ext_stop_i );

	// Now that the start and stop indices of the extended region have been
	// found, the start and stop positions of the actual region must be
	// between these two indices.
	int stop_i = getStopIndex( starts, stop, ext_start_i, ext_stop_i );
	int start_i = getStartIndex( starts, start - READ_LENGTH, ext_start_i,
			stop_i );

	// This loop continues while the position is within the external region bu
	// t before the start of the actual region.
	for( starts->i = ext_start_i; starts->i < start_i; starts->i++ )
	{
		if( starts->ps[starts->i] == last_pos )
		{
			if( count == MAX_COUNT )
				continue;
			count++;
		}

		else
		{
			count = 1;
			last_pos = starts->ps[starts->i];
		}

		ext_tags++;
	}

	// Now that the actual peak has been reached, increment both the
	// ext_control_tags and control_tags fields until the end of the peak is
	// reached.
	for(; starts->i <= stop_i; starts->i++ )
	{
		if( starts->ps[starts->i] == last_pos )
		{
			if( count == MAX_COUNT )
				continue;
			count++;
		}

		else
		{
			count = 1;
			last_pos = starts->ps[starts->i];
		}

		ext_tags++;
		tags++;
	}

	// Now continue incrementing the ext_control_tags field until the end of
	// the extended region is reached.
	for(; starts->i <= ext_stop_i; starts->i++ )
	{
		if( starts->ps[starts->i] == last_pos )
		{
			if( count == MAX_COUNT )
				continue;
			count++;
		}

		else
		{
			count = 1;
			last_pos = starts->ps[starts->i];
		}

		ext_tags++;
	}

	// Reset the pointer so that it lines up with the normal getTags()
	// function.
	starts->i = stop_i;

	// Adjust the ext_control_tags field so that it is scaled in order to be
	// compared with the control_tags field.
	ext_tags *= ( ((long double) (stop - start + 1)) /
				(stop - start + 1 + (2 * EXTENDED_REGION_SIZE)) );

	// Return the maximum of the adjusted ext_control_tags and the normal
	// control_tags.
	tags = ext_tags > tags ? ext_tags : tags;
	return tags;
}

/*
 * This function returns the correct pval for a given set of parameters.  It
 * 	does so by computing the tail of the binomial distribution.  It minimizes
 * 	the time required to do the calculation without sacrificing accuracy.
 * zscore: a pointer to the field that will contain the z-score for very large
 * 	or very small p-values.
 * sample_tags: the number of reads from the sample eland file located in this
 * 	peak.
 * adj_control_tags: the adjusted number of reads from the control input file
 * 	located in this peak.
 * max_fact: the maximum factorial that can be computed on this computer.
 * max_exp: the maximum exponent to which 0.5 can be raised to in order to
 * 	output a non-zero answer on this computer.
 * Outputs the correct p-value.
 */
long double binom( long double* zscore, long double* pCache,
		long double* factCache, const int sample_tags,
		const int adj_control_tags, const int max_fact, const int max_exp )
{
	const int n = sample_tags + adj_control_tags;
	const int k = adj_control_tags;
	long double pval = 0.0L;

	// Since p == (1-p), the term p^a*(1-p)^(n-a) becomes p^a * p ^ (n-a),
	// which always equals the constant p^n no matter what value a takes on
	// within the loop.  To speed up calculation time, the p^a*(1-p)^(n-a)
	// will be replaced with the constand p^n.
	const long double p = 0.5L;

	// If the factorial can be computed, then this method is much faster.
	if( n <= max_fact )
	{
		// If p^n can be computed, then this method is simpler and faster.
		if( n <= max_exp )
		{
			// Compute p^n.
			const long double p_to_n = powCache( pCache, p, n );

			// This loop runs once for each of the adjusted tags from the
			// control file.  Without the shortcut this line would read
			// choose(n, a, cache) * pow(p, a) * pow(p, n-a).
			for (int a = 0; a <= k; a++)
				pval += choose( factCache, n, a ) * p_to_n;
		}

		// Otherwise, split up p^n into quantities that can be computed and
		// use them separately.
		else
		{
			// Split n into 50 somewhat equal pieces.
			const int increment = n/50;

			// Compute p to the divided powers.
			const long double p_to_n_div = powCache( pCache, p, increment);
			const long double p_to_n = powCache( pCache, p,
					n - (49 * increment));

			// The value of a single term in the summation.
			long double value = 0.0L;

			// This loop runs once for each of the adjusted tags from the
			// control file.
			for( int a = 0; a <= k; a++ )
			{
				// Find n choose a, which can be computed since all the
				// factorials can be computed.  Then multiply it by p_to_n
				// once and by p_to_n_div 49 times.  This accounts for the
				// step size of n/50.
				value = choose( factCache, n, a ) * p_to_n;
				for( int i = 0; i < 49; i++ )
					value *= p_to_n_div;

				// Do the summation and reset the value for the next loop.
				pval += value;
				value = 0.0L;
			}
		}
	}

	// Otherwise use the non-cache version.  Using the cache version would
	// return infinity as the factorial in the choose function could not be
	// computed.
	else
	{
		// If p^n can be computed, this way is faster and simpler.
		if( n <= 0 )
		{
			// Find p^n.
			const long double p_to_n = powCache( pCache, p, n );

			// Do the summation.
			for(int a = 0; a <= k; a++)
				pval += getTerm( n, a, p_to_n );
		}

		// If p^n cannot be computed, break it up into pieces as before.
		else
		{
			// Divide n into 50 pieces and compute each value separately.
			const int increment = n/50;
			const long double p_to_n_div = powCache( pCache, p, increment);
			const long double p_to_n = powCache( pCache, p,
					n - (49 * increment));

			// Do the summation in a way that will not return 0 unless the
			// value is truly smaller than can be represented by a long
			// double.
			for (int a = 0; a <= k; a++)
				pval += getTermDiv( n, a, p_to_n, p_to_n_div );
		}
	}

	// If the pvalue is 0 or infinite, make sure it outputs the correct value.
	if( isnan(pval) || pval == 0.0L || isinf(pval) )
	{
		if( adj_control_tags < sample_tags )
		{
			pval = 0.0L;
			(*zscore) = (k - n*p) / sqrt(n*p*(1-p));
		}
		else
			pval = 1.0L;
	}

	return pval;	// Return the pvalue.
}

/*
 * This function improves the speed of calculation for computing n^k by saving
 * 	all intermediate results in a cache array.
 * cache: a pointer to the cache of already-calculated powers of this integer
 * 	n.
 * n: the integer.
 * k: the exponent.
 * Outputs n^k.
 */
long double powCache( long double* cache, const long double n, const int k )
{
	// Initialize the index to the index that should hold this answer.
	int i = k;

	// Continue decreasing the index until a position is reached with a value.
	while( (i > 0) && (cache[i] < 0.0) )
		i--;

	// Once this value is reached, compute more until i becomes k.
	while( i < k )
	{
		cache[i+1] = cache[i] * n;
		i++;
	}

	return cache[i];
}

/*
 * This function computes n choose k.
 * cache: a cache of all factorials as they are computed.
 * n: n in n choose k.
 * k: k in n choose k.
 */
long double choose( long double* cache, const int n, const int k )
{
	return ( factCache(cache, n) /
			(factCache(cache, k) * factCache(cache, n - k)) );
}

/*
 * This function improves the speed of calculation for computing n! by saving
 * 	all intermediate results in a cache array.
 * cache: a pointer to the cache of already-calculated factorials.
 * n: the factorial that needs to be computed.
 * Outputs n!.
 */
long double factCache( long double* cache, const int n )
{
	// Initialize the index to the index that should hold this answer.
	int i = n;

	// Continue decreasing the index until a position is reached with a value.
	while( (i > 0) && (cache[i] < 0.0) )
		i--;

	// Once this value is reached, compute more until i becomes n.
	while( i < n )
	{
		cache[i+1] = cache[i] * (i+1);
		i++;
	}

	// Return n!.
	return cache[i];
}

/*
 * This function calculates n choose k where n! cannot be computed.
 * n: the n in n choose k.
 * k: the k in n choose k.
 * p_to_n: the value p^n.
 * Outputs (n choose k) * p^n.
 */
long double getTerm( const int n, int k, const long double p_to_n )
{
	// Multiply the term by p_to_n at the beginning as opposed to the end.
	// Since p_to_n is very small, this will prevent the term from reaching
	// infinity.
	long double res = p_to_n;

	// If n - k is greater than i, insert n - k for k.
	k = n - k > k ? n - k : k;

	for(int j = 0; j < k; j++)
		res *= (((long double) n) - j) / (k - j);

	return res;
}

/*
 * This function calculates n choose k where neither n! nor n^p can be
 * 	computed.
 * n: the n in n choose k.
 * k: the k in n choose k.
 * p_to_n: p^(n - 49 * (n/50))
 * p_to_n_div: p^(n/50)
 * Outputs (n choose k) * p^n.
 */
long double getTermDiv( const int n, int k, const long double p_to_n,
		const long double p_to_n_div )
{
	// Multiply the term by p_to_n first instead of last in order to keep the
	// term from reaching infinity.
	long double res = p_to_n;

	// If n - k is greater than i, insert n - k for k.
	k = n - k > k ? n - k : k;

	// Now, n choose k needs to be multiplied by res as well as p_to_n_div 49
	// times. In order to keep the term under infinity, intersperse the
	// multiplications of the small value of p_to_n_div evenly within the
	// computations to compute n choose k.  This will never allow the value to
	// get too high and will keep the computation manageable by the computer.
	int step_size = k / 49;

	// This loop runs until p_to_n_div has been multiplied by res 49 times and
	// most of n choose k has been computed as well.
	for (int i = 0; i < (step_size) * 49;)
	{
		// Do some computation of n choose k along the way.
		for(int j = 0; j < step_size; j++)
		{
			res *= (((long double) n) - i) / (k - i);
			i++;
		}

		// Multiply the intermediate result by p_to_n_div.
		res *= p_to_n_div;
	}

	// Do the rest of the computation.
	for(int j = (step_size) * 49; j < k; j++)
		res *= (((long double) n) - j) / (k - j);

	// Return the result.
	return res;
}
