/*
 * PeakSeq
 * Version 1.01
 * Paper by Joel Rozowsky, et. al.
 * Coded in C by Theodore Gibson.
 * main.c
 * Main program.
 */

#include <stdio.h>
#include <string.h>
#include "window.h"
#include "simulator.h"
#include "filter.h"
#include "analyze.h"
#include "random.h"
#include "io.h"
#include "util.h"
#include "config.h"


//----------------------------------------------------------------------------
// MAIN PROGRAM
//----------------------------------------------------------------------------
int main( void )
{
	// Randomize the simulated data in this program.
	Randomize();

	// Open the filter_hits_PolII.pl output file.
	FILE* out = fopen( OUTPUT, "w" );

	// Print header of columns to file.
	fprintf(out, "Chr\tStart\tStop\tSample_tags\tAdj_control_tags\t"
			"Enrichment\tExcess\tp_value\n");

	// Check that the file opens correctly.
	if( out == NULL )
		fatal("Error opening file %s for output.\nProgram can create files "
				"but not directories. Program exiting.");

	// Open the analysis final output file.
	FILE* final = fopen( FINAL, "w" );

	// Check that the file opens correctly.
	if( final == NULL )
	{
		fclose( out );
		fatal("Error opening file %s for output.\nProgram can create files "
				"but not directories. Program exiting.");
	}

	// Find the highest factorial and power of 0.5 that can be computed on
	// this computer.
	const int max_fact = getMaxFact();
	const int max_exp = getMaxExp();

	// Create cache arrays to store factorial and power values so they only
	// need be computed once.
	long double* factCache = newCache( max_fact );
	long double* pCache = newCache( max_exp );

	// Create a double array of window structures.
	Window** wa = newWA();

	// Create a new data structure to store every peak for later analysis and
	// output to the FINAL file.
	Data data = newData();


//*****
//*****GET MAPPABILITY FRACTION DATA FROM MAP FILE.
//*****

	// Open the file with the mappability fraction data and check that it
	// opens correctly.
	FILE* map = fopen( MAP_FILENAME, "r" );
	if( map == NULL )
	{
		free( factCache );
		free( pCache );
		freeData( data );
		fclose( out );
		fclose( final );
		fatal("Error opening file %s. Program exiting.", MAP_FILENAME);
	}

	// Store the mappability fractions in the double array.
	getMapFracts( map, wa );

	// Close the file.
	fclose( map );


//*****
//*****THE REST OF THE PROGRAM LOOPS OVER EACH CHROMOSOME SEPARATELY.
//*****

	// Print headers of streaming output to terminal.
	printf("Chr\tWindow\tAct_reads\tThresh\tFDR\tCount_at_thresh\n");

	for(int cnum = MIN_CNUM; cnum <= MAX_CNUM; cnum++)
	{
		// Get the string representation of the chromosome number in the
		// format chr1 to chrM for input/output purposes.
		String cname = getCname( cnum );

		// If the getCname() function fails, go to the next chromosome.  This
		// will only happen if getCname is passed a number less than 1 or
		// greater than 25.
		if( cname == NULL )
			continue;

		// Get the bin_size in this chromosome.
		int bin_size = BIN_SIZE;
		if( cnum == 25 ) bin_size = BIN_SIZE_M;


//*****
//*****GET DATA FROM SAMPLE ELAND FILE: NUMBER OF READS AND READ START
//*****POSITIONS.
//*****

		// Open the sample Eland file for this chromosome and check that it
		// opens correctly.
		char eland_filename[FILENAME_LENGTH];
		strcpy( eland_filename, ELAND_PREFIX );
		strcat( eland_filename, cname );
		strcat( eland_filename, ELAND_SUFFIX );
		FILE* eland = fopen( eland_filename, "r" );
		if( eland == NULL )
		{
			freeWA( wa );
			freeData( data );
			free( cname );
			free( pCache );
			free( factCache );
			fclose( out );
			fclose( final );
			fatal("Error opening file %s. Program exiting.", eland_filename);
		}

		// Allocate memory for a new dynamic array to store all the start
		// positions of reads in the sample Eland file.
		Starts sample_starts = newStarts();

		// Get the number of reads of each window and the starting positions
		// of these reads from the sample Eland file.
		getNreadsStarts( eland, wa[cnum-MIN_CNUM], sample_starts );

		fclose( eland );	// Close the file.


//*****
//*****GET READ START POSITIONS FROM INPUT (CONTROL) FILE DATA.
//*****

		// Open the input control Eland file for this chromosome and check
		// that it opens correctly.
		char input_filename[FILENAME_LENGTH];
		strcpy( input_filename, INPUT_PREFIX );
		strcat( input_filename, cname );
		strcat( input_filename, INPUT_SUFFIX );
		FILE* input = fopen( input_filename, "r" );
		if( input == NULL )
		{
			freeWA( wa );
			freeStarts( sample_starts );
			freeData( data );
			free( cname );
			free( pCache );
			free( factCache );
			fclose( out );
			fclose( final );
			fatal("Error opening file %s. Program exiting.", input_filename);
		}

		// Allocate memory for a new dynamic array of positions to store all
		// the start positions of reads in the input file.
		Starts control_starts = newStarts();

		// Get the peak positions from the input file.
		getStarts( input, control_starts );

		fclose( input );	// Close the input file.

		// Get the scaling factor.
		const long double scaling_factor = getScalingFactor( control_starts,
				sample_starts, bin_size );

		// If scaling factor is negative, don't do the remainder of that chromosome
		// and print out a runtime error message.
		if( scaling_factor < 0 )
		{
			printf("Error: Scaling factor negative on chromosome %s.  "
					"Chromosome skipped.\n", cname);
			freeStarts( sample_starts );
			freeStarts( control_starts );
			free( cname );
			continue;
		}


//*****
//*****GET ACTUAL PEAK DATA FROM SGR FILE.
//*****

		// Open the sgr file for this chromosome and check that it opens
		// correctly.
		char sgr_filename[FILENAME_LENGTH];
		strcpy( sgr_filename, SGR_PREFIX );
		strcat( sgr_filename, cname );
		strcat( sgr_filename, SGR_SUFFIX );
		FILE* sgr = fopen( sgr_filename, "r" );
		if( sgr == NULL )
		{
			freeWA( wa );
			freeStarts( sample_starts );
			freeStarts( control_starts );
			freeData( data );
			free( cname );
			free( pCache );
			free( factCache );
			fclose( out );
			fclose( final );
			fatal("Error opening file %s. Program exiting.", sgr_filename);
		}

		// Allocate memory for and initialize a new actThreshes structure.
		// This structure stores all fields that differ between thresholds
		// including the peak start and stop locations.  The count in each
		// window at each threshold, however, is stored in the window
		// structure's counts field.
		ActThresh* ata = newActThreshArray();

		// Get the number of peaks and the peak locations from the sgr file.
		getActCounts( sgr, wa[cnum-MIN_CNUM], ata );

		fclose( sgr );	// Close the sgr file.


//*****
//*****RUN THE SIMULATION TO GET SIMULATED PEAK DATA AND OUTPUT THE RESULTS.
//*****

		// For output purposes, these variables will contain the start and
		// stop locations of the last peak printed.
		int last_stop = -MAX_GAP - 1;
		int last_start = 0;

		// This loop runs once for each window within the current chromosome.
		for(int wi = 0; wi < W_PER_C; wi++)
		{
			// Get a pointer to the current window;
			Window w = wa[cnum-MIN_CNUM][wi];

			// If there are no reads in this window, continue to the next
			// window.
			if( getWNReads( w ) <= 0 )
				continue;

			// Get a new thresh array.
			Thresh* ta = newThreshArray();

			// Run the simulation.
			simulator( w, ta );

			// After the findThresh() function, this variable will contain the
			// FDR at the threshold.
			long double fdr = 0.0L;

			// Get the qualifying threshold.
			int thresh = findThresh( w, ta, &fdr );

			freeThreshArray( ta );	// Free the threshes structure.

			// If no threshold was found, continue to the next window.
			if( thresh == -1 )
				continue;

			// Find the start and stop positions of this window.
			int w_start = (wi * W_SIZE);
			int w_stop = ((wi + 1) * W_SIZE) - 1;

			// Find the peaks for this window.  For each peak, compute and
			// print tag information to the file.
			findPeaks( out, ata[thresh-MIN_THRESH], sample_starts,
					control_starts, data, pCache, factCache, &last_start,
					&last_stop, w_start, w_stop, scaling_factor, max_fact,
					max_exp, cname );

			// Print the results from this window to the terminal.
			printf("%s\t%d\t%d\t%d\t%.15Lg\t%d\n", cname, wi, getWNReads( w ),
					thresh, fdr, getWCount( w, thresh ));
		}

		// As long as there was at least one peak, run the findPeaks()
		// function on the last peak.
		if( last_stop > 0 )
			fprintfTagData( out, sample_starts, control_starts, data, pCache,
					factCache, last_start, last_stop, scaling_factor,
					max_fact, max_exp, cname );

		// Clean up.
		freeActThreshArray( ata );
		freeStarts( sample_starts );
		freeStarts( control_starts );
		freeCounts( wa[cnum-MIN_CNUM] );
		free( cname );
	}

	sortData( data );			// Sort the data in the data structure.
	fprintfData( final, data );	// Print the sorted data.

	// Clean up.
	freeWA( wa );
	free( pCache );
	free( factCache );
	fclose( out );
	fclose( final );
}
