/*
 * PEAK-SEQ -- PREPROCESSING
 * Paper by Joel Rozowsky, et. al.
 * Coded in C by Theodore Gibson.
 * This module deals with sorting and output to the sgr files.
*/


#include <stdio.h>
#include <string.h>
#include "sgr.h"
#include "io.h"
#include "util.h"
#include "config.h"


//-------------------------------------------------
// PRIVATE STRUCTURE DEFINITIONS
//-------------------------------------------------
/*
 * This structure holds the information for a position.
 * Pos is a pointer to this structure.
*/
typedef struct pos
{
	// The position.
	int pos;
	// The score at this position.  Score is incremented once for
	// each read that begins at this position and decremented once
	// for each read that ends at this position.
	int score;
}* Pos;

/*
 * This structure holds an array of positions.
 * Ps is a pointer to this strucutre.
*/
struct ps
{
	// The array of positions.
	Pos* ps;
	// The number of positions in the array.
	int nps;
	// The number of positions that can be stored in
	// the array without allocating more memory.
	int size;
};


//-------------------------------------------------
// PRIVATE FUNCTION PROTOTYPES
//-------------------------------------------------
Ps newPs( void );
Pos newPos( const int pos, const int score );
void sortPsArray( Ps* array );
Pos* mergesortPs( Pos* left, const int num );
Pos* mergePs( Pos* left, Pos* right, const int lnum, const int rnum );
void freePs( Ps ps );


//-------------------------------------------------
// PUBLIC FUNCTIONS
//-------------------------------------------------
/*
 * This function allocates memory for and initializes a new
 * array of Ps structures, including one for each chromosome.
 * Outputs a pointer to the array.
*/
Ps* newPsArray( void )
{
	// Allocate memory for the array.
	Ps* array = safe_malloc( N_CHRS * sizeof(Ps) );

	// Allocate memory for a structure for each chromosome.
	for(int i = 0; i < N_CHRS; i++)
		array[i] = newPs();

	// Return the pointer.
	return array;
}

/*
 * This function inserts a read into the pos array corresponding
 * to its chromosome.  It inserts both the the start and stop
 * locations of the read separately.
 * ps: a pointer to the structure containing the pos array for the
 *	chromosome containing this read.
 * pos: the position of this read on this chromosome.
 * score: the score indicating whether this is a start or stop of a read.
*/
void insertPos( Ps ps, const int pos, const int score )
{
	// Insert these two positions into the array.
	ps->ps[ps->nps] = newPos( pos, score );
	ps->nps++;

	//  Allocate more memory if necessary.
	if( ps->nps + 1 >= ps->size )
	{
		ps->size *= 2;
		ps->ps = safe_realloc( ps->ps, ps->size * sizeof(Pos) );
	}
}

/*
 * This function prints to the sgr files.
 * array: a pointer to the array of ps structures.
*/
void fprintfSGR( Ps* array )
{
	sortPsArray( array );

	// Allocate memory for the string holding each filename.
	char filename[FILENAME_LENGTH];

	// This loop runs once for each chromosome.
	for(int cnum = MIN_CNUM; cnum <= MAX_CNUM; cnum++)
	{
		// Get a pointer to the ps structure for this chromosome.
		Ps ps = array[cnum-MIN_CNUM];

		// If there are no positions in this array, skip to the next
		// chromosome.
		if( ps->nps <= 0 )
		{
			freePs( ps );
			continue;
		}

		// Get the string representation (from chr1 to chrM)
		// for this chromosome.
		String cname = getCname( cnum );

		// Open this file for output.
		strcpy( filename, SGR_PREFIX );
		strcat( filename, cname);
		strcat( filename, SGR_SUFFIX );
		FILE* sgr = fopen( filename, "w" );

		// Read the first position from the array.
		int height = ps->ps[0]->score;
		int last_pos = ps->ps[0]->pos;

		// Free this pos structure.
		free( ps->ps[0] );

		// This loop runs over all pos structures in this array.	
		for(int i = 1; i < ps->nps; i++)
		{
			// Get a pointer to this position.
			Pos p = ps->ps[i];

			// If this position is equal to the last position,
			// increment the height and go to the next position.
			if( p->pos == last_pos )
			{
				height += p->score;
				free( p );
				continue;
			}

			// Otherwise, print the last position and the current
			// height to the sgr file, then record the current position.
			fprintf(sgr, "%s\t%d\t%d\n", cname, last_pos, height);
			height += p->score;
			last_pos = p->pos;

			// Free this position.
			free( p );
		}

		// Print the last position.
		fprintf(sgr, "%s\t%d\t%d\n", cname, last_pos, height);

		// Free this ps structure and close the file.
		freePs( ps );
		fclose( sgr );
	}

	// Free the array of ps structures and close the files.
	free( array );
}


//-------------------------------------------------
// PRIVATE FUNCTIONS
//-------------------------------------------------
/*
 * This function allocates memory for and initializes a new
 * Ps structure.
 * Outputs a pointer to the new structure.
*/
Ps newPs( void )
{
	// Allocate memory for the structure.
	Ps ps = safe_malloc( sizeof(*ps) );

	// Initialize the fields.
	ps->nps = 0;
	ps->size = 10;

	// Allocate memory for the array of positions.
	ps->ps = safe_malloc( ps->size * sizeof(struct pos) );

	// Return the pointer.
	return ps;
}

/*
 * This function allocates memory for and initializes a new pos structure.
 * pos: the position.
 * score: the score telling whether this is the start or stop of a read.
 * Outputs a pointer to the new structure.
*/
Pos newPos( const int pos, const int score )
{
	// Allocate memory for the structure.
	Pos p = safe_malloc( sizeof(*p) );

	// Initialize the fields.
	p->pos = pos;
	p->score = score;

	// Return the pointer.
	return p;
}

/*
 * This function sorts all the position within each ps structure
 * in an array of ps structures.
 * array: a pointer to the array of ps structures.
*/
void sortPsArray( Ps* array )
{
	// This loop runs once for each chromosome.
	for(int i = 0; i < N_CHRS; i++)
	{
		Pos* temp  = mergesortPs( array[i]->ps, array[i]->nps );
		free( array[i]->ps );
		array[i]->ps = temp;
	}
}

/*
 * This function implements a standard mergesort algorithm
 * on a ps array, sorting by position.
 * left: a pointer to the array to be sorted.
 * num: the number of elements in the array.
 * Outputs a pointer to the sorted array.
*/
Pos* mergesortPs( Pos* left, const int num )
{
	// If the array contains only one element, return an array
	// with just this element.
	if ( num <= 1)
	{
		Pos* res = safe_malloc( sizeof(*res) );
		res[0] = left[0];
		return res;
	}

	// Otherwise split the array in half and sort each half of the list.
	int half = num / 2;

	// Get a pointer to the right half of the list.
	Pos* right = &left[half];

	// Sort both halves of the array.
	left = mergesortPs( left, half );
	right = mergesortPs( right, num - half );

	// Merge the two sorted halves.
	return ( mergePs( left, right, half, num - half ) );
}

/*
 * This function merges two already sorted arrays into
 * a single sorted array.
 * left: a pointer to one of the arrays.
 * right: a pointer to the other array.
 * lnum: the number of elements in the left array.
 * rnum: the number of elements in the right array.
 * Outputs a pointer to the sorted array.
*/
Pos* mergePs( Pos* left, Pos* right, const int lnum, const int rnum )
{
	// The number of positions in the merged array must be calculated
	// in order to allocate enough storage in the new array to hold
	// pointers to every element in both of the input arrays.
	int num = lnum + rnum;
	Pos* result = safe_malloc( num * sizeof(*result) );

	// The index of the current element in the left array.
	int i = 0;
	// The index of the current element in the right array.
	int j = 0;

	// While these indices are within the bounds of their respective arrays.
	while ( (i < lnum) && (j < rnum) )
	{
		// If this position in the left array is less than this position in
		// the right array, the position in the left array comes next.
		if (left[i]->pos < right[j]->pos)
		{
			result[i+j] = left[i];
			i++;
		}
		// Otherwise, the current element in the right array should be
		// the next element in the result array.
		else
		{
			result[i+j] = right[j];
			j++;
		}
	}

	// Once the end of one of the arrays is reached, copy the remainder of the
	// other array.
	while (i < lnum)
	{
		result[i+j] = left[i];
		i++;
	}
	while (j < rnum)
	{
		result[i+j] = right[j];
		j++;
	}

	// Free the old arrays.
	free( left );
	free( right );

	// Store the resulting array in the left pointer.
	return result;
}

/*
 * This function frees memory associated with a ps structure except
 * for the individual pos structures within its array which are freed
 * within the fprintfSGR() function.
 * ps: a pointer to the structure to be freed.
*/
void freePs( Ps ps )
{
	free( ps->ps );
	free( ps );
}
