/** pslToPileup -- Extract pileup locations from PSL, output as BED. */

/* Copyright (C) 2011 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */

#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "psl.h"
#include "bed.h"
#include "bits.h"
#include "options.h"


int minHits = 50;
int maxGap = 20;

static struct optionSpec optionSpecs[] =  
/* option for pslToPileup */
{
    {"minHits", OPTION_INT},
    {"maxGap", OPTION_INT},
    {"help", OPTION_BOOLEAN},
    {NULL, 0}
};

void usage()
/** provide usage information */
{
errAbort("pslToPileup: Extract pileup locations from PSL (target), output as BED 5+.\n"
	 "usage:\n"
	 "    pslToPileup [options] in.psl chrom.sizes out.bed\n"
	 "options:\n"
	 "    -minHits=N  Minimum number of alignments covering a given base in order for it\n"
	 "                to be considered part of a pileup.  Default: %d.\n"
	 "    -maxGap=N   Maximum number of consecutive bases within a pileup which may have\n"
	 "                fewer than minHits.  Default: %d.\n"
	 "    \n",
	 minHits, maxGap
	 );
} 

/** Allocating 4 bytes per base for entire chroms proved too big even for 
 * kolossus, so use short (and watch out for count overflow -- very unlikely, 
 * but just in case). */
#define MAXSHORT 0x7fff

short *getCountArr(struct hash *chromCounts, struct hash *chromSizes,
		 char *chrom, int chromSize)
/** Return the array of counts associated with chrom.  Hash sizes too. */
{
struct hashEl *el = hashLookup(chromCounts, chrom);
short *arr = el ? el->val : NULL;
if (arr == NULL)
    {
    arr = needLargeZeroedMem(chromSize * sizeof(short));
    hashAdd(chromCounts, chrom, arr);
    hashAddInt(chromSizes, chrom, chromSize);
    }
return arr;
}

void pslToPileup(char *pslFile, char *chromSizeFile, char *outBedName)
/** Extract pileup locations from PSL, output as BED. */
{
FILE *outBed = NULL;
struct hash *chromCounts = hashNew(8);
struct hash *chromSizes = hashNew(8);
struct hashCookie cookie;
struct hashEl *el = NULL;
struct lineFile *lf = pslFileOpen(pslFile);
struct psl *psl = NULL;

outBed = mustOpen(outBedName, "w");

while ((psl = pslNext(lf)) != NULL)
    {
    short *countArr = getCountArr(chromCounts, chromSizes, psl->tName,
				psl->tSize);
    int i=0, j=0;
    for (i=0;  i < psl->blockCount;  i++)
	{
	for (j=0;  j < psl->blockSizes[i];  j++)
	    {
	    //#*** if tStrand (strand[1]) is -, do we need to flip??
	    if (countArr[psl->tStarts[i]+j] < MAXSHORT)
		countArr[psl->tStarts[i]+j]++;
	    }
	}
    }

cookie = hashFirst(chromCounts);
while ((el = hashNext(&cookie)) != NULL)
    {
    char *chrom = el->name;
    short *countArr = (short *)(el->val);
    int i=0;
    boolean inPileup = FALSE;
    int pStart=0, pEnd=0;
    short pMin=MAXSHORT, pMax=0;
    int pTotal=0;
    int gapCount = 0;
    int chromSize = hashIntVal(chromSizes, chrom);
    for (i=0;  i < chromSize;  i++)
	{
	if (countArr[i] >= minHits)
	    {
	    if (! inPileup)
		pStart = i;
	    pEnd = i+1;
	    if (countArr[i] < pMin)
		pMin = countArr[i];
	    if (countArr[i] > pMax)
		pMax = countArr[i];
	    pTotal += countArr[i];
	    inPileup = TRUE;
	    }
	else if (inPileup)
	    {
	    gapCount++;
	    if (gapCount > maxGap)
		{
		fprintf(outBed, "%s\t%d\t%d\t%s:%d-%d\t%d\t%d\t%.1f\t%d\n",
			chrom, pStart, pEnd, chrom, pStart+1, pEnd,
			(int)(pTotal/(pEnd-pStart)),
			pMin, (double)pTotal/(pEnd-pStart), pMax);
		pStart = pEnd = pMax = pTotal = 0;
		pMin = MAXSHORT;
		gapCount = 0;
		inPileup = FALSE;
		}
	    }
	}
    if (inPileup)
	fprintf(outBed, "%s\t%d\t%d\t%s:%d-%d\t%d\t%d\t%.1f\t%d\n",
		chrom, pStart, pEnd, chrom, pStart+1, pEnd,
		(int)(pTotal/(pEnd-pStart)),
		pMin, (double)pTotal/(pEnd-pStart), pMax);
    }

lineFileClose(&lf);
carefulClose(&outBed);
}


int main(int argc, char* argv[])
{
optionInit(&argc, argv, optionSpecs);
if (argc != 4)
    usage();
if (optionExists("help"))
    usage();
minHits = optionInt("minHits", minHits);
maxGap = optionInt("maxGap", maxGap);
pslToPileup(argv[1], argv[2], argv[3]);
return 0;
}
    

