/* chromAnn - chomosome annotations, generic object to store annotations from
 * other formats */

/* Copyright (C) 2011 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
#include "common.h"
#include "chromAnn.h"
#include "binRange.h"
#include "rowReader.h"
#include "psl.h"
#include "bed.h"
#include "chain.h"
#include "genePred.h"
#include "coordCols.h"
#include "verbose.h"


static struct chromAnnBlk* chromAnnBlkNew(struct chromAnn *ca, int start, int end)
/* create new block object and add to chromAnn object */
{
struct chromAnnBlk* caBlk;
AllocVar(caBlk);
if (end < start)
    errAbort("invalid block coordinates for %s: start=%d end=%d", ca->name, start, end);

caBlk->ca = ca;;
caBlk->start = start;
caBlk->end = end;

if (ca->blocks == NULL)
    {
    ca->start = start;
    ca->end = end;
    }    
else
    {
    ca->start = min(ca->start, start);
    ca->end = max(ca->end, end);
    }
ca->totalSize += (end - start);
slAddHead(&ca->blocks, caBlk);
return caBlk;
}

static void chromAnnBlkFreeList(struct chromAnnBlk *blks)
/* free list of objects */
{
struct chromAnnBlk *blk;
while ((blk = slPopHead(&blks)) != NULL)
    freeMem(blk);
}

static struct chromAnn* chromAnnNew(char* chrom, char strand, char* name, void *rec,
                                    void (*recWrite)(struct chromAnn*, FILE *, char),
                                    void (*recFree)(struct chromAnn *))
/* create new object, ownership of rawCols is passed */
{
struct chromAnn* ca;
AllocVar(ca);
ca->chrom = cloneString(chrom);
ca->strand = strand;
if (name != NULL)
    ca->name = cloneString(name);
ca->start = 0;
ca->end = 0; 
ca->rec = rec;
ca->recWrite = recWrite;
ca->recFree = recFree;
return ca;
}

static int chromAnnBlkCmp(const void *va, const void *vb)
/* sort compare of two chromAnnBlk objects */
{
const struct chromAnnBlk *a = *((struct chromAnnBlk **)va);
const struct chromAnnBlk *b = *((struct chromAnnBlk **)vb);
int diff = a->start - b->start;
if (diff == 0)
    diff = a->end - b->end;
return diff;
}

static void chromAnnFinish(struct chromAnn* ca)
/* finish creation of a chromAnn after all blocks are added */
{
slSort(&ca->blocks, chromAnnBlkCmp);
}

void chromAnnFree(struct chromAnn **caPtr)
/* free an object */
{
struct chromAnn *ca = *caPtr;
if (ca != NULL)
    {
    ca->recFree(ca);
    freeMem(ca->chrom);
    freeMem(ca->name);
    chromAnnBlkFreeList(ca->blocks);
    freez(caPtr);
    }
}

int chromAnnTotalBlockSize(struct chromAnn* ca)
/* count the total bases in the blocks of a chromAnn */
{
int bases = 0;
struct chromAnnBlk *cab;
for (cab = ca->blocks; cab != NULL; cab = cab->next)
    bases += (cab->end - cab->start);
return bases;
}

static void strVectorWrite(struct chromAnn *ca, FILE *fh, char term)
/* write a chromAnn that is represented as a vector of strings */
{
char **cols = ca->rec;
assert(cols != NULL);
int i;
for (i = 0; cols[i] != NULL; i++)
    {
    if (i > 0)
        putc_unlocked('\t', fh);
    fputs(cols[i], fh);
    }
putc_unlocked(term, fh);
}

static void strVectorFree(struct chromAnn *ca)
/* free chromAnn data that is represented as a vector of strings */
{
freez(&ca->rec);
}

static void addBedBlocks(struct chromAnn* ca, unsigned opts, struct bed* bed)
/* add blocks from a bed */
{
int iBlk;
for (iBlk = 0; iBlk < bed->blockCount; iBlk++)
    {
    int start = bed->chromStart + bed->chromStarts[iBlk];
    int end = start + bed->blockSizes[iBlk];
    if (opts & chromAnnCds)
        {
        start = max(start, bed->thickStart);
        end = min(end, bed->thickEnd);
        }
    if (start < end)
        chromAnnBlkNew(ca, start, end);
    }
}

static struct chromAnn* chromAnnBedReaderRead(struct chromAnnReader *car)
/* read next BED and convert to a chromAnn */
{
struct rowReader *rr = car->data;
if (!rowReaderNext(rr))
    return NULL;
rowReaderExpectAtLeast(rr, 3);

char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(rr) : NULL;
struct bed *bed = bedLoadN(rr->row, min(rr->numCols, rr->maxParsedCols));
struct chromAnn *ca = chromAnnNew(bed->chrom, bed->strand[0], bed->name, rawCols,
                                  strVectorWrite, strVectorFree);

if ((bed->blockCount == 0) || (car->opts & chromAnnRange))
    {
    if (car->opts & chromAnnCds)
        {
        if (bed->thickStart < bed->thickEnd)
            chromAnnBlkNew(ca, bed->thickStart, bed->thickEnd);
        }
    else
        chromAnnBlkNew(ca, bed->chromStart, bed->chromEnd);
    }
else
    addBedBlocks(ca, car->opts, bed);

chromAnnFinish(ca);
bedFree(&bed);
return ca;
}

static void chromAnnBedReaderFree(struct chromAnnReader **carPtr)
/* free object */
{
struct chromAnnReader *car = *carPtr;
if (car != NULL)
    {
    struct rowReader *rr = car->data;
    rowReaderFree(&rr);
    freez(carPtr);
    }
}

struct chromAnnReader *chromAnnBedReaderNew(char *fileName, unsigned opts,
                                            unsigned maxParsedCols)
/* construct a reader for a BED file */
{
struct chromAnnReader *car;
AllocVar(car);
car->caRead = chromAnnBedReaderRead;
car->carFree = chromAnnBedReaderFree;
car->opts = opts;
car->data = rowReaderOpen(fileName, maxParsedCols, FALSE);
return car;
}

static void addGenePredBlocks(struct chromAnn* ca, unsigned opts, struct genePred* gp)
/* add blocks from a genePred */
{
int iExon;
for (iExon = 0; iExon < gp->exonCount; iExon++)
    {
    int start = gp->exonStarts[iExon];
    int end = gp->exonEnds[iExon];
    if (opts & chromAnnCds)
        {
        start = max(start, gp->cdsStart);
        end = min(end, gp->cdsEnd);
        }
    if (start < end)
        chromAnnBlkNew(ca, start, end);
    }
}

static struct chromAnn* chromAnnGenePredReaderRead(struct chromAnnReader *car)
/* Read the next genePred row and create a chromAnn object row read from a
 * GenePred file or table.  If there is no CDS, and chromAnnCds is specified,
 * it will return a record with zero-length range.*/
{
struct rowReader *rr = car->data;
if (!rowReaderNext(rr))
    return NULL;
rowReaderExpectAtLeast(rr, GENEPRED_NUM_COLS);

char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(rr) : NULL;
struct genePred *gp = genePredLoad(rr->row);
struct chromAnn* ca = chromAnnNew(gp->chrom, gp->strand[0], gp->name, rawCols,
                                  strVectorWrite, strVectorFree);

if (car->opts & chromAnnRange)
    {
    if (car->opts & chromAnnCds)
        {
        if (gp->cdsStart < gp->cdsEnd)
            chromAnnBlkNew(ca, gp->cdsStart, gp->cdsEnd);
        }
    else
        chromAnnBlkNew(ca, gp->txStart, gp->txEnd);
    }
else
    addGenePredBlocks(ca, car->opts, gp);

chromAnnFinish(ca);
genePredFree(&gp);
return ca;
}

static void chromAnnGenePredReaderFree(struct chromAnnReader **carPtr)
/* free object */
{
struct chromAnnReader *car = *carPtr;
if (car != NULL)
    {
    struct rowReader *rr = car->data;
    rowReaderFree(&rr);
    freez(carPtr);
    }
}

struct chromAnnReader *chromAnnGenePredReaderNew(char *fileName, unsigned opts)
/* construct a reader for a genePred file */
{
struct chromAnnReader *car;
AllocVar(car);
car->caRead = chromAnnGenePredReaderRead;
car->carFree = chromAnnGenePredReaderFree;
car->opts = opts;
car->data = rowReaderOpen(fileName, GENEPREDX_NUM_COLS, FALSE);
return car;
}

static void addPslBlocks(struct chromAnn* ca, unsigned opts, struct psl* psl)
/* add blocks from a psl */
{
boolean blkStrand = (opts & chromAnnUseQSide) ? pslQStrand(psl) : pslTStrand(psl);
int size = (opts & chromAnnUseQSide) ? psl->qSize : psl->tSize;
unsigned *blocks = (opts & chromAnnUseQSide) ? psl->qStarts : psl->tStarts;
boolean blkSizeMult = pslIsProtein(psl) ? 3 : 1;
int iBlk;
for (iBlk = 0; iBlk < psl->blockCount; iBlk++)
    {
    int start = blocks[iBlk];
    int end = start + (blkSizeMult * psl->blockSizes[iBlk]);
    if (blkStrand == '-')
        reverseIntRange(&start, &end, size);
    chromAnnBlkNew(ca, start, end);
    }
}

static char getPslTSideStrand(struct psl *psl)
/* get the strand to use for a PSL when doing target side overlaps */
{
if (psl->strand[1] != '\0')
    {
    // translated
    char strand = pslTStrand(psl);
    if (pslQStrand(psl) == '-')
        strand = (strand == '-') ? '+' : '-'; // query reverse complemented
    return strand;
    }
else
    return pslQStrand(psl);  // untranslated
}

static char getPslQSideStrand(struct psl *psl)
/* get the strand to use for a PSL when doing query side overlaps */
{
if (psl->strand[1] != '\0')
    {
    // translated
    char strand = pslQStrand(psl);
    if (pslTStrand(psl) == '-')
        strand = (strand == '-') ? '+' : '-'; // query reverse complemented
    return strand;
    }
else
    return pslTStrand(psl);  // untranslated
}

static struct chromAnn* chromAnnPslReaderRead(struct chromAnnReader *car)
/* read next chromAnn from a PSL file  */
{
struct rowReader *rr = car->data;
if (!rowReaderNext(rr))
    return NULL;
rowReaderExpectAtLeast(rr, PSL_NUM_COLS);

char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(rr) : NULL;

struct psl *psl = pslLoad(rr->row);
struct chromAnn* ca;
if (car->opts & chromAnnUseQSide)
    ca = chromAnnNew(psl->qName, getPslQSideStrand(psl), psl->tName, rawCols,
                     strVectorWrite, strVectorFree);
else
    ca = chromAnnNew(psl->tName, getPslTSideStrand(psl), psl->qName, rawCols,
                     strVectorWrite, strVectorFree);

if (car->opts & chromAnnRange)
    {
    if (car->opts & chromAnnUseQSide)
        chromAnnBlkNew(ca, psl->qStart, psl->qEnd);
    else
        chromAnnBlkNew(ca, psl->tStart, psl->tEnd);
    }
else    
    addPslBlocks(ca, car->opts, psl);
chromAnnFinish(ca);
pslFree(&psl);
return ca;
}

static void chromAnnPslReaderFree(struct chromAnnReader **carPtr)
/* free object */
{
struct chromAnnReader *car = *carPtr;
if (car != NULL)
    {
    struct rowReader *rr = car->data;
    rowReaderFree(&rr);
    freez(carPtr);
    }
}

struct chromAnnReader *chromAnnPslReaderNew(char *fileName, unsigned opts)
/* construct a reader for a PSL file */
{
struct chromAnnReader *car;
AllocVar(car);
car->caRead = chromAnnPslReaderRead;
car->carFree = chromAnnPslReaderFree;
car->opts = opts;
car->data = rowReaderOpen(fileName, PSL_NUM_COLS, FALSE);
return car;
}

static void addChainQBlocks(struct chromAnn* ca, unsigned opts, struct chain* chain)
/* add query blocks from a chain */
{
struct cBlock *blk;
for (blk = chain->blockList; blk != NULL; blk = blk->next)
    {
    int start = blk->qStart;
    int end = blk->qEnd;
    if (chain->qStrand == '-')
        reverseIntRange(&start, &end, chain->qSize);
    chromAnnBlkNew(ca, start, end);
    }
}

static void addChainTBlocks(struct chromAnn* ca, unsigned opts, struct chain* chain)
/* add target blocks from a chain */
{
struct cBlock *blk;
for (blk = chain->blockList; blk != NULL; blk = blk->next)
    chromAnnBlkNew(ca, blk->tStart, blk->tEnd);
}

struct chromAnnChainReader
/* reader data for chain files */
{
    struct lineFile *lf;
};

static void chainRecWrite(struct chromAnn *ca, FILE *fh, char term)
/* write a chromAnn that is chain */
{
struct chain *chain = ca->rec;
assert(term == '\n');
chainWrite(chain, fh);
}

static void chainRecFree(struct chromAnn *ca)
/* free chromAnn chain data */
{
chainFree((struct chain**)&ca->rec);
}

static struct chromAnn* chromAnnChainReaderRead(struct chromAnnReader *car)
/* read a chromAnn object from a tab file or table */
{
struct chromAnnChainReader *carr = car->data;
struct chain *chain = chainRead(carr->lf);
if (chain == NULL)
    return NULL;

struct chromAnn* ca;
if (car->opts & chromAnnUseQSide)
    ca = chromAnnNew(chain->qName, '+', chain->tName,
                     ((car->opts & chromAnnSaveLines) ? chain : NULL),
                     chainRecWrite, chainRecFree);
else
    ca = chromAnnNew(chain->tName, chain->qStrand, chain->qName,
                     ((car->opts & chromAnnSaveLines) ? chain : NULL),
                     chainRecWrite, chainRecFree);

if (car->opts & chromAnnRange)
    {
    if (car->opts & chromAnnUseQSide)
        chromAnnBlkNew(ca, chain->qStart, chain->qEnd);
    else
        chromAnnBlkNew(ca, chain->tStart, chain->tEnd);
    }
else    
    {
    if (car->opts & chromAnnUseQSide)
        addChainQBlocks(ca, car->opts, chain);
    else
        addChainTBlocks(ca, car->opts, chain);
    }
chromAnnFinish(ca);
if (!(car->opts & chromAnnSaveLines))
    chainFree(&chain);
return ca;
}

static void chromAnnChainReaderFree(struct chromAnnReader **carPtr)
/* free object */
{
struct chromAnnReader *car = *carPtr;
if (car != NULL)
    {
    struct chromAnnChainReader *carr = car->data;
    lineFileClose(&carr->lf);
    freeMem(carr);
    freez(carPtr);
    }
}

struct chromAnnReader *chromAnnChainReaderNew(char *fileName, unsigned opts)
/* construct a reader for an arbitrary tab file. */
{
struct chromAnnChainReader *carr;
AllocVar(carr);
carr->lf = lineFileOpen(fileName, TRUE);

struct chromAnnReader *car;
AllocVar(car);
car->caRead = chromAnnChainReaderRead;
car->carFree = chromAnnChainReaderFree;
car->opts = opts;
car->data = carr;
return car;
}

struct chromAnnTabReader
/* reader data for tab files */
{
    struct coordCols  cols;  // column spec
    struct rowReader *rr;    // tab row reader
};

static struct chromAnn* chromAnnTabReaderRead(struct chromAnnReader *car)
/* read a chromAnn object from a tab file or table */
{
struct chromAnnTabReader *catr = car->data;
if (!rowReaderNext(catr->rr))
    return NULL;
rowReaderExpectAtLeast(catr->rr, catr->cols.minNumCols);

char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(catr->rr) : NULL;
struct coordColVals colVals = coordColParseRow(&catr->cols, catr->rr);

struct chromAnn *ca = chromAnnNew(colVals.chrom, colVals.strand, colVals.name, rawCols,
                                  strVectorWrite, strVectorFree);
chromAnnBlkNew(ca, colVals.start, colVals.end);
coordColsValsRelease(&colVals);
return ca;
}

static void chromAnnTabReaderFree(struct chromAnnReader **carPtr)
/* free object */
{
struct chromAnnReader *car = *carPtr;
if (car != NULL)
    {
    struct chromAnnTabReader *catr = car->data;
    rowReaderFree(&catr->rr);
    freeMem(catr);
    freez(carPtr);
    }
}

struct chromAnnReader *chromAnnTabReaderNew(char *fileName, struct coordCols* cols, unsigned opts)
/* construct a reader for an arbitrary tab file. */
{
struct chromAnnTabReader *catr;
AllocVar(catr);
catr->cols = *cols;
catr->rr = rowReaderOpen(fileName, 0, FALSE);

struct chromAnnReader *car;
AllocVar(car);
car->caRead = chromAnnTabReaderRead;
car->carFree = chromAnnTabReaderFree;
car->opts = opts;
car->data = catr;
return car;
}

int chromAnnRefLocCmp(const void *va, const void *vb)
/* Compare location of two chromAnnRef objects. */
{
const struct chromAnnRef *a = *((struct chromAnnRef **)va);
const struct chromAnnRef *b = *((struct chromAnnRef **)vb);
int diff = strcmp(a->ref->chrom, b->ref->chrom);
if (diff == 0)
    diff = a->ref->start - b->ref->start;
if (diff == 0)
    diff = a->ref->end - b->ref->end;
return diff;
}
