/* xao.c - Manage cross-species alignments in Intronerator database. 
 *
 * This file is copyright 2002 Jim Kent, but license is hereby
 * granted for all use - public, private or commercial. */

#include "common.h"
#include "sig.h"
#include "xa.h"


void xaAliFree(struct xaAli *xa)
/* Free up a single xaAli. */
{
freeMem(xa->name);
freeMem(xa->query);
freeMem(xa->target);
freeMem(xa->qSym);
freeMem(xa->tSym);
freeMem(xa->hSym);
freeMem(xa);
}

void xaAliFreeList(struct xaAli **pXa)
/* Free up a list of xaAlis. */
{
struct xaAli *xa, *next;
for (xa = *pXa; xa != NULL; xa = next)
    {
    next = xa->next;
    xaAliFree(xa);
    }
*pXa = NULL;
}

int xaAliCmpTarget(const void *va, const void *vb)
/* Compare two xaAli's to sort by ascending target positions. */
{
const struct xaAli *a = *((struct xaAli **)va);
const struct xaAli *b = *((struct xaAli **)vb);
int diff;
if ((diff = strcmp(a->target, b->target)) == 0)
    diff = a->tStart - b->tStart;
return diff;
}


FILE *xaOpenVerify(char *fileName)
/* Open file, verify it's the right type, and
 * position file pointer for first xaReadNext(). */
{
FILE *f = mustOpen(fileName, "rb");
return f;
}

FILE *xaIxOpenVerify(char *fileName)
/* Open file, verify that it's a good xa index. */
{
FILE *f;
bits32 sig;
f = mustOpen(fileName, "rb");
mustReadOne(f, sig);
if (sig != xaoSig)
    errAbort("Bad signature on %s", fileName);
return f;
}

static void eatLf(FILE *f)
/* Read next char and make sure it's a lf. */
{
int c;
c = fgetc(f);
if (c == '\r')
    c = fgetc(f);
if (c != '\n')
    errAbort("Expecting new line in cross-species alignment file.");
}

static void eatThroughLf(FILE *f)
/* Read through next lf (discarding results). */
{
int c;
while ((c = fgetc(f)) != EOF)
    if (c == '\n')
        break;
}

/* An example line from .st file. 
   G11A11.SEQ.c1 align 53.9% of 6096 ACTIN2~1\G11A11.SEQ:0-4999 - v:9730780-9736763 +
         0         1     2    3   4             5               6        7          8
 */

struct xaAli *xaReadNext(FILE *f, boolean condensed)
/* Read next xaAli from file. If condensed
 * don't fill int query, target, qSym, tSym, or hSym. */
{
char line[512];
char *words[16];
int wordCount;
struct xaAli *xa;
char *parts[5];
int partCount;
double percentScore;
int symCount;
int newOffset = 0;
char *s, *e;

/* Get first line and parse out everything but the sym lines. */
if (fgets(line, sizeof(line), f) == NULL)
    return NULL;
wordCount = chopLine(line, words);
if (wordCount < 9)
    errAbort("Short line in cross-species alignment file");
if (wordCount == 10)
    newOffset = 1;
if (!sameString(words[1], "align"))
    errAbort("Bad line in cross-species alignment file");
AllocVar(xa);
xa->name = cloneString(words[0]);
s = words[5+newOffset];
e = strrchr(s, ':');
if (e == NULL)
    errAbort("Bad line (no colon) in cross-species alignment file");
*e++ = 0;
partCount = chopString(e, "-", parts, ArraySize(parts));
if (partCount != 2)
    errAbort("Bad range format in cross-species alignment file");
if (!condensed)
    xa->query = cloneString(s);
xa->qStart = atoi(parts[0]);
xa->qEnd = atoi(parts[1]);
xa->qStrand = words[6+newOffset][0];
partCount = chopString(words[7+newOffset], ":-", parts, ArraySize(parts));
if (!condensed)
    xa->target = cloneString(parts[0]);
xa->tStart = atoi(parts[1]);
xa->tEnd = atoi(parts[2]);
xa->tStrand = words[8+newOffset][0];
percentScore = atof(words[2]);
xa->milliScore = round(percentScore*10);    
xa->symCount = symCount = atoi(words[4]);

/* Get symbol lines. */
if (condensed)
    {
    eatThroughLf(f);
    eatThroughLf(f);
    eatThroughLf(f);
    }
else
    {
    xa->qSym = needMem(symCount+1);
    mustRead(f, xa->qSym, symCount);
    eatLf(f);

    xa->tSym = needMem(symCount+1);
    mustRead(f, xa->tSym, symCount);
    eatLf(f);

    xa->hSym = needMem(symCount+1);
    mustRead(f, xa->hSym, symCount);
    eatLf(f);
    }
return xa;
}

struct xaAli *xaRdRange(FILE *ix, FILE *data, 
    int start, int end, boolean condensed)
/* Return list of all xaAlis that range from start to end.  
 * Assumes that ix and data files are open. If condensed
 * don't fill int query, target, qSym, tSym, or hSym. */
{
int s, e;
int maxS, minE;
long offset;
struct xaAli *list = NULL, *xa;


/* Scan through index file looking for things in range.
 * When find one read it from data file and add it to list. */
fseek(ix, sizeof(bits32), SEEK_SET);
for (;;)
    {
    if (!readOne(ix, s))
        break;
    mustReadOne(ix, e);
    mustReadOne(ix, offset);
    if (s >= end)
        break;
    maxS = max(s, start);
    minE = min(e, end);
    if (minE - maxS > 0)
        {
        fseek(data, offset, SEEK_SET);
        xa = xaReadNext(data, condensed);
        slAddHead(&list, xa);
        }
    }

slReverse(&list);
return list;
}

struct xaAli *xaReadRange(char *rangeIndexFileName, char *dataFileName, 
    int start, int end, boolean condensed)
/* Return list of all xaAlis that range from start to end.  If condensed
 * don't fill int query, target, qSym, tSym, or hSym. */
{
FILE *ix = xaIxOpenVerify(rangeIndexFileName);
FILE *data = xaOpenVerify(dataFileName);
struct xaAli *xa = xaRdRange(ix, data, start, end, condensed);
fclose(data);
fclose(ix);
return xa;
}


char *xaAlignSuffix()
/* Return suffix of file with actual alignments. */
{
return ".st";
}

char *xaChromIxSuffix()
/* Return suffix of files that index xa's by chromosome position. */
{
return ".xao";
}



