/* psl.c was originally generated by the autoSql program, which also 
 * generated as_psl.h and as_psl.sql.  This module links the database and the RAM 
 * representation of objects. 
 *
 * This file is copyright 2002 Jim Kent, but license is hereby
 * granted for all use - public, private or commercial. */

#include "common.h"
#include "sqlNum.h"
#include "sqlList.h"
#include "localmem.h"
#include "psl.h"
#include "hash.h"
#include "linefile.h"
#include "dnaseq.h"
#include "dystring.h"
#include "fuzzyFind.h"
#include "aliType.h"
#include "binRange.h"
#include "rangeTree.h"


struct psl *pslxLoad(char **row)
/* Load a psl from row fetched with select * from psl
 * from database.  Dispose of this with pslFree(). */
{
struct psl *ret = pslLoad(row);
int retSize;
sqlStringDynamicArray(row[21],&ret->qSequence, &retSize);
sqlStringDynamicArray(row[22],&ret->tSequence, &retSize);
return ret;
}

struct psl *pslLoad(char **row)
/* Load a psl from row fetched with select * from psl
 * from database.  Dispose of this with pslFree(). */
{
struct psl *ret;
int sizeOne;

AllocVar(ret);
ret->blockCount = sqlUnsigned(row[17]);
ret->match = sqlUnsigned(row[0]);
ret->misMatch = sqlUnsigned(row[1]);
ret->repMatch = sqlUnsigned(row[2]);
ret->nCount = sqlUnsigned(row[3]);
ret->qNumInsert = sqlUnsigned(row[4]);
ret->qBaseInsert = sqlSigned(row[5]);
ret->tNumInsert = sqlUnsigned(row[6]);
ret->tBaseInsert = sqlSigned(row[7]);
strcpy(ret->strand, row[8]);
ret->qName = cloneString(row[9]);
ret->qSize = sqlUnsigned(row[10]);
ret->qStart = sqlUnsigned(row[11]);
ret->qEnd = sqlUnsigned(row[12]);
ret->tName = cloneString(row[13]);
ret->tSize = sqlUnsigned(row[14]);
ret->tStart = sqlUnsigned(row[15]);
ret->tEnd = sqlUnsigned(row[16]);
sqlUnsignedDynamicArray(row[18], &ret->blockSizes, &sizeOne);
if (sizeOne != ret->blockCount)
    {
    printf("sizeOne bloxksizes %d bs %d block=%s\n",sizeOne, ret->blockCount,row[18]);
    }
assert(sizeOne == ret->blockCount);
sqlUnsignedDynamicArray(row[19], &ret->qStarts, &sizeOne);
if (sizeOne != ret->blockCount)
    {
    printf("sizeOne qStarts %d bs %d\n",sizeOne, ret->blockCount);
    }
assert(sizeOne == ret->blockCount);
sqlUnsignedDynamicArray(row[20], &ret->tStarts, &sizeOne);
if (sizeOne != ret->blockCount)
    {
    printf("sizeOne tStarts %d bs %d\n",sizeOne, ret->blockCount);
    }
assert(sizeOne == ret->blockCount);
return ret;
}

struct psl *pslCommaIn(char **pS, struct psl *ret)
/* Create a psl out of a comma separated string. 
 * This will fill in ret if non-null, otherwise will
 * return a new psl */
{
char *s = *pS;
int i;

if (ret == NULL)
    AllocVar(ret);
ret->match = sqlUnsignedComma(&s);
ret->misMatch = sqlUnsignedComma(&s);
ret->repMatch = sqlUnsignedComma(&s);
ret->nCount = sqlUnsignedComma(&s);
ret->qNumInsert = sqlUnsignedComma(&s);
ret->qBaseInsert = sqlSignedComma(&s);
ret->tNumInsert = sqlUnsignedComma(&s);
ret->tBaseInsert = sqlSignedComma(&s);
sqlFixedStringComma(&s, ret->strand, sizeof(ret->strand));
ret->qName = sqlStringComma(&s);
ret->qSize = sqlUnsignedComma(&s);
ret->qStart = sqlUnsignedComma(&s);
ret->qEnd = sqlUnsignedComma(&s);
ret->tName = sqlStringComma(&s);
ret->tSize = sqlUnsignedComma(&s);
ret->tStart = sqlUnsignedComma(&s);
ret->tEnd = sqlUnsignedComma(&s);
ret->blockCount = sqlUnsignedComma(&s);
s = sqlEatChar(s, '{');
AllocArray(ret->blockSizes, ret->blockCount);
for (i=0; i<ret->blockCount; ++i)
    {
    ret->blockSizes[i] = sqlUnsignedComma(&s);
    }
s = sqlEatChar(s, '}');
s = sqlEatChar(s, ',');
s = sqlEatChar(s, '{');
AllocArray(ret->qStarts, ret->blockCount);
for (i=0; i<ret->blockCount; ++i)
    {
    ret->qStarts[i] = sqlUnsignedComma(&s);
    }
s = sqlEatChar(s, '}');
s = sqlEatChar(s, ',');
s = sqlEatChar(s, '{');
AllocArray(ret->tStarts, ret->blockCount);
for (i=0; i<ret->blockCount; ++i)
    {
    ret->tStarts[i] = sqlUnsignedComma(&s);
    }
s = sqlEatChar(s, '}');
s = sqlEatChar(s, ',');
*pS = s;
return ret;
}

void pslFree(struct psl **pEl)
/* Free a single dynamically allocated psl such as created
 * with pslLoad(). */
{
struct psl *el;

if ((el = *pEl) == NULL) return;
freeMem(el->qName);
freeMem(el->tName);
freeMem(el->blockSizes);
freeMem(el->qStarts);
freeMem(el->tStarts);
if (el->qSequence)
    {
    freeMem(el->qSequence[0]);
    freeMem(el->qSequence);
    }
if (el->tSequence)
    {
    freeMem(el->tSequence[0]);
    freeMem(el->tSequence);
    }
freez(pEl);
}

void pslFreeList(struct psl **pList)
/* Free a list of dynamically allocated psl's */
{
struct psl *el, *next;

for (el = *pList; el != NULL; el = next)
    {
    next = el->next;
    pslFree(&el);
    }
*pList = NULL;
}

void pslOutputJson(struct psl *el, FILE *f) 
/* print out psl as a json array */
{
fputs("[", f);
fprintf(f, "%u,", el->match);
fprintf(f, "%u,", el->misMatch);
fprintf(f, "%u,", el->repMatch);
fprintf(f, "%u,", el->nCount);
fprintf(f, "%u,", el->qNumInsert);
fprintf(f, "%d,", el->qBaseInsert);
fprintf(f, "%u,", el->tNumInsert);
fprintf(f, "%d,", el->tBaseInsert);
fprintf(f, "\"%s\",", el->strand);
fprintf(f, "\"%s\",", el->qName);
fprintf(f, "%u,", el->qSize);
fprintf(f, "%u,", el->qStart);
fprintf(f, "%u,", el->qEnd);
fprintf(f, "\"%s\",", el->tName);
fprintf(f, "%u,", el->tSize);
fprintf(f, "%u,", el->tStart);
fprintf(f, "%u,", el->tEnd);
fprintf(f, "%u,", el->blockCount);

fputs("\"", f);
for (int i=0; i<el->blockCount; ++i)
    {
    fprintf(f, "%u", el->blockSizes[i]);
    if (i < el->blockCount-1)
        fputc(',', f);
    }
fputs("\",", f);

fputs("\"", f);
for (int i=0; i<el->blockCount; ++i)
    {
    fprintf(f, "%u", el->qStarts[i]);
    if (i < el->blockCount-1)
        fputc(',', f); // json does not allow trailing commas
    }
fputs("\",", f);

fputs("\"", f);
for (int i=0; i<el->blockCount; ++i)
    {
    fprintf(f, "%u", el->tStarts[i]);
    if (i < el->blockCount-1)
        fputc(',', f);
    }
fputs("\"", f);

if (el->qSequence)
    {
    fputc(',',f);
    fputc('[',f);
    for (int i=0; i<el->blockCount; ++i)
	{
	fprintf(f, "'%s'", el->qSequence[i]);
        if (i < el->blockCount-1)
            fputc(',', f);
	}
    fputc(']',f);
    fputc(',',f);

    fputc('[',f);
    for (int i=0; i<el->blockCount; ++i)
	{
	fprintf(f, "\"%s\"", el->tSequence[i]);
        if (i < el->blockCount-1)
            fputc(',', f);
	}
    fputc(']',f);
    }

if (ferror(f))
    {
    perror("Error writing psl file\n");
    errAbort("\n");
    }

fputs("]", f);
}

void pslOutput(struct psl *el, FILE *f, char sep, char lastSep) 
/* Print out psl.  Separate fields with sep. Follow last field with lastSep. */
{
int i;
fprintf(f, "%u", el->match);
fputc(sep,f);
fprintf(f, "%u", el->misMatch);
fputc(sep,f);
fprintf(f, "%u", el->repMatch);
fputc(sep,f);
fprintf(f, "%u", el->nCount);
fputc(sep,f);
fprintf(f, "%u", el->qNumInsert);
fputc(sep,f);
fprintf(f, "%d", el->qBaseInsert);
fputc(sep,f);
fprintf(f, "%u", el->tNumInsert);
fputc(sep,f);
fprintf(f, "%d", el->tBaseInsert);
fputc(sep,f);
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->strand);
if (sep == ',') fputc('"',f);
fputc(sep,f);
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->qName);
if (sep == ',') fputc('"',f);
fputc(sep,f);
fprintf(f, "%u", el->qSize);
fputc(sep,f);
fprintf(f, "%u", el->qStart);
fputc(sep,f);
fprintf(f, "%u", el->qEnd);
fputc(sep,f);
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->tName);
if (sep == ',') fputc('"',f);
fputc(sep,f);
fprintf(f, "%u", el->tSize);
fputc(sep,f);
fprintf(f, "%u", el->tStart);
fputc(sep,f);
fprintf(f, "%u", el->tEnd);
fputc(sep,f);
fprintf(f, "%u", el->blockCount);
fputc(sep,f);
if (sep == ',') fputc('{',f);
for (i=0; i<el->blockCount; ++i)
    {
    fprintf(f, "%u", el->blockSizes[i]);
    fputc(',', f);
    }
if (sep == ',') fputc('}',f);
fputc(sep,f);
if (sep == ',') fputc('{',f);
for (i=0; i<el->blockCount; ++i)
    {
    fprintf(f, "%u", el->qStarts[i]);
    fputc(',', f);
    }
if (sep == ',') fputc('}',f);
fputc(sep,f);
if (sep == ',') fputc('{',f);
for (i=0; i<el->blockCount; ++i)
    {
    fprintf(f, "%u", el->tStarts[i]);
    fputc(',', f);
    }
if (sep == ',') fputc('}',f);
if (el->qSequence)
    {
    fputc(sep,f);
    if (sep == ',') fputc('{',f);
    for (i=0; i<el->blockCount; ++i)
	{
	fprintf(f, "%s", el->qSequence[i]);
	fputc(',', f);
	}
    if (sep == ',') fputc('}',f);
    fputc(sep,f);
    if (sep == ',') fputc('{',f);
    for (i=0; i<el->blockCount; ++i)
	{
	fprintf(f, "%s", el->tSequence[i]);
	fputc(',', f);
	}
    if (sep == ',') fputc('}',f);
    }

fputc(lastSep,f);
if (ferror(f))
    {
    perror("Error writing psl file\n");
    errAbort("\n");
    }
}

/* ----- end autoSql generated part --------------- */

void pslOutputShort(struct psl *el, FILE *f, char sep, char lastSep) 
/* Print out psl.  Separate fields with sep. Follow last field with lastSep. */
{
fprintf(f, "%u", el->match);
fputc(sep,f);
fprintf(f, "%u", el->misMatch);
fputc(sep,f);
fprintf(f, "%u", el->repMatch);
fputc(sep,f);
fprintf(f, "%u", el->qNumInsert);
fputc(sep,f);
fprintf(f, "%d", el->qBaseInsert);
fputc(sep,f);
fprintf(f, "%u", el->tNumInsert);
fputc(sep,f);
fprintf(f, "%d", el->tBaseInsert);
fputc(sep,f);
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->strand);
if (sep == ',') fputc('"',f);
fputc(sep,f);
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->qName);
if (sep == ',') fputc('"',f);
fputc(sep,f);
fprintf(f, "%u", el->qStart);
fputc(sep,f);
fprintf(f, "%u", abs(el->qEnd - el->qStart));
fputc(sep,f);
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->tName);
if (sep == ',') fputc('"',f);
fputc(sep,f);
fprintf(f, "%u", el->tStart);
fputc(sep,f);
fprintf(f, "%u", abs(el->tEnd - el->tStart));
fputc(sep,f);
fprintf(f, "%u", el->blockCount);
fputc(sep,f);
if (sep == ',') fputc('{',f);
fputc(lastSep,f);
if (ferror(f))
    {
    perror("Error writing psl file\n");
    errAbort("\n");
    }
}

void pslOutFormat(struct psl *el, FILE *f, char sep, char lastSep) 
/* Print out selected psl values.  Separate fields with sep. Follow last field with lastSep. */
/* Prints out a better format with bold field headings followed by value */
/* Requires further upstream work to ensure that only the field headers */
/* declared here are printed if replacing an existing psl print function*/
{
const char *headers[] = {"Matches", "Mismatches", "Matches in repeats", "Number of N bases", "Query name", "Size", "Start", "End", "Chromosome", "Strand", "Start", "End"};
char *hformat = "<B>%s:</B> "; /* string for formatted print for headers */
char *uformat = "<B>%s:</B> %u%c"; /* string for formatted print for unsigned variable */
char *targName;

fprintf(f, uformat, headers[0], el->match, sep);
fprintf(f, uformat, headers[1], el->misMatch, sep);
fprintf(f, uformat, headers[2], el->repMatch, sep);
fprintf(f, uformat, headers[3], el->nCount, sep);

fprintf(f, hformat, headers[4]);
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->qName);
if (sep == ',') fputc('"',f);
fputc(sep,f);

fprintf(f, uformat, headers[5], el->qSize, sep);
fprintf(f, uformat, headers[6], el->qStart, sep);
fprintf(f, uformat, headers[7], el->qEnd, sep);

fprintf(f, hformat, headers[8]);
if (sep == ',') fputc('"',f);
/* skip leading 'chr' in string to get only chromosome part */
targName = el->tName;
if (startsWith("chr", el->tName))
   targName += 3;
fprintf(f, "%s", targName);

if (sep == ',') fputc('"',f);
fputc(sep,f);

fprintf(f, hformat, headers[9]);
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->strand);
if (sep == ',') fputc('"',f);
fputc(sep,f);

fprintf(f, uformat, headers[10], el->tStart, sep);
fprintf(f, uformat, headers[11], el->tEnd, sep);

fputc(lastSep,f);

if (ferror(f))
    {
    perror("Error writing psl file\n");
    errAbort("\n");
    }

}

struct psl *pslLoadAll(char *fileName)
/* Load all psl's in file. */
{
struct lineFile *lf = pslFileOpen(fileName);
struct psl *pslList = NULL, *psl;
while ((psl = pslNext(lf)) != NULL)
    {
    slAddHead(&pslList, psl);
    }
slReverse(&pslList);
lineFileClose(&lf);
return pslList;
}


int pslCmpQuery(const void *va, const void *vb)
/* Compare to sort based on query start. */
{
const struct psl *a = *((struct psl **)va);
const struct psl *b = *((struct psl **)vb);
int dif;
dif = strcmp(a->qName, b->qName);
if (dif == 0)
    dif = a->qStart - b->qStart;
return dif;
}

int pslCmpTarget(const void *va, const void *vb)
/* Compare to sort based on target start. */
{
const struct psl *a = *((struct psl **)va);
const struct psl *b = *((struct psl **)vb);
int dif;
dif = strcmp(a->tName, b->tName);
if (dif == 0)
    dif = a->tStart - b->tStart;
return dif;
}

int pslCmpTargetAndStrand(const void *va, const void *vb)
/* Compare to sort based on target, strand,  tStart. */
{
const struct psl *a = *((struct psl **)va);
const struct psl *b = *((struct psl **)vb);
int dif;
dif = strcmp(a->tName, b->tName);
if (dif == 0)
    dif = strcmp(a->strand, b->strand);
if (dif == 0)
    dif = a->tStart - b->tStart;
return dif;
}


int pslCmpScore(const void *va, const void *vb)
/* Compare to sort based on score (descending). */
{
const struct psl *a = *((struct psl **)va);
const struct psl *b = *((struct psl **)vb);
return pslScore(b) - pslScore(a);
}

int pslCmpQueryScore(const void *va, const void *vb)
/* Compare to sort based on query then score (descending). */
{
const struct psl *a = *((struct psl **)va);
const struct psl *b = *((struct psl **)vb);
int diff = strcmp(a->qName, b->qName);
if (diff == 0)
    diff = pslScore(b) - pslScore(a);
return diff;
}

int pslCmpMatch(const void *va, const void *vb)
/* Compare to sort based on match */
{
const struct psl *a = *((struct psl **)va);
const struct psl *b = *((struct psl **)vb);
return b->match - a->match;
}

static void pslLabelColumns(FILE *f)
/* Write column info. */
{
fputs("\n"
"match\tmis- \trep. \tN's\tQ gap\tQ gap\tT gap\tT gap\tstrand\tQ        \tQ   \tQ    \tQ  \tT        \tT   \tT    \tT  \tblock\tblockSizes \tqStarts\t tStarts\n"
"     \tmatch\tmatch\t   \tcount\tbases\tcount\tbases\t      \tname     \tsize\tstart\tend\tname     \tsize\tstart\tend\tcount\n" 
"---------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
f);
}

static void pslLabelColumnsJson(FILE *f) 
/* Write column info as a JSON array */
{
fputs("[\"matches\", \"misMatches\", \"repMatches\", \"nCount\", \"qNumInsert\", \"qBaseInsert\", "
        "\"tNumInsert\", \"tBaseInsert\", \"strand\", "
        "\"qName\", \"qSize\", \"qStart\", \"qEnd\", "
        "\"tName\", \"tSize\", \"tStart\", \"tEnd\", "
	"\"blockCount\", \"blockSizes\", \"qStarts\", \"tStarts\"]", f);
}

void pslxWriteHead(FILE *f, enum gfType qType, enum gfType tType)
/* Write header for extended (possibly protein) psl file. */
{
fprintf(f, "psLayout version 4 %s %s\n", gfTypeName(qType), gfTypeName(tType));
pslLabelColumns(f);
}

void pslWriteHead(FILE *f)
/* Write head of psl. */
{
fputs("psLayout version 3\n", f);
pslLabelColumns(f);
}

void pslWriteAll(struct psl *pslList, char *fileName, boolean writeHeader)
/* Write a psl file from list. */
{
FILE *f;
struct psl *psl;

f = mustOpen(fileName, "w");
if (writeHeader)
    pslWriteHead(f);
for (psl = pslList; psl != NULL; psl = psl->next)
    pslTabOut(psl, f);
fclose(f);
}

void pslWriteAllJson(struct psl *pslList, FILE *f, char *db, boolean writeHeader)
/* Write a psl file from list as a json array . */
{
fputs("{\n", f);
if (writeHeader) 
    {
    fputs("\"track\": \"blat\",\n", f);
    fprintf(f, "\"genome\": \"%s\",\n", db);
    fputs("\"fields\": ", f);
    pslLabelColumnsJson(f);
    fputs(",\n", f);
    }
fputs("\"blat\" : [\n", f);

for (struct psl *psl = pslList; psl; psl = psl->next)
    {
    pslOutputJson(psl, f);
    if (psl->next)
        fputs(",\n", f);
    }

puts("\n]\n}\n");
}

void pslxFileOpen(char *fileName, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf)
/* Read header part of psl and make sure it's right.  Return
 * sequence types and file handle. */
{
char *line;
int lineSize;
char *words[30];
char *version;
int wordCount;
int i;
enum gfType qt = gftRna,  tt = gftDna;
struct lineFile *lf = lineFileOpen(fileName, TRUE);

if (!lineFileNext(lf, &line, &lineSize))
    warn("%s is empty", fileName);
else
    {
    if (startsWith("psLayout version", line))
	{
	wordCount = chopLine(line, words);
	if (wordCount < 3)
	    errAbort("%s is not a psLayout file", fileName);
	version = words[2];
	if (sameString(version, "3"))
	    {
	    }
	else if (sameString(version, "4"))
	    {
	    qt = gfTypeFromName(words[3]);
	    tt = gfTypeFromName(words[4]);
	    }
	else
	    {
	    errAbort("%s is version %s of psLayout, this program can only handle through version 4",
		fileName,  version);
	    }
	for (i=0; i<4; ++i)
	    {
	    if (!lineFileNext(lf, &line, &lineSize))
		errAbort("%s severely truncated", fileName);
	    }
	}
    else
	lineFileReuse(lf); 
    }
*retQueryType = qt;
*retTargetType = tt;
*retLf = lf;
}

static void pslxFileOpenWithMetaConfig(char *fileName, bool isMetaUnique, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf, FILE *f)
/* Read header part of psl and make sure it's right.  Return
 * sequence types and file handle and send meta data to output file f */
{
char *line;
int lineSize;
char *words[30];
char *version;
int wordCount;
int i;
enum gfType qt = gftRna,  tt = gftDna;
struct lineFile *lf = lineFileOpen(fileName, TRUE);

lineFileSetMetaDataOutput(lf, f);
if (isMetaUnique)
    lineFileSetUniqueMetaData(lf);
if (!lineFileNext(lf, &line, &lineSize))
    warn("%s is empty", fileName);
else
    {
    if (startsWith("psLayout version", line))
	{
	wordCount = chopLine(line, words);
	if (wordCount < 3)
	    errAbort("%s is not a psLayout file", fileName);
	version = words[2];
	if (sameString(version, "3"))
	    {
	    }
	else if (sameString(version, "4"))
	    {
	    qt = gfTypeFromName(words[3]);
	    tt = gfTypeFromName(words[4]);
	    }
	else
	    {
	    errAbort("%s is version %s of psLayout, this program can only handle through version 4",
		fileName,  version);
	    }
	for (i=0; i<4; ++i)
	    {
	    if (!lineFileNext(lf, &line, &lineSize))
		errAbort("%s severely truncated", fileName);
	    }
	}
    else
        {
	char *s = cloneString(line);
        boolean eof = FALSE;
        while ((line[0] == '#') && (!eof))
            {
            freeMem(s);
            if (!lineFileNext(lf, &line, &lineSize))
                eof = TRUE;
            s = cloneString(line);
            }
	wordCount = chopLine(s, words);
	if ((wordCount < 21 || wordCount > 23 || (words[8][0] != '+' && words[8][0] != '-')) && (!eof))
	    errAbort("%s is not a psLayout file", fileName);
	else
	    lineFileReuse(lf); 
	freeMem(s);
	}
    }
*retQueryType = qt;
*retTargetType = tt;
*retLf = lf;
}

void pslxFileOpenWithMeta(char *fileName, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf, FILE *f)
/* Read header part of psl and make sure it's right.  Return
 * sequence types and file handle and send meta data to output file f */
{
pslxFileOpenWithMetaConfig(fileName, FALSE, retQueryType, retTargetType, retLf, f);
}

void pslxFileOpenWithUniqueMeta(char *fileName, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf, FILE *f)
/* Read header part of psl and make sure it's right.  Return
 * sequence types and file handle and send only unique meta data to output f */
{
pslxFileOpenWithMetaConfig(fileName, TRUE, retQueryType, retTargetType, retLf, f);
}

struct lineFile *pslFileOpen(char *fileName)
/* Read header part of psl and make sure it's right. 
 * Return line file handle to it. */
{
enum gfType qt, tt;
struct lineFile *lf;
pslxFileOpen(fileName, &qt, &tt, &lf);
return lf;
}

struct lineFile *pslFileOpenWithMeta(char *fileName, FILE *f)
/* Read header part of psl and make sure it's right. 
 * Return line file handle to it. */
{
enum gfType qt, tt;
struct lineFile *lf;
pslxFileOpenWithMeta(fileName, &qt, &tt, &lf, f);
return lf;
}

struct lineFile *pslFileOpenWithUniqueMeta(char *fileName, FILE *f)
/* Read header part of psl and make sure it's right. 
 * Set flag to suppress duplicate header comments.
 * Return line file handle to it. */
{
enum gfType qt, tt;
struct lineFile *lf;
pslxFileOpenWithUniqueMeta(fileName, &qt, &tt, &lf, f);
return lf;
}

struct psl *pslNext(struct lineFile *lf)
/* Read next line from file and convert it to psl.  Return
 * NULL at eof. */
{
char *line;
int lineSize;
char *words[32];
int wordCount;
static int lineAlloc = 0;
static char *chopBuf = NULL;

if (!lineFileNextReal(lf, &line))
    {
    return NULL;
    }
lineSize = strlen(line);
if (lineSize >= lineAlloc)
    {
    lineAlloc = lineSize+256;
    chopBuf = needMoreMem(chopBuf, 0, lineAlloc);
    }
memcpy(chopBuf, line, lineSize+1);
wordCount = chopLine(chopBuf, words);
if (wordCount == 21)
    {
    return pslLoad(words);
    }
if (wordCount == 23)
    {
    return pslxLoad(words);
    }
else
    {
    errAbort("Bad line %d of %s wordCount is %d instead of 21 or 23\n", lf->lineIx, lf->fileName, wordCount);
    return NULL;
    }
}

struct psl *pslxLoadLm(char **row, struct lm *lm)
/* Load row into local memory pslx. */
{
struct psl *ret = pslLoadLm(row, lm);
ret->qSequence = lmAlloc(lm, sizeof(ret->qSequence[0]) * ret->blockCount);
sqlStringArray(lmCloneString(lm,row[21]),ret->qSequence, ret->blockCount);
ret->tSequence = lmAlloc(lm, sizeof(ret->tSequence[0]) * ret->blockCount);
sqlStringArray(lmCloneString(lm,row[22]),ret->tSequence, ret->blockCount);
return ret;
}

struct psl *pslLoadLm(char **row, struct lm *lm)
/* Load row into local memory psl. */
{
struct psl *ret;

ret = lmAlloc(lm, sizeof(*ret));
ret->blockCount = sqlUnsigned(row[17]);
ret->match = sqlUnsigned(row[0]);
ret->misMatch = sqlUnsigned(row[1]);
ret->repMatch = sqlUnsigned(row[2]);
ret->nCount = sqlUnsigned(row[3]);
ret->qNumInsert = sqlUnsigned(row[4]);
ret->qBaseInsert = sqlSigned(row[5]);
ret->tNumInsert = sqlUnsigned(row[6]);
ret->tBaseInsert = sqlSigned(row[7]);
strcpy(ret->strand, row[8]);
ret->qName = lmCloneString(lm,row[9]);
ret->qSize = sqlUnsigned(row[10]);
ret->qStart = sqlUnsigned(row[11]);
ret->qEnd = sqlUnsigned(row[12]);
ret->tName = lmCloneString(lm, row[13]);
ret->tSize = sqlUnsigned(row[14]);
ret->tStart = sqlUnsigned(row[15]);
ret->tEnd = sqlUnsigned(row[16]);
ret->blockSizes = lmAlloc(lm, sizeof(ret->blockSizes[0]) * ret->blockCount);
sqlUnsignedArray(row[18], ret->blockSizes, ret->blockCount);
ret->qStarts = lmAlloc(lm, sizeof(ret->qStarts[0]) * ret->blockCount);
sqlUnsignedArray(row[19], ret->qStarts, ret->blockCount);
ret->tStarts = lmAlloc(lm, sizeof(ret->tStarts[0]) * ret->blockCount);
sqlUnsignedArray(row[20], ret->tStarts, ret->blockCount);
return ret;
}

boolean pslIsProtein(const struct psl *psl)
/* is psl a protein psl (are it's blockSizes and scores in protein space) */
{
int lastBlock = psl->blockCount - 1;

return  (((psl->strand[1] == '+' ) &&
    (psl->tEnd == psl->tStarts[lastBlock] + 3*psl->blockSizes[lastBlock])) ||
   ((psl->strand[1] == '-') && 
    (psl->tStart == (psl->tSize-(psl->tStarts[lastBlock] + 3*psl->blockSizes[lastBlock])))));
}

int pslCalcMilliBad(struct psl *psl, boolean isMrna)
/* Calculate badness in parts per thousand. */
{
int sizeMul = pslIsProtein(psl) ? 3 : 1;
int qAliSize, tAliSize, aliSize;
int milliBad = 0;
int sizeDif;
int insertFactor;
int total;

qAliSize = sizeMul * (psl->qEnd - psl->qStart);
tAliSize = psl->tEnd - psl->tStart;
aliSize = min(qAliSize, tAliSize);
if (aliSize <= 0)
    return 0;
sizeDif = qAliSize - tAliSize;
if (sizeDif < 0)
    {
    if (isMrna)
	sizeDif = 0;
    else
	sizeDif = -sizeDif;
    }
insertFactor = psl->qNumInsert;
if (!isMrna)
    insertFactor += psl->tNumInsert;

total = (sizeMul * (psl->match + psl->repMatch + psl->misMatch));
if (total != 0)
    milliBad = (1000 * (psl->misMatch*sizeMul + insertFactor + round(3*log(1+sizeDif)))) / total;
return milliBad;
}

int pslScore(const struct psl *psl)
/* Return score for psl. */
{
int sizeMul = pslIsProtein(psl) ? 3 : 1;

return sizeMul * (psl->match + ( psl->repMatch>>1)) - 
	sizeMul * psl->misMatch - psl->qNumInsert - psl->tNumInsert;
}

int pslCmpScoreDesc(const void *va, const void *vb)
/* Compare to sort based on score. */
{
const struct psl *a = *((struct psl **)va);
const struct psl *b = *((struct psl **)vb);
return pslScore(b) - pslScore(a);
}


struct ffAli *pslToFakeFfAli(struct psl *psl, DNA *needle, DNA *haystack)
/* Convert from psl to ffAli format.  In some cases you can pass NULL
 * for needle and haystack - depending what the post-processing is going
 * to be. */
{
struct ffAli *ffList = NULL, *ff;
int blockCount = psl->blockCount;
unsigned *blockSizes = psl->blockSizes;
unsigned *qStarts = psl->qStarts;
unsigned *tStarts = psl->tStarts;
int size;
int i;

for (i=0; i<blockCount; ++i)
    {
    size = blockSizes[i];
    AllocVar(ff);
    ff->left = ffList;
    ffList = ff;
    ff->nStart = ff->nEnd = needle + qStarts[i];
    ff->nEnd += size;
    ff->hStart = ff->hEnd = haystack + tStarts[i];
    ff->hEnd += size;
    }
ffList = ffMakeRightLinks(ffList);
return ffList;
}

struct psl *pslFromFakeFfAli(struct ffAli *ff, 
	DNA *needle, DNA *haystack, char strand,
	char *qName, int qSize, char *tName, int tSize)
/* This will create a basic psl structure from a sorted series of ffAli
 * blocks.  The fields that would need actual sequence to be filled in
 * are left zero however - fields including match, repMatch, mismatch. */
{
struct psl *psl;
unsigned *blockSizes;
unsigned *qStarts;
unsigned *tStarts;
int blockCount;
int i;
int nStart, hStart;
int nEnd, hEnd;

AllocVar(psl);
psl->blockCount = blockCount = ffAliCount(ff);
psl->blockSizes = AllocArray(blockSizes, blockCount);
psl->qStarts = AllocArray(qStarts, blockCount);
psl->tStarts = AllocArray(tStarts, blockCount);
psl->qName = cloneString(qName);
psl->qSize = qSize;
psl->tName = cloneString(tName);
psl->tSize = tSize;
psl->strand[0] = strand;

for (i=0; i<blockCount; ++i)
    {
    nStart = ff->nStart - needle;
    nEnd = ff->nEnd - needle;
    hStart = ff->hStart - haystack;
    hEnd = ff->hEnd - haystack;
    blockSizes[i] = nEnd - nStart;
    qStarts[i] = nStart;
    tStarts[i] = hStart;
    if (i == 0)
       {
       psl->qStart = nStart;
       psl->tStart = hStart;
       }
    if (i == blockCount-1)
       {
       psl->qEnd = nEnd;
       psl->tEnd = hEnd;
       }
    ff = ff->right;
    }
if (strand == '-')
    {
    reverseIntRange(&psl->qStart, &psl->qEnd, psl->qSize);
    }
return psl;
}

struct ffAli *pslToFfAli(struct psl *psl, struct dnaSeq *query, struct dnaSeq *target,
	int targetOffset)
/* Convert from psl to ffAli format.  Clip to parts that we actually
 * have sequence for. */
{
struct ffAli *ffList = NULL, *ff;
DNA *needle = query->dna;
DNA *haystack = target->dna;
int blockCount = psl->blockCount;
unsigned *blockSizes = psl->blockSizes;
unsigned *qStarts = psl->qStarts;
unsigned *tStarts = psl->tStarts;
int size;
int i;
int tMin = targetOffset;
int tMax = targetOffset + target->size;
int tStart, tEnd;
int clipStart, clipEnd, clipOffset, clipSize;

for (i=0; i<blockCount; ++i)
    {
    clipStart = tStart = tStarts[i];
    size = blockSizes[i];
    clipEnd = tEnd = tStart + size;
    if (tStart < tMax && tEnd > tMin)
	{
	if (clipStart < tMin) clipStart = tMin;
	if (clipEnd > tMax) clipEnd = tMax;
	clipOffset = clipStart - tStart;
	clipSize = clipEnd - clipStart;
	AllocVar(ff);
	ff->left = ffList;
	ffList = ff;
	ff->nStart = ff->nEnd = needle + qStarts[i] + clipOffset;
	ff->nEnd += clipSize;
	ff->hStart = ff->hEnd = haystack + clipStart - targetOffset;
	ff->hEnd += clipSize;
	}
    }
ffList = ffMakeRightLinks(ffList);
ffCountGoodEnds(ffList);
return ffList;
}

int pslOrientation(struct psl *psl)
/* Translate psl strand + or - to orientation +1 or -1 */
{
if (psl->strand[1] != '\0')
    {
    /* translated blat */
    if (psl->strand[0] != psl->strand[1])
        return -1;
    else
        return 1;
    }
else
    {
    if (psl->strand[0] == '-')
        return -1;
    else
        return 1;
    }
}

int pslWeightedIntronOrientation(struct psl *psl, struct dnaSeq *genoSeq, int offset)
/* Return >0 if introns make it look like alignment is on + strand,
 *        <0 if introns make it look like alignment is on - strand,
 *        0 if can't tell.  The absolute value of the return indicates
 * how many splice sites we've seen supporting the orientation.
 * Sequence should NOT be reverse complemented.  */
{
int intronDir = 0;
int oneDir;
int i;
DNA *dna = genoSeq->dna;

/* code below doesn't support negative target strand (translated blat) */
if (psl->strand[1] == '-')
    errAbort("pslWeightedIntronOrientation doesn't support a negative target strand");

for (i=1; i<psl->blockCount; ++i)
    {
    int iStart, iEnd, blockSize = psl->blockSizes[i-1];
    if (psl->qStarts[i-1] + blockSize == psl->qStarts[i])
	{
	iStart = psl->tStarts[i-1] + psl->blockSizes[i-1] - offset;
	iEnd = psl->tStarts[i] - offset;
	oneDir = intronOrientation(dna+iStart, dna+iEnd);
	intronDir += oneDir;
	}
    }
return intronDir;
}

int pslIntronOrientation(struct psl *psl, struct dnaSeq *genoSeq, int offset)
/* Return 1 if introns make it look like alignment is on + strand,
 *       -1 if introns make it look like alignment is on - strand,
 *        0 if can't tell.
 * Sequence should NOT be reverse complemented.  */
{
int intronDir = pslWeightedIntronOrientation(psl, genoSeq, offset);
if (intronDir < 0)
    intronDir = -1;
else if (intronDir > 0)
    intronDir = 1;
return intronDir;
}

boolean pslHasIntron(struct psl *psl, struct dnaSeq *seq, int seqOffset)
/* Return TRUE if there's a probable intron. Sequence should NOT be
 * reverse complemented.*/
{
int blockCount = psl->blockCount, i;
unsigned *tStarts = psl->tStarts;
unsigned *blockSizes = psl->blockSizes;
unsigned *qStarts = psl->qStarts;
int blockSize, start, end;
DNA *dna = seq->dna;

for (i=1; i<blockCount; ++i)
    {
    blockSize = blockSizes[i-1];
    start = qStarts[i-1]+blockSize;
    end = qStarts[i];
    if (start == end)
        {
        start = tStarts[i-1] + blockSize;
        end = tStarts[i];
        if (psl->strand[1] == '-')
            reverseIntRange(&start, &end, psl->tSize);
        start -= seqOffset;
        end -= seqOffset;
	if (intronOrientation(dna+start, dna+end) != 0)
	    return TRUE;
	}
    }
return FALSE;
}

void pslTailSizes(struct psl *psl, int *retStartTail, int *retEndTail)
/* Find the length of "tails" (rather than extensions) implied by psl. */
{
int orientation = pslOrientation(psl);
int qFloppyStart, qFloppyEnd;
int tFloppyStart, tFloppyEnd;

if (orientation > 0)
    {
    qFloppyStart = psl->qStart;
    qFloppyEnd = psl->qSize - psl->qEnd;
    }
else
    {
    qFloppyStart = psl->qSize - psl->qEnd;
    qFloppyEnd = psl->qStart;
    }
tFloppyStart = psl->tStart;
tFloppyEnd = psl->tSize - psl->tEnd;
*retStartTail = min(qFloppyStart, tFloppyStart);
*retEndTail = min(qFloppyEnd, tFloppyEnd);
}

static void rcSeqs(char **seqs, unsigned blockCount, unsigned *blockSizes)
/* reverses complement sequences in list, maintain property that all strings
 * are in one malloc block.   blockSizes should already be reversed. */
{
char *buf, *next;
int i, memSz = 0;

/* get a new memory block for strings */
for (i = 0; i < blockCount; i++)
    memSz += blockSizes[i]+1;
next = buf = needLargeMem(memSz);

/* reverse compliment and copy to new memory block */
for (i = blockCount-1; i >= 0; i--)
    {
    int len = strlen(seqs[i]);
    reverseComplement(seqs[i], len);
    memcpy(next, seqs[i], len+1);
    next += len+1;
    }

/* swap memory and update pointers */
freeMem(seqs[0]);
seqs[0] = buf;
next = buf;

for (i = 0; i < blockCount; i++)
    {
    seqs[i] = next;
    next += blockSizes[i]+1;
    }
}

void pslRc(struct psl *psl)
/* Reverse-complement a PSL alignment.  This makes the target strand explicit. */
{
unsigned tSize = psl->tSize, qSize = psl->qSize;
unsigned blockCount = psl->blockCount, i;
unsigned *tStarts = psl->tStarts, *qStarts = psl->qStarts, *blockSizes = psl->blockSizes;
int mult = pslIsProtein(psl) ? 3 : 1;

/* swap strand, forcing target to have an explict strand */
psl->strand[0] = (psl->strand[0] != '-') ? '-' : '+';
psl->strand[1] = (psl->strand[1] != '-') ? '-' : '+';
psl->strand[2] = 0;

for (i=0; i<blockCount; ++i)
    {
    tStarts[i] = tSize - (tStarts[i] + mult * blockSizes[i]);
    qStarts[i] = qSize - (qStarts[i] + blockSizes[i]);
    }
reverseUnsigned(tStarts, blockCount);
reverseUnsigned(qStarts, blockCount);
reverseUnsigned(blockSizes, blockCount);
if (psl->qSequence != NULL)
    {
    rcSeqs(psl->qSequence, blockCount, blockSizes);
    rcSeqs(psl->tSequence, blockCount, blockSizes);
    }
}


/* macro to swap to variables */
#define swapVars(a, b, tmp) ((tmp) = (a), (a) = (b), (b) = (tmp))

static void swapBlocks(struct psl *psl)
/* Swap the blocks in a psl without reverse complementing them. */
{
int i;
unsigned utmp;
char *stmp; 
for (i = 0; i < psl->blockCount; i++)
    {
    swapVars(psl->qStarts[i], psl->tStarts[i], utmp);
    if (psl->qSequence != NULL)
        swapVars(psl->qSequence[i], psl->tSequence[i], stmp);
    }
}

static void swapRCBlocks(struct psl *psl)
/* Swap and reverse complement blocks in a psl. Other psl fields must
 * be modified first */
{
int i;
unsigned *uatmp;
char **satmp;
reverseUnsigned(psl->tStarts, psl->blockCount);
reverseUnsigned(psl->qStarts, psl->blockCount);
reverseUnsigned(psl->blockSizes, psl->blockCount);
swapVars(psl->tStarts, psl->qStarts, uatmp);

/* qSize and tSize have already been swapped */
for (i = 0; i < psl->blockCount; i++)
    {
    psl->qStarts[i] = psl->qSize - (psl->qStarts[i] + psl->blockSizes[i]);
    psl->tStarts[i] = psl->tSize - (psl->tStarts[i] + psl->blockSizes[i]);
    }
if (psl->qSequence != NULL)
    {
    /* note: all block sequences are stored in one malloc block, which is
     * entry zero */
    rcSeqs(psl->qSequence, psl->blockCount, psl->blockSizes);
    rcSeqs(psl->tSequence, psl->blockCount, psl->blockSizes);
    swapVars(psl->qSequence, psl->tSequence, satmp);
    }
}

void pslSwap(struct psl *psl, boolean noRc)
/* swap query and target in psl.  If noRc is TRUE, don't reverse-complement
 * PSL if needed, instead make target strand explict. */
{
int itmp;
unsigned utmp;
char ctmp, *stmp; 
swapVars(psl->qBaseInsert, psl->tBaseInsert, utmp);
swapVars(psl->tNumInsert, psl->qNumInsert, utmp);
swapVars(psl->qName, psl->tName, stmp);
swapVars(psl->qSize, psl->tSize, utmp);
swapVars(psl->qStart, psl->tStart, itmp);
swapVars(psl->qEnd, psl->tEnd, itmp);

/* handle strand and block copy */
if (psl->strand[1] != '\0')
    {
    /* translated */
    swapVars(psl->strand[0], psl->strand[1], ctmp);
    swapBlocks(psl);
    }
else if (noRc)
    {
    /* untranslated with no reverse complement */
    psl->strand[1] = psl->strand[0];
    psl->strand[0] = '+';
    swapBlocks(psl);
    }
else
    {
    /* untranslated */
    if (psl->strand[0] == '+')
        swapBlocks(psl);
    else
        swapRCBlocks(psl);
    }
}

void pslTargetOffset(struct psl *psl, int offset)
/* Add offset to target positions in psl. */
{
int i, blockCount = psl->blockCount;
unsigned *tStarts = psl->tStarts;
psl->tStart += offset;
psl->tEnd += offset;
for (i=0; i<blockCount; ++i)
   tStarts[i] += offset;
}

void pslDump(struct psl *psl, FILE *f)
/* Dump most of PSL to file - for debugging. */
{
int i;
fprintf(f, "<PRE>\n");
fprintf(f, "psl %s:%d-%d %s %s:%d-%d %d\n", 
	psl->qName, psl->qStart, psl->qEnd, psl->strand,
	psl->tName, psl->tStart, psl->tEnd, psl->blockCount);
for (i=0; i<psl->blockCount; ++i)
    fprintf(f, "  size %d, qStart %d, tStart %d\n", 
    	psl->blockSizes[i], psl->qStarts[i], psl->tStarts[i]);
fprintf(f, "</PRE>");
}

void pslRecalcBounds(struct psl *psl)
/* Calculate qStart/qEnd tStart/tEnd at top level to be consistent
 * with blocks. */
{
int qStart, qEnd, tStart, tEnd, size;
int last = psl->blockCount - 1;
qStart = psl->qStarts[0];
tStart = psl->tStarts[0];
size = psl->blockSizes[last];
qEnd = psl->qStarts[last] + size;
tEnd = psl->tStarts[last] + size;
if (psl->strand[0] == '-')
    reverseIntRange(&qStart, &qEnd, psl->qSize);
if (psl->strand[1] == '-')
    reverseIntRange(&tStart, &tEnd, psl->tSize);
psl->qStart = qStart;
psl->qEnd = qEnd;
psl->tStart = tStart;
psl->tEnd = tEnd;
}

void pslRecalcMatchCounts(struct psl *psl)
/* Update the match/mismatch counts in PSL, assuming everything is a match. */
{
psl->match = psl->misMatch = psl->repMatch = psl->nCount = 0;
for (int iBlk = 0; iBlk < psl->blockCount; iBlk++)
    psl->match += psl->blockSizes[iBlk];
}
    
struct psl *pslTrimToTargetRange(struct psl *oldPsl, int tMin, int tMax)
/* Return psl trimmed to fit inside tMin/tMax.  Note this does not
 * update the match/misMatch and related fields. */
{
int newSize;
int oldBlockCount = oldPsl->blockCount;
boolean tIsRc = (oldPsl->strand[1] == '-');
int newBlockCount = 0, completeBlockCount = 0;
int i;
struct psl *newPsl = NULL;
int tMn = tMin, tMx = tMax;   /* tMin/tMax adjusted for strand. */

/* Deal with case where we're completely trimmed out quickly. */
newSize = rangeIntersection(oldPsl->tStart, oldPsl->tEnd, tMin, tMax);
if (newSize <= 0)
    return NULL;

if (tIsRc)
    reverseIntRange(&tMn, &tMx, oldPsl->tSize);

/* Count how many blocks will survive trimming. */
oldBlockCount = oldPsl->blockCount;
for (i=0; i<oldBlockCount; ++i)
    {
    int s = oldPsl->tStarts[i];
    int e = s + oldPsl->blockSizes[i];
    int sz = e - s;
    int overlap;
    if ((overlap = rangeIntersection(s, e, tMn, tMx)) > 0)
        ++newBlockCount;
    if (overlap == sz)
        ++completeBlockCount;
    }

if (newBlockCount == 0)
    return NULL;

/* Allocate new psl and fill in what we already know. */
AllocVar(newPsl);
strcpy(newPsl->strand, oldPsl->strand);
newPsl->qName = cloneString(oldPsl->qName);
newPsl->qSize = oldPsl->qSize;
newPsl->tName = cloneString(oldPsl->tName);
newPsl->tSize = oldPsl->tSize;
newPsl->blockCount = newBlockCount;
AllocArray(newPsl->blockSizes, newBlockCount);
AllocArray(newPsl->qStarts, newBlockCount);
AllocArray(newPsl->tStarts, newBlockCount);

/* Fill in blockSizes, qStarts, tStarts with real data. */
newBlockCount = completeBlockCount = 0;
for (i=0; i<oldBlockCount; ++i)
    {
    int oldSz = oldPsl->blockSizes[i];
    int sz = oldSz;
    int tS = oldPsl->tStarts[i];
    int tE = tS + sz;
    int qS = oldPsl->qStarts[i];
    int qE = qS + sz;
    if (rangeIntersection(tS, tE, tMn, tMx) > 0)
        {
	int diff;
	if ((diff = (tMn - tS)) > 0)
	    {
	    tS += diff;
	    qS += diff;
	    sz -= diff;
	    }
	if ((diff = (tE - tMx)) > 0)
	    {
	    tE -= diff;
	    qE -= diff;
	    sz -= diff;
	    }
	newPsl->qStarts[newBlockCount] = qS;
	newPsl->tStarts[newBlockCount] = tS;
	newPsl->blockSizes[newBlockCount] = sz;
	++newBlockCount;
	if (sz == oldSz)
	    ++completeBlockCount;
	}
    }
pslRecalcBounds(newPsl);
return newPsl;
}

struct psl *pslTrimToQueryRange(struct psl *oldPsl, int qMin, int qMax)
/* Return psl trimmed to fit inside qMin/qMax.  Note this does not
 * update the match/misMatch and related fields. */
{
int newSize;
int oldBlockCount = oldPsl->blockCount;
boolean qIsRc = (oldPsl->strand[0] == '-');
int newBlockCount = 0, completeBlockCount = 0;
int i;
struct psl *newPsl = NULL;
int qMn = qMin, qMx = qMax;   /* qMin/qMax adjusted for strand. */

/* Deal with case where we're completely trimmed out quickly. */
newSize = rangeIntersection(oldPsl->qStart, oldPsl->qEnd, qMin, qMax);
if (newSize <= 0)
    return NULL;

if (qIsRc)
    reverseIntRange(&qMn, &qMx, oldPsl->qSize);

/* Count how many blocks will survive trimming. */
oldBlockCount = oldPsl->blockCount;
for (i=0; i<oldBlockCount; ++i)
    {
    int s = oldPsl->qStarts[i];
    int e = s + oldPsl->blockSizes[i];
    int sz = e - s;
    int overlap;
    if ((overlap = rangeIntersection(s, e, qMn, qMx)) > 0)
        ++newBlockCount;
    if (overlap == sz)
        ++completeBlockCount;
    }

if (newBlockCount == 0)
    return NULL;

/* Allocate new psl and fill in what we already know. */
AllocVar(newPsl);
strcpy(newPsl->strand, oldPsl->strand);
newPsl->qName = cloneString(oldPsl->qName);
newPsl->qSize = oldPsl->qSize;
newPsl->tName = cloneString(oldPsl->tName);
newPsl->tSize = oldPsl->tSize;
newPsl->blockCount = newBlockCount;
AllocArray(newPsl->blockSizes, newBlockCount);
AllocArray(newPsl->qStarts, newBlockCount);
AllocArray(newPsl->tStarts, newBlockCount);

/* Fill in blockSizes, qStarts, tStarts with real data. */
newBlockCount = completeBlockCount = 0;
for (i=0; i<oldBlockCount; ++i)
    {
    int oldSz = oldPsl->blockSizes[i];
    int sz = oldSz;
    int qS = oldPsl->qStarts[i];
    int qE = qS + sz;
    int tS = oldPsl->tStarts[i];
    int tE = tS + sz;
    if (rangeIntersection(qS, qE, qMn, qMx) > 0)
        {
	int diff;
	if ((diff = (qMn - qS)) > 0)
	    {
	    tS += diff;
	    qS += diff;
	    sz -= diff;
	    }
	if ((diff = (qE - qMx)) > 0)
	    {
	    tE -= diff;
	    qE -= diff;
	    sz -= diff;
	    }
	newPsl->qStarts[newBlockCount] = qS;
	newPsl->tStarts[newBlockCount] = tS;
	newPsl->blockSizes[newBlockCount] = sz;
	++newBlockCount;
	if (sz == oldSz)
	    ++completeBlockCount;
	}
    }
pslRecalcBounds(newPsl);
return newPsl;
}

static void printPslDesc(char* pslDesc, FILE* out, struct psl* psl)
/* print description of a PSL on first error */
{
fprintf(out, "Error: invalid PSL: %s:%u-%u %s:%u-%u %s %s\n",
        psl->qName, psl->qStart, psl->qEnd,
        psl->tName, psl->tStart, psl->tEnd,
        psl->strand, pslDesc);
}


static void chkError(char* pslDesc, FILE* out, struct psl* psl, int* errCount, char* format, ...)
/* forward needed to specify printf signature for gcc checking */
#if defined(__GNUC__)
__attribute__((format(printf, 5, 6)))
#endif
;

static void chkError(char* pslDesc, FILE* out, struct psl* psl, int* errCount, char* format, ...)
/* error handling on an pslCheck error, counting error and issuing description
 * of PSL on the first error. */
{
if (*errCount == 0)
    printPslDesc(pslDesc, out, psl);
va_list args;
va_start(args, format);
vfprintf(out, format, args);
va_end(args);
(*errCount)++;
}

static void chkBlkRanges(char* pslDesc, FILE* out, struct psl* psl,
                         char* pName, char* pLabel, char pCLabel, char pStrand,
                         unsigned pSize, unsigned pStart, unsigned pEnd,
                         unsigned iBlk, unsigned* pBlockStarts, int* errCount)
/* check the target or query ranges in a PSL incrementing errorCnt */
{
unsigned blkStart = pBlockStarts[iBlk];
unsigned blkEnd = blkStart+psl->blockSizes[iBlk];
/* translate stand to genomic coords */
unsigned gBlkStart = (pStrand == '+') ? blkStart : (pSize - blkEnd);
unsigned gBlkEnd = (pStrand == '+') ? blkEnd : (pSize - blkStart);

if ((pSize > 0) && (blkEnd > pSize))
    chkError(pslDesc, out, psl, errCount,
             "\t%s %s block %u end %u > %cSize %u\n",
             pName, pLabel, iBlk, blkEnd, pCLabel, pSize);
if (gBlkStart < pStart)
    chkError(pslDesc, out, psl, errCount,
             "\t%s %s block %u start %u < %cStart %u\n",
             pName, pLabel, iBlk, gBlkStart, pCLabel, pStart);
if (gBlkStart >= pEnd)
    chkError(pslDesc, out, psl, errCount,
             "\t%s %s block %u start %u >= %cEnd %u\n",
             pName, pLabel, iBlk, gBlkStart, pCLabel, pEnd);
if (gBlkEnd < pStart)
    chkError(pslDesc, out, psl, errCount,
             "\t%s %s block %u end %u < %cStart %u\n",
             pName, pLabel, iBlk, gBlkEnd, pCLabel, pStart);
if (gBlkEnd > pEnd)
    chkError(pslDesc, out, psl, errCount,
             "\t%s %s block %u end %u > %cEnd %u\n",
             pName, pLabel, iBlk, gBlkEnd, pCLabel, pEnd);
if (iBlk > 0)
    {
    unsigned prevBlkEnd = pBlockStarts[iBlk-1]+psl->blockSizes[iBlk-1];
    if (blkStart < prevBlkEnd)
        chkError(pslDesc, out, psl, errCount,
                 "\t%s %s block %u start %u < previous block end %u\n",
                 pName, pLabel, iBlk, blkStart, prevBlkEnd);
    }
}

static void chkRanges(char* pslDesc, FILE* out, struct psl* psl,
                      char* pName, char* pLabel, char pCLabel, char pStrand,
                      unsigned pSize, unsigned pStart, unsigned pEnd,
                      unsigned* pBlockStarts, int blockSizeMult, int* errCount)
/* check the target or query ranges in a PSL, increment errorCnt */
{
unsigned iBlk;
if (pStart >= pEnd)
    chkError(pslDesc, out, psl, errCount,
             "\t%s %cStart %u >= %cEnd %u\n",
             pName, pCLabel, pStart, pCLabel, pEnd);
if (pEnd > pSize)
    chkError(pslDesc, out, psl, errCount,
             "\t%s %cEnd %u >= %cSize %u\n",
             pName, pCLabel, pEnd, pCLabel, pSize);
// check that block start/end matches overall start end
unsigned pStartStrand = pStart, pEndStrand = pEnd;
if (pStrand != '+')
    reverseUnsignedRange(&pStartStrand, &pEndStrand, pSize);
unsigned lastBlkEnd = pBlockStarts[psl->blockCount-1] + (blockSizeMult * psl->blockSizes[psl->blockCount-1]);
if ((pStartStrand != pBlockStarts[0]) || (pEndStrand != lastBlkEnd))
    chkError(pslDesc, out, psl, errCount,
             "\t%s strand \"%c\" adjusted %cStart-%cEnd range %u-%u != block range %u-%u\n",
             pName, pStrand, pCLabel, pCLabel, pStartStrand, pEndStrand, pBlockStarts[0], lastBlkEnd);

for (iBlk = 0; iBlk < psl->blockCount; iBlk++)
    chkBlkRanges(pslDesc, out, psl, pName, pLabel, pCLabel, pStrand,
                 pSize, pStart, pEnd, iBlk, pBlockStarts, errCount);
}


static void chkInsertCounts(char* pslDesc, FILE* out, struct psl* psl,
                            char* pName, char pCLabel, unsigned* pBlockStarts,
                            unsigned pNumInsert, unsigned pBaseInsert,
                            int* errCount)
/* check the insert counts, incrementing errorCnt */
{
unsigned numInsert = 0, baseInsert = 0;
int iBlk;

for (iBlk = 1; iBlk < psl->blockCount; iBlk++)
    {
    unsigned gapSize = pBlockStarts[iBlk] - (pBlockStarts[iBlk-1]+psl->blockSizes[iBlk-1]);
    if (gapSize > 0)
        {
        numInsert++;
        baseInsert += gapSize;
        }
    }
if (numInsert != pNumInsert)
    chkError(pslDesc, out, psl, errCount,
             "\t%s %cNumInsert %u != expected %u\n",
             pName, pCLabel, pNumInsert, numInsert);
if (baseInsert != pBaseInsert)
    chkError(pslDesc, out, psl, errCount,
             "\t%s %cBaseInsert %u != expected %u\n",
             pName, pCLabel, pBaseInsert, baseInsert);
}

int pslCheck2(unsigned opts, char *pslDesc, FILE* out, struct psl* psl)
/* Validate a PSL for consistency.  pslDesc is printed the error messages to
 * file out (open /dev/null to discard). Return count of errors.  Option
 * PSL_CHECK_IGNORE_INSERT_CNTS doesn't validate problems insert counts fields
 * in each PSL.  Useful because protein PSL doesn't seen to compute these in a
 * consistent way.
 */
{
static char* VALID_STRANDS[] = {
    "+", "-", "++", "+-", "-+", "--", NULL
};
int i, errCount = 0;
int tBlockSizeMult = pslIsProtein(psl) ? 3 : 1;

/* check strand value */
for (i = 0; VALID_STRANDS[i] != NULL; i++)
    {
    if (strcmp(psl->strand, VALID_STRANDS[i]) == 0)
        break;
    }
if (VALID_STRANDS[i] == NULL)
    chkError(pslDesc, out, psl, &errCount,
             "\tinvalid PSL strand: \"%s\"\n", psl->strand);

/* check query */
chkRanges(pslDesc, out, psl, psl->qName, "query", 'q', pslQStrand(psl), psl->qSize, psl->qStart, psl->qEnd,
          psl->qStarts, 1, &errCount);
if ((opts & PSL_CHECK_IGNORE_INSERT_CNTS) == 0)
    chkInsertCounts(pslDesc, out, psl, psl->qName, 'q', psl->qStarts, psl->qNumInsert, psl->qBaseInsert, &errCount);

/* check target */
chkRanges(pslDesc, out, psl, psl->tName, "target", 't', pslTStrand(psl), psl->tSize, psl->tStart, psl->tEnd,
          psl->tStarts, tBlockSizeMult, &errCount);
if ((opts & PSL_CHECK_IGNORE_INSERT_CNTS) == 0)
    chkInsertCounts(pslDesc, out, psl, psl->tName, 't', psl->tStarts, psl->tNumInsert, psl->tBaseInsert, &errCount);

return errCount;
}

int pslCheck(char *pslDesc, FILE* out, struct psl* psl)
/* Validate a PSL for consistency.  pslDesc is printed the error messages
 * to file out (open /dev/null to discard). Return count of errors. */
{
return pslCheck2(0, pslDesc, out, psl);
}

struct hash *readPslToBinKeeper(char *sizeFileName, char *pslFileName)
/* read a list of psls and return results in hash of binKeeper structure for fast query*/
{
struct binKeeper *bk; 
struct psl *psl;
struct lineFile *sf = lineFileOpen(sizeFileName, TRUE);
struct lineFile *pf = lineFileOpen(pslFileName , TRUE);
struct hash *hash = newHash(0);
char *chromRow[2];
char *row[21] ;

while (lineFileRow(sf, chromRow))
    {
    char *name = chromRow[0];
    int size = lineFileNeedNum(sf, chromRow, 1);

    if (hashLookup(hash, name) != NULL)
        warn("Duplicate %s, ignoring all but first\n", name);
    else
        {
        bk = binKeeperNew(0, size);
        assert(size > 1);
	hashAdd(hash, name, bk);
        }
    }
while (lineFileNextRow(pf, row, ArraySize(row)))
    {
    psl = pslLoad(row);
    bk = hashMustFindVal(hash, psl->tName);
    binKeeperAdd(bk, psl->tStart, psl->tEnd, psl);
    }
lineFileClose(&pf);
lineFileClose(&sf);
return hash;
}

static boolean isDelChar(char c)
/* is this a indel character? */
{
return (c == '-') || (c == '.') || (c == '=') || (c == '_');
}

static void trimAlignment(struct psl* psl, char** qStrPtr, char** tStrPtr,
                          int* aliSizePtr)
/* remove leading or trailing indels from alignment */
{
char* qStr = *qStrPtr;
char* tStr = *tStrPtr;
int aliSize = *aliSizePtr;

// skip lending indels
while ((aliSize > 0) && (isDelChar(*qStr) || isDelChar(*tStr)))
    {
    if (!isDelChar(*qStr))
        psl->qStart++;
    else if (!isDelChar(*tStr))
        psl->tStart++;
    qStr++;
    tStr++;
    aliSize--;
    }

// skip trailing indels
while ((aliSize > 0) && (isDelChar(qStr[aliSize-1]) || isDelChar(tStr[aliSize-1])))
    {
    if (!isDelChar(qStr[aliSize-1]))
        psl->qEnd--;
    else if (!isDelChar(tStr[aliSize-1]))
        psl->tEnd--;
    aliSize--;
    }
*qStrPtr = qStr;
*tStrPtr = tStr;
*aliSizePtr = aliSize;
}

static void addBlock(struct psl* psl, int qs, int qe, int ts, int te,
                     int *blockSpace)
/* add a block to the psl, growing if necessary */
{
assert((qe-qs) == (te-ts));  /* same lengths? */
if (psl->blockCount == *blockSpace)
    pslGrow(psl, blockSpace);
psl->blockSizes[psl->blockCount] = qe - qs;
psl->qStarts[psl->blockCount] = qs;
psl->tStarts[psl->blockCount] = ts;
psl->blockCount++;
}

static void accumCounts(struct psl *psl, char prevQ, char prevT,
                        char q, char t, unsigned options)
/* accumulate block and base counts  */
{
if (!isDelChar(q) && !isDelChar(t))
    {
    /* aligned column. */
    char qu = toupper(q);
    char tu = toupper(t);
    if ((q == 'N') || (t == 'N'))
        psl->nCount++;
    else if (qu == tu)
        {
        if ((options & PSL_IS_SOFTMASK) && ((qu != q) || (tu != t)))
            psl->repMatch++;
        else
            psl->match++;
        }
    else
        psl->misMatch++;
    }
else if (isDelChar(q) && !isDelChar(t))
    {
    /* target insert */
    psl->tBaseInsert++;
    if (!isDelChar(prevQ))
        psl->tNumInsert++;
    }
else if (isDelChar(t) && !isDelChar(q))
    {
    /* query insert */
    psl->qBaseInsert++;
    if (!isDelChar(prevT))
        psl->qNumInsert++;
    }
}

struct psl* pslFromAlign(char *qName, int qSize, int qStart, int qEnd, char *qString,
                         char *tName, int tSize, int tStart, int tEnd, char *tString,
                         char* strand, unsigned options)
/* Create a PSL from an alignment.  Options PSL_IS_SOFTMASK if lower case
 * bases indicate repeat masking.  Returns NULL if alignment is empty after
 * triming leading and trailing indels.*/
{
/* Note, the unit tests for these programs exercise this function:
 *   hg/embossToPsl
 *   hg/mouseStuff/axtToPsl
 *   hg/spideyToPsl
 *   utils/est2genomeToPsl
 */

int blockSpace = 16;
struct psl* psl = NULL;
int aliSize = strlen(qString);
boolean eitherInsert = FALSE;	/* True if either in insert state. */
int i, qs,qe,ts,te;
char prevQ = '\0',  prevT = '\0';
AllocVar(psl);

if (strlen(tString) != aliSize)
    errAbort("query and target alignment strings are different lengths");

psl = pslNew(qName, qSize, qStart, qEnd, tName, tSize, tStart, tEnd,
             strand, blockSpace, 0);
trimAlignment(psl, &qString, &tString, &aliSize);

/* Don't create if either query or target is zero length after trim */
 if ((psl->qStart == psl->qEnd) || (psl->tStart == psl->tEnd))
     {
     pslFree(&psl);
     return NULL;
     }

/* Get range of alignment in strand-specified coordinates */
qs = psl->qStart;
qe = psl->qEnd;
if (strand[0] == '-')
    reverseIntRange(&qs, &qe, psl->qSize);
ts = psl->tStart;
te = psl->tEnd;
if (strand[1] == '-')
    reverseIntRange(&ts, &te, psl->tSize);

eitherInsert = FALSE;
qe = qs;  /* current block coords */
te = ts;
for (i=0; i<aliSize; ++i)
    {
    char q = qString[i];
    char t = tString[i];
    if (isDelChar(q) && isDelChar(t))
        {
        continue; /* nothing in this column, just ignore it */
        }
    else if (isDelChar(q) || isDelChar(t))
        {
        /* insert in one of the columns */
	if (!eitherInsert)
	    {
            /* end of a block */
            addBlock(psl, qs, qe, ts, te, &blockSpace);
	    eitherInsert = TRUE;
	    }
	if (!isDelChar(q))
            qe += 1;
	if (!isDelChar(t))
            te += 1;
	}
    else
        {
        /* columns aligned */
	if (eitherInsert)
	    {
            /* start new block */
	    qs = qe;
	    ts = te;
	    eitherInsert = FALSE;
	    }
	qe += 1;
	te += 1;
	}
    accumCounts(psl, prevQ, prevT, q, t, options);
    prevQ = q; /* will not include skipped empty columns */
    prevT = t;
    }
addBlock(psl, qs, qe, ts, te, &blockSpace);
return psl;
}

struct psl* pslNew(char *qName, unsigned qSize, int qStart, int qEnd,
                   char *tName, unsigned tSize, int tStart, int tEnd,
                   char *strand, unsigned blockSpace, unsigned opts)
/* create a new psl with space for the specified number of blocks allocated.
 * pslGrow maybe used to expand this space if needed.  Valid options are
 * PSL_XA_FORMAT. */
{
struct psl *psl;
AllocVar(psl);
assert(blockSpace > 0);
psl->qName = cloneString(qName);
psl->qSize = qSize;
psl->qStart = qStart;
psl->qEnd = qEnd;
psl->tName = cloneString(tName);
psl->tSize = tSize;
psl->tStart = tStart;
psl->tEnd = tEnd;
strncpy(psl->strand, strand, 2);
AllocArray(psl->blockSizes, blockSpace);
AllocArray(psl->qStarts, blockSpace);
AllocArray(psl->tStarts, blockSpace);
if (opts & PSL_XA_FORMAT)
    {
    AllocArray(psl->qSequence, blockSpace);
    AllocArray(psl->tSequence, blockSpace);
    }
return psl;
}

void pslGrow(struct psl *psl, int *blockSpacePtr)
/* Increase memory allocated to a psl to hold more blocks.  blockSpacePtr
 * should point the the current maximum number of blocks and will be
 * updated to with the new amount of space. */
{
int blockSpace = *blockSpacePtr;
int newSpace = 2 * blockSpace;
ExpandArray(psl->blockSizes, blockSpace, newSpace);
ExpandArray(psl->qStarts, blockSpace, newSpace);
ExpandArray(psl->tStarts, blockSpace, newSpace);
if (psl->qSequence != NULL)
    {
    ExpandArray(psl->qSequence, blockSpace, newSpace);
    ExpandArray(psl->tSequence, blockSpace, newSpace);
    }
*blockSpacePtr = newSpace;
}

void pslComputeInsertCounts(struct psl *psl)
/* compute numInsert and baseInsert fields from the blocks */
{
psl->qNumInsert = psl->qBaseInsert = 0;
psl->tNumInsert = psl->tBaseInsert = 0;
int iBlk;

for (iBlk = 1; iBlk < psl->blockCount; iBlk++)
    {
    unsigned qGapSize = psl->qStarts[iBlk] - (psl->qStarts[iBlk-1]+psl->blockSizes[iBlk-1]);
    if (qGapSize != 0)
        {
        psl->qNumInsert++;
        psl->qBaseInsert += qGapSize;
        }
    unsigned tGapSize = psl->tStarts[iBlk] - (psl->tStarts[iBlk-1]+psl->blockSizes[iBlk-1]);
    if (tGapSize != 0)
        {
        psl->tNumInsert++;
        psl->tBaseInsert += tGapSize;
        }
    }
}

static boolean getNextCigarOp(char *startPtr, boolean reverse, char **ptr, char *op, int *size)
/* gets one cigar op out of the CIGAR string.  Reverse the order if asked */
{
char *str = *ptr;

if (str == NULL)
    return FALSE;

if ((!reverse && (*str == 0)) || (reverse && (str == startPtr)))
    return FALSE;

// between each cigar op there could be nothing, or a space, or a plus
if (reverse)
    {
    char *end = str - 1;
    for(;*end ; end--)
	{
	if (isalpha(*end))
	    break;
	}
    str = end;
    *ptr = end;
    }
else
    {
    char *end = str + 1;
    for(;*end ; end++)
	{
	if (! (isdigit(*end)  || (*end == ' ') || (*end == '+')))
	    break;
	}

    *ptr = end;
    }

*op = *str++;
*size = atoi(str);
return TRUE;
}

struct psl* pslFromGff3Cigar(char *qName, int qSize, int qStart, int qEnd,
                             char *tName, int tSize, int tStart, int tEnd,
                             char* strand, char *cigar)
/* create a PSL from a GFF3-style cigar formatted alignment */
{
// this is currently tested by the gff3ToPsl
int blocksAlloced = 4;
struct psl *psl = pslNew(qName, qSize, qStart, qEnd, tName, tSize, tStart, tEnd, strand, blocksAlloced, 0);

char op;
int size;
int totalSize = 0;

int qNext = qStart, qBlkEnd = qEnd;
if (strand[0] == '-')
    reverseIntRange(&qNext, &qBlkEnd, qSize);
int tNext = tStart, tBlkEnd = tEnd;
if (strand[1] == '-')
    reverseIntRange(&tNext, &tBlkEnd, tSize);

if (cigar == NULL)
    {
    // no cigar means one block
    size = qEnd - qStart;
    totalSize += size;
    psl->blockSizes[psl->blockCount] = size;
    psl->qStarts[psl->blockCount] = qNext;
    psl->tStarts[psl->blockCount] = tNext;
    psl->blockCount++;
    tNext += size;
    qNext += size;
    }
else
    {
    if (strand[0] == '-' && strand[1] == '-')
        errAbort("GFF3 spec is vague about Gap when both strands are '-'; not implemented yet.");
    char cigarSpec[strlen(cigar)+1];  // copy since parsing is destructive
    safecpy(cigarSpec, sizeof cigarSpec, cigar);
    char *cigarNext = cigarSpec;
    while(getNextCigarOp(cigarSpec, FALSE, &cigarNext, &op, &size))
	{
	switch (op)
	    {
	    case 'M': // match or mismatch (gapless aligned block)
		if (psl->blockCount == blocksAlloced)
		    pslGrow(psl, &blocksAlloced);

		totalSize += size;
		psl->blockSizes[psl->blockCount] = size;
		psl->qStarts[psl->blockCount] = qNext;
		psl->tStarts[psl->blockCount] = tNext;
		psl->blockCount++;
		tNext += size;
		qNext += size;
		break;
	    case 'I': // inserted in target
		tNext += size;
		break;
	    case 'D': // deleted from target
		qNext += size;
		break;
	    
	    default:
		errAbort("unrecognized CIGAR op %c in %s", op, cigar);
	    }
	}
    }

/* CIGARs starting/ending with indels require adjusting of query/target ranges,
 * as PSL starts/ends with matches */
psl->qStart = psl->qStarts[0];
psl->qEnd = pslQEnd(psl, psl->blockCount-1);
if (strand[0] == '-')
    reverseIntRange(&psl->qStart, &psl->qEnd, qSize);
psl->tStart = psl->tStarts[0];
psl->tEnd = pslTEnd(psl, psl->blockCount-1);
if (strand[1] == '-')
    reverseIntRange(&psl->tStart, &psl->tEnd, tSize);

/* sanity check */
if (qNext != qBlkEnd)
    errAbort("CIGAR query length does not match specified query range %s:%d-%d", qName, qStart, qEnd);
if (tNext != tBlkEnd)
    errAbort("CIGAR target length does not match specified target range %s:%d-%d", tName, tStart, tEnd);
psl->match = totalSize;
pslComputeInsertCounts(psl);
return psl;
}

int pslRangeTreeOverlap(struct psl *psl, struct rbTree *rangeTree)
/* Return amount that psl overlaps (on target side) with rangeTree. */
{
int i;
int overlap = 0;
boolean isRc = (psl->strand[1] == '-');
for (i=0; i<psl->blockCount; ++i)
    {
    int start = psl->tStarts[i];
    int end = start + psl->blockSizes[i];
    if (isRc)
        reverseIntRange(&start, &end, psl->tSize);
    overlap += rangeTreeOverlapSize(rangeTree, start, end);
    }
return overlap;
}

float pslIdent(struct psl *psl)
/* computer fraction identity */
{
float aligned = psl->match + psl->misMatch + psl->repMatch;
if (aligned == 0.0)
    return 0.0;
else
    return ((float)(psl->match + psl->repMatch))/aligned;
}

float pslQueryAligned(struct psl *psl)
/* compute fraction of query that was aligned */
{
float aligned = psl->match + psl->misMatch + psl->repMatch;
return aligned/(float)psl->qSize;
}

struct psl* pslClone(struct psl *psl)
/* clone a psl */
{
struct psl* pslCp = pslNew(psl->qName, psl->qSize, psl->qStart, psl->qEnd,
                           psl->tName, psl->tSize, psl->tStart, psl->tEnd,
                           psl->strand, psl->blockCount,
                           ((psl->tSequence != NULL) ? PSL_XA_FORMAT : 0));
pslCp->match = psl->match;
pslCp->misMatch = psl->misMatch;
pslCp->repMatch = psl->repMatch;
pslCp->nCount = psl->nCount;
pslCp->qNumInsert = psl->qNumInsert;
pslCp->qBaseInsert = psl->qBaseInsert;
pslCp->tNumInsert = psl->tNumInsert;
pslCp->tBaseInsert = psl->tBaseInsert;
int iBlk;
for (iBlk = 0; iBlk < psl->blockCount; iBlk++)
    {
    pslCp->blockSizes[iBlk] = psl->blockSizes[iBlk];
    pslCp->qStarts[iBlk] = psl->qStarts[iBlk];
    pslCp->tStarts[iBlk] = psl->tStarts[iBlk];
    if (psl->qSequence != NULL)
        pslCp->qSequence[iBlk] = cloneString(psl->qSequence[iBlk]);
    if (psl->tSequence != NULL)
        pslCp->tSequence[iBlk] = cloneString(psl->tSequence[iBlk]);
    pslCp->blockCount++;
    }
return pslCp;
}

int cmpChrom(char *a, char *b)
/* Compare two chromosomes. */
{
return cmpStringsWithEmbeddedNumbers(a, b);
}


int pslCmpTargetScore(const void *va, const void *vb)
/* Compare to sort based on target then score. */
{
const struct psl *a = *((struct psl **)va);
const struct psl *b = *((struct psl **)vb);
int diff = cmpChrom(a->tName, b->tName);
if (diff == 0)
    diff = pslScore(b) - pslScore(a);
return diff;
}

int pslCmpTargetStart(const void *va, const void *vb)
/* Compare to sort based on target start. */
{
const struct psl *a = *((struct psl **)va);
const struct psl *b = *((struct psl **)vb);
int diff = cmpChrom(a->tName, b->tName);
if (diff == 0)
    diff = a->tStart - b->tStart;
return diff;
}

char *pslSortList[] = {"query,score", "query,start", "chrom,score", "chrom,start", "score"};

void pslSortListByVar(struct psl **pslList, char *sort)
/* Sort a list of psls using the method definied in the sort string. */
{
if (sameString(sort, "query,start"))
    {
    slSort(pslList, pslCmpQuery);
    }
else if (sameString(sort, "query,score"))
    {
    slSort(pslList, pslCmpQueryScore);
    }
else if (sameString(sort, "score"))
    {
    slSort(pslList, pslCmpScore);
    }
else if (sameString(sort, "chrom,start"))
    {
    slSort(pslList, pslCmpTargetStart);
    }
else if (sameString(sort, "chrom,score"))
    {
    slSort(pslList, pslCmpTargetScore);
    }
else
    {
    slSort(pslList, pslCmpQueryScore);
    }
}

void pslRemoveFrameShifts(struct psl *psl)
/* Remove any frameshits if present. Changes in place, doesn't update statistics in first nine fields. */
{
unsigned prevEnd = psl->tStarts[0];
int ii;

for(ii = 0; ii < psl->blockCount; ii++)
    {
    int diff = prevEnd - psl->tStarts[ii];
    if (diff > 0)
        {
        if (diff > psl->blockSizes[ii])
            errAbort("frame shift (%d) larger than block size (%d)", diff, psl->blockSizes[ii]);
        psl->blockSizes[ii] -= diff;
        psl->tStarts[ii] += diff;
        psl->qStarts[ii] += diff;
        }
    prevEnd = psl->tStarts[ii] + psl->blockSizes[ii];
    }
}
