/*
  File: pslAnal.c
  Author: Terry Furey
  Date: 5/2/2003
  Description: Calculates characteristics of a file of psl alignments
*/

/* Copyright (C) 2013 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */

#include "common.h"
#include "linefile.h"
#include "memalloc.h"
#include "hash.h"
#include "jksql.h"
#include "hdb.h"
#include "fuzzyFind.h"
#include "genePred.h"
#include "portable.h"
#include "dnautil.h"
#include "dnaseq.h"
#include "psl.h"
#include "snp125.h"
#include "fa.h"
#include "psl.h"
#include "options.h"
#include "hgConfig.h"
#include "genbank.h"

/* command line option specifications */
static struct optionSpec optionSpecs[] = {
    {"db", OPTION_STRING},
    {"der", OPTION_STRING},
    {"versions", OPTION_STRING},
    {"xeno", OPTION_BOOLEAN},
    {"indels", OPTION_BOOLEAN},
    {"unaligned", OPTION_BOOLEAN},
    {"mismatches", OPTION_BOOLEAN},
    {"codonsub", OPTION_BOOLEAN},
    {"noVersions", OPTION_BOOLEAN},
    {"genbankCds", OPTION_BOOLEAN},
    {NULL, 0}
};

boolean indelReport = FALSE;
boolean unaliReport = FALSE;
boolean mismatchReport = FALSE;
boolean codonSubReport = FALSE;
boolean genbankCdsFmt = FALSE;
boolean xeno = FALSE;
boolean noVersions = FALSE;

struct acc
{
  struct acc *next;
  char *name;
  char *version;
  char *organism;
};

struct clone
{
  struct clone *next;
  int id;
  int imageId;
};

struct refseq
{
  struct refseq *next;
  struct acc *accList;
};

/* indel object is now used for tracking several, these constants
 * defined the type */
#define INDEL    1
#define MISMATCH 2
#define CODONSUB 3
#define UNALIGNED 4

struct evid
{
  struct evid *next;
  int genMrna;
  struct acc *genMrnaAcc;
  int genEst;
  struct acc *genEstAcc;
  int mrnaMrna;
  struct acc *mrnaMrnaAcc;
  int mrnaEst;
  struct acc *mrnaEstAcc;
  int noMrna;
  struct acc *noMrnaAcc;
  int noEst;
  struct acc *noEstAcc;
  int unMrna;
  struct acc *unMrnaAcc;
  int unEst;
  struct acc *unEstAcc;
};

struct indel
{
  struct indel *next;
  int size;
  char *chrom;
  int chromStart;  /* note: [1..n] coordinates */
  int chromEnd;
  struct acc *mrna;
  int mrnaStart;
  int mrnaEnd;
  struct clone *clones;
  struct clone *libraries;
  struct acc *accs;
  struct evid *hs;
  struct evid *xe;
  char* mrnaSeq;
  char* genSeq;
  boolean insertion;
  boolean inCds;

  /* fields used if this is tracking codon substitutions*/
  int codonGenPos[3];  /* position of the codon bases */
  char genCodon[4];
  char mrnaCodon[4];
  boolean knownSnp;
  boolean validatedSnp;
};

struct pslInfo
{
  struct pslInfo *next;
  struct psl *psl;
  short splice[512];
  struct acc *mrna;
  float pctId;
  float coverage;
  int cdsStart;
  int cdsEnd;
  int cdsSize;
  float cdsPctId;
  float cdsCoverage;
  int introns;
  int stdSplice;
  int cdsMatch;
  int cdsMismatch;
  struct indel *mmList;
  int cdsGap;
  int unalignedCds;
  int singleIndel;
  int tripleIndel;
  int totalIndel;
  int indelCount;
  int indels[256];
  struct indel *indelList;
  struct indel *unaliList;
  int snp;
  int thirdPos;
  int synSub;
  int nonSynSub;
  int synSubSnp;
  int nonSynSubSnp;
  int loci;
  struct indel *codonSubList;
  struct clone *mrnaCloneId;
  struct refseq *refseq;
} *piList = NULL;

struct hash *cdsStarts = NULL;
struct hash *cdsEnds = NULL;
struct hash *loci = NULL;
int nextFakeLoci = 1;
struct hash *rnaSeqs = NULL;
struct hash *version = NULL;
struct hash *derived = NULL;

int indels[128];

void cloneFree(struct clone **clone)
/* Free a dynamically allocated acc */
{
struct clone *c;

if ((c = *clone) == NULL) return;
freez(clone);
}

void cloneFreeList(struct clone **cList)
/* Free a dynamically allocated list of acc's */
{
struct clone *c = NULL, *next = NULL;

for (c = *cList; c != NULL; c = next)
    {
    next = c->next;
    cloneFree(&c);
    }
*cList = NULL;
}

char* accFmt(struct acc* acc)
/* format acc with optional version */
{
static char accVer[64];

if (acc->version == NULL)
    return acc->name;
else
    {
    safef(accVer, sizeof(accVer), "%s.%s", acc->name, acc->version);
    return accVer;
    }
}

void accFree(struct acc **acc)
/* Free a dynamically allocated acc */
{
struct acc *a;

if ((a = *acc) == NULL) return;
freez(&(a->organism));
freez(&(a->name));
freez(&(a->version));
freez(acc);
}

void accFreeList(struct acc **aList)
/* Free a dynamically allocated list of acc's */
{
struct acc *a = NULL, *next = NULL;

for (a = *aList; a != NULL; a = next)
    {
    next = a->next;
    accFree(&a);
    }
*aList = NULL;
}

void evidFree(struct evid **ev)
/* Free a dynamically allocated evid */
{
struct evid *e;

if ((e = *ev) == NULL) return;
accFreeList(&(e->genMrnaAcc));
accFreeList(&(e->genEstAcc));
accFreeList(&(e->mrnaMrnaAcc));
accFreeList(&(e->mrnaEstAcc));
accFreeList(&(e->noMrnaAcc));
accFreeList(&(e->noEstAcc));
accFreeList(&(e->unMrnaAcc));
accFreeList(&(e->unEstAcc));
freez(ev);
}

void evidListFree(struct evid **iList)
/* Free a dynamically allocated list of evid's */
{
struct evid *i, *next;

for (i = *iList; i != NULL; i = next)
    {
    next = i->next;
    evidFree(&i);
    }
*iList = NULL;
}

void indelFree(struct indel **in)
/* Free a dynamically allocated indel */
{
struct indel *i;

if ((i = *in) == NULL) return;
/*accFree(&(i->mrna));*/
evidListFree(&(i->hs));
evidListFree(&(i->xe));
cloneFreeList(&(i->clones));
cloneFreeList(&(i->libraries));
/* accFreeList(&(i->accs)); - freed by evidFreeList */
free(i->mrnaSeq);
free(i->genSeq);
freez(in);
}

void indelListFree(struct indel **iList)
/* Free a dynamically allocated list of indel's */
{
struct indel *i, *next;

for (i = *iList; i != NULL; i = next)
    {
    next = i->next;
    indelFree(&i);
    }
*iList = NULL;
}

void pslInfoFree(struct pslInfo **pi)
/* Free a single dynamically allocated pslInfo element */
{
struct pslInfo *el;

if ((el = *pi) == NULL) return;
pslFree(&(el->psl));
accFree(&(el->mrna));
indelListFree(&(el->indelList));
indelListFree(&(el->mmList));
indelListFree(&(el->codonSubList));
cloneFree(&(el->mrnaCloneId));
freez(pi);
}

void parseCdsCols(struct lineFile *cf, char **words, int wordCnt)
/* parse CDS row in a column format */
{
if (wordCnt < 3)
    lineFileExpectWords(cf, 3, wordCnt);
else
    {
    char *name = words[0];
    int start = sqlUnsigned(words[1]) - 1;
    int end = sqlUnsigned(words[2]);
    hashAddInt(cdsStarts, name, start);
    hashAddInt(cdsEnds, name, end);
    }
}

void parseCdsGenbank(struct lineFile *cf, char **words, int wordCnt)
/* parse CDS row in genbank format */
{
if (wordCnt < 2)
    lineFileExpectWords(cf, 2, wordCnt);
else
    {
    char *name = words[0];
    struct genbankCds cds;
    if (!genbankCdsParse(words[1], &cds))
        errAbort("invalid cds for %s: %s", words[0], words[1]);
    hashAddInt(cdsStarts, name, cds.start);
    hashAddInt(cdsEnds, name, cds.end);
    }
}
void readCds(struct lineFile *cf)
/* Read in file of coding region starts and stops 
   Convert start to 0-based to make copmarison with psl easier */
{
int wordCnt;
char *words[4];

cdsStarts = newHash(16);
cdsEnds = newHash(16);

while ((wordCnt = lineFileChopNextTab(cf, words, ArraySize(words))) > 0)
    {
    if (genbankCdsFmt)
        parseCdsGenbank(cf, words, wordCnt);
    else
        parseCdsCols(cf, words, wordCnt);
    }
}

void readRnaSeq(char *filename)
/* Read in file of mRNA fa sequences */
{
struct dnaSeq *seqList, *oneSeq;

rnaSeqs = newHash(16);
seqList = faReadAllDna(filename);
for (oneSeq = seqList; oneSeq != NULL; oneSeq = oneSeq->next)
    hashAdd(rnaSeqs, oneSeq->name, oneSeq);
}

void readLoci(struct lineFile *lf)
/* Read in file of loci id's, primarily from LocusLink */
{
char *words[4];
char *name;
int thisLoci;
int numLoci = 0;

loci = newHash(16);

while (lineFileChopNext(lf, words, 2))
    {
    name = cloneString(words[0]);
    thisLoci = sqlUnsigned(words[1]);
    hashAddInt(loci, name, thisLoci);
    numLoci++;
    }

/* if loci files was empty, no loci will be used */
if (numLoci == 0)
    hashFree(&loci);
}

void readVersion(struct lineFile *lf)
/* Read in file of version numbers for mrnas */
{
char *words[4];
char *name;
char *v;

version = newHash(16);

while (lineFileChopNext(lf, words, 2))
    {
    name = cloneString(words[0]);
    v = cloneString(words[1]);
    hashAdd(version, name, v);
    }
}

char *findVersion(char *name)
/* Determine the version for an mrna/est accession */
{
struct sqlConnection *conn = hAllocConn();
char *ret = NULL;
char query[256];
struct sqlResult *sr;
char **row;

sqlSafef(query, sizeof(query), "select version from gbCdnaInfo where acc = '%s'", name); 
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    ret = cloneString(row[0]);
sqlFreeResult(&sr);
hFreeConn(&conn);

return(ret);
}


struct acc *createAcc(char *name)
{
struct acc *ret;
char *accs[4];
int wordCount;
  
AllocVar(ret);
if (noVersions)
    {
    ret->name = name;
    }
else
    {
    wordCount = chopByChar(name, '.', accs, ArraySize(accs)); 
    if (wordCount > 2) 
        errAbort("Accession not standard, %s\n", name);
    ret->name = accs[0];
    if (wordCount == 1)
        {
        if ((!version) || (!hashLookup(version, name)))
            ret->version = findVersion(name);
        else 
            ret->version = cloneString(hashFindVal(version, name));
        }
    else
        ret->version = accs[1];
    }
/* fprintf(stderr, "accession %s created\n", accFmt(ret));*/

return(ret);
}

void readRsDerived(struct lineFile *lf)
/* Read in file of derived accessions for refseq mrnas */
{
char *words[4];
char *rs, *acc;
struct refseq *r;
struct acc *a;

derived = newHash(16);

while (lineFileChopNext(lf, words, 2))
    {
    rs = cloneString(words[0]);
    acc = cloneString(words[1]);
    a = createAcc(acc);
    if (!hashLookup(derived, rs))
      {
	AllocVar(r);
	r->next = NULL;
	r->accList = NULL;
	hashAdd(derived, rs, r);
      }
    r = hashFindVal(derived, rs);
    slAddHead(&r->accList, a);
    }
}

int countIntrons(unsigned count, unsigned *sizes, unsigned *starts)
/* Count the number of introns in an alignment where an intron is
   defined as a gap greater than 30 bases */
{
int ret = 0, i;

for (i = 1; i < count; i++) 
    if (starts[i] - (starts[i-1] + sizes[i-1]) > 30)
	ret++;
return(ret);
}

int countStdSplice(struct psl *psl, DNA *seq, struct pslInfo *pi)
/* For each intron, determine whether it has a canonical splice site
   Return the number of introns that do */
{
int count=0, i;
int tStart = (psl->strand[1] == '-') ? (psl->tSize - psl->tEnd): psl->tStart;

for (i=1; i<psl->blockCount; ++i)
    {
    pi->splice[i] = 0;
   
    int iStart = psl->tStarts[i-1] + psl->blockSizes[i-1] - tStart;
    int iEnd = psl->tStarts[i] - tStart;
    if (abs(iEnd - iStart) > 15)
      {
            if ((seq[iStart] == 'g' && seq[iStart+1] == 't' && seq[iEnd-2] == 'a' && seq[iEnd-1] == 'g') ||
                (seq[iStart] == 'g' && seq[iStart+1] == 'c' && seq[iEnd-2] == 'a' && seq[iEnd-1] == 'g'))
              {
                count++;
                pi->splice[i] = 1;
                if  (abs(iEnd - iStart) <= 30)
                  pi->introns++;
              }
      }
    }
return(count);
}

int snpBase(struct sqlConnection *conn, char *chr, int position)
/* Determine whether a given position is known to be a SNP */ 
{
struct sqlResult *sr;
char **row;
int rowOff;
int ret = 0, i;
static boolean checked = FALSE;
static boolean haveSnp = FALSE;
static char *snpTable = NULL;
static char *snpTables[] = {"snp126", "snp125", NULL};

verbose(4, "\tchecking for snp\n");
if (!checked)
    {
    for (i = 0; (snpTables[i] != NULL) && !haveSnp; i++)
        {
        snpTable = snpTables[i];
        haveSnp = sqlTableExists(conn, snpTable);
        }
    checked = TRUE;
    if (!haveSnp)
        fprintf(stderr, "WARNING: no snp table in this databsae\n");
    }

/* the new table is snp, replacing snpTsc and snpNih+hgFixed.dsSnpRS */
if (haveSnp)
    {
    verbose(4, "\tquerying snp table\n");
    sr = hRangeQuery(conn, snpTable, chr, position, position+1, NULL, &rowOff);
    while ((row = sqlNextRow(sr)) != NULL) 
        {
        struct snp125 snp;
	verbose(4, "\tloading snp info\n");
        snp125StaticLoad(row+rowOff, &snp);
	/* Check if this is a snp, not a indel */
        if (sameString(snp.class, "snp"))
          {
	  /* Check if the snp has been validated*/
	  if (differentString(snp.valid, "no-information"))
	    ret = 2;
	  else
	    if (ret < 2)
	      ret = 1;	
          }
        }
    sqlFreeResult(&sr);
    }
return(ret);
}

void findOrganism(struct sqlConnection *conn, struct acc *acc)
/* Determine organism for each non-human mrna/est in the list */
{
char query[256];
struct sqlResult *sr;
char **row;
int id = -1;


/*a = cloneString(acc->name);
wordCount = chopByChar(a, '.', accs, ArraySize(accs)); 
if (wordCount > 2) 
errAbort("Accession not standard, %s\n", acc->name);*/
sqlSafef(query, sizeof(query), "select organism from gbCdnaInfo where acc = '%s'", acc->name); 
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    id = sqlUnsigned(row[0]);
sqlFreeResult(&sr);
if (id != -1)
    {
    sqlSafef(query, sizeof(query), "select name from organism where id = %d", id);   
    sr = sqlGetResult(conn, query);
    if ((row = sqlNextRow(sr)) != NULL)
      acc->organism = cloneString(row[0]);
    else
      printf("Could not find organism for id %d\n", id);
    sqlFreeResult(&sr);
    } 
else 
    printf("Could not find mrna record for %s\n", acc->name);
}

struct clone *getMrnaCloneId(struct sqlConnection *conn, char *acc)
/* Find the clone id for an mrna accession */
{
char query[256];
struct sqlResult *sr;
char **row;
struct clone *ret = NULL;

AllocVar(ret);
ret->next = NULL;

sqlSafef(query, sizeof(query), "select mrnaClone from gbCdnaInfo where acc = '%s'", acc); 
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    {
    ret->id = sqlUnsigned(row[0]);
    ret->imageId = 0;
    }
sqlFreeResult(&sr);
sqlSafef(query, sizeof(query), "select imageId from imageClone where acc = '%s'", acc); 
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    ret->imageId = sqlUnsigned(row[0]);
sqlFreeResult(&sr);
return(ret);
}

struct clone *getMrnaLibId(struct sqlConnection *conn, char *acc)
/* Find the library id for an mrna accession */
{
char query[256];
struct sqlResult *sr;
char **row;
struct clone *ret = NULL;

AllocVar(ret);
ret->next = NULL;

sqlSafef(query, sizeof(query), "select library from gbCdnaInfo where acc = '%s'", acc); 
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    {
    ret->id = sqlUnsigned(row[0]);
    ret->imageId = 0;
    }
sqlFreeResult(&sr);
return(ret);
}

boolean refseqAcc(struct refseq *r, char *name, char* rs)
/* Check if accession was used to create refseq sequence */
{
  /*struct refseq *r;*/
struct acc *a;
boolean ret = FALSE;

/*if (hashLookup(derived, rs))
  {
  r = hashFindVal(derived, rs);*/
  for (a = r->accList; a != NULL; a = a->next)
    if (sameString(a->name, name))
      ret = TRUE;
  /*}*/
return(ret);
}

boolean sameAcc(struct acc *a1, struct acc *a2)
/* Determine if two accessions are the same */
{
if ((a1 == NULL) || (a2 == NULL))
    return(0);
 if (sameString(a1->name,a2->name))
    return(1);
return(0);
}

boolean usedAcc(struct acc *a, struct acc *alist) 
/* Determine if acc is in list */
{
struct acc *el;

if ((a == NULL) || (alist == NULL))
    return(0);
for (el = alist; el != NULL; el = el->next)
    if (sameAcc(a, el))
       return(1);
return(0);
}

boolean sameClone(struct clone *c1, struct clone *c2)
/* Determine if two clones are the same */
{
if ((c1 == NULL) || (c2 == NULL))
    return(0);
 if (((c1->id) && (c2->id) && (c1->id == c2->id)) || ((c1->imageId) && (c2->imageId) && (c1->imageId == c2->imageId)))
    return(1);
return(0);
}

boolean usedClone(struct clone *c, struct clone *clist) 
/* Determine if clone/library is in list */
{
struct clone *el;

if ((c == NULL) || (clist == NULL))
    return(0);
for (el = clist; el != NULL; el = el->next)
    if (sameClone(c, el))
       return(1);
return(0);
}

int shiftIndel(char *indel, int size, int dir, int start, int end, struct dnaSeq *seq)
{
/* Determine how far insertion in seq can be shifted with changing an alignment */
  int ret = 0, offset = 0, i = 0;
  char *test;

  test = needMem(size+1);
  offset = start + (size*dir);
  if ((offset >= 0) && (offset < seq->size))
    {
      strncpy(test, seq->dna + offset, size);
      test[size] = '\0';
    }
  else
    test[0] = '\0';

  /* printf("Testing %s vs %s\n", test, indel); */
  while (sameString(indel, test)) {
    ret += size;
    offset = start + (ret*dir) + (size*dir);
    if ((offset >= 0) && (offset < seq->size))
      strncpy(test, seq->dna + offset, size);
    else
      test[0] = '\0';
    /* printf("Testing %s vs %s\n", test, indel); */
  }
  /* Check for pathological cases */
  for (i = 0; i < size; i++) 
    if (test[i] == indel[i]) 
      ret++;
    else
      i = size;
  freez(&test);
  /* printf("Shifting %d in direction %d\n", ret, dir); */
  return(ret);
}

void getCoords(struct psl *psl, int gstart, int gend, int *start, int *end, char *strand, boolean *nogap)
/* Get the genomic DNA that corresponds to an indel, and determine the corresponding \
   start and end positions for this sequence in the query sequence */ 
{
int i, bStart = -1, bEnd = -1;

/* Check that alignment covers the range */
if ((psl->tStart < gstart) && (psl->tEnd > gend))
  {
    /* Reverse complement xeno alignments if done on target - strand */
    if (psl->strand[1] == '-')
      pslRc(psl);
    
    /* Look in all blocks for the indel */
    for (i = 0; i < psl->blockCount; i++) 
      {
	/* If the block contains the indel */   
	if (((psl->tStarts[i] + psl->blockSizes[i]) >= gstart) && (psl->tStarts[i] < gend))
	  {
	    /* Determine the start position offset */
	    if (gstart >= psl->tStarts[i])
	      {
		*start = psl->qStarts[i] + (gstart - psl->tStarts[i]);
		/*gs = gstart;*/
		bStart = i;
	      }
	    /* Determine the end position offset */
	    if (gend <= (psl->tStarts[i]+psl->blockSizes[i]))
	      {
		*end = psl->qStarts[i] + gend - psl->tStarts[i];
		/*ge = gend;*/
		bEnd = i;
	      }
	  }
	if ((gstart < psl->tStarts[i]) && (bStart < 0))
	  {
	    *start = psl->qStarts[i];
	    bStart = i;
	  }
	if ((gend > (psl->tStarts[i] + psl->blockSizes[i])) && (!*end))
	  bEnd = i;
      }
    
    if ((bEnd >= 0) && (!*end))
      *end = psl->qStarts[bEnd] + psl->blockSizes[bEnd];
    
    /* If opposite strand alignment, reverse the start and end positions in the mRNA */
    if (((psl->strand[0] == '-')  && (psl->strand[1] != '-')) 
	|| ((psl->strand[0] == '+') && (psl->strand[1] == '-')))
      {
	int tmp = *start;
	*start = psl->qSize - *end;
	*end = psl->qSize - tmp;
	/* reverseComplement(ret->dna, ret->size); */
	sprintf(strand, "-");
      }
    else
      sprintf(strand, "+");
    
    /* Check if mrna aligns completely in this region */
    if (((*end - *start) == (gend - gstart)) && (bStart > 0))
      *nogap = TRUE;
    else if ((bStart < bEnd) && (bStart > 0))
      {
	/*nogap = TRUE;
	  for (i = bStart; i < bEnd; i++) 
	  if ((psl->qStarts[i] + psl->blockSizes[i]) < psl->qStarts[i+1]) */
	*nogap = FALSE;
      }
  }
 else 
   {
     *start = 0;
     *end = 0;
   }

}

void searchTransPsl(char *table, DNA *mdna, struct indel *ni, char *strand, unsigned type, struct psl* psl, struct dnaSeq *gseq, struct acc *acc, int left, int right)
/* process one mRNA or EST for searchTrans */
{
int start = 0, end = 0;
boolean nogap = FALSE;
char *dna = NULL, *dnaStart = NULL, *dnaEnd = NULL;
char thisStrand[2];
struct sqlConnection *conn2 = hAllocConn();
int mrnaSize = ni->mrnaEnd - ni->mrnaStart + left + right;
char accBuf[64];
char *p;

/* Get the start and end coordinates for the mRNA or EST sequence */
if ((type == INDEL) || (type == UNALIGNED))
    if (ni->chromStart == ni->chromEnd)
      getCoords(psl, ni->chromStart-left, ni->chromEnd+right, &start, &end, thisStrand, &nogap);
    else
      getCoords(psl, ni->chromStart-left, ni->chromEnd+right, &start, &end, thisStrand, &nogap);      
/*if (ni->chromStart == ni->chromEnd)
      getCoords(psl, ni->chromStart-3, ni->chromEnd+2, &start, &end, thisStrand, &nogap);
    else
    getCoords(psl, ni->chromStart-2, ni->chromEnd+2, &start, &end, thisStrand, &nogap);*/      
else
    getCoords(psl, ni->chromStart-1, ni->chromEnd, &start, &end, thisStrand, &nogap);

/* Get the corresponding mRNA or EST  sequence; db doesn't have versions,
 * so strip them. */
safef(accBuf, sizeof(accBuf),"%s", psl->qName);
p = strrchr(accBuf, '.');
if (p != NULL)
    *p = '\0';
struct dnaSeq *seq = hRnaSeq(accBuf);
if (thisStrand[0] != strand[0])
    {
    int temp = start;
    start = seq->size - end;
    end = seq->size - temp;
    reverseComplement(seq->dna, seq->size);
    }
if ((end-start) > 0)
    {
    dna = needMem((end-start)+1);
    strncpy(dna,seq->dna + start, end-start);
    dna[end-start] = '\0';
    }
else
    dna = cloneString("");
if ((type == INDEL) || (type == UNALIGNED))
  {

    if ((seq->size > (start + left + right)) && (start >= 0))
      {      
	dnaStart = needMem(mrnaSize+1);
	strncpy(dnaStart, seq->dna + start, mrnaSize);
	dnaStart[mrnaSize] = '\0';
      }
    if  ((end - mrnaSize) > 0)
      {
	dnaEnd = needMem(mrnaSize+1);
	strncpy(dnaEnd, seq->dna + end - mrnaSize, mrnaSize);
	dnaEnd[mrnaSize] = '\0';
      }    
    /*if ((seq->size > (start + 4)) && (start >= 0))
      {      
	dnaStart = needMem(mrnaSize+5);
	strncpy(dnaStart, seq->dna + start, mrnaSize + 4);
	dnaStart[mrnaSize+4] = '\0';
      }
    if  ((end - mrnaSize - 4) > 0)
      {
	dnaEnd = needMem(mrnaSize+5);
	strncpy(dnaEnd, seq->dna + end - mrnaSize - 4, mrnaSize + 4);
	dnaEnd[mrnaSize+4] = '\0';
	}*/    
  }
if (!dnaStart)
   dnaStart = cloneString("");
if (!dnaEnd)
   dnaEnd = cloneString("");


/* fprintf(stderr, "Comparing genomic %s at %d vs. %s %s vs. %s %s (%d-%d, %s vs. %s)\n", gseq->dna, ni->chromStart, ni->mrna->name, mdna, psl->qName, dna, start, end, thisStrand, strand);*/
/*fprintf(stderr, "Comparing genomic at %d-%d\n%s\n vs. %s\n%s\n vs. %s\n%s\n (%d-%d, %s vs. %s)\n", ni->chromStart, ni->chromEnd, gseq->dna, ni->mrna->name, mdna, psl->qName, dna, start, end, thisStrand, strand);*/
/*fprintf(stderr, "\tfrom start - %s\n\tfrom end - %s\n", dnaStart, dnaEnd);*/

/* If it doesn't align to this region */
if (start == end)
    {
    if (sameString(table, "mrna"))
        {
	  ni->hs->unMrna++;
	  slAddHead(&ni->hs->unMrnaAcc, acc);
        } 
    else if (sameString(table, "xenoMrna"))
        {
	  ni->xe->unMrna++;
	  findOrganism(conn2, acc);
	  slAddHead(&ni->xe->unMrnaAcc, acc);
	}
    else if (sameString(table, "est"))
        {
	  ni->hs->unEst++;
	  slAddHead(&ni->hs->unEstAcc, acc);
	}
     else if (sameString(table, "xenoEst"))
        {
	  ni->xe->unEst++;
	  findOrganism(conn2, acc);
	  slAddHead(&ni->xe->unEstAcc, acc);
	}
    }
/* If it agrees with the genomic sequence */
else if ((sameString(gseq->dna, dna)) || (((type == INDEL) || (type == UNALIGNED)) && (nogap))) 
    {
    if (sameString(table, "mrna"))
        {
	  ni->hs->genMrna++;
	  slAddHead(&ni->hs->genMrnaAcc, acc);
	} 
    else if (sameString(table, "xenoMrna"))
        {
	  ni->xe->genMrna++;
	  findOrganism(conn2, acc);
	  slAddHead(&ni->xe->genMrnaAcc, acc);
	}
    else if (sameString(table, "est"))
        {
	  ni->hs->genEst++;
	  slAddHead(&ni->hs->genEstAcc, acc);
	}
     else if (sameString(table, "xenoEst"))
        {
	  ni->xe->genEst++;
	  findOrganism(conn2, acc);
	  slAddHead(&ni->xe->genEstAcc, acc);
        }
    }
/* If it agrees with the mrna sequence */
else if ((sameString(mdna, dna)) || 
	 (((type == INDEL) || (type == UNALIGNED)) && 
	  ((strlen(dna) == strlen(mdna)) || (sameString(mdna, dnaStart)) || (sameString(mdna, dnaEnd)))))
  {
    if (sameString(table, "mrna"))
        {
	  ni->hs->mrnaMrna++;
	  slAddHead(&ni->hs->mrnaMrnaAcc, acc);
        } 
    else if (sameString(table, "xenoMrna"))
        {
	  ni->xe->mrnaMrna++;
	  findOrganism(conn2, acc);
	  slAddHead(&ni->xe->mrnaMrnaAcc, acc);
	}
    else if (sameString(table, "est"))
        {
	  ni->hs->mrnaEst++;
	  slAddHead(&ni->hs->mrnaEstAcc, acc);
	}
     else if (sameString(table, "xenoEst"))
        {
	  ni->xe->mrnaEst++;
	  findOrganism(conn2, acc);
	  slAddHead(&ni->xe->mrnaEstAcc, acc);
        }
    }
/* if it agrees with neither */
else 
    {
    if (sameString(table, "mrna"))
        {
	  ni->hs->noMrna++;
	  slAddHead(&ni->hs->noMrnaAcc, acc);
	}
    else if (sameString(table, "xenoMrna"))
        {
	  ni->xe->noMrna++;
	  findOrganism(conn2, acc);
	  slAddHead(&ni->xe->noMrnaAcc, acc);
	}
    else if (sameString(table, "est"))
        {
	  ni->hs->noEst++;
	  slAddHead(&ni->hs->noEstAcc, acc);
	}
     else if (sameString(table, "xenoEst"))
        {
	  ni->xe->noEst++;
	  findOrganism(conn2, acc);
	  slAddHead(&ni->xe->noEstAcc, acc);
	}
    }  

if (dnaStart)
   {
     freez(&dnaStart);
     freez(&dnaEnd);
   }
freez(&dna);
dnaSeqFree(&seq);
hFreeConn(&conn2);
}

void searchTrans(struct sqlConnection *conn, char *table, struct dnaSeq *rna, struct indel *ni, char *strand, unsigned type, struct clone *mrnaCloneId, int left, int right)
/* Find all mRNA's or EST's that align in the region of an indel, mismatch, or codon */
{
struct sqlResult *sr;
char **row;
int offset;
struct clone *cloneId, *libId;
struct psl *psl;
DNA mdna[10000];
struct sqlConnection *conn2 = hAllocConn();
struct dnaSeq *gseq;
struct acc *acc;
char *name;
struct refseq *rs = NULL;

if (type == CODONSUB)
    assert(((ni->mrnaEnd-ni->mrnaStart)+1) == 3);
if ((derived) && (hashLookup(derived, ni->mrna->name)))
     rs = hashFindVal(derived, ni->mrna->name);

/* Determine the sequence, If indel, add one base on each side */
/* if ((type == INDEL) || (type == UNALIGNED))
    {
	assert((ni->mrnaEnd-ni->mrnaStart+left+right+1) < sizeof(mdna));
        strncpy(mdna,rna->dna + ni->mrnaStart - left,ni->mrnaEnd-ni->mrnaStart+left+right);
        mdna[ni->mrnaEnd-ni->mrnaStart+left+right] = '\0';
      if (ni->mrnaStart == ni->mrnaEnd) 
	{
	assert((ni->mrnaEnd-ni->mrnaStart+5) < sizeof(mdna));
        strncpy(mdna,rna->dna + ni->mrnaStart - 2,ni->mrnaEnd-ni->mrnaStart+4);
        mdna[ni->mrnaEnd-ni->mrnaStart+4] = '\0';
	}
      else
      {
	assert((ni->mrnaEnd-ni->mrnaStart+5) < sizeof(mdna));
        strncpy(mdna,rna->dna + ni->mrnaStart - 2,ni->mrnaEnd-ni->mrnaStart+4);
        mdna[ni->mrnaEnd-ni->mrnaStart+4] = '\0';
      }
    }
else
    {*/
    int len = ni->mrnaEnd-ni->mrnaStart+left+right;
    assert((len+1) < sizeof(mdna));
    strncpy(mdna,rna->dna + ni->mrnaStart - left,len);
    mdna[len] = '\0';
    /* printf("Mismatch/Indel at %d-%d (%d) in %s:%d-%d bases %s, left=%d, right=%d\n", ni->mrnaStart, ni->mrnaEnd, rna->size, ni->chrom, ni->chromStart, ni->chromEnd, mdna, left, right); */
    /*printf("Mismatch/indel in %s at %d in %s:%d bases %s\n", ni->mrna->name, ni->mrnaStart, ni->chrom, ni->chromStart, mdna);*/
    /*    }*/

/* Get dna sequence */
 if ((type == INDEL) || (type == UNALIGNED))
   if (ni->chromStart == ni->chromEnd)
     gseq = hDnaFromSeq(ni->chrom, ni->chromStart-left, ni->chromEnd+right, dnaLower);
   else
     gseq = hDnaFromSeq(ni->chrom, ni->chromStart-left, ni->chromEnd+right, dnaLower);
 /*if (ni->chromStart == ni->chromEnd)
     gseq = hDnaFromSeq(ni->chrom, ni->chromStart-3, ni->chromEnd+2, dnaLower);
   else
   gseq = hDnaFromSeq(ni->chrom, ni->chromStart-2, ni->chromEnd+2, dnaLower);*/
else
  gseq = hDnaFromSeq(ni->chrom, ni->chromStart-1, ni->chromEnd, dnaLower);
if (strand[0] == '-')
  reverseComplement(gseq->dna, gseq->size);


/* Find all sequences that span this region */
 if ((type == INDEL) || (type == UNALIGNED))
   if (ni->chromStart == ni->chromEnd)
     sr = hRangeQuery(conn, table, ni->chrom, ni->chromStart-left, ni->chromEnd+right, NULL, &offset);
   else
     sr = hRangeQuery(conn, table, ni->chrom, ni->chromStart-left, ni->chromEnd+right, NULL, &offset);
 /*if (ni->chromStart == ni->chromEnd)
     sr = hRangeQuery(conn, table, ni->chrom, ni->chromStart-3, ni->chromEnd+2, NULL, &offset);
   else
   sr = hRangeQuery(conn, table, ni->chrom, ni->chromStart-2, ni->chromEnd+2, NULL, &offset);*/
else 
    sr = hRangeQuery(conn, table, ni->chrom, ni->chromStart-1, ni->chromEnd, NULL, &offset);
while ((row = sqlNextRow(sr)) != NULL) 
    {
    psl = pslLoad(row+offset);
    name = cloneString(psl->qName);
    acc = createAcc(name);
    cloneId = getMrnaCloneId(conn2, acc->name);
    libId = getMrnaLibId(conn2, acc->name);
    if ((!sameAcc(acc,ni->mrna)) && (!sameClone(cloneId,mrnaCloneId)) && 
	((!rs) || (!refseqAcc(rs, acc->name, ni->mrna->name))) &&
	(!usedClone(cloneId, ni->clones)) && (!usedAcc(acc, ni->accs)) &&
	(!usedClone(libId, ni->libraries)))
      {
	slAddHead(&(ni->clones), cloneId);
	slAddHead(&(ni->libraries), libId);
	slAddHead(&(ni->accs), acc);	
	searchTransPsl(table, mdna, ni, strand, type, psl, gseq, acc, left, right);
      }
    else 
      {
	accFree(&acc);
	cloneFree(&cloneId);
      }
    pslFree(&psl);
    }
dnaSeqFree(&gseq);
sqlFreeResult(&sr);
hFreeConn(&conn2);
}

struct evid *createEvid()
/* Create an instance of an evid struct */
{
struct evid *ev;

 AllocVar(ev);
 ev->next = NULL;
 ev->genMrna = 0;
 ev->genMrnaAcc = NULL;
 ev->genEst = 0;
 ev->genEstAcc = NULL;
 ev->mrnaMrna = 0;
 ev->mrnaMrnaAcc = NULL;
 ev->mrnaEst = 0;
 ev->mrnaEstAcc = NULL;
 ev->noMrna = 0;
 ev->noMrnaAcc = NULL;
 ev->noEst = 0;
 ev->noEstAcc = NULL;
 ev->unMrna = 0;
 ev->unMrnaAcc = NULL;
 ev->unEst = 0;
 ev->unEstAcc = NULL;
 return(ev);
}

struct indel *createMismatch(struct sqlConnection *conn, char *mrna, int mbase, char* chr, int gbase, 
			     struct dnaSeq *rna, char *strand, struct clone *cloneId, struct acc *acc, 
			     boolean snp, boolean valid, boolean inCds, char r, char g)
/* Create a record of a mismatch */
{
  struct indel *mi;

  AllocVar(mi);
  mi->next = NULL;
  mi->size = 1;
  mi->chrom = chr;
  mi->chromStart = mi->chromEnd = gbase;
  mi->mrna = acc;
  mi->mrnaStart = mi->mrnaEnd = mbase;
  mi->hs = createEvid();
  mi->xe = createEvid();
  mi->inCds = inCds;
  mi->knownSnp = snp;
  mi->validatedSnp = valid;
  mi->insertion = FALSE;
  mi->mrnaSeq = needMem(2);
  mi->mrnaSeq[0] = r;
  mi->mrnaSeq[1] = '\0';
  mi->genSeq = needMem(2);
  mi->genSeq[0] = g;
  mi->genSeq[1] = '\0';

 /* Determine whether mRNAs and ESTs support genomic or mRNA sequence in mismatch */
  searchTrans(conn, "mrna", rna, mi, strand, MISMATCH, cloneId, 0, 1);
  searchTrans(conn, "est", rna, mi, strand, MISMATCH, cloneId, 0, 1);
  if (xeno)
      {
      searchTrans(conn, "xenoMrna", rna, mi, strand, MISMATCH, cloneId, 0, 1);
      searchTrans(conn, "xenoEst", rna, mi, strand, MISMATCH, cloneId, 0, 1);
      }

  return(mi);
}

struct indel *createCodonSub(struct sqlConnection *conn, int mrnaStart,
                             char *mCodon, char* chr, int genPos[3], char* gCodon,
                             struct dnaSeq *rna, char *strand, struct clone *cloneId,
			     boolean knownSnp, boolean knownValid, struct acc *acc)
/* Create a record of a mismatch */
{
  struct indel *mi;
#if 0
  printf("codonSub: mrna=%s mrnaStart=%d  genPos=%d,%d,%d mCodon=%s,gCodon=%s, aa=%c %c\n",
         acc->name, mrnaStart, genPos[0], genPos[1], genPos[2], mCodon, gCodon,
         lookupCodon(mCodon), lookupCodon(gCodon));
#endif
 
  AllocVar(mi);
  mi->next = NULL;
  mi->size = 1;
  mi->chrom = chr;
  mi->chromStart = genPos[0];
  mi->chromEnd = genPos[2];
  mi->mrna = acc;
  mi->mrnaStart = mrnaStart-2;
  mi->mrnaEnd = mrnaStart;
  memcpy(mi->codonGenPos, genPos, sizeof(mi->codonGenPos));
  strcpy(mi->genCodon, gCodon);
  strcpy(mi->mrnaCodon, mCodon);
  mi->hs = createEvid();
  mi->xe = createEvid();
  mi->knownSnp = knownSnp;
  mi->validatedSnp = knownValid;
    
  /* Determine whether mRNAs and ESTs support genomic or mRNA sequence in mismatch */
  searchTrans(conn, "mrna", rna, mi, strand, CODONSUB, cloneId, 0, 1);
  searchTrans(conn, "est", rna, mi, strand, CODONSUB, cloneId, 0, 1);
  if (xeno)
      {
      searchTrans(conn, "xenoMrna", rna, mi, strand, CODONSUB, cloneId, 0, 1);
      searchTrans(conn, "xenoEst", rna, mi, strand, CODONSUB, cloneId, 0, 1);
      }
  return(mi);
}

void cdsCompare(struct sqlConnection *conn, struct pslInfo *pi, struct dnaSeq *rna, struct dnaSeq *dna)
/* Compare the alignment of the coding and UTR regions of an mRNA to the genomic sequence */
{
int i,j;
DNA *r, *d; 
DNA rCodon[4], dCodon[4];
int codonSnps = 0, codonMismatches = 0, codonValid = 0, valid = 0;
int codonGenPos[3];
int codonMrnaStart = 0, tPosition = 0;;
int nCodonBases = 0, iCodon = -1;   /* to deal with partial codons */
struct indel *mi, *miList=NULL;
struct indel *codonSub, *codonSubList=NULL;
ZeroVar(codonGenPos);
boolean knownSnp = FALSE, knownValid = FALSE, inCds = FALSE;

strcpy(rCodon, "---");
strcpy(dCodon, "---");
r = rna->dna;
d = dna->dna;
pi->cdsMatch = pi->cdsMismatch = 0;
/* Look at all alignment blocks */
for (i = 0; i < pi->psl->blockCount; i++) 
    {
    int qstart = pi->psl->qStarts[i];
    int tstart = pi->psl->tStarts[i] - pi->psl->tStarts[0];

    /* Compare each base */
    verbose(4, "\tcomparing block %d\n", i);
    for (j = 0; j < pi->psl->blockSizes[i]; j++) 
      {
	/* Determine genome base */
	tPosition = tstart + j + pi->psl->tStarts[0];
	if (pi->psl->strand[0] == '-')
	  tPosition = pi->psl->tSize - tPosition - 1;
        /* Check if it is in the coding region */
	if (((qstart+j) >= pi->cdsStart) && ((qstart+j) < pi->cdsEnd))
	  {
	    inCds = TRUE;
	    /* Determine codon position */
	    iCodon = ((qstart+j-pi->cdsStart)%3);
            if (iCodon == 0) {
	      codonSnps = 0;
	      codonValid = 0;
	      codonMismatches = 0;
	      codonMrnaStart = qstart+j;
            }
	    if (pi->psl->strand[0] == '-')
	      codonGenPos[2-iCodon] = tPosition + 1;
	    else
	      codonGenPos[iCodon] = tPosition + 1;
	    rCodon[iCodon] = r[qstart+j];
	    dCodon[iCodon] = d[tstart+j];
	    nCodonBases++;
	  } 
	else
	  {
	    inCds = FALSE;
	    iCodon = 0;
	  }
	/* Bases match */
	if ((char)r[qstart+j] == (char)d[tstart+j])
	  {
	    if (inCds)
	      pi->cdsMatch++;
	  }
	/* Check if mismatch is due to a SNP */
	else if ((valid = snpBase(conn,pi->psl->tName,tPosition)) > 0)
	  {
	    if (inCds)
	      {
		pi->cdsMatch++;
		codonSnps++;
	      }
	    pi->snp++;
	    valid--;
	    codonValid += valid;
	    if ((mismatchReport) && (inCds))
	      {
		verbose(4, "\tcreating mismatch - 1\n");
		mi = createMismatch(conn, pi->mrna->name, qstart+j, pi->psl->tName, tPosition+1, rna, pi->psl->strand, pi->mrnaCloneId, pi->mrna, TRUE, valid, inCds, r[qstart+j], d[tstart+j]);
		slAddHead(&miList,mi);
	      }
	  }
	else
	  {
	    if (inCds)
	      pi->cdsMismatch++;
	    if ((mismatchReport) && (inCds))
	      {
		verbose(4, "\tcreating mismatch - 2\n");
		mi = createMismatch(conn, pi->mrna->name, qstart+j, pi->psl->tName, tPosition+1, rna, pi->psl->strand, pi->mrnaCloneId, pi->mrna, FALSE, FALSE, inCds, r[qstart+j], d[tstart+j]);
		slAddHead(&miList,mi);
	      }
	    if (inCds)
	      {
		codonMismatches++;
		/* Check if mismatch is in a codon wobble position.*/
		if (iCodon==2)
		  pi->thirdPos++;
	      }
	  }
	/* If third base, check codon for mismatch */
	if ((iCodon==2) && (nCodonBases == 3) && !sameString(rCodon, dCodon) && (inCds))
	  {
	    if ((codonSnps) && (codonMismatches == 0))
	      {
		knownSnp = TRUE;
		if (codonSnps == codonValid)
		  knownValid = TRUE;
		else
		  knownValid = FALSE;
	      }
	    else
	      {
		knownSnp = FALSE;
		knownValid = FALSE;
	      }
	    if (lookupCodon(rCodon) == lookupCodon(dCodon))
	      {
		pi->synSub++;
		if ((codonSnps > 0) && (codonMismatches == 0))
		  pi->synSubSnp++;
	      }		
	    else
	      {
		pi->nonSynSub++;
		if ((codonSnps > 0) && (codonMismatches == 0))
		  pi->nonSynSubSnp++;
	      }
	    if (codonSubReport)
	      {
		verbose(4, "\tcreating codon sub\n");
		codonSub = createCodonSub(conn, qstart+j,
					  rCodon, pi->psl->tName, codonGenPos,
					  dCodon, rna, pi->psl->strand, 
					  pi->mrnaCloneId, knownSnp, knownValid, pi->mrna);
		slAddHead(&codonSubList, codonSub);
	      }
	  }
	if (iCodon == 2) 
	  nCodonBases = 0;
      }
    }
if (mismatchReport)
    { 
    slReverse(&miList);
    pi->mmList = miList;
    }
if (codonSubReport)
    {
    slReverse(&codonSubList);
    pi->codonSubList = codonSubList;
    }
}

struct indel *createUnali(struct sqlConnection *conn, int mstart, int mend, char* chr, int gstart, int gend, 
			  struct dnaSeq *rna, char *strand, struct clone *cloneId, struct acc *acc, 
			  int left, int right, char* insert, boolean inCds)
/* Create a record of an unaligned region of cds */
{
  struct indel *ni;
 
  AllocVar(ni);
  ni->next = NULL;
  ni->size = mend - mstart;
  ni->chrom = chr;
  ni->chromStart = gstart;
  ni->chromEnd = gend;
  ni->mrna = acc;
  ni->mrnaStart = mstart;
  ni->mrnaEnd = mend;
  ni->hs = createEvid();
  ni->xe = createEvid();
  ni->insertion = FALSE;
  ni->mrnaSeq = insert;
  ni->genSeq = NULL;
  ni->inCds = inCds;
 
  /* Determine whether mRNAs and ESTs support genomic or mRNA sequence in indel region */
  searchTrans(conn, "mrna", rna, ni, strand, UNALIGNED, cloneId, left, right);
  searchTrans(conn, "est", rna, ni, strand, UNALIGNED, cloneId, right, left);
  if (xeno)
      {
      searchTrans(conn, "xenoMrna", rna, ni, strand, UNALIGNED, cloneId, left, right);
      searchTrans(conn, "xenoEst", rna, ni, strand, UNALIGNED, cloneId, right, left);
      }
  
  return(ni);
}

struct indel *createIndel(struct sqlConnection *conn, int mstart, int mend, char* chr, int gstart, int gend, 
			  struct dnaSeq *rna, char *strand, struct clone *cloneId, struct acc *acc, 
			  int left, int right, char* seq, boolean insert, boolean inCds)
/* Create a record of an indel */
{
  struct indel *ni;
 
  AllocVar(ni);
  ni->next = NULL;
  if ((mend - mstart) > 0) 
    ni->size = mend - mstart;
  else
    ni->size = gend - gstart;
  ni->chrom = chr;
  ni->chromStart = gstart;
  ni->chromEnd = gend;
  ni->mrna = acc;
  ni->mrnaStart = mstart;
  ni->mrnaEnd = mend;
  ni->hs = createEvid();
  ni->xe = createEvid();
  ni->insertion = insert;
  ni->genSeq = NULL;
  ni->mrnaSeq = NULL;
  if (insert)
    ni->mrnaSeq = seq;
  else
    ni->genSeq = seq;
  ni->knownSnp = FALSE;
  ni->validatedSnp = FALSE;
  ni->inCds = inCds;
 
  /* Determine whether mRNAs and ESTs support genomic or mRNA sequence in indel region */
  searchTrans(conn, "mrna", rna, ni, strand, INDEL, cloneId, left, right);
  searchTrans(conn, "est", rna, ni, strand, INDEL, cloneId, left, right);
  if (xeno)
      {
      searchTrans(conn, "xenoMrna", rna, ni, strand, INDEL, cloneId, left, right);
      searchTrans(conn, "xenoEst", rna, ni, strand, INDEL, cloneId, left, right);
      }

  return(ni);
}

void cdsIndels(struct sqlConnection *conn, struct pslInfo *pi, struct dnaSeq *rna)
/* Find indels in coding regions and UTRs of mRNA alignment */
{
  int i;
  int unaligned=0, unalignedCds=0, prevqend=0, prevtend=0, qstart=0, tstart=0;
  int leftShift = 0, rightShift = 0, tLeft = 0, tRight = 0, startIndel = 0;
  struct indel *ni=NULL, *niList=NULL, *uiList=NULL;
  DNA *insert;
  int cdsS = pi->cdsStart, cdsE = pi->cdsEnd;
  boolean unali = FALSE, inCds = FALSE;
  struct dnaSeq *gseq;
  
  /* Check all blocks for indels */
  for (i = 1; i < pi->psl->blockCount; i++)
  {  
    /* Find insertions in the mRNA sequence */
    unaligned = 0;
    prevqend = 0;
    prevtend = 0;
    qstart = pi->psl->qStarts[i];
    tstart = pi->psl->tStarts[i];
    /* If block is in the cds region */
    if ((qstart >= cdsS) && (qstart <= cdsE))
      inCds = TRUE;
    else
      inCds = FALSE;
      
    /* Determine where previous block left off in the alignment */
    prevqend = pi->psl->qStarts[i-1] + pi->psl->blockSizes[i-1];
    prevtend = pi->psl->tStarts[i-1] + pi->psl->blockSizes[i-1];
    /* Adjust if previous end is not in coding region */
    /* if (prevqend < pi->cdsStart)
      {
	prevqend = cdsS;
	prevtend = tstart;
      }
    */
    unaligned = qstart - prevqend;
    if (inCds)
      unalignedCds = qstart - cdsS;
    else
      unalignedCds = 0;
   /*if (((tstart - prevtend) != 0) && (!pi->splice[i]) && ((tstart - prevtend) < 30))*/
    if (((tstart - prevtend) != 0) && (!pi->splice[i]))
      unali = TRUE;
    else
      unali = FALSE;
    /* Check if there is an indel */
    if (unaligned > 0)
      {
	/* Check if unaligned part is a gap in the mRNA alignment, not an insertion */
	if (unaligned > 30)
	  if (inCds)
	    pi->cdsGap += unalignedCds;
	if ((!unali) && (inCds))
	  {	    
	    if (unalignedCds == 1) 
	      pi->singleIndel++;
	    if ((unalignedCds%3) == 0) 
	      pi->tripleIndel += unalignedCds/3;
	    pi->totalIndel += unalignedCds;
	    pi->indels[pi->indelCount] = unalignedCds;
	    pi->indelCount++;
	  }
	if (inCds)
	  pi->unalignedCds += unaligned;
	/*if ((pi->cdsPctId >= 0.98) && (pi->cdsCoverage >= 0.80))
	  indels[unaligned]++;*/
	if (pi->indelCount > 256) 
	  errAbort("Indel count too high");
	/* Determine boundaries of the indel */
	if (pi->psl->strand[0] == '-') {
	  int temp = tstart;
	  tstart = pi->psl->tSize - prevtend;
	  prevtend = pi->psl->tSize - temp;
	  tLeft = pi->psl->tSize - pi->psl->tStarts[i] - pi->psl->blockSizes[i];
	  tRight = pi->psl->tSize - pi->psl->tStarts[i-1];
	  gseq = hDnaFromSeq(pi->psl->tName, tLeft, tRight, dnaLower);
	  reverseComplement(gseq->dna, gseq->size);
	  startIndel = tRight - tstart;
	} else {
	  tLeft = pi->psl->tStarts[i-1];
	  tRight = pi->psl->tStarts[i] + pi->psl->blockSizes[i];
	  gseq = hDnaFromSeq(pi->psl->tName, tLeft, tRight, dnaLower);
	  startIndel = prevtend-tLeft;
	}
	insert = needMem(unaligned+1);
	strncpy(insert, rna->dna + prevqend, unaligned);
	insert[unaligned] = '\0';
	leftShift = shiftIndel(insert, unaligned, -1, startIndel, tRight-tLeft, gseq);
	rightShift = shiftIndel(insert, unaligned, 1, startIndel-1, tRight-tLeft, gseq);
	/*free(insert);*/
	if (pi->psl->strand[0] == '-') 
	  {
	    int temp = leftShift;
	    prevqend += rightShift;
	    qstart += rightShift;
	    prevtend += rightShift;
	    tstart += rightShift;
	    leftShift = rightShift;
	    rightShift = temp;
	  }
	prevqend -= leftShift;
	qstart -= leftShift;
	prevtend -= leftShift;
	tstart -= leftShift;
	/* Create an unali record for this */	
	if ((unaliReport) && (unali)) 
	  {
	    ni = createUnali(conn, prevqend, qstart, pi->psl->tName, prevtend, tstart, rna, pi->psl->strand, pi->mrnaCloneId, pi->mrna, 1, leftShift + rightShift + 1, insert, inCds); 
	    slAddHead(&uiList, ni);
	  }
	/* Create an indel record for this */	
	if ((indelReport) && (!unali)) 
	  {
	    ni = createIndel(conn, prevqend, qstart, pi->psl->tName, prevtend, prevtend, rna, pi->psl->strand, pi->mrnaCloneId, pi->mrna, 1, leftShift + rightShift + 1, insert, TRUE, inCds); 
	    slAddHead(&niList,ni);
	  }
      }
    
    /* Find deletions in the mRNA sequence */
    unaligned = 0;
    qstart = pi->psl->qStarts[i];
    tstart = pi->psl->tStarts[i];
    prevqend = 0;
    prevtend = 0;
    /* Check if in the coding region */
    if ((qstart >= cdsS) && (qstart <= cdsE))
      inCds = TRUE;
    else 
      inCds = FALSE;
    /* Determine where previous block left off in the alignment */
    prevqend = pi->psl->qStarts[i-1] + pi->psl->blockSizes[i-1];
    prevtend = pi->psl->tStarts[i-1] + pi->psl->blockSizes[i-1];
    /* Adjust if previous end is not in coding region */
    /*if (prevqend < pi->cdsStart)
      {
      prevqend = qstart;
      prevtend = tstart;
      }
    */
    unaligned = tstart - prevtend;
    if (inCds)
      unalignedCds = tstart - qstart;
    else
      unalignedCds = 0;
    if ((qstart - prevqend) > 0)
      unali = TRUE;
    else
      unali = FALSE;
    /* Check if unaligned part is an intron */
    if ((unaligned > 30) || (pi->splice[i]))
      {
	/*pi->cdsGap += unaligned;*/
	if (inCds)
	  pi->cdsGap += 0;  
      }
    /* Check if there is an indel */
    else if ((unaligned != 0) && (!unali)) 
      {
	if (inCds)
	  {
	    if (unaligned == 1) 
	      pi->singleIndel++;
	    if ((unaligned%3) == 0) 
	      pi->tripleIndel++;
	    pi->totalIndel -= unaligned;
	    pi->indels[pi->indelCount] = unaligned;
	    pi->indelCount++;
	    if (pi->indelCount > 256) 
	      errAbort("Indel count too high");
	  }
	/*if ((pi->cdsPctId >= 0.98) && (pi->cdsCoverage >= 0.80))
	  indels[unaligned]++;*/
	/* Create an indel record for this */
	if (pi->psl->strand[0] == '-') {
	  int temp = tstart;
	  tstart = pi->psl->tSize - prevtend;
	  prevtend = pi->psl->tSize - temp;
	  tLeft = pi->psl->tSize - pi->psl->tStarts[i] - pi->psl->blockSizes[i];
	  tRight = pi->psl->tSize - pi->psl->tStarts[i-1];
	  gseq = hDnaFromSeq(pi->psl->tName, tLeft, tRight, dnaLower);
	  reverseComplement(gseq->dna, gseq->size);
	  startIndel = tRight - tstart;
	} else {
	  tLeft = pi->psl->tStarts[i-1];
	  tRight = pi->psl->tStarts[i] + pi->psl->blockSizes[i];
	  gseq = hDnaFromSeq(pi->psl->tName, tLeft, tRight, dnaLower);
	  startIndel = prevtend - tLeft;
	}
	insert = needMem(unaligned+1);
	strncpy(insert, gseq->dna + startIndel, unaligned);
	insert[unaligned] = '\0';
	leftShift = shiftIndel(insert, unaligned, -1, startIndel, tLeft+tRight, gseq);
	rightShift = shiftIndel(insert, unaligned, 1, startIndel, tLeft+tRight, gseq);
	/* free(insert); */
	if (pi->psl->strand[0] == '-') 
	  {
	    int temp = leftShift;
	    prevqend += rightShift;
	    qstart += rightShift;
	    prevtend += rightShift;
	    tstart += rightShift;
	    leftShift = rightShift;
	    rightShift = temp;
	  }
	prevqend -= leftShift;
	qstart -= leftShift;
	prevtend -= leftShift;
	tstart -= leftShift;
	if (indelReport)
	  {
	    ni = createIndel(conn, prevqend, qstart, pi->psl->tName, prevtend, tstart, rna, pi->psl->strand, pi->mrnaCloneId, pi->mrna, 1, rightShift + leftShift + 1, insert, FALSE, inCds); 
	    slAddHead(&niList,ni);
	  }
      }
  }
  if (indelReport)
    { 
      slReverse(&niList);
      pi->indelList = niList;
    }
  if (unaliReport)
    { 
      slReverse(&uiList);
      pi->unaliList = uiList;
    }
}

void processCds(struct sqlConnection *conn, struct pslInfo *pi, struct dnaSeq *rna, struct dnaSeq *dna)
/* Examine closely the alignment of the coding region */
{
struct acc *acc;
char *name = cloneString(pi->psl->qName);

verbose(2, "Processing %s\n", name);
/* Create the accession for the query */
acc = createAcc(name);
/* Compare the actual aligned parts */
verbose(3, "\tcomparing cds alignment\n");
cdsCompare(conn, pi, rna, dna);
pi->cdsPctId = (float)(pi->cdsMatch)/(pi->cdsMatch + pi->cdsMismatch);
pi->cdsCoverage = (float)(pi->cdsMatch + pi->cdsMismatch)/(pi->cdsSize);

/* Determine indels in the alignment */
verbose(3, "\tanalyzing indels\n");
cdsIndels(conn, pi, rna);
} 

struct pslInfo *processPsl(struct sqlConnection *conn, struct psl *psl)
/* Analyze an alignment of a mRNA to the genomic sequence */
{
struct pslInfo *pi;
struct dnaSeq *rnaSeq;
struct dnaSeq *dnaSeq;
char *name = cloneString(psl->qName);

verbose(3, "\tprocessing psl record\n");
AllocVar(pi);
pi->psl = psl;
pi->mrna = createAcc(name);
pi->pctId = (float)(psl->match + psl->repMatch)/(psl->match + psl->repMatch + psl->misMatch);
pi->coverage = (float)(psl->match + psl->misMatch + psl->repMatch)/(psl->qSize);
if (!hashLookup(cdsStarts, pi->mrna->name))
    pi->cdsStart = 0;  
else
    pi->cdsStart = hashIntVal(cdsStarts, pi->mrna->name);
if (!hashLookup(cdsEnds, pi->mrna->name))
    pi->cdsEnd = psl->qSize; 
else
    pi->cdsEnd = hashIntVal(cdsEnds, pi->mrna->name);
pi->cdsSize = pi->cdsEnd - pi->cdsStart;
pi->introns = countIntrons(psl->blockCount, psl->blockSizes, psl->tStarts);
if (loci != NULL)
    pi->loci = hashIntVal(loci, pi->mrna->name);
else
    pi->loci = nextFakeLoci++;
pi->indelCount = 0;
pi->totalIndel = 0;
pi->mrnaCloneId = getMrnaCloneId(conn, pi->mrna->name);
if (derived && hashLookup(derived, pi->mrna->name))
   pi->refseq = hashFindVal(derived, pi->mrna->name);
else 
  pi->refseq = NULL;

/* Get the corresponding sequences */
verbose(3, "\tretrieving rna and dna sequences\n");
rnaSeq = hashMustFindVal(rnaSeqs, pi->mrna->name);
dnaSeq = hDnaFromSeq(psl->tName, psl->tStart, psl->tEnd, dnaLower);

/* Reverse compliment genomic and psl record if aligned on opposite strand */
if (psl->strand[0] == '-') 
  {
   verbose(3, "\treverse complementing\n");
   reverseComplement(dnaSeq->dna, dnaSeq->size);
   pslRc(pi->psl);
  }

/* Analyze the coding region */
verbose(3, "\tcounting splice sites\n");
pi->stdSplice = countStdSplice(psl, dnaSeq->dna, pi);
verbose(3, "\tanalyzing cds region\n");
processCds(conn, pi, rnaSeq, dnaSeq);

/* Revert back to original psl record for printing */
if (psl->strand[0] == '-') 
  {
   verbose(3, "\treverse complementing back\n");
   pslRc(pi->psl);
  }

verbose(3, "\tdone with psl record\n");
freeDnaSeq(&dnaSeq);
return(pi);
}

void writeList(FILE *of, struct indel *iList, int type, struct psl *psl, struct acc *a)
/* Write out an list of indel/mismatches/codon subs*/
{
struct indel *indel;
struct acc *acc;

for (indel = iList; indel != NULL; indel=indel->next)
    {
      /*printf("Indel of size %d in %s:%d-%d vs. %s:%d-%d\n",
    indel->size, indel->mrna, indel->mrnaStart, indel->mrnaEnd,
    indel->chrom, indel->chromStart, indel->chromEnd);*/
    if (type == INDEL)
      {
        fprintf(of, "Indel of size %d in %s:%d-%d vs. %s:%d-%d, %s vs. %s",
		indel->size, accFmt(indel->mrna), indel->mrnaStart, indel->mrnaEnd,
		indel->chrom, indel->chromStart, indel->chromEnd, indel->mrnaSeq, 
		indel->genSeq);
	if (indel->inCds)
	  fprintf(of, ", CDS");
	if (indel->insertion)
	  fprintf(of, ", INSERTION\n");
	else
	  fprintf(of, ", DELETION\n");
      }
    else if (type == UNALIGNED)
      {
        fprintf(of, "Unaligned bases of size %d in %s:%d-%d vs. %s:%d-%d, %s vs. %s",
                indel->size, accFmt(indel->mrna), indel->mrnaStart, indel->mrnaEnd,
                indel->chrom, indel->chromStart, indel->chromEnd, indel->mrnaSeq,
		indel->genSeq);
	if (indel->inCds)
	  fprintf(of, ", CDS");
	fprintf(of, "\n");
      }
    else if (type == MISMATCH)
      {
      fprintf(of, "Mismatch at %s:%d vs. %s:%d, %s vs. %s",
              accFmt(indel->mrna), indel->mrnaStart, indel->chrom, indel->chromStart,
	      indel->mrnaSeq, indel->genSeq);
      if (indel->inCds)
	fprintf(of, ", CDS");
      if (indel->knownSnp)
	if (indel->validatedSnp)
	  fprintf(of, ", SNP-validated\n");
	else
          fprintf(of, ", SNP\n");
      else
          fprintf(of, "\n");	   
      }
    else if (type == CODONSUB)
       {
       char mrnaAA = lookupCodon(indel->mrnaCodon);
       char genAA = lookupCodon(indel->genCodon);
       bool isSyn = (mrnaAA == genAA);
       fprintf(of, "%s codon substitution at %s:%d vs. %s:%d,%d,%d, %s vs. %s, ",
	       (isSyn ? "synonymous" : "non-synonymous"),
	       accFmt(indel->mrna), indel->mrnaStart, indel->chrom, indel->codonGenPos[0],
	       indel->codonGenPos[1], indel->codonGenPos[2],
	       indel->mrnaCodon, indel->genCodon);
       if (mrnaAA == 0)
	   fprintf(of, "STOP vs. ");
       else
           fprintf(of, "%c vs. ", mrnaAA);
       if (genAA == 0)
           fprintf(of, "STOP");
       else
           fprintf(of, "%c", genAA);
       if (indel->knownSnp)
	 if (indel->validatedSnp)
	   fprintf(of, ", SNP-validated\n");
	 else
	   fprintf(of, ", SNP\n");
       else
	   fprintf(of, "\n");	   
       fprintf(of, "\tpsl %s %d %d\t%s %d %d\n",
	       a->name, psl->qStart, psl->qEnd,
	       psl->tName, psl->tStart, psl->tEnd);
       } 
    fprintf(of, "\t%d human mRNAs support genomic: ", indel->hs->genMrna);
    slReverse(&(indel->hs->genMrnaAcc));
    for (acc = indel->hs->genMrnaAcc; acc != NULL; acc = acc->next)
	fprintf(of, "%s ", accFmt(acc));
    fprintf(of, "\n\t%d human ESTs support genomic: ",indel->hs->genEst);
    slReverse(&(indel->hs->genEstAcc));
    for (acc = indel->hs->genEstAcc; acc != NULL; acc = acc->next)
	fprintf(of, "%s ", accFmt(acc));
    fprintf(of, "\n\t%d human mRNAs support %s: ", indel->hs->mrnaMrna, accFmt(indel->mrna));
    slReverse(&(indel->hs->mrnaMrnaAcc));
    for (acc = indel->hs->mrnaMrnaAcc; acc != NULL; acc = acc->next)
	fprintf(of, "%s ", accFmt(acc));
    fprintf(of, "\n\t%d human ESTs support %s: ",indel->hs->mrnaEst, accFmt(indel->mrna));
    slReverse(&(indel->hs->mrnaEstAcc));
    for (acc = indel->hs->mrnaEstAcc; acc != NULL; acc = acc->next)
	fprintf(of, "%s ", accFmt(acc));
    fprintf(of, "\n\t%d human mRNAs support neither: ", indel->hs->noMrna);
    slReverse(&(indel->hs->noMrnaAcc));
    for (acc = indel->hs->noMrnaAcc; acc != NULL; acc = acc->next)
	fprintf(of, "%s ", accFmt(acc));
    fprintf(of, "\n\t%d human ESTs support neither: ",indel->hs->noEst);
    slReverse(&(indel->hs->noEstAcc));
    for (acc = indel->hs->noEstAcc; acc != NULL; acc = acc->next)
	fprintf(of, "%s ", accFmt(acc));
    fprintf(of, "\n\t%d human mRNAs do not align: ", indel->hs->unMrna);
    slReverse(&(indel->hs->unMrnaAcc));
    for (acc = indel->hs->unMrnaAcc; acc != NULL; acc = acc->next)
	fprintf(of, "%s ", accFmt(acc));
    fprintf(of, "\n\t%d human ESTs do not align: ",indel->hs->unEst);
    slReverse(&(indel->hs->unEstAcc));
    for (acc = indel->hs->unEstAcc; acc != NULL; acc = acc->next)
	fprintf(of, "%s ", accFmt(acc));

    if (xeno)
        {
        fprintf(of, "\n\n\t%d xeno mRNAs support genomic: ", indel->xe->genMrna);
	slReverse(&(indel->xe->genMrnaAcc));
	for (acc = indel->xe->genMrnaAcc; acc != NULL; acc = acc->next)
	    fprintf(of, "%s(%s) ", accFmt(acc), acc->organism);
	fprintf(of, "\n\t%d xeno ESTs support genomic: ",indel->xe->genEst);
	slReverse(&(indel->xe->genEstAcc));
	for (acc = indel->xe->genEstAcc; acc != NULL; acc = acc->next)
	    fprintf(of, "%s(%s) ", acc->name, accFmt(acc));
	fprintf(of, "\n\t%d xeno mRNAs support %s: ", indel->xe->mrnaMrna, accFmt(indel->mrna));
	slReverse(&(indel->xe->mrnaMrnaAcc));
	for (acc = indel->xe->mrnaMrnaAcc; acc != NULL; acc = acc->next)
	    fprintf(of, "%s(%s) ", accFmt(acc), acc->organism);
	fprintf(of, "\n\t%d xeno ESTs support %s: ",indel->xe->mrnaEst, accFmt(indel->mrna));
	slReverse(&(indel->xe->mrnaEstAcc));
	for (acc = indel->xe->mrnaEstAcc; acc != NULL; acc = acc->next)
	    fprintf(of, "%s(%s) ", accFmt(acc), acc->organism);
	fprintf(of, "\n\t%d xeno mRNAs support neither: ", indel->xe->noMrna);
	slReverse(&(indel->xe->noMrnaAcc));
	for (acc = indel->xe->noMrnaAcc; acc != NULL; acc = acc->next)
	    fprintf(of, "%s(%s) ", accFmt(acc), acc->organism);
	fprintf(of, "\n\t%d xeno ESTs support neither: ",indel->xe->noEst);
	slReverse(&(indel->xe->noEstAcc));
	for (acc = indel->xe->noEstAcc; acc != NULL; acc = acc->next)
	    fprintf(of, "%s(%s) ", accFmt(acc), acc->organism);
	fprintf(of, "\n\t%d xeno mRNAs do not align: ", indel->xe->unMrna);
	slReverse(&(indel->xe->unMrnaAcc));
	for (acc = indel->xe->unMrnaAcc; acc != NULL; acc = acc->next)
	    fprintf(of, "%s(%s) ", accFmt(acc), acc->organism);
	fprintf(of, "\n\t%d xeno ESTs do not align: ",indel->xe->unEst);
	slReverse(&(indel->xe->unEstAcc));
	for (acc = indel->xe->unEstAcc; acc != NULL; acc = acc->next)
	    fprintf(of, "%s(%s) ", accFmt(acc), acc->organism);
	}
    fprintf(of, "\n\n");
    }
}

void writeOut(FILE *of, FILE *in, FILE *mm, FILE* cs, FILE* un, struct pslInfo *pi)
/* Output results of the mRNA alignment analysis */
{
int i;

fprintf(of, "%s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t", accFmt(pi->mrna), pi->psl->tName, pi->psl->tStart, 
	pi->psl->tEnd,pi->psl->qStart,pi->psl->qEnd,pi->psl->qSize,pi->loci);
fprintf(of, "%.4f\t%.4f\t%d\t%d\t%.4f\t%.4f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t", 
	pi->coverage, pi->pctId, pi->cdsStart+1, 
	pi->cdsEnd, pi->cdsCoverage, pi->cdsPctId, pi->cdsMatch, 
	pi->cdsMismatch, pi->snp, pi->thirdPos, pi->synSub, pi->nonSynSub,
        pi->synSubSnp, pi->nonSynSubSnp, pi->introns, pi->stdSplice);
fprintf(of, "%d\t%d\t%d\t%d\t", pi->unalignedCds, pi->singleIndel, pi->tripleIndel, pi->totalIndel);
for (i = 0; i < pi->indelCount; i++)
    fprintf(of, "%d,", pi->indels[i]);
fprintf(of, "\t%d", pi->cdsGap);
fprintf(of, "\n");

/* Write out detailed records of indels, if requested */
if (indelReport) 
    {
    verbose(2, "Writing out indels\n");
    writeList(in, pi->indelList, INDEL, NULL, NULL);
    }

/* Write out detailed records of indels, if requested */
if (unaliReport) 
    {
    verbose(2, "Writing out unaligned regions\n");
    writeList(un, pi->unaliList, UNALIGNED, NULL, NULL);
    }

/* Write out detailed records of mismatches, if requested */
if (mismatchReport) 
    {
    verbose(2, "Writing out mismatches\n");
    writeList(mm, pi->mmList, MISMATCH, NULL, NULL);
    }
/* Write out detailed records of codon substitutions, if requested */
if (codonSubReport) 
    {
    verbose(2, "Writing out codon substitutions\n");
    writeList(cs, pi->codonSubList, CODONSUB, pi->psl, pi->mrna);
    }
}
 
void doFile(struct lineFile *pf, FILE *of, FILE *in, FILE *mm, FILE* cs, FILE* un)
/* Process all records in a psl file of mRNA alignments */
{
int lineSize;
char *line;
char *words[32];
int wordCount;
struct psl *psl;
struct pslInfo *pi;
struct sqlConnection *conn = hAllocConn();

while (lineFileNext(pf, &line, &lineSize))
    {
    wordCount = chopTabs(line, words);
    if (wordCount != 21)
	errAbort("Bad line %d of %s\n", pf->lineIx, pf->fileName);
    psl = pslLoad(words);
    pi = processPsl(conn, psl);
    slAddHead(&piList, pi);
    writeOut(of, in, mm, cs, un, pi);
    pslInfoFree(&pi);
    }
hFreeConn(&conn);
}

int main(int argc, char *argv[])
{
struct lineFile *pf, *cf, *lf, *vf=NULL, *df=NULL;
FILE *of, *in=NULL, *mm=NULL, *cs=NULL, *un=NULL;
char *faFile, *db, filename[PATH_LEN], *vfName = NULL, *dfName = NULL;

optionInit(&argc, argv, optionSpecs);
if (argc != 6)
    {
    fprintf(stderr, "USAGE: pslAnal  <psl file> <cds file> <loci file> <fa file> <out file prefix>\n");
    fprintf(stderr, "Options:\n");
    fprintf(stderr, "\t-db=db\n");
    fprintf(stderr, "\t-versions=<mrna versions>\n");
    fprintf(stderr, "\t-noVersions\n");
    fprintf(stderr, "\t-der=<refseq derived accs>\n");
    fprintf(stderr, "\t-verbose=<level>\n");
    fprintf(stderr, "\t-xeno\n");
    fprintf(stderr, "\t-indels\n");
    fprintf(stderr, "\t-unaligned\n");
    fprintf(stderr, "\t-mismatches\n");
    fprintf(stderr, "\t-codonsub\n");
    fprintf(stderr, "\t-genbankCds\n");
    return 1;
    }
db = optionVal("db", NULL);
if (db == NULL)
    errAbort("must specify -db");
vfName = optionVal("versions", NULL);
dfName = optionVal("der", NULL);
indelReport = optionExists("indels");
unaliReport = optionExists("unaligned");
mismatchReport = optionExists("mismatches");
codonSubReport = optionExists("codonsub");
genbankCdsFmt = optionExists("genbankCds");
xeno = optionExists("xeno");
noVersions = optionExists("noVersions");
pf = pslFileOpen(argv[1]);
cf = lineFileOpen(argv[2], TRUE);
lf = lineFileOpen(argv[3], TRUE);
faFile = argv[4];
safef(filename, sizeof(filename), "%s.anal", argv[5]);
of = mustOpen(filename, "w");
fprintf(of, "Acc\tChr\tStart\tEnd\tmStart\tmEnd\tSize\tLoci\tCov\tID\tCdsStart\tCdsEnd\tCdsCov\tCdsID\tCdsMatch\tCdsMismatch\tSnp\tThirdPos\tSyn\tNonSyn\tSynSnp\tNonSynSnp\tIntrons\tStdSplice\tUnCds\tSingle\tTriple\tTotal\tIndels\tGaps\n");
fprintf(of, "10\t10\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10N\t10\t10N\n");
if (vfName) 
  vf = lineFileOpen(vfName, TRUE);
if (dfName) 
  df = lineFileOpen(dfName, TRUE);
if (indelReport) 
    {
    safef(filename, sizeof(filename), "%s.indel", argv[5]);
    in = mustOpen(filename, "w");
    }
if (unaliReport) 
    {
    safef(filename, sizeof(filename), "%s.unali", argv[5]);
    un = mustOpen(filename, "w");
    }
if (mismatchReport)
    {
    safef(filename, sizeof(filename), "%s.mismatch", argv[5]);
    mm = mustOpen(filename, "w");
    }

if (codonSubReport)
    {
    safef(filename, sizeof(filename), "%s.codonsub", argv[5]);
    cs = mustOpen(filename, "w");
    }

verbose(2, "Reading CDS file\n");
readCds(cf);
verbose(2, "Reading FA file\n");
readRnaSeq(faFile);
verbose(2, "Reading loci file\n");
readLoci(lf);
if (vf) 
    {
    verbose(2, "Reading version file\n");
    readVersion(vf);
    }
if (df) 
    {
    verbose(2, "Reading refseq derived accessions file\n");
    readRsDerived(df);
    }
verbose(2, "Processing psl file\n");
doFile(pf, of, in, mm, cs, un);

if (indelReport)
    fclose(in);
if (unaliReport)
    fclose(un);
if (mismatchReport)
    fclose(mm);
if (codonSubReport)
    fclose(cs);
fclose(of);
lineFileClose(&pf);

return 0;
}
