/* orthologBySynteny - Find syntenic location for a list of locations */

/* Copyright (C) 2013 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "options.h"
#include "jksql.h"
#include "hdb.h"
#include "dnautil.h"
#include "chain.h"
#include "genePred.h"
#include "axt.h"
#include "axtInfo.h"
#include "gff.h"
#include "genbank.h"


#define INTRON 10 
#define CODINGA 11 
#define CODINGB 12 
#define UTR5 13 
#define UTR3 14
#define STARTCODON 15
#define STOPCODON 16
#define SPLICESITE 17
#define NONCONSPLICE 18
#define INFRAMESTOP 19
#define INTERGENIC 20
#define REGULATORY 21
#define LABEL 22

#define INSERT_MERGE_SIZE 50


char *db ; /* from database */
char *db2; /* database that we are mapping synteny to */
int filter; /* max gene prediction allowed */

void usage()
/* Explain usage and exit. */
{
errAbort(
  "orthologBySynteny - Find syntenic location for a list of gene predictions on a single chromosome\n"
  "usage:\n"
  "   orthologBySynteny from-db to-db geneTable netTable chrom chainFile\n"
  "options:\n"
  "   -name=field name in gene table (from-db) to get name from [default: name]\n"
  "   -tName=field name in gene table (to-db) to get name from [default: name]\n"
  "   -track=name of track in to-db - find gene overlapping with syntenic location\n"
  "   -psl=geneTable is psl instead of gene prediction\n"
  "   -gff=output gene prediction\n"
  "   -filter=max gene prediction allowed [default 2mb]\n"
  );
}

void gffWrite(struct genePred *gp, FILE *f)
/* write out a standard gff record from a gene pred record - should be moved to gff.c but include file problems */
{
    char start[25] = "start_codon";
    char stop[25] = "stop_codon";
    int i;

    assert(gp->cdsStart >= 0);
    assert(gp->cdsEnd >= 0);
    assert(gp->txStart >= 0);
    assert(gp->txEnd >= 0);
    if (gp->strand[0] == '+')
        fprintf(f, "%s\tucsc\t%s\t%d\t%d\t.\t%c\t.\tgene_id %s\ttranscript_id %s\texon_number 1\n", 
        gp->chrom, start, (gp->cdsStart)+1, (gp->cdsStart)+3,gp->strand[0], gp->name, gp->name);
    else
        fprintf(f, "%s\tucsc\t%s\t%d\t%d\t.\t%c\t.\tgene_id %s\ttranscript_id %s\texon_number 1\n", 
        gp->chrom, stop, (gp->cdsStart)+1, (gp->cdsStart)+3,gp->strand[0], gp->name, gp->name);
    for (i=0 ; i< gp->exonCount ; i++)
        {
        if (
            (((gp->strand[0] == '+') && ((gp->exonStarts[i])+1 >= gp->cdsStart)) || (((gp->strand[0] == '-') && ((gp->exonEnds[i]) >= gp->cdsEnd)))) &&
            (((gp->strand[0] == '+') && ((gp->exonEnds[i])+1 <= gp->cdsEnd)) || (((gp->strand[0] == '-') && ((gp->exonEnds[i]) <= gp->cdsStart))))
           )
            /* if coding region write out CDS */
            fprintf(f, "%s\tucsc\tCDS\t%d\t%d\t.\t%c\t.\tgene_id %s\ttranscript_id %s\texon_number %d\n", 
                gp->chrom, (gp->exonStarts[i])+1, gp->exonEnds[i],gp->strand[0], gp->name, gp->name, i+1
                );
        fprintf(f, "%s\tucsc\texon\t%d\t%d\t.\t%c\t.\tgene_id %s\ttranscript_id %s\texon_number %d\n", 
            gp->chrom, (gp->exonStarts[i])+1, gp->exonEnds[i],gp->strand[0], gp->name, gp->name, i+1
            );
        }

    if (gp->strand[0] == '+')
        fprintf(f, "%s\tucsc\t%s\t%d\t%d\t.\t%c\t.\tgene_id %s\ttranscript_id %s\texon_number %d\n", 
            gp->chrom, stop, (gp->cdsEnd)+1, (gp->cdsEnd)+2,gp->strand[0], gp->name, gp->name, i
            );
    else
        fprintf(f, "%s\tucsc\t%s\t%d\t%d\t.\t%c\t.\tgene_id %s\ttranscript_id %s\texon_number %d\n", 
            gp->chrom, start, gp->cdsEnd, (gp->cdsEnd)+2,gp->strand[0], gp->name, gp->name, i
            );
    return;
}
struct hash *allChains(char *fileName)
/* read all chains in a chain file */
{
    struct hash *hash = newHash(0);
    struct lineFile *lf = lineFileOpen(fileName, TRUE);
    struct chain *chain;
    char chainId[128];

    while ((chain = chainRead(lf)) != NULL)
        {
        safef(chainId, sizeof(chainId), "%d",chain->id);
        if (hashLookup(hash, chainId) == NULL)
                {
                hashAddUnique(hash, chainId, chain);
                }
        }
    lineFileClose(&lf);
    return hash;
}
struct axt *readAllAxt(char *chrom, char *alignment)
/* read axt records from the database table alignment for a chromosome */
{
char query[255];
struct lineFile *lf ;
struct axt *axt = NULL, *axtList = NULL;
struct axtInfo *ai = NULL;
struct sqlResult *sr;
struct sqlConnection *conn = hAllocConn(db);
char **row;

if (alignment != NULL)
    sqlSafef(query, sizeof(query),
	     "select * from axtInfo where chrom = '%s' and species = '%s' and alignment = '%s'",
	     chrom, db2, alignment);
else
    sqlSafef(query, sizeof(query),
	     "select * from axtInfo where chrom = '%s' and species = '%s'",
	     chrom, db2);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    {
    ai = axtInfoLoad(row );
    }
if (ai == NULL)
    {
    printf("\nNo axt alignments available for %s (database %s).\n\n",
	   hFreezeFromDb(db2), db2);
    /*return;*/
    }
lf = lineFileOpen(ai->fileName, TRUE);
while ((axt = axtRead(lf)) != NULL)
    {
    slAddHead(&axtList, axt);
    }
slReverse(&axtList);

axtInfoFree(&ai);
sqlFreeResult(&sr);
hFreeConn(&conn);
return(axtList);
}


void verifyAlist(struct axt *axtList)
/* very crude check for bad axt records */
{
    struct axt *axt;
for (axt = axtList; axt != NULL; axt = axt->next)
    {
    if (!startsWith("chr",axt->qName))
        printf("BAD axt record %s %s tStart = %d score=%d\n",axt->qName, axt->tName, axt->tStart, axt->score);
    }
}

void generateGff(struct chain *chain, FILE *f, char *ortholog, char *name, char *type, double score, int level, struct genePred *gp, struct axt *axtList)
/* output a gff gene prediction*/
{
struct axt *axt= NULL ;
struct axt *axtFound = NULL ;
struct genePred *gpSyn = NULL;
int cStart, cEnd, txEnd, nextEndIndex, posStrand, tPtr;
int oneSize = 1;
int tCoding = FALSE;
int qCoding = FALSE;
int tClass = INTERGENIC;
// int qClass = INTERGENIC;
// int prevTClass = INTERGENIC;
// int prevQClass = INTERGENIC;
int qStopCodon = FALSE;
int nextStart = gp->exonStarts[0];
int nextEnd = gp->exonEnds[0];
int tCodonPos = 1;
int qCodonPos = 1;
unsigned *eStarts, *eEnds;
int prevQptr = 0;
int lastChance = 0;

AllocVar(gpSyn);
for (axt = axtList; axt != NULL; axt = axt->next)
    { 
    if (sameString(gp->chrom , axt->tName) && ((axt->tStart <= gp->txEnd) && (axt->tEnd >= gp->txStart)) )
        {
    /*    if (gp->strand[0] == '-')
            {
            printf("swap axt t=%s q=%s name=%s\n",axt->tName, axt->qName, gp->name);
            reverseComplement(axt->qSym, axt->symCount);
            reverseComplement(axt->tSym, axt->symCount);
            tmp = hChromSize2(axt->qName) - axt->qStart;
            axt->qStart = hChromSize2(axt->qName) - axt->qEnd;
            axt->qEnd = tmp;
            }
            */
        axtFound = axt;
        break;
        }
    }
if (axtFound == NULL)
    /* no alignment skip this one */
    {
    fprintf(stderr,"No syntenic alignment %s %s:%d-%d %s:%d-%d\n",gp->name, gp->chrom, gp->txStart, gp->txEnd, axt->tName, axt->tStart,axt->tEnd);
    genePredFree(&gpSyn);
    return;
    }

                verifyAlist(axtList);
/* we found the first axt, map gene gp to syntenic gene gpSyn */
gpSyn->chrom = axtFound->qName;
gpSyn->name = gp->name;	/* Name of gene */
gpSyn->exonCount = 0;
gpSyn->exonStarts = AllocArray(eStarts, gp->exonCount);
gpSyn->exonEnds = AllocArray(eEnds, gp->exonCount);
if (axtFound->qStrand == '+' )
    gpSyn->strand[0] = gp->strand[0];	/* + or - for strand */
else
    {
    if (gp->strand[0] == '+')
        {
        gpSyn->strand[0] = '-';
        }
    else
        {
        gpSyn->strand[0] = '+';
        }
    }
nextEndIndex = 0;
assert(nextEndIndex < gp->exonCount);
assert(nextEndIndex >= 0);
nextStart = gp->exonStarts[nextEndIndex];
nextEnd = gp->exonEnds[nextEndIndex];
cStart = gp->cdsStart;
cEnd = gp->cdsEnd-3;
txEnd = gp->txEnd-3;
tPtr = axtFound->tStart;
if (gp->strand[0] == '+')
    {
    posStrand = TRUE;
    }
else
    {
    posStrand = FALSE;
    /*
    nextEndIndex = (gp->exonCount)-1;
    assert(nextEndIndex < gp->exonCount);
    assert(nextEndIndex >= 0);
    nextStart = gp->exonEnds[nextEndIndex];
    nextEnd = gp->exonStarts[nextEndIndex];
    cStart = gp->cdsEnd-1;
    cEnd = gp->cdsStart+1;
    txStart = gp->txEnd-1;
    txEnd = gp->txStart+1;
    tPtr = axtFound->tEnd;
    */
    }

    if (gp->strand[0] == '-')
        {
        verifyAlist(axtList);
        }
    if (sameString("NM_023781",gp->name))
        {
        verifyAlist(axtList);
        }
    if (sameString("NM_010001",gp->name))
        {
        verifyAlist(axtList);
        }
for (axt = axtFound; axt != NULL ; axt = axt->next)
    /* loop thru set of axt alignments for gene */
    {
    char *q = axt->qSym; /* dna sequence from query */
    char *t = axt->tSym; /* dna sequence from target */
    int i = 0; /* index into t array */
    int size = axt->symCount;
    int sizeLeft = size; /* remaining bases in the alignment */
    int qPtr = axt->qStart; /*index into axt file for syntenic gene */

    /* skip alignments if on different chromosome UNLESS we haven't found any exons yet */
    if (!sameString(axt->qName,axtFound->qName))
	{
        if (gpSyn->exonCount != 0)
            continue;
        else
            {
            /* start over with this new (presumably better alignment) */
            axtFound = axt;
            gpSyn->chrom = axtFound->qName;
            gpSyn->cdsStart = 0;
            gpSyn->txStart = 0;
            tCoding = FALSE;
            qCoding = FALSE;
            tClass = INTERGENIC;
//            qClass = INTERGENIC;
//            prevTClass = INTERGENIC;
//            prevQClass = INTERGENIC;
            qStopCodon = FALSE;
            }
	}

    if (sameString(gp->name , "NM_008386"))
        {
        verifyAlist(axtList);
        }
    tPtr = axt->tStart; /*index into axt file for gene */
    while ((tPtr > nextEnd ) && (nextEndIndex < (gp->exonCount)-1))
        { /* missed exons - find next block if we had regions that didn't align*/
        if(nextEndIndex < (gp->exonCount)-1)
            {
            nextEndIndex++;
            nextStart = (gp->exonStarts[nextEndIndex]);
            nextEnd = (gp->exonEnds[nextEndIndex]);
            gpSyn->txStart = qPtr;	/* Transcription start position */
            tClass = INTRON;
//            qClass = INTRON;
            }
        }
    while ((tPtr > nextStart ) && (nextEndIndex <= (gp->exonCount)-1) && tPtr < txEnd)
        { /* starting in the middle of an exon - yuck*/
        if((nextEndIndex+1) < (gp->exonCount))
            {
            nextStart = (gp->exonStarts[nextEndIndex+1]);
            gpSyn->txStart = qPtr;	/* Transcription start position */
            if (tPtr > cStart)
                {
                gpSyn->cdsStart = qPtr;
                tCoding = TRUE;
                }
            tClass = INTRON;
//             qClass = INTRON;
            }
        else
            {
            gpSyn->txStart = qPtr;	/* Transcription start position */
            break;
            }
        } 
    prevQptr = qPtr;
    /*
    if (!posStrand)
        {
        qPtr = axt->qEnd;
        tPtr = axt->tEnd; *index into axt file for gene *
        if (tPtr < nextEnd)
            {
            nextEndIndex--;
            if(nextEndIndex >= gp->exonCount)
                printf("nextEndIndex = %d gp exons %d\n", nextEndIndex , gp->exonCount);
            assert(nextEndIndex < gp->exonCount);
            assert(nextEndIndex >= 0);
            nextStart = (gp->exonEnds[nextEndIndex]);
            nextEnd = (gp->exonStarts[nextEndIndex]);
            }
        }
*/
    while (sizeLeft > 0)
        {
        if (posStrand || !posStrand)
            { /* this next section sets the transcription start */
            if ((tClass == INTRON) && (tPtr >= nextStart)  && (tPtr >= cStart) && (tPtr < cEnd))        
                {
                tCoding = TRUE;
                if (qStopCodon == FALSE)
                    {
                    qCoding = TRUE;
                    qCodonPos = tCodonPos; /* put trans back in sync */
                    }
                }
            else if ((tPtr >= nextStart ) && (tPtr <= cStart ))
                /* start of UTR 5'*/
                {
                if (tClass == INTRON || tClass == INTERGENIC)
                    //assert(nextEndIndex+1 < gp->exonCount);
                    //nextStart = gp->exonStarts[nextEndIndex+1];
                    {
                    gpSyn->txStart = qPtr;	/* Transcription start position */
                    //assert(nextEndIndex+1 < gp->exonCount);
                    //nextStart = gp->exonStarts[nextEndIndex+1];
                    }
                tClass = UTR5;
//                 qClass = UTR5;
                }
            }
        else
            /* gene on neg strand */
            {
            if (((tClass == UTR5) || (tClass == INTRON)) && (tPtr >= nextStart) && (tPtr >= cStart) && (tPtr < cEnd))        
                {
                tCoding = TRUE;
                if (qStopCodon == FALSE)
                    {
                    qCoding = TRUE;
                    qCodonPos = tCodonPos; /* put trans back in sync */
                    }
                }
            else if ((tPtr >= nextStart-1 ) && (tPtr > cStart ))
                /* start of UTR 5'*/
                {
                if (tClass == INTRON)
                    {
                    //gpSyn->txEnd = qPtr;	/* Transcription start position */
                    }
                tClass = UTR5;
//                qClass = UTR5;
                }
            }
            if (posStrand || !posStrand)
                {/* this is crucial, maps exons (regardless of coding or not) to syntenic region */
                if (tPtr == nextStart)
                    {
                    prevQptr = qPtr; /* remember this for when you hit the end of exon, prevents 1/2 exon */
                    }
                if (tPtr == nextEnd)
                    {
                    tCoding=FALSE;
                    qCoding=FALSE;
                    if (tClass != UTR3)
                        {
                        tClass=INTRON;
//                         qClass=INTRON;
                        }
                    if(gpSyn->exonCount < gp->exonCount)
                        {
                        assert(gpSyn->exonCount < gp->exonCount);
                        eStarts[gpSyn->exonCount] = prevQptr;
                        if (qPtr-prevQptr > filter)
                            {
                            fprintf(stderr,"exon %d larger than %d %d-%d %s from %s:%d-%d axt %c to %s:%d-%d %d-%d %s.\n",gp->exonCount,filter, prevQptr, qPtr, gp->name,gp->chrom, gp->txStart,gp->txEnd, axtFound->qStrand, gpSyn->chrom, gpSyn->txStart,gp->txEnd, gpSyn->cdsStart, gpSyn->cdsEnd, gpSyn->strand);
                            return;
                            }
                        eEnds[gpSyn->exonCount] = qPtr;
                        nextEndIndex++;
                        gpSyn->exonCount++;	/* Number of exons */
                        assert(nextEndIndex >= 0);
                        if(nextEndIndex < gp->exonCount)
                            {
                            nextStart = gp->exonStarts[nextEndIndex];
                            nextEnd = gp->exonEnds[nextEndIndex];
                            }
                        }
                    }
                }
            else
                {
                if (tPtr == nextStart-1)
                    {
                    prevQptr = qPtr;
                    }
                if (tPtr == nextEnd-1)
                    {
                    tCoding=FALSE;
                    qCoding=FALSE;
                    tClass=INTRON;
//                    qClass=INTRON;
                    nextEndIndex--;
                    assert(nextEndIndex < gp->exonCount);
                    assert(nextEndIndex >= 0);
                    nextStart = (gp->exonEnds[nextEndIndex]);
                    nextEnd = (gp->exonStarts[nextEndIndex]);
                    assert(gpSyn->exonCount < gp->exonCount);
	                eStarts[gpSyn->exonCount] = qPtr;
	                eEnds[gpSyn->exonCount] = prevQptr;
                    gpSyn->exonCount++;	/* Number of exons */
                    }
                }
            if (posStrand || !posStrand)
                { /* handle start/stop coding */
                if ((tPtr >= (cStart)) && (tPtr <=(cStart+2)))
                    {
                    tClass=STARTCODON;
//                     qClass=STARTCODON;
                    tCoding=TRUE;
                    qCoding=TRUE;
                    gpSyn->cdsStart = qPtr-2;	/* coding start position */
                    assert(gpSyn->cdsStart > 0);
                    if (tPtr == cStart) 
                        {
                        tCodonPos=1;
                        qCodonPos=1;
                        }
                    }
                if ((tPtr >= cEnd) && (tPtr <= (cEnd+2)))
                    {
                    tClass=STOPCODON;
//                     qClass=STOPCODON;
                    gpSyn->cdsEnd = qPtr-1;	/* coding start position */
                    tCoding=FALSE;
                    qCoding=FALSE;
                    }
                }
            else
                {
                if ((tPtr <= (cStart)) && (tPtr >=(cStart-2)))
                    {
                    tClass=STARTCODON;
//                     qClass=STARTCODON;
                    gpSyn->cdsStart = qPtr;	/* coding start position */
                    tCoding=TRUE;
                    qCoding=TRUE;
                    if (tPtr == cStart-3) 
                        {
                        tCodonPos=1;
                        qCodonPos=1;
                        }
                    }
                if ((tPtr <= cEnd+1) && (tPtr >= (cEnd-1)))
                    {
                    tClass=STOPCODON;
                    tCoding=FALSE;
                    }
                }
            if (posStrand || !posStrand)
                { /* handle end of transcription */
                if (tPtr == (cEnd +3) )
                    {
                    tClass = UTR3;
//                     qClass = UTR3;
                    }
                if ((tPtr >= txEnd) && (tClass == UTR3))
                    {
                    gpSyn->txEnd = qPtr-3;	/* Transcription end position */
                    tClass = INTERGENIC;
//                     qClass = INTERGENIC;
                    }
                }
            else 
                {
                if (tPtr == (cEnd -2) )
                    {
                    tClass = UTR3;
//                     qClass = UTR3;
                    }
                if ((tPtr > txEnd) && (tClass == UTR3))
                    {
                    gpSyn->txStart = qPtr;	/* Transcription end position */
                    tClass = INTERGENIC;
//                     qClass = INTERGENIC;
                    }
                }

            if (t[i] != '-')
                {/* don't count gaps */
                if (posStrand || !posStrand)
                    {
                    tPtr++;
                    }
                else
                    {
                    tPtr--;
                    }
                if (tCoding) 
                    {
                    tCodonPos++;
                    }
                if (tCodonPos>3) tCodonPos=1;
                }
            if (q[i] != '-')
                {
                if (posStrand || !posStrand)
                    qPtr++;
                else
                    qPtr--;
                if (qCoding) 
                    {
                    qCodonPos++;
                    }
                if (qCodonPos>3) qCodonPos=1;
                }

            sizeLeft -= oneSize; /* this is now 1, left over from parsing lines */
            i++;

            }
        lastChance = qPtr ; /*save this value in case there are no more alignments */
        } /* end of axt file */
if (gpSyn->exonCount == 0)
    {
    fprintf(stderr,"No alignment found for %s %s:%d-%d.\n",gp->name,gp->chrom, gp->txStart,gp->txEnd);
    return;
    }

if (posStrand || !posStrand)
    {
    if (gpSyn->exonStarts[0] <= 0 ||  (gpSyn->exonEnds[0] <= 0) )
        fprintf(stderr,"%s is wrong\n",gpSyn->name);
    assert(gpSyn->exonStarts[0] > 0);
    assert(gpSyn->exonEnds[0] > 0);
    if (gpSyn->cdsStart == 0) gpSyn->cdsStart = gpSyn->exonStarts[0];
    if (gpSyn->cdsEnd == 0) gpSyn->cdsEnd = gpSyn->exonEnds[(gpSyn->exonCount)-1];
    if (gpSyn->txEnd != gpSyn->exonEnds[(gpSyn->exonCount)-1]) 
        gpSyn->txEnd = gpSyn->exonEnds[(gpSyn->exonCount)-1] ;
    }
    if (gpSyn->txEnd == 0) gpSyn->txEnd = lastChance; //gpSyn->exonEnds[(gpSyn->exonCount)-1];
    if (gpSyn->txStart == 0)
        gpSyn->txStart = gpSyn->exonStarts[0] ;
if (gpSyn->txStart <= 0 ||  (gpSyn->txEnd <= 0) || (gpSyn->cdsStart <= 0) || (gpSyn->cdsEnd <= 0) )
    fprintf(stderr,"%s is wrong\n",gpSyn->name);
assert(gpSyn->txStart > 0);
assert(gpSyn->txEnd > 0);
assert(gpSyn->cdsStart > 0);
assert(gpSyn->cdsEnd > 0);
if (gpSyn->txEnd < gpSyn->txStart || gpSyn->cdsEnd < gpSyn->cdsStart)
    {
    fprintf(stderr,"Gene Prediction backwards %d %s from %s:%d-%d axt %c to %s:%d-%d %d-%d %s.\n",filter, gp->name,gp->chrom, gp->txStart,gp->txEnd, axtFound->qStrand, gpSyn->chrom, gpSyn->txStart,gp->txEnd, gpSyn->cdsStart, gpSyn->cdsEnd, gpSyn->strand);
    }
else if (gpSyn->txEnd - gpSyn->txStart < filter && gpSyn->cdsEnd - gpSyn->cdsStart < filter) 
    {
    fprintf(stderr,"OK %s from %s:%d-%d %s axt %c to %s:%d-%d %d-%d %s.\n", gp->name,gp->chrom, gp->txStart,gp->txEnd, gp->strand, axtFound->qStrand, gpSyn->chrom, gpSyn->txStart,gpSyn->txEnd,gpSyn->cdsStart, gpSyn->cdsEnd, gpSyn->strand); 
    gffWrite(gpSyn, stdout);
    }
else
    {
    fprintf(stderr,"Gene Prediction larger than %d %s from %s:%d-%d axt %c to %s:%d-%d %d-%d %s.\n",filter, gp->name,gp->chrom, gp->txStart,gp->txEnd, axtFound->qStrand, gpSyn->chrom, gpSyn->txStart,gp->txEnd, gpSyn->cdsStart, gpSyn->cdsEnd, gpSyn->strand);
    }
/*genePredFree(&gpSyn);*/
}

void chainPrintHead(struct chain *chain, FILE *f, char *ortholog, char *name, char *type, double score, int level)
/* output a map file similar to psl */
{
if (chain->qStrand == '+')
    fprintf(f, "%s\t%s\t%d\t%s\t%d\t+\t%d\t%d\t%s\t%d\t%c\t%d\t%d\t%d\t%1.0f\t%s\n", 
        name, type, level, chain->tName, chain->tSize, chain->tStart, chain->tEnd,
        chain->qName, chain->qSize, chain->qStrand, chain->qStart, chain->qEnd,
        chain->id,score, ortholog);
else
    fprintf(f, "%s\t%s\t%d\t%s\t%d\t+\t%d\t%d\t%s\t%d\t%c\t%d\t%d\t%d\t%1.0f\t%s\n", 
        name, type, level, chain->tName, chain->tSize, chain->tStart, chain->tEnd,
        chain->qName, chain->qSize, chain->qStrand, chain->qSize - chain->qEnd, chain->qSize - chain->qStart,
        chain->id,score, ortholog);
}

char *mapOrtholog(struct sqlConnection *conn,  char *geneTable, char *chrom, int gStart, int gEnd)
/* finds gene that overlaps syntenic projection */
{
struct sqlResult *sr;
char **row;
char query[512];
static char retName[255];
char *tFieldName = optionVal("tName", "name");

    sqlSafef(query, sizeof query, "select %s, txStart, txEnd from %s where chrom = '%s' and txStart <= %d and txEnd >= %d",tFieldName, geneTable,chrom,gEnd, gStart);
    /*printf("%s\n",query);*/
    sr = sqlGetResult(conn, query);
    while ((row = sqlNextRow(sr)) != NULL)
        {
        char *name = (row[0]);
        sprintf(retName, "%s",name);
        sqlFreeResult(&sr);
        return(retName);
        }
    sqlFreeResult(&sr);
    return(NULL);
}

void mustGetMrnaStartStop( char *acc, unsigned *cdsStart, unsigned *cdsEnd )
{
	char query[256];
	struct sqlConnection *conn;
	struct sqlResult *sr;
	char **row;

	/* Get cds start and stop, if available */
	conn = hAllocConn(db);
	sqlSafef(query, sizeof query, "select cds from mrna where acc = '%s'", acc);
	sr = sqlGetResult(conn, query);
	assert((row = sqlNextRow(sr)) != NULL);
       	sqlSafef(query, sizeof query, "select name from cds where id = '%d'", atoi(row[0]));
    	sqlFreeResult(&sr);
       	sr = sqlGetResult(conn, query);
    	assert((row = sqlNextRow(sr)) != NULL);
        genbankParseCds(row[0], cdsStart, cdsEnd);
	sqlFreeResult(&sr);
    hFreeConn(&conn);
}

void subsetChain(char *geneTable, char *netTable, char *chrom, struct hash *chainHash, struct axt *axtList)
/* Return subset ofchain at syntenic location. */
{
struct genePred *gp = NULL;
char *fieldName = optionVal("name", "name");
char *syntenicGene = optionVal("track", "knownGene");
boolean psl = optionExists("psl");
boolean gff = optionExists("gff");
struct chain *aChain;
struct sqlConnection *conn = hAllocConn(db);
struct sqlConnection *conn2 = hAllocConn(db2);
struct sqlResult *sr;
char **row;
char query[512];
char prevName[255] = "x";
char *ortholog = NULL;
struct chain *retSubChain;
struct chain *retChainToFree;
/*int chainArr[MAXCHAINARRAY];*/
filter = optionInt("filter",2000000);

                verifyAlist(axtList);
if (psl)
    sqlSafef(query, sizeof query, "select g.qName, g.tName, g.strand, g.tStart, g.tEnd, g.qStart, g.qEnd, g.blockCount, g.tStarts, g.blockSizes, g.%s,n.chainId, n.type, n.level, n.qName, n.qStart, n.qEnd, g.qSize, g.qStarts, g.tSize from %s g, %s n where n.tName = '%s' and n.tStart <= g.tEnd and n.tEnd >=  g.tStart and g.tName = '%s' and type <> 'gap' order by g.%s, g.tStart, score desc",fieldName, geneTable,netTable, chrom,chrom, fieldName );
else
    sqlSafef(query, sizeof query, "select g.name, g.chrom, g.strand, g.txStart, g.txEnd, g.cdsStart, g.cdsEnd, g.exonCount, g.exonStarts, g.exonEnds,  g.%s, n.chainId, n.type, n.level, n.qName, n.qStart, n.qEnd from %s g, %s n where n.tName = '%s' and n.tStart <= g.txEnd and n.tEnd >=  g.txStart and chrom = '%s' and n.type <> 'gap' order by g.%s, g.txStart, score desc",fieldName, geneTable,netTable, chrom,chrom, fieldName );
//fprintf(stderr,"query\n%s\n",query);
sr = sqlGetResult(conn, query);

                verifyAlist(axtList);
while ((row = sqlNextRow(sr)) != NULL)
    {
    int geneStart = sqlUnsigned(row[3]);
    int geneEnd = sqlUnsigned(row[4]);
    char *name = row[10];
    int chainId = sqlUnsigned(row[11]);
    int level = sqlUnsigned(row[13]);
    int sizeOne;
    char chainIdStr[128];
    unsigned int cdsStart, cdsEnd;

    struct psl *p = NULL;

    verifyAlist(axtList);
    safef(chainIdStr,sizeof(chainIdStr),"%d",chainId);
    /*gp = genePredLoad(row + hasBin);*/
    AllocVar(gp);
    AllocVar(p);

    if(psl)
    {
        p->qName = cloneString(row[0]);
	    p->tName = cloneString(row[1]);
	    strcpy(p->strand, cloneString(row[2]));
	    p->tStart = sqlUnsigned(row[3]);
	    p->tEnd = sqlUnsigned(row[4]);
	    p->qStart = sqlUnsigned(row[5]);
	    p->qEnd = sqlUnsigned(row[6]);
    	p->blockCount = sqlUnsigned(row[7]);
	    sqlUnsignedDynamicArray(row[8], &p->tStarts, &sizeOne );
	    assert( sizeOne == p->blockCount );
	    sqlUnsignedDynamicArray(row[9], &p->blockSizes, &sizeOne );
	    assert( sizeOne == p->blockCount );
	    sqlUnsignedDynamicArray(row[18], &p->qStarts, &sizeOne );
	    assert( sizeOne == p->blockCount );
        p->qSize = sqlUnsigned(row[17]);
        p->tSize = sqlUnsigned(row[19]);

	    //get mRNA start and stop positions and fail otherwise.
	    fprintf(stderr,"\n### %s\t", p->qName);
	    mustGetMrnaStartStop(p->qName, &cdsStart, &cdsEnd);
	    fprintf(stderr, "%d\t%d ###\n", cdsStart, cdsEnd);

	    //convert to genePred
	    gp = genePredFromPsl( p, cdsStart, cdsEnd, INSERT_MERGE_SIZE );
    }
    else //input gene table is a gff not a psl
    {
    	gp->exonCount = sqlUnsigned(row[7]);
    	gp->name = cloneString(row[0]);
    	gp->chrom = cloneString(row[1]);
    	strcpy(gp->strand, row[2]);
    	gp->txStart = sqlUnsigned(row[3]);
    	gp->txEnd = sqlUnsigned(row[4]);
    	gp->cdsStart = sqlUnsigned(row[5]);
    	gp->cdsEnd = sqlUnsigned(row[6]);
    	sqlUnsignedDynamicArray(row[8], &gp->exonStarts, &sizeOne);
    	assert(sizeOne == gp->exonCount);
    	sqlUnsignedDynamicArray(row[9], &gp->exonEnds, &sizeOne);
    	assert(sizeOne == gp->exonCount);
    }

    verifyAlist(axtList);

    if ((atoi(row[11]) != 0) && strcmp(prevName,name))
        {
        /*printf("name %s prev %s\n",name, prevName);*/
        aChain = hashMustFindVal(chainHash, chainIdStr);
        fprintf(stderr," chain %d ",aChain->id);
        chainSubsetOnT(aChain, geneStart, geneEnd, &retSubChain,  &retChainToFree);
        if(retSubChain != NULL)
            {
            assert(atoi(row[11]) > 0);
            assert(atoi(row[3]) > 0);
            assert(atoi(row[4]) > 0);
            /* if -track option on then look for an ortholog */
            if (syntenicGene)
                {
                if (retSubChain->qStrand == '+')
                    ortholog = mapOrtholog(conn2, syntenicGene, retSubChain->qName, retSubChain->qStart, retSubChain->qEnd );
                else
                    ortholog = mapOrtholog(conn2, syntenicGene, retSubChain->qName, retSubChain->qSize - retSubChain->qEnd, retSubChain->qSize - retSubChain->qStart );
                }
            if (gff)
                {
                assert(gp->cdsStart > 0);
                assert(gp->cdsEnd > 0);
                assert(gp->txStart > 0);
                assert(gp->txEnd > 0);
                verifyAlist(axtList);
                if (gp->strand[0] == '+')
                    generateGff(retSubChain, stdout, ortholog, name, row[12], aChain->score, level, gp, axtList);
                else
                    generateGff(retSubChain, stdout, ortholog, name, row[12], aChain->score, level, gp, axtList);
                verifyAlist(axtList);
                }
            else
                chainPrintHead(retSubChain, stdout, ortholog, name, row[12], aChain->score, level);
            }
        else
            {
            /* gap in net/chain skip to next level */
            chainFree(&retChainToFree);
            continue;
            }
        chainFree(&retChainToFree);
        }
    sprintf(prevName , "%s",name);
    /*genePredFree(&gp);
    gp = NULL;*/
                verifyAlist(axtList);
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
hFreeConn(&conn2);
}

void orthologBySyntenyChrom(char *geneTable, char *netTable, char *chrom, struct hash *chainHash)
{
    struct axt *axtList = readAllAxt(chrom,"Blastz Chained");
    subsetChain(geneTable, netTable, chrom, chainHash, axtList);

}

int main(int argc, char *argv[])
/* Process command line. */
{
struct hash *chainHash;

optionHash(&argc, argv);
if (argc != 7)
    usage();
db = argv[1];
db2 = argv[2];
chainHash = allChains(argv[6]);
orthologBySyntenyChrom(argv[3], argv[4], argv[5], chainHash);
return 0;
}
