/* netChainSubset - Create chain file with subset of chains that appear in the net. */

/* Copyright (C) 2011 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "options.h"
#include "chain.h"
#include "chainNet.h"


char *type = NULL;
boolean splitOnInsert = FALSE;
boolean wholeChains = FALSE;
boolean skipMissing = FALSE;

void usage()
/* Explain usage and exit. */
{
errAbort(
  "netChainSubset - Create chain file with subset of chains that appear in the net\n"
  "usage:\n"
  "   netChainSubset in.net in.chain out.chain\n"
  "options:\n"
  "   -gapOut=gap.tab - Output gap sizes to file\n"
  "   -type=XXX - Restrict output to particular type in net file\n"
  "   -splitOnInsert - Split chain when get an insertion of another chain\n"
  "   -wholeChains - Write entire chain references by net, don't split\n"
  "    when a high-level net is encoundered.  This is useful when nets\n"
  "    have been filtered.\n"
  "   -skipMissing - skip chains that are not found instead of generating\n"
  "    an error.  Useful if chains have been filtered.\n"
  );
}

struct optionSpec options[] = {
   {"gapOut", OPTION_STRING},
   {"type", OPTION_STRING},
   {"splitOnInsert", OPTION_BOOLEAN},
   {"wholeChains", OPTION_BOOLEAN},
   {"skipMissing", OPTION_BOOLEAN},
   {NULL, 0},
};

void gapWrite(struct chain *chain, FILE *f)
/* Write gaps to simple two column file. */
{
struct cBlock *a, *b;
a = chain->blockList;
for (b = a->next; b != NULL; b = b->next)
    {
    fprintf(f, "%d\t%d\n", b->tStart - a->tEnd, b->qStart - a->qEnd);
    a = b;
    }
}

void writeChainPart(struct chain *chain, int tStart, int tEnd, FILE *f,
	FILE *gapFile)
/* Write out part of a chain. */
{
struct chain *subChain, *chainToFree;

chainSubsetOnT(chain, tStart, tEnd, &subChain, &chainToFree);
assert(subChain != NULL);
chainWrite(subChain, f);
if (gapFile != NULL)
    gapWrite(subChain, gapFile);
chainFree(&chainToFree);
}

void writeChainWhole(struct chain *chain, FILE *f, FILE *gapFile)
/* Write out entire chain. */
{
chainWrite(chain, f);
if (gapFile != NULL)
    gapWrite(chain, gapFile);
}

struct cnFill *nextGapWithInsert(struct cnFill *gapList)
/* Find next in list that has a non-empty child.   */
{
struct cnFill *gap;
for (gap = gapList; gap != NULL; gap = gap->next)
    {
    if (gap->children != NULL)
        break;
    }
return gap;
}

void splitWrite(struct cnFill *fill, struct chain *chain, 
    FILE *f, FILE *gapFile)
/* Split chain into pieces if it has inserts.  Write out
 * each piece. */
{
int tStart = fill->tStart;
struct cnFill *child = fill->children;

for (;;)
    {
    child = nextGapWithInsert(child);
    if (child == NULL)
        break;
    writeChainPart(chain, tStart, child->tStart, f, gapFile);
    tStart = child->tStart + child->tSize;
    child = child->next;
    }
writeChainPart(chain, tStart, fill->tStart + fill->tSize, f, gapFile);
}

void convertFill(struct cnFill *fill, 
	struct chain *chain, FILE *f, FILE *gapFile)
/* Convert subset of chain as defined by fill to axt. */
{
if (type != NULL)
    {
    if (!sameString(type, fill->type))
        return;
    }
if (splitOnInsert)
    splitWrite(fill, chain, f, gapFile);
else if (wholeChains)
    writeChainWhole(chain, f, gapFile);
else
    writeChainPart(chain, fill->tStart, fill->tStart + fill->tSize, f, gapFile);
}

void rConvert(struct cnFill *fillList, 
	struct hash *chainHash, FILE *f, FILE *gapFile)
/* Recursively output chains in net as axt. */
{
struct cnFill *fill;
for (fill = fillList; fill != NULL; fill = fill->next)
    {
    if (fill->chainId)
        {
        struct chain *chain = skipMissing
            ? chainFind(chainHash, fill->chainId)
            : chainLookup(chainHash, fill->chainId);
        if (chain != NULL)
            convertFill(fill, chain, f, gapFile);
        }
    if (fill->children)
        rConvert(fill->children, chainHash, f, gapFile);
    }
}

void netChainSubset(char *netIn, char *chainIn, char *chainOut)
/* netChainSubset - Create chain file with subset of *
 * chains that appear in the net. */
{
struct hash *chainHash;
struct chainNet *net;
struct lineFile *lf = lineFileOpen(netIn, TRUE);
FILE *f = mustOpen(chainOut, "w");
char *gapFileName = optionVal("gapOut", NULL);
FILE *gapFile = NULL;

if (gapFileName)
    gapFile = mustOpen(gapFileName, "w");
chainHash = chainReadAllWithMeta(chainIn, f);
while ((net = chainNetRead(lf)) != NULL)
    {
    verbose(1, "Processing %s\n", net->name);
    rConvert(net->fillList, chainHash, f, gapFile);
    chainNetFree(&net);
    }
}

int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
type = optionVal("type", type);
splitOnInsert = optionExists("splitOnInsert");
wholeChains = optionExists("wholeChains");
skipMissing = optionExists("skipMissing");

if (argc != 4)
    usage();
netChainSubset(argv[1], argv[2], argv[3]);
return 0;
}
