/* bptForTwoBit - Create a b+ tree index for a .2bit file.  Key is the sequence name. 
 * Value is the position of the start of the compressed DNA in the .2bit file. */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "options.h"
#include "twoBit.h"
#include "bPlusTree.h"


int blockSize = 256;

void usage()
/* Explain usage and exit. */
{
errAbort(
  "bptForTwoBit - Create a b+ tree index for a .2bit file.  Key is the sequence name. Value \n"
  "is the position of the start of the compressed DNA in the .2bit file.\n"
  "usage:\n"
  "   bptForTwoBit in.2bit out.bpt\n"
  "options:\n"
  "   -blockSize=N - number of children per node in b+ tree. Default %d\n"
  , blockSize
  );
}

static struct optionSpec options[] = {
   {"blockSize", OPTION_INT},
   {NULL, 0},
};

int twoBitIndexCmp(const void *va, const void *vb)
/* Compare to sort on name. */
{
const struct twoBitIndex *a = *((struct twoBitIndex **)va);
const struct twoBitIndex *b = *((struct twoBitIndex **)vb);
return strcmp(a->name, b->name);
}

void twoBitIndexKey(const void *va, char *keyBuf)
/* Get key field. */
{
const struct twoBitIndex *a = *((struct twoBitIndex **)va);
strcpy(keyBuf, a->name);
}

void *twoBitIndexVal(const void *va)
/* Get key field. */
{
const struct twoBitIndex *a = *((struct twoBitIndex **)va);
return (void*)(&a->offset);
}

void bptForTwoBit(char *twoBitIn, char *indexOut)
/* bptForTwoBit - Create a b+ tree index for a .2bit file.  Key is the sequence name. 
 * Value is the position of the start of the compressed DNA in the .2bit file. */
{
/* Read two bit file, and convert linked list index to array. */
struct twoBitFile *tbf = twoBitOpen(twoBitIn);
struct twoBitIndex *tbi, **tbiArray;
int elCount = tbf->hash->elCount;
AllocArray(tbiArray, elCount);
int i;
for (i=0, tbi=tbf->indexList; i < elCount; ++i, tbi=tbi->next)
    tbiArray[i] = tbi;

// array MUST be sorted 
qsort(tbiArray, elCount, sizeof(tbiArray[0]), twoBitIndexCmp);

/* Calculate longest name. */
int maxSize = 0;
for (tbi = tbf->indexList; tbi != NULL; tbi = tbi->next)
    {
    int size = strlen(tbi->name);
    if (maxSize < size)
        maxSize = size;
    }

/* Create index. */
bptFileCreate(tbiArray, sizeof(tbiArray[0]), elCount, blockSize,
    twoBitIndexKey, maxSize, twoBitIndexVal, sizeof(tbi->offset), indexOut);
verbose(1, "Created index of %d sequences in %s\n", elCount, indexOut);
}

int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
if (argc != 3)
    usage();
blockSize = optionInt("blockSize", blockSize);
bptForTwoBit(argv[1], argv[2]);
return 0;
}
