/* fullalte - Extracts full 3' end of intron based on a fragment.
 * Used to fill out alt-splicing intron before skipped exon data
 * set, which originally only had last 50 bp of intron. */
#include "common.h"
#include "dnautil.h"
#include "dnaseq.h"
#include "fa.h"

struct faSeq
    {
    struct faSeq *next;
    struct dnaSeq *seq;
    char *commentLine;
    };

struct faSeq *faReadAll(char *fileName)
/* Read all sequences in FA file. */
{
FILE *f = mustOpen(fileName, "r");
struct faSeq *list = NULL, *el;
struct dnaSeq *seq;
char *comment;

while (faReadNext(f, NULL, TRUE, &comment, &seq) )
    {
    AllocVar(el);
    el->seq = seq;
    el->commentLine = comment;
    slAddHead(&list, el);
    }
fclose(f);
slReverse(&list);
return list;
}

void writeLines50(FILE *f, DNA *dna)
/* Write DNA 50 bp to a line. */
{
int sizeLeft = strlen(dna);
int lineSize;
while (sizeLeft > 0)
    {
    lineSize = 50;
    if (lineSize > sizeLeft)
        lineSize = sizeLeft;
    mustWrite(f, dna, lineSize);
    fputc('\n', f);
    dna += lineSize;
    sizeLeft -= lineSize;
    }
}

int main(int argc, char *argv[])
{
char *fragName, *fullName, *outName;
FILE *out;
struct faSeq *fragList, *fullList;
struct faSeq *fragEl, *fullEl;
struct dnaSeq *fragSeq, *fullSeq;

if (argc != 4)
    {
    errAbort("fullAltE - extracts full intron from fragments\n"
             "usage\n"
             "   fullAltE frags.fa introns.fa output.fa\n");
    }
dnaUtilOpen();
fragName = argv[1];
fullName = argv[2];
outName = argv[3];

printf("Loading %s\n", fragName);
fragList = faReadAll(fragName);
printf("Loading %s\n", fullName);
fullList = faReadAll(fullName);
printf("Creating %s\n", outName);
out = mustOpen(outName, "w");
for (fragEl = fragList; fragEl != NULL; fragEl = fragEl->next)
    {
    boolean foundMatch = FALSE;
    fragSeq = fragEl->seq;
    for (fullEl = fullList; fullEl != NULL; fullEl = fullEl->next)
        {
        fullSeq = fullEl->seq;
        if (strstr(fullSeq->dna, fragSeq->dna) != NULL)
            {
            fprintf(out, "%s", fragEl->commentLine);
            writeLines50(out, fullSeq->dna+5);
            foundMatch = TRUE;
            break;
            }
        }
    if (!foundMatch)
        warn("Couldn't find match for %s", fragSeq->name);
    }

fclose(out);
return 0;
}