/* HGAP - Human Genome Annotation Project database. 
 *
 * This file is copyright 2000 Jim Kent, but license is hereby
 * granted for all use - public, private or commercial. */

#ifndef HGAP_H
#define HGAP_H

#ifndef DNASEQ_H
#include "dnaseq.h"
#endif 

#ifndef UNFIN_H
#include "unfin.h"
#endif

#ifndef JKSQL_H
#include "jksql.h"
#endif

typedef unsigned int HGID;	/* A database ID. */

void hgSetDb(char *dbName);
/* Set the database name. */

char *hgGetDb();
/* Return the current database name. */

struct sqlConnection *hgAllocConn();
/* Get free connection if possible. If not allocate a new one. */

struct sqlConnection *hgFreeConn(struct sqlConnection **pConn);
/* Put back connection for reuse. */


HGID hgIdQuery(struct sqlConnection *conn, char *query);
/* Return first field of first table as HGID. 0 return ok. */

HGID hgRealIdQuery(struct sqlConnection *conn, char *query);
/* Return first field of first table as HGID- abort if 0. */


struct sqlConnection *hgStartUpdate();
/* Get a connection for an update.  (Starts allocating id's) */

void hgEndUpdate(struct sqlConnection **pConn, char *comment, ...)
/* Finish up connection with a printf format comment. */
#if defined(__GNUC__)
__attribute__((format(printf, 2, 3)))
#endif
;


HGID hgNextId();
/* Get next unique id.  (Should only be called after hgStartUpdate). */

FILE *hgCreateTabFile(char *tableName);
/* Open a tab file with name corresponding to tableName.  This
 * may just be fclosed when done. (Currently just makes
 * tableName.tab in the current directory.) */

void hgLoadTabFile(struct sqlConnection *conn, char *tableName);
/* Load tab delimited file corresponding to tableName. 
 * Should only be used after hgCreatTabFile, and only after
 * file closed. */


enum 
/* Various constants used. */
    {
    hgContigPad = 800,	  /* Number of N's between contigs. */
    };

struct hgBac
/* This represents a sequenced clone (BAC/PAC/cosmid) */
    {
    struct hgBac *next;	      /* Next in list. */
    struct hgNest *nest;      /* Coordinate space. */
    HGID id;		      /* HGAP ID. */
    char name[16];            /* GenBank accession. */
    int contigCount;          /* Number of contigs. */
    struct hgContig *contigs; /* Contig list. */
    };

struct hgContig
/* This represents a contig within a BAC. */
    {
    struct hgContig *next;    /* Next in list. */
    struct hgNest *nest;      /* Coordinate space. */
    HGID id;		      /* HGAP ID. */
    char name[20];            /* Name like AC000007.24 */
    struct hgBac *bac;        /* Bac this is in. */
    int ix;                   /* Contig index. */
    int submitOffset;         /* Position in genBank submission. */
    int size;                 /* Size in bases. */
    };

struct hgNest
/* This structure describes the contig tree
 * chromosomes->chromosome contigs->bacs->
 * bac contigs.  */
    {
    struct hgNest *next;	/* Pointer to next sibling. */
    struct hgNest *children;	/* Children. */
    struct hgNest *parent;      /* Parent if any. */
    HGID id;		        /* HGAP ID. */
    int orientation;            /* +1 or -1 relative to parent. */
    int offset;			/* Offset relative to parent. */
    int size;                   /* Size in bases. */
    struct hgContig *contig;    /* Associated contig if any. */
    };

struct hgBac *hgGetBac(char *acc);
/* Load BAC with given accession into memory. Don't free this, it's
 * managed by system. */

struct hgContig *hgGetContig(char *acc, int contigIx);
/* Get contig.  contigIx is position in submission, not position in
 * ordering. */

struct dnaSeq *hgContigSeq(struct hgContig *contig);
/* Return DNA associated with contig. */

struct dnaSeq *hgRnaSeq(char *acc);
/* Return sequence for RNA. */

void hgRnaSeqAndId(char *acc, struct dnaSeq **retSeq, HGID *retId);
/* Return sequence for RNA and it's database ID. */

struct dnaSeq *hgBacOrderedSeq(struct hgBac *bac);
/* Return DNA associated with BAC including NNN's between
 * contigs in ordered coordinates. */

struct dnaSeq *hgBacSubmittedSeq(char *acc);
/* Returns DNA associated with BAC in submitted ordering
 * and coordinates. */

struct dnaSeq *hgBacContigSeq(char *acc);
/* Returns list of sequences, one for each contig in BAC. */

int hgOffset(struct hgNest *source, int offset, struct hgNest *dest);
/* Translate offset from source to destination coordinate space.
 * Destination has to be an ancestor (or the same) as source. */

/* The following is a series of nested structures for
 * describing a range of DNA. The later structures include
 * the first fields of the earlier ones.  Routines that
 * work on the earlier structures will also work on
 * the later.  This is a crude but effective form of single 
 * inheritance. */

struct hgRange
/* Just start/end locations somewhere... */
    {
    struct hgRange *next; /* Next in list. */
    int tStart, tEnd;	  /* Position in target or only sequence tStart <= x < tEnd */
    };

int hgCmpTStart(const void *va, const void *vb);
/* Compare function to sort by tStart, then by tEnd. */

struct hgHit
/* A simple hit - an interesting range of a sequence. */
    {
    struct hgHit *next;   /* Next in list. */
    int tStart, tEnd;	  /* Position in target or only sequence tStart <= x < tEnd */
    int tOrientation;	  /* +1 or -1 orientation. */
    char *target;	  /* Name of target seq. (Not allocated here.) */
    };

int hgCmpTarget(const void *va, const void *vb);
/* Compare function to sort by target, orientaation, tStart, then tEnd. */

struct hgScoredHit
/* A hit with a log odds score. */
    {
    struct hgScoredHit *next;
    int tStart, tEnd;	  /* Position in target or only sequence tStart <= x < tEnd */
    int tOrientation;	  /* +1 or -1 orientation. */
    char *target;	  /* Name of target seq. (Not allocated here.) */
    int logOdds;          /* Log odds style score - scaled x 1000. */
    };

int hgCmpScore(const void *va, const void *vb);
/* Compare function to sort logOdds score. */

struct hgAliHit
/* A hit representing an alignment between two sequences without inserts. */
    {
    struct hgAliHit *next;
    int tStart, tEnd;	  /* Position in target or only sequence tStart <= x < tEnd */
    int tOrientation;	  /* +1 or -1 orientation. */
    char *target;	  /* Name of target seq. (Not allocated here.) */
    int logOdds;          /* Log odds style score - scaled x 1000. */
    int qStart, qEnd;     /* Position in query sequence. */
    int qOrientation;	  /* +1 or -1 query orientation. */
    char *query;	  /* Name of query seq. (Not allocated here.) */
    };

int hgCmpQStart(const void *va, const void *vb);
/* Compare function to sort by qStart, then by qEnd. */

int hgCmpQuery(const void *va, const void *vb);
/* Compare function to sort by query, orientation, qStart, then qEnd. */

struct hgBoundedHit
/* An alignment hit that can have soft or hard edges. */
    {
    struct hgBoundedHit *next;
    int tStart, tEnd;	  /* Position in target or only sequence tStart <= x < tEnd */
    int tOrientation;	  /* +1 or -1 orientation. */
    char *target;	  /* Name of target seq. (Not allocated here.) */
    int logOdds;          /* Log odds style score - scaled x 1000. */
    int qStart, qEnd;     /* Position in query sequence. */
    int qOrientation;	  /* +1 or -1 query orientation. */
    char *query;	  /* Name of query seq. (Not allocated here.) */
    bool hardStart;       /* Start position known */
    bool hardEnd;         /* End position known */
    };

struct hgAlignment
/* An alignment with gaps. */
    {
    struct hgAlignment *next;
    int tStart, tEnd;	  /* Position in target or only sequence tStart <= x < tEnd */
    int tOrientation;	  /* +1 or -1 orientation. */
    char *target;	  /* Name of target seq. (Not allocated here.) */
    int logOdds;          /* Log odds style score - scaled x 1000. */
    int qStart, qEnd;     /* Position in query sequence. */
    int qOrientation;     /* +1 or -1 orientation. */
    bool hardStart;       /* Start position known */
    bool hardEnd;         /* End position known */
    struct hgBoundedHit *hitList;  /*  Subalignments. */
    };

#endif /* HGAP_H */

