/* WormDNA - stuff that finds C. elegans sequence data. 
 *
 * This file is copyright 2000 Jim Kent, but license is hereby
 * granted for all use - public, private or commercial. */

#ifndef WORMDNA_H
#define WORMDNA_H

#ifndef DNAUTIL_H
#include "dnautil.h"
#endif

#ifndef DNASEQ_H
#include "dnaseq.h"
#endif 

#ifndef NT4_H
#include "nt4.h"
#endif 

#ifndef GDF_H
#include "gdf.h"
#endif


struct wormCdnaInfo 
/* Extra info stored in cDNA database other than string. */
    {
    char *motherString;         /* Holds memory for other strings. */
    char *name;                 /* Name of cDNA. */
    char *gene;                 /* Something like unc-1 */
    char orientation;           /* + or - (relative to gene transcription direction) */
    char *product;              /* Something like "cyclin-dependent kinase" */
    int cdsStart, cdsEnd;       /* Start and stop of coding region within cDNA. */
    boolean knowStart, knowEnd; /* Start known?  End known? */
    boolean isEmbryonic;        /* True if derived from embryo culture. */
    boolean isAdult;            /* True if derived from adult culture. */
    boolean isMale;             /* True if males only. */
    boolean isHermaphrodite;    /* True if hermaphrodites only. */
    char *description;          /* One line description. */
    };

boolean wormCdnaInfo(char *name, struct wormCdnaInfo *retInfo);
/* Get info about cDNA sequence. */

void wormFaCommentIntoInfo(char *faComment, struct wormCdnaInfo *retInfo);
/* Process line from .fa file containing information about cDNA into binary
 * structure. */

void wormFreeCdnaInfo(struct wormCdnaInfo *ci);
/* Free the mother string in the cdnaInfo.  (The info structure itself normally isn't
 * dynamically allocated. */

boolean wormInfoForGene(char *orfName, struct wormCdnaInfo *retInfo);
/* Return info if any on gene/ORF, or NULL if none exists. wormFreeCdnaInfo retInfo when done.
 */


boolean wormCdnaSeq(char *name, struct dnaSeq **retDna, struct wormCdnaInfo *retInfo);
/* Get a single worm cDNA sequence. Optionally (if retInfo is non-null) get additional
 * info about the sequence. */

void wormCdnaUncache();
/* Get rid of any resources used caching or quickly accessing
 * worm cDNA */


/* Stuff for searching entire database of worm cDNA */

struct wormCdnaIterator
    {
    FILE *faFile;
    };

boolean wormSearchAllCdna(struct wormCdnaIterator **retSi);
/* Set up to search entire database or worm cDNA */

void freeWormCdnaIterator(struct wormCdnaIterator **pIt);
/* Free up iterator returned by wormSearchAllCdna() */

struct dnaSeq *nextWormCdna(struct wormCdnaIterator *it);
/* Return next sequence in database */

boolean nextWormCdnaAndInfo(struct wormCdnaIterator *it, struct dnaSeq **retSeq, struct wormCdnaInfo *retInfo);
/* Return next sequence and associated info from database. */

char *wormFeaturesDir();
/* Return the features directory. (Includes trailing slash.) */

char *wormChromDir();
/* Return the directory with the chromosomes. */

char *wormCdnaDir();
/* Return directory with cDNA data. */

char *wormSangerDir();
/* Return directory with Sanger specific gene predictions. */

char *wormGenieDir();
/* Return directory with Genie specific gene predictions. */

char *wormXenoDir();
/* Return directory with cross-species alignments. */

boolean wormIsGeneName(char *name);
/* See if it looks like a worm gene name - that is
 *   abc-12
 * letters followed by a dash followed by a number. */

boolean wormIsOrfName(char *in);
/* Check to see if the input is formatted correctly to be
 * an ORF. */

struct slName *wormGeneToOrfNames(char *name);
/* Returns list of cosmid.N type ORF names that are known by abc-12 type name. */

char *wormGeneFirstOrfName(char *geneName);
/* Return first ORF synonym to gene. */

boolean wormGeneForOrf(char *orfName, char *geneNameBuf, int bufSize);
/* Look for gene type (unc-12 or something) synonym for cosmid.N name. */

boolean getWormGeneExonDna(char *name, DNA **retDna);
/* get the exon sequence for a gene */

boolean getWormGeneDna(char *name, DNA **retDna, boolean upcExons);
/* Get the DNA associated with a gene.  Optionally upper case exons. */

void wormLoadNt4Genome(struct nt4Seq ***retNt4Seq, int *retNt4Count);
/* Load up entire packed worm genome into memory. */

void wormFreeNt4Genome(struct nt4Seq ***pNt4Seq);
/* Free up packed worm genome. */

int wormChromSize(char *chrom);
/* Return size of worm chromosome. */

DNA *wormChromPart(char *chromId, int start, int size);
/* Return part of a worm chromosome. */

DNA *wormChromPartExonsUpper(char *chromId, int start, int size);
/* Return part of a worm chromosome with exons in upper case. */

void wormChromNames(char ***retNames, int *retNameCount);
/* Get list of worm chromosome names. */

int wormChromIx(char *name);
/* Return index of worm chromosome. */

char *wormChromForIx(int ix);
/* Given ix, return worm chromosome official name. */

char *wormOfficialChromName(char *name);
/* This returns a pointer to our official string for the chromosome name.
 * (This allows some routines to do directo pointer comparisons rather
 * than string comparisons.) */

boolean wormGeneRange(char *name, char **retChromId, char *retStrand, int *retStart, int *retEnd);
/* Return chromosome position of a gene, ORF,  nameless cluster, or cosmid. Returns
 * FALSE if no such gene/cluster. */

boolean wormParseChromRange(char *in, char **retChromId, int *retStart, int *retEnd);
/* Chop up a string representation of a range within a chromosome and put the
 * pieces into the return variables. Return FALSE if it isn't formatted right. */

boolean wormIsChromRange(char *in);
/* Check to see if the input is formatted correctly to be
 * a range of a chromosome. */

void wormClipRangeToChrom(char *chrom, int *pStart, int *pEnd);
/* Make sure that we stay inside chromosome. */

boolean wormIsNamelessCluster(char *name);
/* Returns true if name is of correct format to be a nameless cluster. */

DNA *wormGetNamelessClusterDna(char *name);
/* Get DNA associated with nameless cluster */

struct wormFeature
/* This holds info on where something is in the genome. */
    {
    struct wormFeature *next;
    char *chrom;    /* One of names returned by */
    int start, end;
    char typeByte;
    char name[1];   /* Allocated to fit. */
    };

struct wormFeature *newWormFeature(char *name, char *chrom, int start, int end, char typeByte);
/* Allocate a new feature. */

struct wormFeature *wormCdnasInRange(char *chromId, int start, int end);
/* Get info on all cDNAs that overlap the range. */

struct wormFeature *wormGenesInRange(char *chromId, int start, int end);
/* Get info on all genes that overlap the range. */

struct wormFeature *wormSomeGenesInRange(char *chromId, int start, int end, char *gdfDir);
/* Get info on genes that overlap range in a particular set of gene predictions. */

struct wormFeature *wormCosmidsInRange(char *chromId, int start, int end);
/* Get info on all cosmids that overlap the range. */

struct cdaAli *wormCdaAlisInRange(char *chromId, int start, int end);
/* Return list of cdna alignments that overlap range. */

FILE *wormOpenGoodAli();
/* Opens good alignment file and reads signature. 
 * (You can then cdaLoadOne() it.) */

struct wormGdfCache
/* Helps managed fast indexed access to gene predictions. */
    {
    char **pDir;
    struct snof *snof;
    FILE *file;
    };
extern struct wormGdfCache wormSangerGdfCache;
extern struct wormGdfCache wormGenieGdfCache;

struct gdfGene *wormGetGdfGene(char *name);
/* Get the named gdfGene. */

struct gdfGene *wormGetSomeGdfGene(char *name, struct wormGdfCache *cache);
/* Get a single gdfGene of given name. */

struct gdfGene *wormGetGdfGeneList(char *baseName, int baseNameSize);
/* Get all gdfGenes that start with a given name. */

struct gdfGene *wormGetSomeGdfGeneList(char *baseName, int baseNameSize, struct wormGdfCache *cache);
/* Get all gdfGenes that start with a given name. */

struct gdfGene *wormGdfGenesInRange(char *chrom, int start, int end, 
    struct wormGdfCache *geneFinder);
/* Get list of genes in range according to given gene finder. */

void wormUncacheGdf();
/* Free up resources associated with fast GDF access. */

void wormUncacheSomeGdf(struct wormGdfCache *cache);
/* Uncache some gene prediction set. */

#endif /* WORMDNA_H */

