/* edwLib - routines shared by various encodeDataWarehouse programs.    See also encodeDataWarehouse
 * module for tables and routines to access structs built on tables. */

/* Copyright (C) 2014 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */

#include "common.h"
#include "hex.h"
#include "dystring.h"
#include "jksql.h"
#include "errAbort.h"
#include "openssl/sha.h"
#include "base64.h"
#include "basicBed.h"
#include "bigBed.h"
#include "portable.h"
#include "cheapcgi.h"
#include "genomeRangeTree.h"
#include "md5.h"
#include "htmshell.h"
#include "obscure.h"
#include "bamFile.h"
#include "raToStruct.h"
#include "web.h"
#include "encode3/encode3Valid.h"
#include "encodeDataWarehouse.h"
#include "edwLib.h"
#include "edwFastqFileFromRa.h"
#include "edwBamFileFromRa.h"
#include "edwQaWigSpotFromRa.h"


/* System globals - just a few ... for now.  Please seriously not too many more. */
char *edwDatabase = "encodeDataWarehouse";
int edwSingleFileTimeout = 4*60*60;   // How many seconds we give ourselves to fetch a single file

char *edwRootDir = "/data/encode3/encodeDataWarehouse/";
char *eapRootDir = "/data/encode3/encodeAnalysisPipeline/";
char *edwValDataDir = "/data/encode3/encValData/";
char *edwDaemonEmail = "edw@encodedcc.sdsc.edu";

struct sqlConnection *edwConnect()
/* Returns a read only connection to database. */
{
return sqlConnect(edwDatabase);
}

struct sqlConnection *edwConnectReadWrite()
/* Returns read/write connection to database. */
{
return sqlConnectProfile("encodeDataWarehouse", edwDatabase);
}

char *edwPathForFileId(struct sqlConnection *conn, long long fileId)
/* Return full path (which eventually should be freeMem'd) for fileId */
{
char query[256];
char fileName[PATH_LEN];
sqlSafef(query, sizeof(query), "select edwFileName from edwFile where id=%lld", fileId);
sqlNeedQuickQuery(conn, query, fileName, sizeof(fileName));
char path[512];
safef(path, sizeof(path), "%s%s", edwRootDir, fileName);
return cloneString(path);
}

char *edwTempDir()
/* Returns pointer to edwTempDir.  This is shared, so please don't modify. */
{
static char path[PATH_LEN];
if (path[0] == 0)
    {
    /* Note code elsewhere depends on tmp dir being inside of edwRootDir - also good
     * to have it there so move to a permanent file is quick and unlikely to fail. */
    safef(path, sizeof(path), "%s%s", edwRootDir, "tmp");
    makeDirsOnPath(path);
    strcat(path, "/");
    }
return path;
}

char *edwTempDirForToday(char dir[PATH_LEN])
/* Fills in dir with temp dir of the day, and returns a pointer to it. */
{
char dayDir[PATH_LEN];
edwDirForTime(edwNow(), dayDir);
safef(dir, PATH_LEN, "%s%stmp/", edwRootDir, dayDir);

/* Bracket time consuming call to makeDirsOnPath with check that we didn't just do same
 * thing. */
static char lastDayDir[PATH_LEN] = "";
if (!sameString(dayDir, lastDayDir))
    {
    strcpy(lastDayDir, dayDir);
    int len = strlen(dir);
    dir[len-1] = 0;
    makeDirsOnPath(dir);
    dir[len-1] = '/';
    }
return dir;
}


long long edwGettingFile(struct sqlConnection *conn, char *submitDir, char *submitFileName)
/* See if we are in process of getting file.  Return file record id if it exists even if
 * it's not complete. Return -1 if record does not exist. */
{
/* First see if we have even got the directory. */
char query[PATH_LEN+512];
sqlSafef(query, sizeof(query), "select id from edwSubmitDir where url='%s'", submitDir);
int submitDirId = sqlQuickNum(conn, query);
if (submitDirId <= 0)
    return -1;

/* Then see if we have file that matches submitDir and submitFileName. */
sqlSafef(query, sizeof(query), 
    "select id from edwFile "
    "where submitFileName='%s' and submitDirId = %d and errorMessage = '' and deprecated=''"
    " and (endUploadTime > startUploadTime or startUploadTime < %lld) "
    "order by submitId desc limit 1"
    , submitFileName, submitDirId
    , (long long)edwNow() - edwSingleFileTimeout);
long long id = sqlQuickLongLong(conn, query);
if (id == 0)
    return -1;
return id;
}

long long edwGotFile(struct sqlConnection *conn, char *submitDir, char *submitFileName, 
    char *md5, long long size)
/* See if we already got file.  Return fileId if we do,  otherwise -1.  This returns
 * TRUE based mostly on the MD5sum.  For short files (less than 100k) then we also require
 * the submitDir and submitFileName to match.  This is to cover the case where you might
 * have legitimate empty files duplicated even though they were computed based on different
 * things. For instance coming up with no peaks is a legitimate result for many chip-seq
 * experiments. */
{
/* For large files just rely on MD5. */
char query[PATH_LEN+512];
if (size > 100000)
    {
    sqlSafef(query, sizeof(query),
        "select id from edwFile where md5='%s' order by submitId desc limit 1" , md5);
    long long result = sqlQuickLongLong(conn, query);
    if (result == 0)
        result = -1;
    return result;
    }

/* Rest of the routine deals with smaller files,  which we are less worried about
 * duplicating,  and indeed expect a little duplication of the empty file if none
 * other. */

/* First see if we have even got the directory. */
sqlSafef(query, sizeof(query), "select id from edwSubmitDir where url='%s'", submitDir);
int submitDirId = sqlQuickNum(conn, query);
if (submitDirId <= 0)
    return -1;

/* The complex truth is that we may have gotten this file multiple times. 
 * We return the most recent version where it got uploaded and passed the post-upload
 * MD5 sum, and thus where the MD5 field is filled in the database. */
sqlSafef(query, sizeof(query), 
    "select md5,id from edwFile "
    "where submitFileName='%s' and submitDirId = %d and md5 != '' "
    "order by submitId desc limit 1"
    , submitFileName, submitDirId);
struct sqlResult *sr = sqlGetResult(conn, query);
char **row;
long fileId = -1;
if ((row = sqlNextRow(sr)) != NULL)
    {
    char *dbMd5 = row[0];
    if (sameWord(md5, dbMd5))
	fileId = sqlLongLong(row[1]);
    }
sqlFreeResult(&sr);

return fileId;
}

long long edwNow()
/* Return current time in seconds since Epoch. */
{
return time(NULL);
}

/* This is size of base64 encoded hash plus 1 for the terminating zero. */
#define EDW_SID_SIZE 65   

static void makeShaBase64(unsigned char *inputBuf, int inputSize, char out[EDW_SID_SIZE])
/* Make zero terminated printable cryptographic hash out of in */
{
unsigned char shaBuf[48];
SHA384(inputBuf, inputSize, shaBuf);
char *base64 = base64Encode((char*)shaBuf, sizeof(shaBuf));
memcpy(out, base64, EDW_SID_SIZE);
out[EDW_SID_SIZE-1] = 0; 
freeMem(base64);
}

void edwMakeSid(char *user, char sid[EDW_SID_SIZE])
/* Convert users to sid */
{
/* Salt it well with stuff that is reproducible but hard to guess */
unsigned char inputBuf[512];
memset(inputBuf, 0, sizeof(inputBuf));
int i;
for (i=0; i<ArraySize(inputBuf); i += 2)
    {
    inputBuf[i] = i ^ 0x29;
    inputBuf[i+1] = ~i;
    }
safef((char*)inputBuf, sizeof(inputBuf), 
	"186ED79BAEXzeusdioIsdklnw88e86cd73%s<*#$*(#)!DSDFOUIHLjksdf", user);
makeShaBase64(inputBuf, sizeof(inputBuf), sid);
}

static void edwVerifySid(char *user, char *sidToCheck)
/* Make sure sid/user combo is good. */
{
char sid[EDW_SID_SIZE];
edwMakeSid(user, sid);
if (sidToCheck == NULL || memcmp(sidToCheck, sid, EDW_SID_SIZE) != 0)
    errAbort("Authentication failed, sid %s", (sidToCheck ? "fail" : "miss"));
}

char *edwGetEmailAndVerify()
/* Get email from persona-managed cookies and validate them.
 * Return email address if all is good and user is logged in.
 * If user not logged in return NULL.  If user logged in but
 * otherwise things are wrong abort. */
{
char *email = findCookieData("email");
if (email)
    {
    char *sid = findCookieData("sid");
    edwVerifySid(email, sid);
    }
return email;
}


struct edwUser *edwUserFromEmail(struct sqlConnection *conn, char *email)
/* Return user associated with that email or NULL if not found */
{
char query[256];
sqlSafef(query, sizeof(query), "select * from edwUser where email='%s'", email);
struct edwUser *user = edwUserLoadByQuery(conn, query);
return user;
}

struct edwUser *edwUserFromId(struct sqlConnection *conn, int id)
/* Return user associated with that id or NULL if not found */
{
char query[256];
sqlSafef(query, sizeof(query), "select * from edwUser where id='%d'", id);
struct edwUser *user = edwUserLoadByQuery(conn, query);
return user;
}

int edwUserIdFromFileId(struct sqlConnection *conn, int fId)
/* Return user id who submit the file originally */
{
char query[256];
sqlSafef(query, sizeof(query), "select s.userId from edwSubmit s, edwFile f where f.submitId=s.id and f.id='%d'", fId);
int sId = sqlQuickNum(conn, query);
sqlSafef(query, sizeof(query), "select u.id from edwSubmit s, edwUser u where  u.id=s.id and s.id='%d'", sId);
return sqlQuickNum(conn, query);
}

struct edwUser *edwFindUserFromFileId(struct sqlConnection *conn, int fId)
/* Return user who submit the file originally */
{
int uId = edwUserIdFromFileId(conn, fId);
struct edwUser *user=edwUserFromId(conn, uId);
return user; 
}

char *edwFindOwnerNameFromFileId(struct sqlConnection *conn, int fId)
/* Return name of submitter. Return "an unknown user" if name is NULL */
{
struct edwUser *owner = edwFindUserFromFileId(conn, fId);
if (owner == NULL)
    return ("an unknown user");
return cloneString(owner->email);
}

int edwFindUserIdFromEmail(struct sqlConnection *conn, char *userEmail)
/* Return true id of this user */
{
char query[256];
sqlSafef(query, sizeof(query), "select id from edwUser where email = '%s'", userEmail);
return sqlQuickNum(conn, query);
}

boolean edwUserIsAdmin(struct sqlConnection *conn, char *userEmail)
/* Return true if the user is an admin */
{
char query[256];
sqlSafef(query, sizeof(query), "select isAdmin from edwUser where email = '%s'", userEmail);
int isAdmin = sqlQuickNum(conn, query);
if (isAdmin == 1) return TRUE;
return FALSE;
}

void edwWarnUnregisteredUser(char *email)
/* Put up warning message about unregistered user and tell them how to register. */
{
warn("No user exists with email %s. If you need an account please contact your "
	 "ENCODE DCC data wrangler and have them create an account for you."
	 , email);
}


struct edwUser *edwMustGetUserFromEmail(struct sqlConnection *conn, char *email)
/* Return user associated with email or put up error message. */
{
struct edwUser *user = edwUserFromEmail(conn, email);
if (user == NULL)
    {
    edwWarnUnregisteredUser(email);
    noWarnAbort();
    }
return user;
}

int edwGetHost(struct sqlConnection *conn, char *hostName)
/* Look up host name in table and return associated ID.  If not found
 * make up new table entry. */
{
/* If it's already in table, just return ID. */
char query[512];
sqlSafef(query, sizeof(query), "select id from edwHost where name='%s'", hostName);
int hostId = sqlQuickNum(conn, query);
if (hostId > 0)
    return hostId;
sqlSafef(query, sizeof(query), "insert edwHost (name, firstAdded, paraFetchStreams) values('%s', %lld, 10)", 
       hostName, edwNow());
sqlUpdate(conn, query);
return sqlLastAutoId(conn);
}

int edwGetSubmitDir(struct sqlConnection *conn, int hostId, char *submitDir)
/* Get submitDir from database, creating it if it doesn't already exist. */
{
/* If it's already in table, just return ID. */
char query[512];
sqlSafef(query, sizeof(query), "select id from edwSubmitDir where url='%s'", submitDir);
int dirId = sqlQuickNum(conn, query);
if (dirId > 0)
    return dirId;

sqlSafef(query, sizeof(query), 
   "insert edwSubmitDir (url, firstAdded, hostId) values('%s', %lld, %d)", 
   submitDir, edwNow(), hostId);
sqlUpdate(conn, query);
return sqlLastAutoId(conn);
}

void edwMakeLicensePlate(char *prefix, int ix, char *out, int outSize)
/* Make a license-plate type string composed of prefix + funky coding of ix
 * and put result in out. */
{
int maxIx = 10*10*10*26*26*26;
if (ix < 0)
    errAbort("ix must be positive in edwMakeLicensePlate");
if (ix > maxIx)
    errAbort("ix exceeds max in edwMakeLicensePlate.  ix %d, max %d\n", ix, maxIx);
int prefixSize = strlen(prefix);
int minSize = prefixSize + 6 + 1;
if (outSize < minSize)
    errAbort("outSize (%d) not big enough in edwMakeLicensePlate", outSize);

/* Copy in prefix. */
strcpy(out, prefix);

/* Generate the 123ABC part of license plate backwards. */
char *s = out+minSize;
int x = ix - 1;	// -1 so start with AAA not AAB
*(--s) = 0;	// zero tag at end;
int i;
for (i=0; i<3; ++i)
    {
    int remainder = x%26;
    *(--s) = 'A' + remainder;
    x /= 26;
    }
for (i=0; i<3; ++i)
    {
    int remainder = x%10;
    *(--s) = '0' + remainder;
    x /= 10;
    }
}

void edwDirForTime(time_t sinceEpoch, char dir[PATH_LEN])
/* Return the output directory for a given time. */
{
/* Get current time parsed into struct tm */
struct tm now;
gmtime_r(&sinceEpoch, &now);

/* make directory string out of year/month/day/ */
safef(dir, PATH_LEN, "%d/%d/%d/", now.tm_year+1900, now.tm_mon+1, now.tm_mday);
}

char *lastMatchCharExcept(char *start, char *end, char match, char except)
/* Return last char between start up to but not including end that is match.
 * However if except occurs between end and this match, return NULL instead.
 * Also return NULL if there is no match */
{
char *e = end;
while (--e >= start)
    {
    char c = *e;
    if (c == except)
       return NULL;
    if (c == match)
       return e;
    }
return NULL;
}

void edwMakeBabyName(unsigned long id, char *baseName, int baseNameSize)
/* Given a numerical ID, make an easy to pronouce file name */
{
char *consonants = "bdfghjklmnprstvwxyz";   // Avoid c and q because make sound ambiguous
char *vowels = "aeiou";
int consonantCount = strlen(consonants);
int vowelCount = strlen(vowels);
assert(id >= 1);
unsigned long ix = id - 1;   /* We start at zero not 1 */
int basePos = 0;
do
    {
    char v = vowels[ix%vowelCount];
    ix /= vowelCount;
    char c = consonants[ix%consonantCount];
    ix /= consonantCount;
    if (basePos + 2 >= baseNameSize)
        errAbort("Not enough room for %lu in %d letters in edwMakeBabyName", id, baseNameSize);
    baseName[basePos] = c;
    baseName[basePos+1] = v;
    basePos += 2;
    }
while (ix > 0);
baseName[basePos] = 0;
}

char *edwFindDoubleFileSuffix(char *path)
/* Return pointer to second from last '.' in part of path between last / and end.  
 * If there aren't two dots, just return pointer to normal single dot suffix. */
{
int nameSize = strlen(path);
char *suffix = lastMatchCharExcept(path, path + nameSize, '.', '/');
if (suffix != NULL)
    {
    if (sameString(suffix, ".gz") || sameString(suffix, ".bigBed"))
	{
	char *secondSuffix = lastMatchCharExcept(path, suffix, '.', '/');
	if (secondSuffix != NULL)
	    suffix = secondSuffix;
	}
    }
else
    suffix = path + nameSize;
return suffix;
}

void edwMakeFileNameAndPath(int edwFileId, char *submitFileName, char edwFile[PATH_LEN], char serverPath[PATH_LEN])
/* Convert file id to local file name, and full file path. Make any directories needed
 * along serverPath. */
{
/* Preserve suffix.  Give ourselves up to two suffixes. */
char *suffix = edwFindDoubleFileSuffix(submitFileName);

/* Figure out edw file name, starting with baseName. */
char baseName[32];
edwMakeBabyName(edwFileId, baseName, sizeof(baseName));

/* Figure out directory and make any components not already there. */
char edwDir[PATH_LEN];
edwDirForTime(edwNow(), edwDir);
char uploadDir[PATH_LEN];
safef(uploadDir, sizeof(uploadDir), "%s%s", edwRootDir, edwDir);
makeDirsOnPath(uploadDir);

/* Figure out full file names */
safef(edwFile, PATH_LEN, "%s%s%s", edwDir, baseName, suffix);
safef(serverPath, PATH_LEN, "%s%s", edwRootDir, edwFile);
}

char *edwSetting(struct sqlConnection *conn, char *name)
/* Return named settings value,  or NULL if setting doesn't exist. FreeMem when done. */
{
char query[256];
sqlSafef(query, sizeof(query), "select val from edwSettings where name='%s'", name);
return sqlQuickString(conn, query);
}

char *edwRequiredSetting(struct sqlConnection *conn, char *name)
/* Returns setting, abort if it isn't found. FreeMem when done. */
{
char *val = edwSetting(conn, name);
if (val == NULL)
    errAbort("Required %s setting is not defined in edwSettings table", name);
return val;
}

char *edwLicensePlateHead(struct sqlConnection *conn)
/* Return license plate prefix for current database - something like TSTFF or DEVFF or ENCFF */
{
static char head[32];
if (head[0] == 0)
     {
     char *prefix = edwRequiredSetting(conn, "prefix");
     safef(head, sizeof(head), "%s", prefix);
     }
return head;
}


static char *localHostName = "localhost";
static char *localHostDir = "";  

static int getLocalHost(struct sqlConnection *conn)
/* Make up record for local host if it is not there already. */
{
char query[256];
sqlSafef(query, sizeof(query), "select id from edwHost where name = '%s'", localHostName);
int hostId = sqlQuickNum(conn, query);
if (hostId == 0)
    {
    sqlSafef(query, sizeof(query), "insert edwHost(name, firstAdded) values('%s', %lld)",
	localHostName,  edwNow());
    sqlUpdate(conn, query);
    hostId = sqlLastAutoId(conn);
    }
return hostId;
}

static int getLocalSubmitDir(struct sqlConnection *conn)
/* Get submit dir for local submissions, making it up if it does not exist. */
{
int hostId = getLocalHost(conn);
char query[256];
sqlSafef(query, sizeof(query), "select id from edwSubmitDir where url='%s' and hostId=%d", 
    localHostDir, hostId);
int dirId = sqlQuickNum(conn, query);
if (dirId == 0)
    {
    sqlSafef(query, sizeof(query), "insert edwSubmitDir(url,hostId,firstAdded) values('%s',%d,%lld)",
	localHostDir, hostId, edwNow());
    sqlUpdate(conn, query);
    dirId = sqlLastAutoId(conn);
    }
return dirId;
}

static int getLocalSubmit(struct sqlConnection *conn)
/* Get the submission that covers all of our local additions. */
{
int dirId = getLocalSubmitDir(conn);
char query[256];
sqlSafef(query, sizeof(query), "select id from edwSubmit where submitDirId='%d'", dirId);
int submitId = sqlQuickNum(conn, query);
if (submitId == 0)
    {
    sqlSafef(query, sizeof(query), "insert edwSubmit (submitDirId,startUploadTime) values(%d,%lld)",
	dirId, edwNow());
    sqlUpdate(conn, query);
    submitId = sqlLastAutoId(conn);
    }
return submitId;
}

char **sqlNeedNextRow(struct sqlResult *sr)
/* Get next row or die trying.  Since the error reporting is not good, please only
 * use when an error would be unusual. */
{
char **row = sqlNextRow(sr);
if (row == NULL) 
    errAbort("Unexpected empty result from database.");
return row;
}

void edwUpdateFileTags(struct sqlConnection *conn, long long fileId, struct dyString *tags)
/* Update tags field in edwFile with given value */
{
struct dyString *query = dyStringNew(0);
sqlDyStringPrintf(query, "update edwFile set tags='%s'", tags->string);
sqlDyStringPrintf(query, " where id=%lld", fileId);
sqlUpdate(conn, query->string);
dyStringFree(&query);
}

struct edwFile *edwGetLocalFile(struct sqlConnection *conn, char *localAbsolutePath, 
    char *symLinkMd5Sum)
/* Get record of local file from database, adding it if it doesn't already exist.
 * Can make it a symLink rather than a copy in which case pass in valid MD5 sum
 * for symLinkM5dSum. */
{
/* First do a reality check on the local absolute path.  Is there a file there? */
if (localAbsolutePath[0] != '/')
    errAbort("Using relative path in edwAddLocalFile.");
long long size = fileSize(localAbsolutePath);
if (size == -1)
    errAbort("%s does not exist", localAbsolutePath);
long long updateTime = fileModTime(localAbsolutePath);

/* Get file if it's in database already. */
int submitDirId = getLocalSubmitDir(conn);
int submitId = getLocalSubmit(conn);
char query[256+PATH_LEN];
sqlSafef(query, sizeof(query), "select * from edwFile where submitId=%d and submitFileName='%s'",
    submitId, localAbsolutePath);
struct edwFile *ef = edwFileLoadByQuery(conn, query);

/* If we got something in database, check update time and size, and if it's no change just 
 * return existing database id. */
if (ef != NULL && ef->updateTime == updateTime && ef->size == size)
    return ef;

/* If we got here, then we need to make a new file record. Start with pretty empty record
 * that just has file ID, submitted file name and a few things*/
sqlSafef(query, sizeof(query), 
    "insert edwFile (submitId,submitDirId,submitFileName,startUploadTime) "
            " values(%d, %d, '%s', %lld)"
	    , submitId, submitDirId, localAbsolutePath, edwNow());
sqlUpdate(conn, query);
long long fileId = sqlLastAutoId(conn);

/* Create big data warehouse file/path name. */
char edwFile[PATH_LEN], edwPath[PATH_LEN];
edwMakeFileNameAndPath(fileId, localAbsolutePath, edwFile, edwPath);

/* We're a little paranoid so md5 it */
char *md5;

/* Do copy or symbolic linking of file into warehouse managed dir. */
if (symLinkMd5Sum)
    {
    md5 = symLinkMd5Sum;
    makeSymLink(localAbsolutePath, edwPath);  
    }
else
    {
    copyFile(localAbsolutePath, edwPath);
    md5 = md5HexForFile(localAbsolutePath);
    }

/* Update file record. */
sqlSafef(query, sizeof(query), 
    "update edwFile set edwFileName='%s', endUploadTime=%lld,"
                       "updateTime=%lld, size=%lld, md5='%s' where id=%lld"
			, edwFile, edwNow(), updateTime, size, md5, fileId);
sqlUpdate(conn, query);

/* Now, it's a bit of a time waste, but cheap in code, to just load it back from DB. */
sqlSafef(query, sizeof(query), "select * from edwFile where id=%lld", fileId);
return edwFileLoadByQuery(conn, query);
}

struct edwFile *edwFileAllIntactBetween(struct sqlConnection *conn, int startId, int endId)
/* Return list of all files that are intact (finished uploading and MD5 checked) 
 * with file IDs between startId and endId - including endId */
{
char query[256];
sqlSafef(query, sizeof(query), 
    "select * from edwFile where id>=%d and id<=%d and endUploadTime != 0 "
    "and updateTime != 0 and errorMessage = '' and deprecated = ''", 
    startId, endId);
return edwFileLoadByQuery(conn, query);
}

struct edwFile *edwFileFromId(struct sqlConnection *conn, long long fileId)
/* Return edwValidFile given fileId - return NULL if not found. */
{
char query[128];
sqlSafef(query, sizeof(query), "select * from edwFile where id=%lld", fileId);
return edwFileLoadByQuery(conn, query);
}

struct edwFile *edwFileFromIdOrDie(struct sqlConnection *conn, long long fileId)
/* Return edwValidFile given fileId - aborts if not found. */
{
struct edwFile *ef = edwFileFromId(conn, fileId);
if (ef == NULL)
    errAbort("Couldn't find file for id %lld\n", fileId);
return ef;
}

struct edwValidFile *edwValidFileFromFileId(struct sqlConnection *conn, long long fileId)
/* Return edwValidFile give fileId - returns NULL if not validated. */
{
char query[128];
sqlSafef(query, sizeof(query), "select * from edwValidFile where fileId=%lld", fileId);
return edwValidFileLoadByQuery(conn, query);
}

struct edwExperiment *edwExperimentFromAccession(struct sqlConnection *conn, char *acc)
/* Given something like 'ENCSR123ABC' return associated experiment. */
{
char query[128];
sqlSafef(query, sizeof(query), "select * from edwExperiment where accession='%s'", acc);
return edwExperimentLoadByQuery(conn, query);
}

struct genomeRangeTree *edwMakeGrtFromBed3List(struct bed3 *bedList)
/* Make up a genomeRangeTree around bed file. */
{
struct genomeRangeTree *grt = genomeRangeTreeNew();
struct bed3 *bed;
for (bed = bedList; bed != NULL; bed = bed->next)
    genomeRangeTreeAdd(grt, bed->chrom, bed->chromStart, bed->chromEnd);
return grt;
}

struct edwAssembly *edwAssemblyForUcscDb(struct sqlConnection *conn, char *ucscDb)
/* Get assembly for given UCSC ID or die trying */
{
char query[256];
sqlSafef(query, sizeof(query), "select * from edwAssembly where ucscDb='%s'", ucscDb);
struct edwAssembly *assembly = edwAssemblyLoadByQuery(conn, query);
if (assembly == NULL)
    errAbort("Can't find assembly for %s", ucscDb);
return assembly;
}

struct edwAssembly *edwAssemblyForId(struct sqlConnection *conn, long long id)
/* Get assembly of given ID. */
{
char query[128];
sqlSafef(query, sizeof(query), "select * from edwAssembly where id=%lld", id);
struct edwAssembly *assembly = edwAssemblyLoadByQuery(conn, query);
if (assembly == NULL)
    errAbort("Can't find assembly for %lld", id);
return assembly;
}

char *edwSimpleAssemblyName(char *assembly)
/* Given compound name like male.hg19 return just hg19 */
/* Given name of assembly return name where we want to do enrichment calcs. */
{
/* If it ends with one of our common assembly suffix, then do enrichment calcs
 * in that space, rather than some subspace such as male, female, etc. */
static char *specialAsm[] = {".hg19",".hg38",".mm9",".mm10",".dm3",".ce10",".dm6"};
int i;
for (i=0; i<ArraySize(specialAsm); ++i)
    {
    char *special = specialAsm[i];
    if (endsWith(assembly, special))
        return special+1;
    }
return assembly;
}


struct genomeRangeTree *edwGrtFromBigBed(char *fileName)
/* Return genome range tree for simple (unblocked) bed */
{
struct bbiFile *bbi = bigBedFileOpen(fileName);
struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);
struct genomeRangeTree *grt = genomeRangeTreeNew();
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    struct rbTree *tree = genomeRangeTreeFindOrAddRangeTree(grt, chrom->name);
    struct lm *lm = lmInit(0);
    struct bigBedInterval *iv, *ivList = NULL;
    ivList = bigBedIntervalQuery(bbi, chrom->name, 0, chrom->size, 0, lm);
    for (iv = ivList; iv != NULL; iv = iv->next)
        rangeTreeAdd(tree, iv->start, iv->end);
    lmCleanup(&lm);
    }
bigBedFileClose(&bbi);
bbiChromInfoFreeList(&chromList);
return grt;
}

boolean edwIsSupportedBigBedFormat(char *format)
/* Return TRUE if it's one of the bigBed formats we support. */
{
int i;
for (i=0; i<encode3BedTypeCount; ++i)
    {
    if (sameString(format, encode3BedTypeTable[i].name))
        return TRUE;
    }
return FALSE;
}

void edwWriteErrToTable(struct sqlConnection *conn, char *table, int id, char *err)
/* Write out error message to errorMessage field of table. */
{
char *trimmedError = trimSpaces(err);
struct dyString *query = dyStringNew(0);
sqlDyStringPrintf(query, "update %s set errorMessage='%s' where id=%d", 
    table, trimmedError, id);
sqlUpdate(conn, query->string);
dyStringFree(&query);
}

void edwWriteErrToStderrAndTable(struct sqlConnection *conn, char *table, int id, char *err)
/* Write out error message to errorMessage field of table and through stderr. */
{
warn("%s", trimSpaces(err));
edwWriteErrToTable(conn, table, id, err);
}


void edwAddJob(struct sqlConnection *conn, char *command)
/* Add job to queue to run. */
{
char query[256+strlen(command)];
sqlSafef(query, sizeof(query), "insert into edwJob (commandLine) values('%s')", command);
sqlUpdate(conn, query);
}

void edwAddQaJob(struct sqlConnection *conn, long long fileId)
/* Create job to do QA on this and add to queue */
{
char command[64];
safef(command, sizeof(command), "edwQaAgent %lld", fileId);
edwAddJob(conn, command);
}

int edwSubmitPositionInQueue(struct sqlConnection *conn, char *url, unsigned *retJobId)
/* Return position of our URL in submission queue.  Optionally return id in edwSubmitJob
 * table of job. */
{
char query[256];
sqlSafef(query, sizeof(query), "select id,commandLine from edwSubmitJob where startTime = 0");
struct sqlResult *sr = sqlGetResult(conn, query);
char **row;
int aheadOfUs = -1;
int pos = 0;
unsigned jobId = 0;
while ((row = sqlNextRow(sr)) != NULL)
    {
    jobId = sqlUnsigned(row[0]);
    char *line = row[1];
    char *edwSubmit = nextQuotedWord(&line);
    char *lineUrl = nextQuotedWord(&line);
    if (sameOk(edwSubmit, "edwSubmit") && sameOk(url, lineUrl))
        {
	aheadOfUs = pos;
	break;
	}
    ++pos;
    }
sqlFreeResult(&sr);
if (retJobId != NULL)
    *retJobId = jobId;
return aheadOfUs;
}

struct edwSubmit *edwSubmitFromId(struct sqlConnection *conn, long long id)
/* Return submission with given ID or NULL if no such submission. */
{
char query[256];
sqlSafef(query, sizeof(query), "select * from edwSubmit where id=%lld", id);
return edwSubmitLoadByQuery(conn, query);
}


struct edwSubmit *edwMostRecentSubmission(struct sqlConnection *conn, char *url)
/* Return most recent submission, possibly in progress, from this url */
{
int urlSize = strlen(url);
char query[128 + 2*urlSize + 1];
sqlSafef(query, sizeof(query), 
    "select * from edwSubmit where url='%s' order by id desc limit 1", url);
return edwSubmitLoadByQuery(conn, query);
}

long long edwSubmitMaxStartTime(struct edwSubmit *submit, struct sqlConnection *conn)
/* Figure out when we started most recent single file in the upload, or when
 * we started if not files started yet. */
{
char query[256];
sqlSafef(query, sizeof(query), 
    "select max(startUploadTime) from edwFile where submitId=%u", submit->id);
long long maxStartTime = sqlQuickLongLong(conn, query);
if (maxStartTime == 0)
    maxStartTime = submit->startUploadTime;
return maxStartTime;
}

int edwSubmitCountNewValid(struct edwSubmit *submit, struct sqlConnection *conn)
/* Count number of new files in submission that have been validated. */
{
char query[256];
sqlSafef(query, sizeof(query), 
    "select count(*) from edwFile e,edwValidFile v where e.id = v.fileId and e.submitId=%u",
    submit->id);
return sqlQuickNum(conn, query);
}

int edwSubmitCountErrors(struct edwSubmit *submit, struct sqlConnection *conn)
/* Count number of errors with submitted files */
{
char query[256];
sqlSafef(query, sizeof(query), 
    "select count(*) from edwFile where submitId=%u and errorMessage != '' and errorMessage is not null",
    submit->id);
return sqlQuickNum(conn, query);
}

boolean edwSubmitIsValidated(struct edwSubmit *submit, struct sqlConnection *conn)
/* Return TRUE if validation has run.  This does not mean that they all passed validation.
 * It just means the validator has run and has made a decision on each file in the submission. */
{
/* Is this off by one because of the validated.txt being in the submission but never validated? */
return edwSubmitCountErrors(submit,conn) + edwSubmitCountNewValid(submit, conn) == submit->newFiles;
}

void edwAddSubmitJob(struct sqlConnection *conn, char *userEmail, char *url, boolean update)
/* Add submission job to table and wake up daemon. */
{
/* Create command and add it to edwSubmitJob table. */
char command[strlen(url) + strlen(userEmail) + 256];
safef(command, sizeof(command), "edwSubmit %s'%s' %s", (update ? "-update " : ""), url, userEmail);
char query[strlen(command)+128];
sqlSafef(query, sizeof(query), "insert edwSubmitJob (commandLine) values('%s')", command);
sqlUpdate(conn, query);

/* Write sync signal (any string ending with newline) to fifo to wake up daemon. */
FILE *fifo = mustOpen("../userdata/edwSubmit.fifo", "w");
fputc('\n', fifo);
carefulClose(&fifo);
}


struct edwValidFile *edwFindElderReplicates(struct sqlConnection *conn, struct edwValidFile *vf)
/* Find all replicates of same output and format type for experiment that are elder
 * (fileId less than your file Id).  Younger replicates are responsible for taking care 
 * of correlations with older ones.  Sorry younguns, it's like social security. */
{
if (sameString(vf->format, "unknown"))
    return NULL;
char query[256];
sqlSafef(query, sizeof(query), 
    "select * from edwValidFile where id<%d and experiment='%s' and format='%s'"
    " and outputType='%s'"
    , vf->id, vf->experiment, vf->format, vf->outputType);
return edwValidFileLoadByQuery(conn, query);
}

void edwWebHeaderWithPersona(char *title)
/* Print out HTTP and HTML header through <BODY> tag with persona info */
{
printf("Content-Type:text/html\r\n");
printf("\r\n\r\n");
puts("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
	      "\"http://www.w3.org/TR/html4/loose.dtd\">");
printf("<HTML><HEAD><TITLE>%s</TITLE>\n", "ENCODE Data Warehouse");
puts("<meta http-equiv='X-UA-Compatible' content='IE=Edge'>");

// Use Stanford ENCODE3 CSS for common look
puts("<link rel='stylesheet' href='/style/encode3.css' type='text/css'>");
puts("<link rel='stylesheet' href='/style/encode3Ucsc.css' type='text/css'>");
// external link icon (box with arrow) is from FontAwesome (fa-external-link)
puts("<link href='//netdna.bootstrapcdn.com/font-awesome/4.0.3/css/font-awesome.css' rel='stylesheet'>");

puts("<script type='text/javascript' SRC='/js/jquery.js'></script>");
puts("<script type='text/javascript' SRC='/js/jquery.cookie.js'></script>");
puts("<script type='text/javascript' src='https://login.persona.org/include.js'></script>");
puts("<script type='text/javascript' src='/js/edwPersona.js'></script>");
puts("</HEAD>");

/* layout with navigation bar */
puts("<BODY>\n");

edwWebNavBarStart();
}


void edwWebFooterWithPersona()
/* Print out end tags and persona script stuff */
{
edwWebNavBarEnd();
htmlEnd();
}


void edwCreateNewUser(char *email)
/* Create new user, checking that user does not already exist. */
{
/* Now make sure user is not already in user table. */
struct sqlConnection *conn = edwConnectReadWrite();
struct dyString *query = dyStringNew(0);
sqlDyStringPrintf(query, "select count(*) from edwUser where email = '%s'", email);
if (sqlQuickNum(conn, query->string) > 0)
    errAbort("User %s already exists", email);

/* Do database insert. */
dyStringClear(query);
sqlDyStringPrintf(query, "insert into edwUser (email) values('%s')", email);
sqlUpdate(conn, query->string);

sqlDisconnect(&conn);
}

void edwPrintLogOutButton()
/* Print log out button */
{
printf("<INPUT TYPE=button NAME=\"signOut\" VALUE=\"sign out\" id=\"signout\">");
}

struct dyString *edwFormatDuration(long long seconds)
/* Convert seconds to days/hours/minutes. Return result in a dyString you can free */
{
struct dyString *dy = dyStringNew(0);
int days = seconds/(3600*24);
if (days > 0)
    dyStringPrintf(dy, "%d days, ", days);
seconds -= days*3600*24;

int hours = seconds/3600;
if (hours > 0 || days > 0)
    dyStringPrintf(dy, "%d hours", hours);
seconds -= hours*3600;

if (days == 0)
    {
    int minutes = seconds/60;
    if (minutes > 0)
	{
	if (hours > 0)
	   dyStringPrintf(dy, ", ");
	dyStringPrintf(dy, "%d minutes", minutes);
	}

    if (hours == 0)
	{
	if (minutes > 0)
	   dyStringPrintf(dy, ", ");
	seconds -= minutes*60;
	dyStringPrintf(dy, "%d seconds", (int)seconds);
	}
    }
return dy;
}

struct edwFile *edwFileInProgress(struct sqlConnection *conn, int submitId)
/* Return file in submission in process of being uploaded if any. */
{
char query[256];
sqlSafef(query, sizeof(query), "select fileIdInTransit from edwSubmit where id=%u", submitId);
long long fileId = sqlQuickLongLong(conn, query);
if (fileId == 0)
    return NULL;
sqlSafef(query, sizeof(query), "select * from edwFile where id=%lld", (long long)fileId);
return edwFileLoadByQuery(conn, query);
}


static void accessDenied()
/* Sleep a bit and then deny access. */
{
sleep(5);
errAbort("Access denied!");
}

struct edwScriptRegistry *edwScriptRegistryFromCgi()
/* Get script registery from cgi variables.  Does authentication too. */
{
struct sqlConnection *conn = edwConnect();
char *user = sqlEscapeString(cgiString("user"));
char *password = sqlEscapeString(cgiString("password"));
char query[256];
sqlSafef(query, sizeof(query), "select * from edwScriptRegistry where name='%s'", user);
struct edwScriptRegistry *reg = edwScriptRegistryLoadByQuery(conn, query);
if (reg == NULL)
    accessDenied();
char key[EDW_SID_SIZE];
edwMakeSid(password, key);
if (!sameString(reg->secretHash, key))
    accessDenied();
sqlDisconnect(&conn);
return reg;
}

void edwValidFileUpdateDb(struct sqlConnection *conn, struct edwValidFile *el, long long id)
/* Save edwValidFile as a row to the table specified by tableName, replacing existing record at 
 * id. */
{
struct dyString *dy = dyStringNew(512);
sqlDyStringPrintf(dy, "update edwValidFile set ");
// omit id and licensePlate fields - one autoupdates and the other depends on this
// also omit fileId which also really can't change.
sqlDyStringPrintf(dy, " format='%s',", el->format);
sqlDyStringPrintf(dy, " outputType='%s',", el->outputType);
sqlDyStringPrintf(dy, " experiment='%s',", el->experiment);
sqlDyStringPrintf(dy, " replicate='%s',", el->replicate);
sqlDyStringPrintf(dy, " validKey='%s',", el->validKey);
sqlDyStringPrintf(dy, " enrichedIn='%s',", el->enrichedIn);
sqlDyStringPrintf(dy, " ucscDb='%s',", el->ucscDb);
sqlDyStringPrintf(dy, " itemCount=%lld,", (long long)el->itemCount);
sqlDyStringPrintf(dy, " basesInItems=%lld,", (long long)el->basesInItems);
sqlDyStringPrintf(dy, " sampleCount=%lld,", (long long)el->sampleCount);
sqlDyStringPrintf(dy, " basesInSample=%lld,", (long long)el->basesInSample);
sqlDyStringPrintf(dy, " sampleBed='%s',", el->sampleBed);
sqlDyStringPrintf(dy, " mapRatio=%g,", el->mapRatio);
sqlDyStringPrintf(dy, " sampleCoverage=%g,", el->sampleCoverage);
sqlDyStringPrintf(dy, " depth=%g,", el->depth);
sqlDyStringPrintf(dy, " singleQaStatus=0,");
sqlDyStringPrintf(dy, " replicateQaStatus=0,");
sqlDyStringPrintf(dy, " technicalReplicate='%s',", el->technicalReplicate);
sqlDyStringPrintf(dy, " pairedEnd='%s',", el->pairedEnd);
sqlDyStringPrintf(dy, " qaVersion='%d',", el->qaVersion);
sqlDyStringPrintf(dy, " uniqueMapRatio=%g", el->uniqueMapRatio);
#if (EDWVALIDFILE_NUM_COLS != 24)
   #error "Please update this routine with new column"
#endif
sqlDyStringPrintf(dy, " where id=%lld\n", (long long)id);
sqlUpdate(conn, dy->string);
dyStringFree(&dy);
}

static char *findTagOrEmpty(struct cgiParsedVars *tags, char *key)
/* Find key in tags.  If it is not there, or empty, or 'n/a' valued return empty string
 * otherwise return val */
{
char *val = hashFindVal(tags->hash, key);
if (val == NULL || sameString(val, "n/a"))
   return "";
else
   return val;
}

void edwValidFileFieldsFromTags(struct edwValidFile *vf, struct cgiParsedVars *tags)
/* Fill in many of vf's fields from tags. */
{
vf->format = cloneString(hashFindVal(tags->hash, "format"));
vf->outputType = cloneString(findTagOrEmpty(tags, "output_type"));
vf->experiment = cloneString(findTagOrEmpty(tags, "experiment"));
vf->replicate = cloneString(findTagOrEmpty(tags, "replicate"));
vf->validKey = cloneString(hashFindVal(tags->hash, "valid_key"));
vf->enrichedIn = cloneString(findTagOrEmpty(tags, "enriched_in"));
vf->ucscDb = cloneString(findTagOrEmpty(tags, "ucsc_db"));
vf->technicalReplicate = cloneString(findTagOrEmpty(tags, "technical_replicate"));
vf->pairedEnd = cloneString(findTagOrEmpty(tags, "paired_end"));
#if (EDWVALIDFILE_NUM_COLS != 24)
   #error "Please update this routine with new column"
#endif
}

void edwFileResetTags(struct sqlConnection *conn, struct edwFile *ef, char *newTags, 
    boolean revalidate)
/* Reset tags on file, strip out old validation and QA,  schedule new validation and QA. */
/* Remove existing QA records and rerun QA agent on given file.   */
{
long long fileId = ef->id;
/* Update database to let people know format revalidation is in progress. */
char query[4*1024];

/* Update tags for file in edwFile table. */
sqlSafef(query, sizeof(query), "update edwFile set tags='%s' where id=%lld", newTags, fileId);
sqlUpdate(conn, query);
    
if (revalidate)
    {
    sqlSafef(query, sizeof(query), "update edwFile set errorMessage = '%s' where id=%lld",
	 "Revalidation in progress.", fileId); 
    sqlUpdate(conn, query);

    /* Get rid of records referring to file in other validation and qa tables. */
    sqlSafef(query, sizeof(query), "delete from edwFastqFile where fileId=%lld", fileId);
    sqlUpdate(conn, query);
    sqlSafef(query, sizeof(query),
	"delete from edwQaPairSampleOverlap where elderFileId=%lld or youngerFileId=%lld",
	fileId, fileId);
    sqlUpdate(conn, query);
    sqlSafef(query, sizeof(query),
	"delete from edwQaPairCorrelation where elderFileId=%lld or youngerFileId=%lld",
	fileId, fileId);
    sqlUpdate(conn, query);
    sqlSafef(query, sizeof(query), "delete from edwQaEnrich where fileId=%lld", fileId);
    sqlUpdate(conn, query);
    sqlSafef(query, sizeof(query), "delete from edwQaContam where fileId=%lld", fileId);
    sqlUpdate(conn, query);
    sqlSafef(query, sizeof(query), "delete from edwQaRepeat where fileId=%lld", fileId);
    sqlUpdate(conn, query);
    sqlSafef(query, sizeof(query), 
	"delete from edwQaPairedEndFastq where fileId1=%lld or fileId2=%lld",
	fileId, fileId);
    sqlUpdate(conn, query);

    /* schedule validator */
    edwAddQaJob(conn, ef->id);
    }
else
    {
    /* The revalidation case relies on edwMakeValidFile to update the edwValidFile table.
     * Here we must do it ourselves. */
    struct edwValidFile *vf = edwValidFileFromFileId(conn, ef->id);
    struct cgiParsedVars *tags = cgiParsedVarsNew(newTags);
    edwValidFileFieldsFromTags(vf, tags);
    edwValidFileUpdateDb(conn, vf, vf->id);
    cgiParsedVarsFree(&tags);
    edwValidFileFree(&vf);
    }
}

static void scanSam(char *samIn, FILE *f, struct genomeRangeTree *grt, long long *retHit, 
    long long *retMiss,  long long *retTotalBasesInHits, long long *retUniqueHitCount)
/* Scan through sam file doing several things:counting how many reads hit and how many 
 * miss target during mapping phase, copying those that hit to a little bed file, and 
 * also defining regions covered in a genomeRangeTree. */
{
samfile_t *sf = samopen(samIn, "r", NULL);
bam_hdr_t *bamHeader = sam_hdr_read(sf);
bam1_t one;
ZeroVar(&one);
int err;
long long hit = 0, miss = 0, unique = 0, totalBasesInHits = 0;
while ((err = sam_read1(sf, bamHeader, &one)) >= 0)
    {
    int32_t tid = one.core.tid;
    if (tid < 0)
	{
	++miss;
        continue;
	}
    ++hit;
    if (one.core.qual > edwMinMapQual)
        ++unique;
    char *chrom = bamHeader->target_name[tid];
    // Approximate here... can do better if parse cigar.
    int start = one.core.pos;
    int size = one.core.l_qseq;
    int end = start + size;	
    totalBasesInHits += size;
    boolean isRc = (one.core.flag & BAM_FREVERSE);
    char strand = (isRc ? '-' : '+');
    if (start < 0) start=0;
    if (f != NULL)
	fprintf(f, "%s\t%d\t%d\t.\t0\t%c\n", chrom, start, end, strand);
    genomeRangeTreeAdd(grt, chrom, start, end);
    }
if (err < 0 && err != -1)
    errnoAbort("samread err %d", err);
samclose(sf);
*retHit = hit;
*retMiss = miss;
*retTotalBasesInHits = totalBasesInHits;
*retUniqueHitCount = unique;
}

void edwReserveTempFile(char *path)
/* Call mkstemp on path.  This will fill in terminal XXXXXX in path with file name
 * and create an empty file of that name.  Generally that empty file doesn't stay empty for long. */
{
int fd = mkstemp(path);
if (fd == -1)
     errnoAbort("Couldn't create temp file %s", path);
mustCloseFd(&fd);
}

void edwBwaIndexPath(struct edwAssembly *assembly, char indexPath[PATH_LEN])
/* Fill in path to BWA index. */
{
safef(indexPath, PATH_LEN, "%s%s/bwaData/%s.fa", 
    edwValDataDir, assembly->ucscDb, assembly->ucscDb);
}

void edwAsPath(char *format, char path[PATH_LEN])
/* Convert something like "narrowPeak" in format to full path involving
 * encValDir/as/narrowPeak.as */
{
safef(path, PATH_LEN, "%sas/%s.as", edwValDataDir, format);
}

void edwAlignFastqMakeBed(struct edwFile *ef, struct edwAssembly *assembly,
    char *fastqPath, struct edwValidFile *vf, FILE *bedF,
    double *retMapRatio,  double *retDepth,  double *retSampleCoverage, 
    double *retUniqueMapRatio)
/* Take a sample fastq and run bwa on it, and then convert that file to a bed. 
 * bedF and all the ret parameters can be NULL. */
{
/* Hmm, tried doing this with Mark's pipeline code, but somehow it would be flaky the
 * second time it was run in same app.  Resorting therefore to temp files. */
char genoFile[PATH_LEN];
edwBwaIndexPath(assembly, genoFile);

char cmd[3*PATH_LEN];
char *saiName = cloneString(rTempName(edwTempDir(), "edwSample1", ".sai"));
safef(cmd, sizeof(cmd), "bwa aln -t 3 %s %s > %s", genoFile, fastqPath, saiName);
mustSystem(cmd);

char *samName = cloneString(rTempName(edwTempDir(), "ewdSample1", ".sam"));
safef(cmd, sizeof(cmd), "bwa samse %s %s %s > %s", genoFile, saiName, fastqPath, samName);
mustSystem(cmd);
remove(saiName);

/* Scan sam file to calculate vf->mapRatio, vf->sampleCoverage and vf->depth. 
 * and also to produce little bed file for enrichment step. */
struct genomeRangeTree *grt = genomeRangeTreeNew();
long long hitCount=0, missCount=0, uniqueHitCount=0, totalBasesInHits=0;
scanSam(samName, bedF, grt, &hitCount, &missCount, &totalBasesInHits, &uniqueHitCount);
verbose(1, "hitCount=%lld, missCount=%lld, totalBasesInHits=%lld, grt=%p\n", 
    hitCount, missCount, totalBasesInHits, grt);
if (retMapRatio)
    *retMapRatio = (double)hitCount/(hitCount+missCount);
if (retDepth)
    *retDepth = (double)totalBasesInHits/assembly->baseCount 
	    * (double)vf->itemCount/vf->sampleCount;
long long basesHitBySample = genomeRangeTreeSumRanges(grt);
if (retSampleCoverage)
    *retSampleCoverage = (double)basesHitBySample/assembly->baseCount;
if (retUniqueMapRatio)
    *retUniqueMapRatio = (double)uniqueHitCount/(hitCount+missCount);
genomeRangeTreeFree(&grt);
remove(samName);
}

struct edwFastqFile *edwFastqFileFromFileId(struct sqlConnection *conn, long long fileId)
/* Get edwFastqFile with given fileId or NULL if none such */
{
char query[256];
sqlSafef(query, sizeof(query), "select * from edwFastqFile where fileId=%lld", fileId);
return edwFastqFileLoadByQuery(conn, query);
}

static int mustMkstemp(char *template)
/* Call mkstemp to make a temp file with name based on template (which is altered)
 * by the call to be the file name.   Return unix file descriptor. */
{
int fd = mkstemp(template);
if (fd == -1)
    errnoAbort("Couldn't make temp file based on %s", template);
return fd;
}

void edwMakeTempFastqSample(char *source, int size, char dest[PATH_LEN])
/* Copy size records from source into a new temporary dest.  Fills in dest */
{
/* Make temporary file to save us a unique place in file system. */
safef(dest, PATH_LEN, "%sedwSampleFastqXXXXXX", edwTempDir());
int fd = mustMkstemp(dest);
close(fd);

char command[3*PATH_LEN];
safef(command, sizeof(command), 
    "fastqStatsAndSubsample %s /dev/null %s -smallOk -sampleSize=%d", source, dest, size);
verbose(2, "command: %s\n", command);
mustSystem(command);
}

void edwMakeFastqStatsAndSample(struct sqlConnection *conn, long long fileId)
/* Run fastqStatsAndSubsample, and put results into edwFastqFile table. */
{
struct edwFastqFile *fqf = edwFastqFileFromFileId(conn, fileId);
if (fqf == NULL)
    {
    char *path = edwPathForFileId(conn, fileId);
    char statsFile[PATH_LEN], sampleFile[PATH_LEN];
    safef(statsFile, PATH_LEN, "%sedwFastqStatsXXXXXX", edwTempDir());
    edwReserveTempFile(statsFile);
    char dayTempDir[PATH_LEN];
    safef(sampleFile, PATH_LEN, "%sedwFastqSampleXXXXXX", edwTempDirForToday(dayTempDir));
    edwReserveTempFile(sampleFile);
    char command[3*PATH_LEN];
    safef(command, sizeof(command), "fastqStatsAndSubsample -sampleSize=%d -smallOk %s %s %s",
	edwSampleTargetSize, path, statsFile, sampleFile);
    mustSystem(command);
    safef(command, sizeof(command), "gzip %s", sampleFile);
    mustSystem(command);
    strcat(sampleFile, ".gz");
    fqf = edwFastqFileOneFromRa(statsFile);
    fqf->fileId = fileId;
    fqf->sampleFileName = cloneString(sampleFile);
    edwFastqFileSaveToDb(conn, fqf, "edwFastqFile", 1024);
    remove(statsFile);
    freez(&path);
    }
edwFastqFileFree(&fqf);
}

struct edwQaWigSpot *edwMakeWigSpot(struct sqlConnection *conn, long long wigId, long long spotId)
/* Create a new edwQaWigSpot record in database based on comparing wig file to spot file
 * (specified by id's in edwFile table). */
{
/* Get valid files from fileIds and check format */
struct edwValidFile *wigVf = edwValidFileFromFileId(conn, wigId);
if (!sameString(wigVf->format, "bigWig"))
    errAbort("%lld is not a bigWig file, is %s instead", wigId, wigVf->format);
struct edwValidFile *spotVf = edwValidFileFromFileId(conn, spotId);
if (!sameString(spotVf->format, "narrowPeak") && !sameString(spotVf->format, "broadPeak") &&
    !sameString(spotVf->format, "bigBed"))
    errAbort("%lld is not a recognized peak type format, is %s", spotId, spotVf->format);

/* Remove any old record for files. */
char query[256];
sqlSafef(query, sizeof(query), 
    "delete from edwQaWigSpot where wigId=%lld and spotId=%lld", wigId, spotId);
sqlUpdate(conn, query);

/* Figure out file names */
char *wigPath = edwPathForFileId(conn, wigId);
char *spotPath = edwPathForFileId(conn, spotId);
char statsFile[PATH_LEN];
safef(statsFile, PATH_LEN, "%sedwQaWigSpotXXXXXX", edwTempDir());
edwReserveTempFile(statsFile);
char peakFile[PATH_LEN];
safef(peakFile, PATH_LEN, "%sedwQaWigSpotXXXXXX", edwTempDir());
edwReserveTempFile(peakFile);

/* Convert narrowPeak input into a temporary bed4 file */
char command[3*PATH_LEN];
safef(command, sizeof(command), "bigBedToBed %s stdout | cut -f 1-4 > %s", spotPath, peakFile);
mustSystem(command);

/* Call on bigWigAverageOverBed on peaks */
safef(command, sizeof(command), 
    "bigWigAverageOverBed %s %s /dev/null -stats=%s", wigPath, peakFile, statsFile);
mustSystem(command);
remove(peakFile);

/* Parse out ra file,  save it to database, and remove ra file. */
struct edwQaWigSpot *spot = edwQaWigSpotOneFromRa(statsFile);
spot->wigId = wigId;
spot->spotId = spotId;
edwQaWigSpotSaveToDb(conn, spot, "edwQaWigSpot", 1024);
spot->id = sqlLastAutoId(conn);

/* Clean up and go home. */
edwQaWigSpotFree(&spot);
edwValidFileFree(&wigVf);
edwValidFileFree(&spotVf);
freez(&wigPath);
freez(&spotPath);
return spot;
}

struct edwQaWigSpot *edwQaWigSpotFor(struct sqlConnection *conn, 
    long long wigFileId, long long spotFileId) 
/* Return wigSpot relationship if any we have in database for these two files. */
{
char query[256];
sqlSafef(query, sizeof(query), 
    "select * from edwQaWigSpot where wigId=%lld and spotId=%lld", wigFileId, spotFileId);
return edwQaWigSpotLoadByQuery(conn, query);
}




struct edwBamFile *edwBamFileFromFileId(struct sqlConnection *conn, long long fileId)
/* Get edwBamFile with given fileId or NULL if none such */
{
char query[256];
sqlSafef(query, sizeof(query), "select * from edwBamFile where fileId=%lld", fileId);
return edwBamFileLoadByQuery(conn, query);
}

struct edwBamFile * edwMakeBamStatsAndSample(struct sqlConnection *conn, long long fileId, 
    char sampleBed[PATH_LEN])
/* Run edwBamStats and put results into edwBamFile table, and also a sample bed.
 * The sampleBed will be filled in by this routine. */
{
/* Remove any old record for file. */
char query[256];
sqlSafef(query, sizeof(query), "delete from edwBamFile where fileId=%lld", fileId);
sqlUpdate(conn, query);

/* Figure out file names */
char *path = edwPathForFileId(conn, fileId);
char statsFile[PATH_LEN];
safef(statsFile, PATH_LEN, "%sedwBamStatsXXXXXX", edwTempDir());
edwReserveTempFile(statsFile);
char dayTempDir[PATH_LEN];
safef(sampleBed, PATH_LEN, "%sedwBamSampleXXXXXX", edwTempDirForToday(dayTempDir));
edwReserveTempFile(sampleBed);

/* Make system call to make ra and bed, and then another system call to zip bed.*/
char command[3*PATH_LEN];
safef(command, sizeof(command), "edwBamStats -sampleBed=%s -sampleBedSize=%d %s %s",
    sampleBed, edwSampleTargetSize, path, statsFile);
mustSystem(command);
safef(command, sizeof(command), "gzip %s", sampleBed);
mustSystem(command);
strcat(sampleBed, ".gz");

/* Parse out ra file,  save it to database, and remove ra file. */
struct edwBamFile *ebf = edwBamFileOneFromRa(statsFile);
ebf->fileId = fileId;
edwBamFileSaveToDb(conn, ebf, "edwBamFile", 1024);
remove(statsFile);

/* Clean up and go home. */
freez(&path);
return ebf;
}


char *edwOppositePairedEndString(char *end)
/* Return "1" for "2" and vice versa */
{
if (sameString(end, "1"))
    return "2";
else if (sameString(end, "2"))
    return "1";
else
    {
    errAbort("Expecting 1 or 2, got %s in oppositeEnd", end);
    return NULL;
    }
}

struct edwValidFile *edwOppositePairedEnd(struct sqlConnection *conn, struct edwValidFile *vf)
/* Given one file of a paired end set of fastqs, find the file with opposite ends. */
{
char *otherEnd = edwOppositePairedEndString(vf->pairedEnd);
char query[1024];
sqlSafef(query, sizeof(query), 
    "select edwValidFile.* from edwValidFile join edwFile on edwValidFile.fileId=edwFile.id"
    " where experiment='%s' and outputType='%s' and replicate='%s' "
    " and technicalReplicate='%s' and pairedEnd='%s' and itemCount=%lld and deprecated=''"
    , vf->experiment, vf->outputType, vf->replicate, vf->technicalReplicate, otherEnd
    , vf->itemCount);
struct edwValidFile *otherVf = edwValidFileLoadByQuery(conn, query);
if (otherVf == NULL)
    return NULL;
if (otherVf->next != NULL)
    errAbort("Multiple results from pairedEnd query %s", query);
return otherVf;
}

struct edwQaPairedEndFastq *edwQaPairedEndFastqFromVfs(struct sqlConnection *conn,
    struct edwValidFile *vfA, struct edwValidFile *vfB,
    struct edwValidFile **retVf1,  struct edwValidFile **retVf2)
/* Return pair record if any for the two fastq files. */
{
/* Sort the two ends. */
struct edwValidFile *vf1 = NULL, *vf2 = NULL;
if (sameString(vfA->pairedEnd, "1"))
    {
    vf1 = vfA;
    vf2 = vfB;
    }
else
    {
    vf1 = vfB;
    vf2 = vfA;
    }
if (retVf1 != NULL)
   *retVf1 = vf1;
if (retVf2 != NULL)
   *retVf2 = vf2;

/* See if we already have a record for these two. */
/* Return record for these two. */
char query[1024];
sqlSafef(query, sizeof(query), 
    "select * from edwQaPairedEndFastq where fileId1=%u and fileId2=%u",
    vf1->fileId, vf2->fileId);
return edwQaPairedEndFastqLoadByQuery(conn, query);
}

FILE *edwPopen(char *command, char *mode)
/* do popen or die trying */
{
/* Because of bugs with popen(...,"r") and programs that use stdin otherwise
 * it's probably better to use Mark's pipeline library,  but it is ever so
 * much harder to use... */
FILE *f = popen(command,  mode);
if (f == NULL)
    errnoAbort("Can't popen(%s, %s)", command, mode);
return f;
}

boolean edwOneLineSystemAttempt(char *command, char *line, int maxLineSize)
/* Execute system command and return one line result from it in line */
{
FILE *f = popen(command, "r");
boolean ok = FALSE;
if (f != NULL)
    {
    char *result  = fgets(line, maxLineSize, f);
    if (result != NULL)
	ok = TRUE;
    pclose(f);
    }
else
    {
    errnoWarn("failed popen %s", command);
    }
return ok;
}

void edwOneLineSystemResult(char *command, char *line, int maxLineSize)
/* Execute system command and return one line result from it in line */
{
if (!edwOneLineSystemAttempt(command, line, maxLineSize) )
    errAbort("Can't get line from %s", command);
}

void edwMd5File(char *fileName, char md5Hex[33])
/* call md5sum utility to calculate md5 for file and put result in hex format md5Hex 
 * This ends up being about 30% faster than library routine md5HexForFile,
 * however since there's popen() weird interactions with  stdin involved
 * it's not suitable for a general purpose library.  Environment inside edw
 * is controlled enough it should be ok. */
{
char command[PATH_LEN + 16];
safef(command, sizeof(command), "md5sum %s", fileName);
char line[2*PATH_LEN];
edwOneLineSystemResult(command, line, sizeof(line));
memcpy(md5Hex, line, 32);
md5Hex[32] = 0;
}


void edwPokeFifo(char *fifoName)
/* Send '\n' to fifo to wake up associated daemon */
{
/* Sadly we loop through places it might be since it varies. It has to live somewhere
 * that web CGIs can poke is the problem. */
char *places[] = {"/data/www/userdata/", "/usr/local/apache/userdata/"};
int i;
for (i=0; i<ArraySize(places); ++i)
    {
    char path[PATH_LEN];
    safef(path, sizeof(path), "%s%s", places[i], fifoName);
    if (fileExists(path))
        {
	char *message = "\n";
	writeGulp(path, message, strlen(message));
	break;
	}
    }
}

/***/
/* Shared functions for EDW web CGI's.
   Mostly wrappers for javascript tweaks */

void edwWebAutoRefresh(int msec)
/* Refresh page after msec.  Use 0 to cancel autorefresh */
{
if (msec > 0)
    {
    // set timeout to refresh page (saving/restoring scroll position via cookie)
    printf("<script type='text/javascript'>var edwRefresh = setTimeout(function() { $.cookie('edwWeb.scrollTop', $(window).scrollTop()); $('form').submit(); }, %d);</script>", msec);
    puts("<script type='text/javascript'>$(document).ready(function() {$(document).scrollTop($.cookie('edwWeb.scrollTop'))});</script>");

    // disable autorefresh when user is changing page settings
    puts("<script type='text/javascript'>$('form').click(function() {clearTimeout(edwRefresh); $.cookie('edwWeb.scrollTop', null);});</script>");
    }
else if (msec == 0)
    puts("clearTimeout(edwRefresh);</script>");

// Negative msec ignored
}

/***/
/* Navigation bar */

void edwWebNavBarStart()
/* Layout navigation bar */
{
puts("<div id='layout'>");
puts("<div id='navbar' class='navbar navbar-fixed-top navbar-inverse'>");
webIncludeFile("/inc/edwNavBar.html");
puts("</div>");
puts("<div id='content' class='container'><div>");
}

void edwWebNavBarEnd()
/* Close layout after navigation bar */
{
puts("</div></div></div>");
}

void edwWebBrowseMenuItem(boolean on)
/* Toggle visibility of 'Browse submissions' link on navigation menu */
{
printf("<script type='text/javascript'>$('#edw-browse').%s();</script>", on ? "show" : "hide");
}

void edwWebSubmitMenuItem(boolean on)
/* Toggle visibility of 'Submit data' link on navigation menu */
{
printf("<script type='text/javascript'>$('#edw-submit').%s();</script>", on ? "show" : "hide");
}

