/* joinTwoInfo - Look at two columns in two tables in mySQL and see how joinable they look.. */

/* Copyright (C) 2013 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "options.h"
#include "localmem.h"
#include "jksql.h"


void usage()
/* Explain usage and exit. */
{
errAbort(
  "joinTwoInfo - Look at two columns in two tables in mySQL and see how joinable they look.\n"
  "usage:\n"
  "   joinTwoInfo db1.table1.column1 db2.table2.column2\n"
  "options:\n"
  "   -xxx=XXX\n"
  );
}

static struct optionSpec options[] = {
   {NULL, 0},
};

struct slName *getColumn(char *db, char *table, char *column, struct lm *lm)
/* Get list of all items in column allocated in lm. */
{
struct slName *list = NULL, *el;
struct sqlConnection *conn = sqlConnect(db);
char query[512];
sqlSafef(query, sizeof(query), "select %s from %s", column, table);
struct sqlResult *sr = sqlGetResult(conn, query);
char **row;
while ((row = sqlNextRow(sr)) != NULL)
    {
    el = lmSlName(lm, row[0]);
    slAddHead(&list, el);
    }
sqlFreeResult(&sr);
struct slName *lmSlName(struct lm *lm, char *name);
sqlDisconnect(&conn);
slReverse(&list);
return list;
}

struct hash *uniqHash(struct slName *list)
/* Return hash of unique items. */
{
struct hash *hash = hashNew(0);
struct slName *el;
for (el = list; el != NULL; el = el->next)
    hashStore(hash, el->name);
return hash;
}

int countInHash(struct slName *list, struct hash *hash)
/* Count number of items in list that are also in hash */
{
int count = 0;
struct slName *el;
for (el = list; el != NULL; el = el->next)
    {
    if (hashLookup(hash, el->name))
        ++count;
    }
return count;
}

int countUniqInHash(struct slName *list, struct hash *hash)
/* Count number of items in list that are also in hash */
{
struct hash *uniqHash = hashNew(0);
struct slName *el;
for (el = list; el != NULL; el = el->next)
    {
    if (hashLookup(hash, el->name))
	{
	hashStore(uniqHash, el->name);
	}
    }
int count = uniqHash->elCount;
hashFree(&uniqHash);
return count;
}


void joinTwoInfo(char *spec1, char *spec2)
/* joinTwoInfo - Look at two columns in two tables in mySQL and see how joinable they look.. */
{
char *s1[4], *s2[4];
struct lm *lm = lmInit(0);
int partCount = chopByChar(lmCloneString(lm, spec1), '.', s1, ArraySize(s1));
if (partCount != 3)
    usage();
partCount = chopByChar(lmCloneString(lm, spec2), '.', s2, ArraySize(s2));
if (partCount != 3)
    usage();

struct slName *list1 = getColumn(s1[0], s1[1], s1[2], lm);
struct hash *uniq1 = uniqHash(list1);
struct slName *list2 = getColumn(s2[0], s2[1], s2[2], lm);
struct hash *uniq2 = uniqHash(list2);
int countOneInTwo = countInHash(list1, uniq2);
int countTwoInOne = countInHash(list2, uniq1);
int countUniqOneInTwo = countUniqInHash(list1, uniq2);
int countUniqTwoInOne = countUniqInHash(list2, uniq1);
printf("%s: %d items, %d unique items, %d items (%d unique) in %s\n",
	spec1, slCount(list1), uniq1->elCount, countOneInTwo, countUniqOneInTwo, spec2);
printf("%s: %d items, %d unique items, %d items (%d unique) in %s\n",
	spec2, slCount(list2), uniq2->elCount, countTwoInOne, countUniqTwoInOne, spec1);

lmCleanup(&lm);
}

int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
if (argc != 3)
    usage();
joinTwoInfo(argv[1], argv[2]);
return 0;
}
