/*
 * library.cpp
 * This file is part of isoLasso library; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; version 2.1 of the License.
 *  Created on: 2011-10-14
 *      Author: daniel
 */
#include "library.h"
#include <vector>
#include <string.h>
#include <sstream>
#include <stdio.h>
#include <math.h>
#include <algorithm>
#include <iostream>

using namespace std;

void writeoneinstance(FILE* fid, NewInstance &oneins, string index, double expcutoff, long mininsrcount){
  long max_predsub;
  if (oneins.PredSubInst.size() != 0) max_predsub = *max_element(oneins.PredSubInst.begin(), oneins.PredSubInst.end());
  else max_predsub = -1;
  for (long isub = 0; isub <= max_predsub; ++isub){
      //clustering
     
      vector<long> equaltoisub; //store all index belonging to this subinstance
      vector<long> imap(oneins.Pred[0].size(), 0); //mark which segment is used
      vector<double> currentpredexp; //store the expression level of all isoforms belonging to this subinstance
      for (long j = 0; j < oneins.PredSubInst.size(); ++j)
        if (oneins.PredSubInst[j] == isub){
          currentpredexp.push_back(oneins.PredExp[j]);
          equaltoisub.push_back(j);
          for (long k = 0; k < imap.size(); ++k) imap[k] += oneins.Pred[j][k];
        }
      vector<long> isvalid(oneins.SGTypes.size(),0);
      long ncount = 0;
      for (long j = 0; j < oneins.SGTypes.size(); ++j)
        for (long k = 0; k < oneins.SGTypes[0].size(); ++k){
          if (oneins.SGTypes[j][k] * imap[k] > 0){
            isvalid[j] = 1;
            ncount += oneins.SGCounts[j];
            break;
          }
      }
      if (ncount < mininsrcount) continue;
      for (long j = 0; j < equaltoisub.size(); ++j){
        if (currentpredexp[j] < expcutoff) continue;
        //fprintf(fid, "%ld\tPred%ld_%ld_%ld\t%s\t+\t", index, index, isub + 1, j + 1, oneins.chromosome.c_str());
        int cpredid=equaltoisub[j];
        fprintf(fid, "%s\t%s\t%s\t%c\t",index.c_str(), oneins.PredID[cpredid].c_str(),oneins.chromosome.c_str(),oneins.PredDir[cpredid]);
        vector<long> startpos, endpos;
        for (long k = 0; k < oneins.Pred[equaltoisub[j]].size(); ++k)
          if (oneins.Pred[equaltoisub[j]][k] > 0){
            startpos.push_back(oneins.exonBoundary[k][0] - 1);
            endpos.push_back(oneins.exonBoundary[k][1]);
          }
        fprintf(fid, "%ld\t%ld\t", startpos[0], endpos[endpos.size() - 1]);
        for (long k = 0; k < startpos.size() - 1; ++k) fprintf(fid, "%ld,", startpos[k]);
        fprintf(fid, "%ld\t", startpos[startpos.size()-1]);
        for (long k = 0; k < endpos.size() - 1; ++k) fprintf(fid, "%ld,", endpos[k]);
        fprintf(fid, "%ld\t", endpos[endpos.size()-1]);
        double rpkm;
        if (oneins.PETypes.size() != 0)
          rpkm = currentpredexp[j] * pow(double(10), 9) / oneins.totalReadCnt;
        else
          rpkm = currentpredexp[j] * pow(double(10), 9) / oneins.totalReadCnt;
        fprintf(fid, "%lf\n", rpkm);
      }
  }
}


vector<string> parseVaragin(string varagin) {

  istringstream iss(varagin);
  vector<string> parameters;
  do
  {
      string sub;
      iss >> sub;
      parameters.push_back(sub);
  } while (iss);
  return parameters;
}

bool verifyFieldName(char *fieldname, char *expect_name, int level /*1 means error, 2 means warning*/) {
  if (strcmp(fieldname, expect_name) != 0) {
    if (level == 1) printf("Error: Invalid Field Name %s, it should be %s\n", fieldname, expect_name);
    else if (level == 2) printf("Warning: Invalid Field Name %s, it should be %s\n", fieldname, expect_name);
    return false;
  }
  else return true;
}

NewInstance readoneinstance(FILE *fid, long &readcnt, int &success) {
  NewInstance thisitem;
  success = 1;
  readcnt = 0;
  char stvalue[MAXSTRLEN], stvalue2[MAXSTRLEN];
  long value, value2, value3;
  double fvalue;
  char cvalue;

  //read first line
  fscanf(fid, "%s\t%s\n", &stvalue, &stvalue2);
        if(feof(fid)){success=-1;return thisitem;}
  if (!verifyFieldName(stvalue, "Instance", 1)) {
                success = 0;  return thisitem;
  }
  thisitem.id = string(stvalue2);
  //second line
  fscanf(fid, "%s\t%s\t%ld\t%ld\t%c\n", &stvalue, &stvalue2, &value, &value2,&cvalue);
  verifyFieldName(stvalue, "Boundary", 2); //This should return instead of warning?
  thisitem.chromosome = string(stvalue2);
  switch(cvalue){
    case '+': thisitem.dir=1;break;
    case '-': thisitem.dir=-1;break;
    default: thisitem.dir=0;
  }
  thisitem.boundary.push_back(value);
  thisitem.boundary.push_back(value2);

  fscanf(fid, "%s\t%ld\n", &stvalue, &value);
  verifyFieldName(stvalue, "ReadLen", 2); //This should return instead of warning?
  thisitem.readLen = value;
  thisitem.peReadLen = value;

  fscanf(fid, "%s\t%ld\n", &stvalue, &value);
  verifyFieldName(stvalue, "Segs", 2); //This should return instead of warning?
  thisitem.nExons = value;

  for (int i = 0; i < thisitem.nExons; ++i) {
    fscanf(fid, "%ld\t%ld\t", &value, &value2);
    vector<long> exonBoundary;
    exonBoundary.push_back(value);
    exonBoundary.push_back(value2);
    thisitem.exonBoundary.push_back(exonBoundary);
    fscanf(fid, "%ld\t", &value);
    thisitem.exonLen.push_back(value);
    fscanf(fid, "%lf", &fvalue); //skip the 4th value...
    fgets(stvalue, MAXSTRLEN, fid);
    istringstream iss(stvalue);

    vector<double> exonStat;
    double sub;
    while (iss >> sub)
    {
        exonStat.push_back(sub);
    };
    thisitem.exonStat.push_back(exonStat);
  }

  fscanf(fid, "%s\t%ld\n", &stvalue, &value);
  if (strcmp(stvalue, "Refs") == 0){
    long nrefs = value;
  char refdir;
  string refid;
    for (int i = 0; i < nrefs; ++i) {
      vector<long> refs;
      for (int j = 0; j < thisitem.nExons; ++j) {
        fscanf(fid, "%ld", &value);
        refs.push_back(value);
      }
    fscanf(fid,"\t%c\t%s",&refdir, &stvalue);
    refid=string(stvalue);
      fgets(stvalue2, MAXSTRLEN, fid); //Read the \n in the end
      thisitem.Refs.push_back(refs);
    thisitem.RefDirs.push_back(refdir);
    thisitem.RefIDs.push_back(refid);
    }
    fscanf(fid, "%s\t%ld\n", &stvalue, &value);
  }

  verifyFieldName(stvalue, "Reads", 2);
  thisitem.readCnt = value;
  readcnt = value;


  fscanf(fid, "%s\t%ld\n", &stvalue, &value);
  verifyFieldName(stvalue, "SGTypes", 2);
  long nSGTypes = value;
  bool isDir = false;
  for (int i = 0; i < nSGTypes; ++i) {
    fgets(stvalue, MAXSTRLEN, fid);
    istringstream iss(stvalue);
    vector<long> tSGType;
    for (int j = 0; j < thisitem.nExons; ++j) {
      iss >> value;
      tSGType.push_back(value);
    }
    thisitem.SGTypes.push_back(tSGType);
    iss >> value;
    thisitem.SGCounts.push_back(value);
  while (iss >> value) {
      isDir = true;
      thisitem.SGDirs.push_back(value);
    }

  }
  long nPETypes = 0;
  fscanf(fid, "%s\t%ld%ld\n", &stvalue, &value, &nPETypes);
  verifyFieldName(stvalue, "PETypes", 2);
  thisitem.peReadCnt = value;
  if (nPETypes > 0) {
    zeros(thisitem.PETypes, nPETypes, 3);
    thisitem.PETypesDistance.assign(nPETypes, vector<long>());
    thisitem.PETypesDistanceCount.assign(nPETypes, vector<long>());
    for (long i = 0; i < nPETypes; ++i){
      fscanf(fid, "%ld\t%ld\t%ld\n", &value, &value2, &value3);
      thisitem.PETypes[i][0] = value;
      thisitem.PETypes[i][1] = value2;
      thisitem.PETypes[i][2] = value3;
      for (long j = 0; j < value3; ++j){
        fscanf(fid, "%ld:%ld", &value, &value2);
        thisitem.PETypesDistance[i].push_back(value);
        thisitem.PETypesDistanceCount[i].push_back(value2);
      }
    }
  }


  // Coverage
  fscanf(fid, "%s\t%ld\t%ld\n", &stvalue, &value, &value2);
  verifyFieldName(stvalue, "Coverage", 2);
  if (nSGTypes != value) {
    printf("warning: Incorrect numbers in the coverage field\n");
  }
  for (int i = 0; i < nSGTypes; ++i) {
    fscanf(fid, "%ld\t%ld\n", &value, &value2);
    long nrec = value2;
    vector<vector<long> > coverage;
    for (int j = 0; j < nrec; ++j){
      fscanf(fid, "%ld,%ld", &value, &value2);
      vector<long> sub;
      sub.push_back(value);
      sub.push_back(value2);
      coverage.push_back(sub);
    }

    //fgets(stvalue, MAXSTRLEN, fid);
    thisitem.Coverage.push_back(coverage);
  }
  fillrcount(thisitem);
  thisitem.rcountColSum.assign(thisitem.rcount[0].size(),0);
  thisitem.rcountRowSum.assign(thisitem.rcount.size(),0);
  for (long i = 0; i < thisitem.rcount.size(); ++i)
    for (long j = 0; j < thisitem.rcount[0].size(); ++j){
      thisitem.rcountRowSum[i] += thisitem.rcount[i][j];
      thisitem.rcountColSum[j] += thisitem.rcount[i][j];
    }

  return thisitem;
}

bool getBoolenValue(string &a){
  for (long i = 0; i < a.size(); ++i) 
    if (islower(a[i])) a[i] = tolower(a[i]);
  if (a == "true") return true;
  else return false;
}


void fillrcount(NewInstance &thisitem){
  for (long i = 0; i < thisitem.nExons; ++i){
    thisitem.rcount.push_back(vector<long>());
    for (long j = 0; j < thisitem.nExons; ++j)
      thisitem.rcount[i].push_back(0);
  }



  for (long i = 0; i < thisitem.SGTypes.size(); ++i){
    long sum = 0;
    vector<long> eids;
    for (long j = 0; j < thisitem.SGTypes[i].size(); ++j){
      sum += thisitem.SGTypes[i][j];
      if (thisitem.SGTypes[i][j] != 0) eids.push_back(j);
    }
    if (sum == 1) {
      thisitem.rcount[eids[0]][eids[0]] = thisitem.SGCounts[i];
    }
    else if (sum > 1){
      for (int j = 0; j < eids.size() - 1; ++j){
        thisitem.rcount[eids[j]][eids[j + 1]] = thisitem.SGCounts[i];
      }
    }
  }
}

void matrixTransposeMulSelf(vector<vector<double> > &matrix, double **result){
  for (long i = 0; i < matrix[0].size(); ++i)
    for (long j = 0; j <= i; ++j){
      result[i][j] = 0;
      for (long k = 0; k < matrix.size(); ++k)
        result[i][j] += matrix[k][i] * matrix[k][j];
    }
}








