
#include <iostream>
#include <fstream>
#include <sstream>
#include <map>
#include <vector>
#include <algorithm>
#include <climits>
#include "samio.h"
#include "auxiliaryio.h"
#include "cvganalysis.h"
#include "instanceio.h"
#include "common.h"
#include "rangeset.h"
#include "parseopt.h"
#include "readgroup.h"


inline long abs2(long a){
  return a>0?a:-a;
}


void getcoverage(map<long,int>&cvg, vector<vector<long> >&rpoolstart, vector<vector<long> >&rpoolend);
/**
 * Split large instances into smaller one
 * (FIXED) ATTENTION: the range is re-calculated based on the coverage 
  * ATTENTION: minjcount is no longer used.
 */
int splitlargeinstances2(range_t&currentrange, vector<range_t >& newrange,
    vector<vector<long> >&rpoolstart, vector<vector<long> >&rpoolend,
    int appearpereads
    ){
  if(appearpereads==0){
//    newrange.push_back(currentrange);
//    return 0;
  }
  if(REFONLY){
    //don't break
    newrange.push_back(currentrange);
    return 0;
  }
  //map<long,int> allbound;
  //getallbound(allbound,currentrange,rpoolstart,rpoolend,minjcount,-1);
  //int nExons=allbound.size()-1;

  //check coverage
  
  map<long,int>cvg;
  getconnectedcoverage(cvg,rpoolstart,rpoolend,appearpereads);

  //go through the coverage, if the coverage is smaller than minjcount, cut
  map<long,int>::iterator mitr,mitr2,mitr3;

  //get maximum cvg
  int maxcvg=1;
  for(mitr=cvg.begin();mitr!=cvg.end();mitr++){
    if(maxcvg<mitr->second)maxcvg=mitr->second;
  }
  int chosenminj=(int)(maxcvg*MIN_CVG_FRACTION);
  if(MINEXONCVG<chosenminj)chosenminj=MINEXONCVG;

  mitr=cvg.begin();;
  while(true){
    while(mitr!=cvg.end()&&mitr->second<chosenminj)mitr++;
    if(mitr==cvg.end())break;
    else{
      mitr2=mitr;
      while(mitr2!=cvg.end()&& mitr2->second>=chosenminj)mitr3=mitr2++;
      if(mitr3!=mitr)newrange.push_back(range_t(mitr->first,mitr3->first-1));
      if(mitr2==cvg.end())break;
      mitr=mitr2;

    }
  }
  
  return 0;

}

/**
 * Merge two ranges such that overlapping ranges are merged
 *
 * Notice that range is inclusive, i.e., range=[range.first,range.second]
 */
void mergeranges(vector<range_t >&inrange1, vector<range_t >& inrange2, 
    vector<range_t >&outrange){
  map<long,int>counter;
  for(int i=0;i<inrange1.size();i++){
    counter[inrange1[i].first]++;
    counter[inrange1[i].second+1]--;
  }
  for(int i=0;i<inrange2.size();i++){
    counter[inrange2[i].first]++;
    counter[inrange2[i].second+1]--;
  }
  long nc=0;
  long startsite=-1;
  //iterate through all possible markers
  for(map<long,int>::iterator itr=counter.begin();itr!=counter.end();itr++){
    if(nc==0){
      if(itr->second>0)startsite=itr->first;
      else{
        cerr<<"Error: incorrect range overlap!\n";
      }
    }
    nc=nc+itr->second;
    if(nc==0){
      outrange.push_back(range_t(startsite,itr->first-1));
      startsite=-1;
    }
  }

}


/*
analyze sam file.
Return value: 0 if all reads are single-ended, 1 if some are paired-ended.
*/
int readSamFile(string inSamFile, //input file
    vector<string> args // we need to print the command line into the instance file
    ){

  //Parsing arguments and set up parameters
  if (parseopt(args)==-1){
    return -1;
  }

  vector<string> rangechrs;
  vector<range_t > rangerange;
  bool outinstance=true;
  //Preparing output files
  prepareAuxFile( args,inSamFile,outinstance);

  ifstream fifs;
  if(inSamFile!="-"){
    fifs.open(inSamFile.c_str());
    if(!fifs.is_open()){
      cerr<<"Error opening input file "<<inSamFile<<endl;
      return -1;
    }
  }
  istream &ifs= (inSamFile=="-")?cin:fifs;
  cerr<<"Input file: "<<inSamFile<<endl;

  int appearpereads=-1;
  if(USE_SINGLE_ONLY==1)appearpereads=0;
  //saving flag and cigar for pair-end 
  map<range_t, int> flagmap;
  map<range_t, string> cigarmap;
  map<range_t, int> paircount;

  string prevrname="";
  range_t dp=make_range_t(0,0);
  range_t dp2=dp;
  
  range_t currentrange=dp;
  long currentrcount=0;
  int rangecounter=1;


  int instanceid=0; //instance id
  int currentreadlen=0;
  long linecount=0;

  Align calign;
  ReadGroup rd;

  //----------------The main loop of parsing SAM file-------------------------
  while(true){

    string oneline;
    getline(ifs,oneline);
    linecount++;
    if(ifs.eof())break;
    if(oneline[0]=='@') continue;
    if(calign.parse(oneline)==-1){
      cerr<<"Error parsing SAM records at line "<<linecount<<endl;
    }
    bool usesingle=USE_SINGLE_ONLY;
    //if the paired-end distance is too large, treat them as single-end reads
    if(calign.isPairedEnd()) {
       if( abs2(calign.plen)>MAX_PE_DISTANCE)
         usesingle=true;
       //do not support mapping to different chromosomes now
       if(calign.rnext!="*" && calign.rnext!="=")
         usesingle=true;
    }
    if(!calign.isValid())
      continue;
  
    //write to read info
    writeoneline2readinfo(calign);
    if(linecount==1){
      ReadGroup::setStatReadLen(calign.getReadLen());
      ReadGroup::setStatChr(calign.rname);
    }
    //write to instance, if possilbe
    //update and write generange file
    if(calign.rname!=prevrname || calign.pos>currentrange.second+MIN_GRANGE_DISTANCE){
  
      rd.clearPairInfo();   
      if(currentrange.second!=0)
        write2rangeandinstance(rd,currentrange);
      rd.clear();
      //If gene ranges are fixed, get the next gene range
      if(FIXRANGE){
        string crname=calign.rname;
        int getnextret=0;
        
        while((getnextret=C_GENE_RANGE.getNext(calign.rname,calign.pos,crname,currentrange))==2){
          // if annotation's gene range < read's gene range, output annotation gene range
          ReadGroup::setStatChr(crname);
          rd.setRange(currentrange);
          write2rangeandinstance(rd,currentrange);
          cout<<"\nJumping range, read: "<<calign.rname<<", range: "
            <<calign.rname<<":"<<currentrange.first<<"-"<<currentrange.second<<endl;
        }
        if(getnextret==-1)
           break;
        ReadGroup::setStatChr(crname);
        rd.setRange(currentrange);
        if(currentrange.first>0)
          cout<<"\nFix range, read: "<<calign.rname<<", range: "
            <<calign.rname<<":"<<currentrange.first<<"-"<<currentrange.second<<endl;
      }else{
        //update current range and current r count
        currentrange=calign.getRange(usesingle);
      }
      //update current range w.r.t. existing ref isoforms
      //this works no matter whether FIXRANGE is true
      //C_ANNO.checkOverlapRange(currentrange,rname);
    }//end if(pos>currentrange.second+MIN_GRANGE_DISTANCE)
      
    //update the current range
    if(FIXRANGE){
      range_t rrt=calign.getRange(usesingle);
      if(rrt.first>currentrange.first && rrt.second<currentrange.second)
        rd.add(calign);
    }else{
      if(calign.pos<currentrange.first){
          cerr<<"Error: the SAM file MUST be sorted!\n";
      }
      else{
        currentrcount++;
        long antd=calign.getRange(usesingle).second;
        if(antd>currentrange.second){
          currentrange.second=antd;
        }
        //this works no matter whether FIXRANGE is true
        C_ANNO.checkOverlapRange(currentrange,calign.rname);
      }
      rd.add(calign);
    }
    prevrname=calign.rname;
    if(calign.rname!=prevrname)
      ReadGroup::setStatChr(calign.rname);
    ReadGroup::setStatChr(calign.rname);
  
    if(MAX_INSTANCE!=-1 && instanceid>MAX_INSTANCE)
      break;
  
    if(rd.size()>10000 && rd.size()%10000==1){
      cout<<"--Reads: "<<rd.size()<<", range:["<<currentrange.first<<","<<currentrange.second<<"]"
          <<", current read: "<<calign.pos<<endl;
    }
  }//end of iteration of ifs
  //write the last one
  write2rangeandinstance(rd,currentrange);
  //write empty ref seq ranges, if possible
  currentrange.first=LONG_MAX;
  rd.clear();  
  if(FIXRANGE){
    cout<<"\nEnd of read input, writing remaining instances...\n";
    while(!C_GENE_RANGE.isEnd()){
      currentrange=C_GENE_RANGE.getRange();
      ReadGroup::setStatChr(C_GENE_RANGE.getChr());
      rd.setRange(currentrange);
      write2rangeandinstance(rd,currentrange);
      C_GENE_RANGE.inc();
    }
  }
  fifs.close();
  closeAuxFile();


  return appearpereads;
}


