/**
 * Last update: 
 *   2011.09.27  Add -o option
 *
 */
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <sstream>
#include <map>
//#include <multimap>

using namespace std;

struct onetrans{
  vector<string>data;
};

/*
Parse the last field of gtf, return a <key,value> set
return -1 if fail
*/
int parsegtflastfield(stringstream& ss, map<string,string>&ret){
                
  vector<string> tk;
  //cout<<"Parsing "<<ss.str()<<endl;
  while(ss.good()){
    string tsp;
    ss>>tsp;
    //cout<<"term:"<<tsp<<endl;
    if(tsp.size()>=2 && tsp[0]=='"' && tsp.substr(tsp.size()-2,2)=="\";")
      tsp=tsp.substr(1,tsp.size()-3);
    if(tsp!="")
      tk.push_back(tsp);
  }
  //if(int(tk.size())%2==1)return -1;
  for(int i=1;i<tk.size();i+=2){
    ret[tk[i-1]]=tk[i];
  }
  return 0;
}



int main(int argc, char* argv[]){
  if(argc<2){
    cerr<<"This program is used to sort gtf file.\n\n";
    cerr<<"Usage: sortgtf {OPTIONS} <gtf file | ->\n\n";
    cerr<<"Options:\n\n";
    cerr<<"  -o OUTPUTFILE \tSpecify the output file name. In default, the sorted gtf is located at the same directory of original gtf file, and with suffix .sorted.gtf.\n";
    return -1;
  }

  bool inputfromfile;
  bool outputfromfile;
  string infile(argv[argc-1]);
  string outfile=infile;
  bool specifyout=false;
  for(int i=1;i<argc;i++){
    if(string(argv[i])=="-o"){
      outfile=string(argv[i+1]);
      specifyout=true;
    }
  }
  if(outfile!="-"){
    if(specifyout==false)
      outfile+=".sorted.gtf";
    outputfromfile=true;
  }else{
    outputfromfile=false;
  }
  if(infile!="-") inputfromfile=true;
  else inputfromfile=false;


  ifstream ifs;
  ofstream ofs;
  if(inputfromfile){
    ifs.open(argv[argc-1]);
    if(!ifs.is_open()){
      cerr<<"Error opening "<<argv[argc-1]<<endl;
      return -1;
    }
  }
  if(outputfromfile){
    ofs.open(outfile.c_str());
    if(!ofs.is_open()){
      cerr<<"Error opening output file "<<outfile<<endl;
      return -1;
    }
  }
  

  map<string, multimap<long, onetrans*> >alldata;

  onetrans* currenttrans=NULL;
  int nline=0;
  string prevtransid="";

  bool isinserted=false;//check if this record is already in the list

  while(true){
    string oneline;
    if(inputfromfile){
      getline(ifs,oneline);
      if(ifs.eof())break;
    }
    else{
      getline(cin,oneline);
      if(cin.eof())break;
    }
    stringstream ss(oneline);
    string chrname;
    string tmp;
    string type; //transcript or exon
    long start;
    ss>>chrname>>tmp>>type>>start;//1-4th field
    long endp;ss>>endp; //5th
    string fscore;ss>>fscore;//6th, score
    char f78;
    ss>>f78; ss>>f78; //7th, 8th:     orientation and shift

                map<string,string> lfd;
                parsegtflastfield(ss,lfd);
                if(lfd.count("gene_id")==0 || lfd.count("transcript_id")==0){
                  cerr<<"Error: missing gene_id and transcript_id field.\n";
                  continue;
                }
                string gene_id,trans_id;
                gene_id=lfd["gene_id"];
                trans_id=lfd["transcript_id"];
    

    multimap<long,onetrans*> & chrmap= alldata[chrname];

    if(type=="transcript" || trans_id!=prevtransid){

      if(isinserted==false && prevtransid!=""){
        cerr<<"Warning: discard transcript "<<prevtransid<<" because it has no exon field.\n";
        delete currenttrans;
        currenttrans=NULL;
      }

      currenttrans=new onetrans();
      currenttrans->data.push_back(oneline);
      //if(type=="transcript" || type=="exon"){
        chrmap.insert(pair<long,onetrans*>(start,currenttrans));
        isinserted=true;
      //}
      //else{
      //  isinserted=false;
      //}
      prevtransid=trans_id;
    }
    else {
      if(currenttrans!=NULL){
        currenttrans->data.push_back(oneline);
      }
      else{
        //cerr<<"Error: appear exon before transcript.\n";
      }
      //if(isinserted==false && type=="exon"){//appear the 1st exon field, insert into map
      if(isinserted==false ){
        isinserted=true;
        chrmap.insert(pair<long,onetrans*>(start,currenttrans));
      }
    }
    nline++;
  }
  cerr<<"Total number of lines: "<<nline<<endl;

  //output
  int ntrans=0;
  map<string, multimap<long, onetrans*> >::iterator mit;
  for(mit=alldata.begin();mit!=alldata.end();mit++){
    multimap<long, onetrans*> & curmap=mit->second;
    ntrans+=curmap.size();
    multimap<long,onetrans*>::iterator cit;
    for(cit=curmap.begin();cit!=curmap.end();cit++){
      vector<string> & cvec=cit->second->data;
      for(int i=0;i<cvec.size();i++){
        if(outputfromfile)
          ofs<<cvec[i]<<endl;
        else
          cout<<cvec[i]<<endl;
      }
    }
  }
  
  cerr<<"Total number of transcripts: "<<ntrans<<endl;
  if(inputfromfile)
    ifs.close();
  if(outputfromfile)
    ofs.close();
  

  return 0;
}
