/**
convert .bed files (e.g., scripture) to .gtf files.
Notice that in .bed file, the start coordinates are 0-based, and the end coordinates are 1-based. However, in .gtf files, all coordinates are 1-based. 
*/
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <vector>

using namespace std;

int minscore=0;

//process one line of input
//If success, return 0;
//else return -1
int processoneline(ostream & out, string oneline,long isoformid){
		stringstream ss(oneline);
		string chrname;
		ss>>chrname; //1st, chromosome name
		long lstart,lend;
		ss>>lstart; //0 base
		ss>>lend; //1 base.  --2nd, 3rd, mapping start and end
		string readname;
		ss>>readname; //4th, read id
		int score;
		double iscore;
		ss>>iscore; //5th, the score of this transcript
		score=(int)iscore;
		
		if(score<minscore)return -1;

		char ori;
		ss>>ori; //6th, orientation, should be + or -
		string tmps;
		ss>>tmps; //jump 7th, 8th, and 9th field
		ss>>tmps; //jump 7th, 8th, and 9th field
		ss>>tmps; //jump 7th, 8th, and 9th field
		int nblock;
		ss>>nblock; //10th, the number of blocks one read maps to
		string blocksizes;
		ss>>blocksizes; //11th, block sizes
		string blockstarts;
		ss>>blockstarts; //12th, block starts

		//parse blocksizes and blockstarts
		char tmpc;
		vector<long> bsizes(nblock,-1),bstarts(nblock,-1);
		stringstream ssi(blocksizes),ssa(blockstarts);
		for(int i=0;i<nblock;i++){
			ssi>>bsizes[i];
			ssi>>tmpc;
			ssa>>bstarts[i];
			ssa>>tmpc;
		}

		//now, write to .gtf file
		//writing the "transcript line"
		out<<chrname<<"\t"; //1st field, chromosome name
		out<<"Scripture\ttranscript\t";//2nd and 3rd field, "Scripture transcript"
		out<<lstart+1<<"\t"; //4rd field, start, 1-base, so +1
		out<<lend<<"\t"; //5th field, end, 1-base,  
		out<<score<<"\t"; //6th field, score, maximum 1000
		out<<ori<<"\t"; //7th, orientation
		out<<"."<<"\t"; //8th, should be .
		//attributes
		out<<"gene_id \"Inst"<<isoformid<<"\"; transcript_id \""<<readname<<"\"; FPKM \""<<score<<"\"; frac \"1.0000000\"; conf_lo \""<<score<<"\"; conf_hi \""<<score<<"\"; cov \"0.1\";"<<endl;


		for(int i=0;i<nblock;i++){
			//writing the "transcript line"
			out<<chrname<<"\t"; //1st field, chromosome name
			out<<"Scripture\texon\t";//2nd and 3rd field, "Scripture transcript"
			out<<lstart+bstarts[i]+1<<"\t"; //4rd field, start, 1-base, so +1
			out<<lstart+bstarts[i]+bsizes[i]<<"\t"; //5th field, end, 1-base
			out<<score<<"\t"; //6th field, score, maximum 1000
			out<<ori<<"\t"; //7th, orientation
			out<<"."<<"\t"; //8th, should be .
			//attributes
			out<<"gene_id \"Inst"<<isoformid<<"\"; transcript_id \""<<readname<<"\"; ";
			out<<"exon_number \""<<i+1<<"\"; ";
			out<<"FPKM \""<<score<<"\"; frac \"1.0000000\"; conf_lo \""<<score<<"\"; conf_hi \""<<score<<"\"; cov \"0.1\";"<<endl;
		}
		
		return 0;

}

void printhelp(){
	cout<<"This program is used to convert .bed file (of scripture output) into .gtf file.\n";
	cout<<"Usage: bed2gtf [option] BEDFILE\n";
	cout<<"Options:\n";
	cout<<"-f minscore\tOnly convert records with score greater than minscore (integer). Default is 0.\n";
	cout<<"-o outputfilename\tSpecify output file name. Default is BEDFILE.convert.gtf.\n";
}

int main(int argc, char* argv[]){
	if(argc<2){
		printhelp();
		return -1;
	}
	string ofname(argv[argc-1]);
	ofname+=".convert.gtf";
	for(int i=0;i<argc-1;i++){
		if(string(argv[i])=="-f"){
			string nxt(argv[i+1]);
			stringstream ss(nxt);
			ss>>minscore;
			cout<<"Minimum score: "<<minscore<<endl;
		}
		if(string(argv[i])=="-o"){
			ofname=string(argv[i+1]);
			cout<<"Output file name: "<<ofname<<endl;
		}
	}

	ifstream ifs(argv[argc-1]);
	if(!ifs.is_open()){
		cerr<<"Error opening input file "<<argv[1]<<endl;
		return -1;
	}

	ofstream ofs(ofname.c_str());
	if(!ofs.is_open()){
		cerr<<"Error opening output file "<<ofname<<endl;
		return -1;
	}


	string oneline;
	long nline=0;
	long npline=0;
	while(true){
		getline(ifs,oneline);
		if(ifs.eof())break;
		nline++;
		if(nline%10000==0){
			cout<<"Processing "<<nline<<" lines. "<<npline<<" lines written..."<<endl;
		}
		int ret=processoneline(ofs,oneline,nline);
		if(ret==0)npline++;
	}

	cout<<"Write "<<npline<<" out of "<<nline<<" ("<<npline*1.0/nline<<") lines.\n";
	ofs.close();
	ifs.close();


	return 0;
}
