#include #include #include #include #include "auxiliaryio.h" #include "cvganalysis.h" #include "instanceio.h" #include "structdef.h" #include "bedio.h" /** FILE IOS **/ ofstream wigfs; //wigfs, the connected coverage ofstream realwigfs;//realwigfs, the real coverage ofstream bedfs; //bedfs, all junctions ofstream juncfs;//juncfs, summary of junctions ofstream boundfs;//boundfs, the boundary of an instance ofstream iofs; //out instance file ofstream ofs; //read_info file ofstream bofs; //bound file string ofstream gofs; //gene range file /* Instance ID */ int n_INST=0; /* Prepare the aux output file, including parsing the input arguments and open corresponding files */ int prepareAuxFile(vector args, string prefix, bool& outinstance ){ string wigfile="/dev/null"; string bedfile="/dev/null"; string realwigfile="/dev/null"; string juncfile="/dev/null"; string boundfile="/dev/null"; //The prefix of the generated files for(int i=0;i& startpos, vector& endpos, bool writeendl=true){ ofs<&startpos, vector& endpos){ for(int i=1;icvg; map::iterator mitr,mitr2; string chrname=rg.getChr(); if(rg.size()==0)return; //getcoverage(cvg,rpoolstart,rpoolend); if(wigfs.is_open()){ rg.getConnectedCoverage(cvg); //write to wigfs mitr=cvg.begin();mitr2=mitr;mitr2++; while(mitr2!=cvg.end()){ wigfs<first-1<<"\t"<first-1<<"\t"<second<first-1<<"\t"<first-1<<"\t"<second<1){//junction reads rec.segstart=rpoolstart[i]; rec.segend=rpoolend[i]; rec.start=rpoolstart[i][0]; rec.end=rpoolend[i].back(); rec.nsegs=rec.segstart.size(); stringstream ss; ss<<(nj++);string snj; ss>>snj; stringstream ss2; ss2<>sid; rec.name=string("Inst")+sid+string("_Junc")+snj;; if(dir[i]>=0)rec.dir='+'; else rec.dir='-'; writeBed(bedfs,rec); } }//end for }//end if mapsumjunc; map dirjunc; map::iterator jmit; if(juncfs.is_open()){ for(int i=0;i1){//junction reads //get stat for(int j=0;j=0?1:-1); } } } //write int width=20; nj=0; for(jmit=sumjunc.begin();jmit!=sumjunc.end();jmit++){ long b=(jmit->first).first; //end of 1st exon, 1-base inclusive long c=(jmit->first).second;//beginning of 2nd exon, 1-base inclusive long a=b-width+1;// beginning of 1st exon, 1-base inclusive long d=c+width-1;// end of 2nd exon, 1-base inclusive int djdr=dirjunc[jmit->first]; juncfs<second<<"\t";//4th, name juncfs<second<<"\t";//5th, score if(djdr>0) juncfs<<"+\t";//6th,orientation else juncfs<<"-\t";//6th,orientation juncfs<0) juncfs<<"255,0,255\t";//9th, color else juncfs<<"0,255,255\t";//9th, color juncfs<<2<<"\t";//10th, blockcounts //11th, sizes juncfs<&allbound, vector& validsegs vector& allrange, int score, int dir ){ bool plotexons=true; char bd='.'; if(dir>0)bd='+'; if(dir<0)bd='-'; if(boundfs.is_open()){ //map::iterator mitr=allbound.begin(); //map::reverse_iterator rmitr=allbound.rbegin(); long startr=allrange.front().first; long endr=allrange.back().second; int nbks=allrange.size(); //for(int i=0;i::iterator mitr2; //mitr=allbound.begin();mitr2=mitr;mitr2++; //int nt=0; vector startcod,lencod; for(int i=0;ifirst); // lencod.push_back(mitr2->first-mitr->first); // } // mitr++; // mitr2++; // nt++; //} boundfs<1){ //plot exons boundary for(int n=0;n0 && startcod[n-1]+lencod[n-1]==startcod[n])needplot=true; if(needplot==false)continue; boundfs< subg; cout<<"Coming in, Reads: "< annors(crn.size()); vpos_t & ans=crn.s(); vpos_t & ane=crn.e(); for(int i=0;i jrange; C_JUNCTION.getAllRanges(subg[i].getChr(),subg[i].getRange(),jrange); subg[i].setupBound(jrange); subg[i].calculateType(); subg[i].getCvgStatistics(); }else{ if(subg[i].size() segrange=subg[i].getSegs(); writeboundfs(subg[i].getRange(),n_INST,subg[i].getChr(),segrange,subg[i].size(),subg[i].getDir()); iofs<<"Instance\t"< >&rpoolstart, vector >&rpoolend, int appearpereads, int instanceid, string chrname, int readlen, vector >& annostart, vector >& annoend, //annotations within currentrange bool wantcoverage, bool refonly ){ ofstream &out=iofs; //cout<<"write2Inst, rpoolsize: "< allbound; //the type of every boundary; 0 for junction, 1 for low coverage map boundtype; //use the length of the 1st read as the length of the read //int readlen=0; //for(int i=0;i::iterator it, it2; //this variable is used to collect statistics of different segments. The following values in vector are: 0, max cvg, 1, leftmost cvg, 2, rightmost cvg, 3, fraction of 0 coverage map >cvgstat; //incorporate boundaries from annotation //notice that start and end boundaries are also added here //use this at caution! //if(false) for(int i=0;i0){ for(int j=0;jsecond=0; int nExons=allbound.size()-1; //consider ref annotations as special reads, and get their type map,int> reftype, reforder; vector > refseriestype; vector allreftype; //get all types of the annotations maprefbound=allbound; //njuncs is used to store the number of junction reads falling onto one segment //emptyjuncs is a dummy variable map njuncs,emptyjuncs; getalltypeorders(reftype,reforder,refseriestype,allreftype,refbound,annostart,annoend,emptyjuncs); //after that, count the number of reads falling into each segments, and calculate the fingerprint for each read map,int> contenttype; //and, assign a type number for it. We will write these types in the order of this number map,int> typeorder; //save the type according to its order, from 0 to n vector > seriestype; //save the type of all the reads vector allreadtype; //get all types and their orders getalltypeorders(contenttype,typeorder,seriestype,allreadtype,allbound,rpoolstart,rpoolend,njuncs); //get seg lengths vector seglengths; it=allbound.begin();it2=it;it2++; while(it2!=allbound.end()){ seglengths.push_back(it2->first-it->first); it++;it2++; } //check segs with non-zero reads vector validsegs(nExons,0); int nvalidsegs=0; int validid=0; it=allbound.begin();it2=it;it2++; //also, iterate refbound too, retain segments which appear in ref isoforms map::iterator rit=refbound.begin(); while(it2!=allbound.end()){ if(it->second!=0 || rit->second!=0){ if(seglengths[validid]first]==1 || boundtype[it2->first]==1 ) && rit->second==0 // a new condition that this segment does not appear in ref isoforms && njuncs[it->first]==0 //a new condition that this segment contains no junction reads ){ //encounter a short range, check if the range is due to the coverage cut } else{ validsegs[validid]=1; nvalidsegs++; } } validid++; it++; it2++; rit++; } if(nvalidsegs==0){ cout<<"0 segs, return..."<first<<"\t"<first-1<<"\t"<<(it2->first-it->first)<<"\t"<second;//range and length vector& currentstat=cvgstat[it->first]; if(currentstat.size()==0){ cerr<<"Error: not enough fields in cvg stat.\n"; } else{ //statistics, including: max cvg, leftmost cvg, rightmost cvg, zero fraction cvg, and average cvg for(int i=0;ifirst, it2->first-1] } it++; it2++; validid++; } //ref out<<"Refs\t"< exonbin(nExons,0); for(int j=0;j. mapreshuforder; int reshufid=0; vector validsgtypes(seriestype.size(),0); for(int i=0;i exonbin(nExons,0); for(int j=0;j, vector > allpetypes; pair thispair=make_pair(0,0); for(int i=0;i, vector >::iterator apitr=allpetypes.begin(); while(apitr!=allpetypes.end()){ out<<(apitr->first).first<<"\t"<<(apitr->first).second<<"\t"<<(apitr->second).size()<second).size();i++){ out<<(apitr->second)[i]<<" "; } out< > allstartrange(seriestype.size()); int step=1; int pst=0; if(appearpereads==1){step=2;pst=1;} for(int i=pst;i tcounts; for(int i=0;i::iterator tit=tcounts.begin();tit!=tcounts.end();tit++){ out<first<<","<second<<"\t"; } out<>id>>chr>>ori>>starti>>endi; ss>>chr>>starti>>endi>>ori; gr.push_back(chr,make_range_t(starti,endi)); nread++; } cout<<"Reading "<fields; int current=-1,next=-1; while((next=oneline.find('\t',current+1))!=-1){ fields.push_back(oneline.substr(current+1,next-current-1)); current=next; } fields.push_back(oneline.substr(current+1)); //it should be 12 fields if(fields.size()!=12){ cerr<<"Error: not enough field (12) at line "< alln=anno.getChrom(); for(int i=0;i