// $Id: recognizeFormat.cpp 6780 2009-09-15 00:55:05Z itaymay $

#include "recognizeFormat.h"
#include "maseFormat.h"
#include "sequenceContainer.h"
#include "molphyFormat.h"
#include "phylipFormat.h"
#include "nexusFormat.h"
#include "fastaFormat.h"
#include "clustalFormat.h"
#include "nexusFormat.h"
#include "phylipSequentialFormat.h"


sequenceContainer recognizeFormat::read(istream &infile, const alphabet* alph) {
	sequenceContainer mySeqData = readUnAligned(infile, alph);
	mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
	return mySeqData;
}

sequenceContainer recognizeFormat::readUnAligned(istream &infile, const alphabet* alph) {
	// recognize a format and returns the sequence container of it.
	sequenceContainer sc;
	if (!infile){
		string tmp = "error unable to open sequence input file ";
		errorMsg::reportError(tmp);
	}

	// this part eats spaces, tabs and such.
	char check = infile.peek();
	while ((check==' ') || (check == '\n') || (check == '\t')) {
		infile.get();
		check = infile.peek();
	}

	switch (check){
	case '#':
		sc=nexusFormat::readUnAligned(infile,alph);
		break;
	case '>':
		sc=fastaFormat::readUnAligned(infile,alph);
		break;
	case 'C':
		sc=clustalFormat::readUnAligned(infile,alph);
		break;
	case ';':
		sc=maseFormat::readUnAligned(infile,alph);
		break;	

	default:
		if (isdigit(check)){ 
			// here it can be either MOLPHY format or one of the PHYLIP type formats (interleaved, sequential)
			// in PHYLIP format there are lines that are not empty, but the first 10 characters
			// are space.
			string s;
			getline(infile,s, '\n' ); // read the first line which are numbers in both formats
			getline(infile,s, '\n' ); // read the second line
			bool phylipFormat = false;
			int r = s.find_first_of(' '); // if there is a space somewhere - this is phylip format
			if ((r==(s.size()-1)) || (r==-1)) phylipFormat = false;
			else phylipFormat = true;
			
			
			if (phylipFormat == false) {
				infile.seekg(0, ios::beg); // file return to the beginning
				sc=molphyFormat::readUnAligned(infile,alph);
			} else {
				getline(infile,s, '\n' ); // read the third line: interleaved will begin with a space, sequential not
				infile.seekg(0, ios::beg); // file return to the beginning
				if (s[0] == ' ') 
					sc = phylipSequentialFormat::readUnAligned(infile, alph);
				else 
					sc = phylipFormat::readUnAligned(infile,alph);
			}
		}
		else{
			string line;
			getline(infile, line, '\n');
			string tmp2 = "The program can't recognise your format!";
			tmp2+="\nThis is the first line in your format:\n";
			tmp2+=line;
			errorMsg::reportError(tmp2);
		}
		break;
	}
	return sc;
}
