// ***************************************************************************
// BamReader_p.cpp (c) 2009 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
// Last modified: 18 November 2012 (DB)
// ---------------------------------------------------------------------------
// Provides the basic functionality for reading BAM files
// ***************************************************************************

#include "bamtools/api/BamConstants.h"
#include "bamtools/api/BamReader.h"
#include "bamtools/api/IBamIODevice.h"
#include "api/internal/bam/BamHeader_p.h"
#include "api/internal/bam/BamRandomAccessController_p.h"
#include "api/internal/bam/BamReader_p.h"
#include "api/internal/index/BamStandardIndex_p.h"
#include "api/internal/index/BamToolsIndex_p.h"
#include "api/internal/io/BamDeviceFactory_p.h"
#include "api/internal/io/BgzfStream_p.h"
#include "api/internal/io/SerialBgzfStream_p.h"
#include "api/internal/io/ParallelBgzfStream_p.h"
#include "api/internal/utils/BamException_p.h"
using namespace BamTools;
using namespace BamTools::Internal;

#include <algorithm>
#include <cassert>
#include <iostream>
#include <iterator>
#include <vector>
using namespace std;

// constructor
BamReaderPrivate::BamReaderPrivate(BamReader* parent)
    : m_alignmentsBeginOffset(0)
    , m_parent(parent)
    , m_stream(NULL)
    , m_numThreads(0)
{
    m_isBigEndian = BamTools::SystemIsBigEndian();
}

// destructor
BamReaderPrivate::~BamReaderPrivate(void) {
    Close();
}

// closes the BAM file
bool BamReaderPrivate::Close(void) {

    // clear BAM metadata
    m_references.clear();
    m_header.Clear();

    // clear filename
    m_filename.clear();

    // close random access controller
    m_randomAccessController.Close();

    // if stream is open, attempt close
    if ( IsOpen() ) {
        try {
            m_stream->Close();
        } catch ( BamException& e ) {
            const string streamError = e.what();
            const string message = string("encountered error closing BAM file: \n\t") + streamError;
            SetErrorString("BamReader::Close", message);
            return false;
        }
        delete m_stream;
        m_stream = NULL;
    }

    // return success
    return true;
}

// creates an index file of requested type on current BAM file
bool BamReaderPrivate::CreateIndex(const BamIndex::IndexType& type) {

    // skip if BAM file not open
    if ( !IsOpen() ) {
        SetErrorString("BamReader::CreateIndex", "cannot create index on unopened BAM file");
        return false;
    }

    // attempt to create index
    if ( m_randomAccessController.CreateIndex(this, type) )
        return true;
    else {
        const string bracError = m_randomAccessController.GetErrorString();
        const string message = string("could not create index: \n\t") + bracError;
        SetErrorString("BamReader::CreateIndex", message);
        return false;
    }
}

// return path & filename of current BAM file
const string BamReaderPrivate::Filename(void) const {
    return m_filename;
}

const SamHeader& BamReaderPrivate::GetConstSamHeader(void) const {
    return m_header.ToConstSamHeader();
}

string BamReaderPrivate::GetErrorString(void) const {
    return m_errorString;
}

// return header data as std::string
string BamReaderPrivate::GetHeaderText(void) const {
    return m_header.ToString();
}

// return header data as SamHeader object
SamHeader BamReaderPrivate::GetSamHeader(void) const {
    return m_header.ToSamHeader();
}

// get next alignment (with character data fully parsed)
bool BamReaderPrivate::GetNextAlignment(BamAlignment& alignment) {

    // if valid alignment found
    if ( GetNextAlignmentCore(alignment) ) {

        // store alignment's "source" filename
        alignment.Filename = m_filename;

        // return success/failure of parsing char data
        if ( alignment.BuildCharData() )
            return true;
        else {
            const string alError = alignment.GetErrorString();
            const string message = string("could not populate alignment data: \n\t") + alError;
            SetErrorString("BamReader::GetNextAlignment", message);
            return false;
        }
    }

    // no valid alignment found
    return false;
}

// retrieves next available alignment core data (returns success/fail)
// ** DOES NOT populate any character data fields (read name, bases, qualities, tag data, filename)
//    these can be accessed, if necessary, from the supportData
// useful for operations requiring ONLY positional or other alignment-related information
bool BamReaderPrivate::GetNextAlignmentCore(BamAlignment& alignment) {

    // skip if stream not opened
    if ( NULL == m_stream || !m_stream->IsOpen() )
        return false;

    try {

        // skip if region is set but has no alignments
        if ( m_randomAccessController.HasRegion() &&
             !m_randomAccessController.RegionHasAlignments() )
        {
            return false;
        }

        // if can't read next alignment
        if ( !LoadNextAlignment(alignment) )
            return false;

        // check alignment's region-overlap state
        BamRandomAccessController::RegionState state = m_randomAccessController.AlignmentState(alignment);

        // if alignment starts after region, no need to keep reading
        if ( state == BamRandomAccessController::AfterRegion )
            return false;

        // read until overlap is found
        while ( state != BamRandomAccessController::OverlapsRegion ) {

            // if can't read next alignment
            if ( !LoadNextAlignment(alignment) )
                return false;

            // check alignment's region-overlap state
            state = m_randomAccessController.AlignmentState(alignment);

            // if alignment starts after region, no need to keep reading
            if ( state == BamRandomAccessController::AfterRegion )
                return false;
        }

        // if we get here, we found the next 'valid' alignment
        // (e.g. overlaps current region if one was set, simply the next alignment if not)
        alignment.SupportData.HasCoreOnly = true;
        return true;

    } catch ( BamException& e ) {
        const string streamError = e.what();
        const string message = string("encountered error reading BAM alignment: \n\t") + streamError;
        SetErrorString("BamReader::GetNextAlignmentCore", message);
        return false;
    }
}

int BamReaderPrivate::GetReferenceCount(void) const {
    return m_references.size();
}

const RefVector& BamReaderPrivate::GetReferenceData(void) const {
    return m_references;
}

// returns RefID for given RefName (returns References.size() if not found)
int BamReaderPrivate::GetReferenceID(const string& refName) const {

    // retrieve names from reference data
    vector<string> refNames;
    RefVector::const_iterator refIter = m_references.begin();
    RefVector::const_iterator refEnd  = m_references.end();
    for ( ; refIter != refEnd; ++refIter)
        refNames.push_back( (*refIter).RefName );

    // return 'index-of' refName (or -1 if not found)
    int index = distance(refNames.begin(), find(refNames.begin(), refNames.end(), refName));
    if ( index == (int)m_references.size() ) return -1;
    else return index;
}

bool BamReaderPrivate::HasIndex(void) const {
    return m_randomAccessController.HasIndex();
}

bool BamReaderPrivate::IsOpen(void) const {
    return ( NULL != m_stream && m_stream->IsOpen() );
}

// load BAM header data
void BamReaderPrivate::LoadHeaderData(void) {
    m_header.Load(m_stream);
}

// populates BamAlignment with alignment data under file pointer, returns success/fail
bool BamReaderPrivate::LoadNextAlignment(BamAlignment& alignment) {

    // read in the 'block length' value, make sure it's not zero
    char buffer[sizeof(uint32_t)];
    fill_n(buffer, sizeof(uint32_t), 0);
    m_stream->Read(buffer, sizeof(uint32_t));
    alignment.SupportData.BlockLength = BamTools::UnpackUnsignedInt(buffer);
    if ( m_isBigEndian ) BamTools::SwapEndian_32(alignment.SupportData.BlockLength);
    if ( alignment.SupportData.BlockLength == 0 )
        return false;

    // read in core alignment data, make sure the right size of data was read
    char x[Constants::BAM_CORE_SIZE];
    if ( m_stream->Read(x, Constants::BAM_CORE_SIZE) != Constants::BAM_CORE_SIZE )
        return false;

    // swap core endian-ness if necessary
    if ( m_isBigEndian ) {
        for ( unsigned int i = 0; i < Constants::BAM_CORE_SIZE; i+=sizeof(uint32_t) )
            BamTools::SwapEndian_32p(&x[i]);
    }

    // set BamAlignment 'core' and 'support' data
    alignment.RefID    = BamTools::UnpackSignedInt(&x[0]);
    alignment.Position = BamTools::UnpackSignedInt(&x[4]);

    unsigned int tempValue = BamTools::UnpackUnsignedInt(&x[8]);
    alignment.Bin        = tempValue >> 16;
    alignment.MapQuality = tempValue >> 8 & 0xff;
    alignment.SupportData.QueryNameLength = tempValue & 0xff;

    tempValue = BamTools::UnpackUnsignedInt(&x[12]);
    alignment.AlignmentFlag = tempValue >> 16;
    alignment.SupportData.NumCigarOperations = tempValue & 0xffff;

    alignment.SupportData.QuerySequenceLength = BamTools::UnpackUnsignedInt(&x[16]);
    alignment.MateRefID    = BamTools::UnpackSignedInt(&x[20]);
    alignment.MatePosition = BamTools::UnpackSignedInt(&x[24]);
    alignment.InsertSize   = BamTools::UnpackSignedInt(&x[28]);

    // set BamAlignment length
    alignment.Length = alignment.SupportData.QuerySequenceLength;

    // read in character data - make sure proper data size was read
    bool readCharDataOK = false;
    const unsigned int dataLength = alignment.SupportData.BlockLength - Constants::BAM_CORE_SIZE;
    RaiiBuffer allCharData(dataLength);

    if ( m_stream->Read(allCharData.Buffer, dataLength) == dataLength ) {

        // store 'allCharData' in supportData structure
        alignment.SupportData.AllCharData.assign((const char*)allCharData.Buffer, dataLength);

        // set success flag
        readCharDataOK = true;

        // save CIGAR ops
        // need to calculate this here so that  BamAlignment::GetEndPosition() performs correctly,
        // even when GetNextAlignmentCore() is called
        const unsigned int cigarDataOffset = alignment.SupportData.QueryNameLength;
        uint32_t* cigarData = (uint32_t*)(allCharData.Buffer + cigarDataOffset);
        CigarOp op;
        alignment.CigarData.clear();
        alignment.CigarData.reserve(alignment.SupportData.NumCigarOperations);
        for ( unsigned int i = 0; i < alignment.SupportData.NumCigarOperations; ++i ) {

            // swap endian-ness if necessary
            if ( m_isBigEndian ) BamTools::SwapEndian_32(cigarData[i]);

            // build CigarOp structure
            op.Length = (cigarData[i] >> Constants::BAM_CIGAR_SHIFT);
            op.Type   = Constants::BAM_CIGAR_LOOKUP[ (cigarData[i] & Constants::BAM_CIGAR_MASK) ];

            // save CigarOp
            alignment.CigarData.push_back(op);
        }
    }

    // return success/failure
    return readCharDataOK;
}

// loads reference data from BAM file
bool BamReaderPrivate::LoadReferenceData(void) {

    // get number of reference sequences
    char buffer[sizeof(uint32_t)];
    m_stream->Read(buffer, sizeof(uint32_t));
    uint32_t numberRefSeqs = BamTools::UnpackUnsignedInt(buffer);
    if ( m_isBigEndian ) BamTools::SwapEndian_32(numberRefSeqs);
    m_references.reserve((int)numberRefSeqs);

    // iterate over all references in header
    for ( unsigned int i = 0; i != numberRefSeqs; ++i ) {

        // get length of reference name
        m_stream->Read(buffer, sizeof(uint32_t));
        uint32_t refNameLength = BamTools::UnpackUnsignedInt(buffer);
        if ( m_isBigEndian ) BamTools::SwapEndian_32(refNameLength);
        RaiiBuffer refName(refNameLength);

        // get reference name and reference sequence length
        m_stream->Read(refName.Buffer, refNameLength);
        m_stream->Read(buffer, sizeof(int32_t));
        int32_t refLength = BamTools::UnpackSignedInt(buffer);
        if ( m_isBigEndian ) BamTools::SwapEndian_32(refLength);

        // store data for reference
        RefData aReference;
        aReference.RefName   = (string)((const char*)refName.Buffer);
        aReference.RefLength = refLength;
        m_references.push_back(aReference);
    }

    // return success
    return true;
}

bool BamReaderPrivate::LocateIndex(const BamIndex::IndexType& preferredType) {

    if ( m_randomAccessController.LocateIndex(this, preferredType) )
        return true;
    else {
        const string bracError = m_randomAccessController.GetErrorString();
        const string message = string("could not locate index: \n\t") + bracError;
        SetErrorString("BamReader::LocateIndex", message);
        return false;
    }
}

// opens BAM file (and index)
bool BamReaderPrivate::Open(const string& filename) {

    try {

        // make sure we're starting with fresh state
        Close();

        if ( 1 <= m_numThreads )
            m_stream = new ParallelBgzfStream(m_numThreads);
        else
            m_stream = new SerialBgzfStream();

        // open BgzfStream
        m_stream->Open(filename, IBamIODevice::ReadOnly);

        // load BAM metadata
        LoadHeaderData();
        LoadReferenceData();

        // store filename & offset of first alignment
        m_filename = filename;
        m_alignmentsBeginOffset = m_stream->Tell();

        // return success
        return true;

    } catch ( BamException& e ) {
        const string error = e.what();
        const string message = string("could not open file: ") + filename +
                               "\n\t" + error;
        SetErrorString("BamReader::Open", message);
        return false;
    }
}

bool BamReaderPrivate::OpenIndex(const std::string& indexFilename) {

    if ( m_randomAccessController.OpenIndex(indexFilename, this) )
        return true;
    else {
        const string bracError = m_randomAccessController.GetErrorString();
        const string message = string("could not open index: \n\t") + bracError;
        SetErrorString("BamReader::OpenIndex", message);
        return false;
    }
}

// returns BAM file pointer to beginning of alignment data
bool BamReaderPrivate::Rewind(void) {

    // reset region
    m_randomAccessController.ClearRegion();

    // return status of seeking back to first alignment
    if ( Seek(m_alignmentsBeginOffset) )
        return true;
    else {
        const string currentError = m_errorString;
        const string message = string("could not rewind: \n\t") + currentError;
        SetErrorString("BamReader::Rewind", message);
        return false;
    }
}

bool BamReaderPrivate::Seek(const int64_t& position) {

    // skip if BAM file not open
    if ( !IsOpen() ) {
        SetErrorString("BamReader::Seek", "cannot seek on unopened BAM file");
        return false;
    }

    try {
        m_stream->Seek(position);
        return true;
    }
    catch ( BamException& e ) {
        const string streamError = e.what();
        const string message = string("could not seek in BAM file: \n\t") + streamError;
        SetErrorString("BamReader::Seek", message);
        return false;
    }
}

void BamReaderPrivate::SetErrorString(const string& where, const string& what) {
    static const string SEPARATOR = ": ";
    m_errorString = where + SEPARATOR + what;
}

void BamReaderPrivate::SetIndex(BamIndex* index) {
    m_randomAccessController.SetIndex(index);
}

// sets current region & attempts to jump to it
// returns success/failure
bool BamReaderPrivate::SetRegion(const BamRegion& region) {

    if ( m_randomAccessController.SetRegion(region, m_references.size()) )
        return true;
    else {
        const string bracError = m_randomAccessController.GetErrorString();
        const string message = string("could not set region: \n\t") + bracError;
        SetErrorString("BamReader::SetRegion", message);
        return false;
    }
}

int64_t BamReaderPrivate::Tell(void) const {
    return m_stream->Tell();
}

void BamReaderPrivate::SetParallel(int numThreads) {
    m_numThreads = numThreads;
}
