# Copyright (c) 2013, Ian Reid, Concordia University Centre for Structural and Functional Genomics
# All rights reserved.


__author__ = 'ian'

import sys, os
this_dir = os.path.dirname(__file__)
src = os.path.dirname(this_dir)
sys.path.append(src)
import argparse
from pysam import Tabixfile
from lib.gff3Iterator import GFF3Iterator
from calcTranscriptFragmentationProbabilities import get_exon_origin_counts_list, origin_count_line

DESCRIPTION = 'Extract the read origin counts for specific transcripts from a genomic read origins file'
VERSION = '0.1'


def get_args():
    argparser = argparse.ArgumentParser(description=DESCRIPTION)
    # standard options
    argparser.add_argument('--version', action='version', version='%(prog)s' + VERSION)
    argparser.add_argument('--verbose', '-v', action='count', default=0,
                           help='Omit to see only fatal error messages; -v to see warnings; -vv to see warnings and '
                                'progress messages')
    # options to customize
    argparser.add_argument('--in', '-i', dest='input', type=argparse.FileType('r'), nargs='?', default=sys.stdin,
                           help='Path to the transcript input file; if omitted or -, input is read from stdin')
    argparser.add_argument('--out', '-o', type=argparse.FileType('w'), nargs='?', default=sys.stdout,
                           help='Path to the output file; if omitted or -, output is written to stdout')
    argparser.add_argument('--origins', '-c', required=True, help="Path to the genomic counted origins tabix file")
    return argparser.parse_args()


def get_counts_for_transcript(transcript, origins):
    result = []
    counts = []
    transcript_len = 0
    for exon in transcript.get_exons():
        counts.extend(get_exon_origin_counts_list(exon, origins, transcript_len))
        transcript_len += len(exon)
    if counts:
        result = [origin_count_line(transcript.get_ID(), i, 0, 0) for i in range(transcript_len)]
        for ocl in counts:
            ocl.seq_id = transcript.get_ID()
            result[ocl.position] = ocl
    return result


if __name__ == '__main__':
    args = get_args()

    origins = Tabixfile(args.origins)
    genes = GFF3Iterator(args.input).genes()
    print >> args.out, 'chrom\tposn\tleft\tright'
    for gene in genes:
        for transcript in gene.get_transcripts():
            transcript_counts = get_counts_for_transcript(transcript, origins)
            for ocl in transcript_counts:
                print >> args.out, repr(ocl)

    args.out.close()
    print >> sys.stderr, sys.argv[0], 'done.'
