##################################
#                                #
# Last modified 2020/03/03       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import gc
import pysam
import string
from sets import Set
import os

# FLAG field meaning
# 0x0001 1 the read is paired in sequencing, no matter whether it is mapped in a pair
# 0x0002 2 the read is mapped in a proper pair (depends on the protocol, normally inferred during alignment) 1
# 0x0004 4 the query sequence itself is unmapped
# 0x0008 8 the mate is unmapped 1
# 0x0010 16 strand of the query (0 for forward; 1 for reverse strand)
# 0x0020 32 strand of the mate 1
# 0x0040 64 the read is the first read in a pair 1,2
# 0x0080 128 the read is the second read in a pair 1,2
# 0x0100 256 the alignment is not primary (a read having split hits may have multiple primary alignment records)
# 0x0200 512 the read fails platform/vendor quality checks
# 0x0400 1024 the read is either a PCR duplicate or an optical duplicate
# 0x0800 2048 supplementary alignment

def FLAG(FLAG):

    Numbers = [0,1,2,4,8,16,32,64,128,256,512,1024,2048]

    FLAGList=[]

    MaxNumberList=[]
    for i in Numbers:
        if i <= FLAG:
            MaxNumberList.append(i)

    Residual=FLAG
    maxPos = len(MaxNumberList)-1

    while Residual > 0:
        if MaxNumberList[maxPos] <= Residual:
            Residual = Residual - MaxNumberList[maxPos]
            FLAGList.append(MaxNumberList[maxPos])
            maxPos-=1
        else:
            maxPos-=1
  
    return FLAGList

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s BAMfilename chrom.sizes outputfilename ' % sys.argv[0]
        print '       BAM file has to be indexed'
        sys.exit(1)

    SAM = sys.argv[1]
    outputfilename = sys.argv[3]

    chrominfo = sys.argv[2]
    chromInfoList = []
    chromInfoDict = {}
    linelist=open(chrominfo)
    for line in linelist:
        fields = line.strip().split('\t')
        chr = fields[0]
        start = 0
        end = int(fields[1])
        chromInfoList.append((chr,start,end))
        chromInfoDict[chr] = end

    outfile = open(outputfilename, 'w')

    samfile = pysam.Samfile(SAM, "rb" )
    i=0
    for (chr,start,end) in chromInfoList:
        for alignedread in samfile.fetch(chr, start, end):
            i+=1
            if i % 100000 == 0:
                print str(i/1000000.) + 'M alignments processed in multiplicity assessment', chr,start,alignedread.pos,end
            (m1,bp1) = alignedread.cigar[0]
            (m2,bp2) = alignedread.cigar[-1]
            if m1 == 4 and m2 == 4:
                outline = str(bp1) + '\t' + str(bp2)
            if m1 == 4 and m2 != 4:
                outline = str(bp1) + '\t' + '0'
            if m1 != 4 and m2 == 4:
                outline = '0' + '\t' + str(bp2)
            if m1 != 4 and m2 != 4:
                outline = '0' + '\t' + '0'
            outfile.write(outline + '\n')
             
    outfile.close()

run()
