##################################
#                                #
# Last modified 2023/09/16       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import os
import math
import random
import string

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s PE-BAM samtools chrom.sizes outfile(SAM)' % sys.argv[0]
        sys.exit(1)

    BAM = sys.argv[1]
    samtools = sys.argv[2]
    chrominfo = sys.argv[3]
    newBAM = sys.argv[4]

    chromInfoList = []
    chromInfoDict = {}
    linelist = open(chrominfo)
    for line in linelist:
        fields = line.strip().split('\t')
        chr = fields[0]
        start = 0
        end = int(fields[1])
        chromInfoList.append((chr,start,end))
        chromInfoDict[chr] = (start,end)

    outfile = open(newBAM, 'w')

    for (chr,start,end) in chromInfoList:
        ReadDict = {}
        cmd = samtools + ' view ' + BAM + ' ' + chr + ':' + str(start) + '-' + str(end)
#        print cmd
        print (chr,start,end)
        p = os.popen(cmd, "r")
        line = 'line'
        while line != '':
            line = p.readline().strip()
            if line == '':
                break
            fields = line.strip().split('\t')
            pos = int(fields[3])
            nextposdist = int(math.fabs(int(fields[7]) - pos))
            RL = len(fields[9])
            newpos = random.randint(start,end)
            if fields[1] == '99':
               newR1 = fields[0] + '\t99\t' + fields[2] + '\t' + str(newpos) + '\t' + fields[4] + '\t' + fields[5] + '\t' + fields[6]
               newR1 = newR1 + '\t' + str(newpos + nextposdist) + '\t' + fields[8] + '\t' + RL*'N' + '\t' + RL*'A'
               for i in range(11,len(fields)):
                   newR1 = newR1 + '\t' + fields[i]
               newR2 = fields[0] + '\t147\t' + fields[2] + '\t' + str(newpos + nextposdist) + '\t' + fields[4] + '\t' + fields[5] + '\t' + fields[6]
               newR2 = newR2 + '\t' + str(newpos) + '\t-' + fields[8] + '\t' + RL*'N' + '\t' + RL*'A'
               for i in range(11,len(fields)):
                   newR2 = newR2 + '\t' + fields[i]
            if fields[1] == '163':
               newR1 = fields[0] + '\t163\t' + fields[2] + '\t' + str(newpos) + '\t' + fields[4] + '\t' + fields[5] + '\t' + fields[6]
               newR1 = newR1 + '\t' + str(newpos + nextposdist) + '\t' + fields[8] + '\t' + RL*'N' + '\t' + RL*'A'
               for i in range(11,len(fields)):
                   newR1 = newR1 + '\t' + fields[i]
               newR2 = fields[0] + '\t83\t' + fields[2] + '\t' + str(newpos + nextposdist) + '\t' + fields[4] + '\t' + fields[5] + '\t' + fields[6]
               newR2 = newR2 + '\t' + str(newpos) + '\t-' + fields[8] + '\t' + RL*'N' + '\t' + RL*'A'
               for i in range(11,len(fields)):
                   newR2 = newR2 + '\t' + fields[i]
            outfile.write(newR1 + '\n')
            outfile.write(newR2 + '\n')

    outfile.close()

#    cmd = samtools + ' view -bT ' + genome + ' ' + outfile + ' | samtools sort - ' + newBAM
#    os.system(cmd)

#    cmd = 'rm ' + outfile
#    os.system(cmd)

run()

