#!/usr/bin/python

import re
inputfile_name = "accepted_hits.sam" #The output of tophat storing the junctions information
outputfile_name = "Output_read_position.txt"



try:
	file_read = open(inputfile_name, 'r')
except IOError:
	print "The input file does not exist, exiting!"

myfile = open(outputfile_name, 'w')
p=re.compile('\d+(?=[M N])')
count = 0
for line in file_read:
	if count<2:
		pass
	else:
		temp = ''
		junction = line.split("\t")
		chr_name = junction[2]
		strand = '+'
		mapping_start_pos = int(junction[3])
		seq_pos=p.findall(junction[5])
		num_jun = len(seq_pos)
		starts=''
		ends=''
		for i in range((num_jun+1)/2):
			if i==0:
				starts=starts+str(mapping_start_pos-1)
				end_pos = mapping_start_pos+int(seq_pos[i]) - 1
				ends = ends + str(end_pos) 
			else:
				end_pos = end_pos + int(seq_pos[2*i-1]) 
				starts = starts + ',' + str(end_pos) 
				end_pos = end_pos + int(seq_pos[2*i]) 
				ends  = ends + ',' + str(end_pos) 
		temp = chr_name+'\t'+strand+'\t'+starts+'\t'+ends 
		myfile.writelines(temp+'\n')
	count = count + 1
	
	
file_read.close()
myfile.close()

