##################################
#                                #
# Last modified 08/27/2015       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s fasta bed chrFieldID outputfilename' % sys.argv[0]
        sys.exit(1)

    fasta = sys.argv[1]
    bed = sys.argv[2]
    chrFieldID = int(sys.argv[3])
    outfilename = sys.argv[4]

    GenomeDict={}
    sequence=''
    inputdatafile = open(fasta)
    for line in inputdatafile:
        if line[0]=='>':
            if sequence != '':
                GenomeDict[chr] = ''.join(sequence).upper()
            chr = line.strip().split('>')[1]
            sequence=[]
            Keep=False
            continue
        else:
            sequence.append(line.strip())
    GenomeDict[chr] = ''.join(sequence).upper()

    outfile = open(outfilename, 'w')

    inputdatafile = open(bed)
    for line in inputdatafile:
        if line.startswith('#'):
            outline = line.strip() + '\t' + '%GC'
            outfile.write(outline + '\n')
            continue
        fields = line.strip().split('\t')
        chr = fields[chrFieldID]
        left = int(fields[chrFieldID + 1])
        right = int(fields[chrFieldID + 2])
        G = 0.0
        C = 0.0
        T = 0.0
        A = 0.0
        for i in range(left,min(right,len(GenomeDict[chr]))):
            if GenomeDict[chr][i] == 'A':
                A += 1
            if GenomeDict[chr][i] == 'C':
                C += 1
            if GenomeDict[chr][i] == 'G':
                G += 1
            if GenomeDict[chr][i] == 'T':
                T += 1
        if (G + C + A + T) == 0:
            GC = 'nan'
        else:
            GC = (G + C)/(G + C + A + T)
        outfile.write(line.strip() + '\t' + str(GC) + '\n')

    outfile.close()
   
run()
