##################################
#                                #
# Last modified 2024/08/09       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s fasta window step outfilename [-chr chrN1(,chrN2....)]'
        sys.exit(1)

    doSingleBP=False
    if '-singlebasepair' in sys.argv:
        doSingleBP=True

    fasta = sys.argv[1]
    window = int(sys.argv[2])
    step = int(sys.argv[3])
    outfilename = sys.argv[4]

    doChrSubset=False
    if '-chr' in sys.argv:
        doChrSubset=True
        WantedChrDict={}
        for chr in sys.argv[sys.argv.index('-chr')+1].split(','):
            WantedChrDict[chr]=''

    SequenceDict={}
    linelist=open(fasta)
    Keep=False
    sequence = ''
    chr = ''
    for line in linelist:
        if line.startswith('>'):
            if sequence != '' and chr != '':
                SequenceDict[chr] = ''.join(sequence)
            chr = line.strip().split('>')[1]
            if doChrSubset:
                if WantedChrDict.has_key(chr):
                    Keep=False
                else:
                    Keep=True
            else:
                Keep=True
            sequence = []
            continue
        else:
            sequence.append(line.strip())
    SequenceDict[chr] = ''.join(sequence)

    chrList = SequenceDict.keys()
    chrList.sort()

    outfile = open(outfilename, 'w')

    for chr in chrList:
        print chr, len(SequenceDict[chr])
        for i in range(0,len(SequenceDict[chr]) - window,step):
            windowseq = SequenceDict[chr][i:i + window].upper()
            if windowseq.count('N') == len(windowseq):
                continue
            GC = (windowseq.count('G') + windowseq.count('C'))/(window + 0.0 - windowseq.count('N'))
            outline = chr + '\t' + str(i) + '\t' + str(i + window) + '\t' + str(GC)
            outfile.write(outline+'\n')

    outfile.close()
            
run()
