##################################
#                                #
# Last modified 2017/12/14       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s fasta outpurfilename' % sys.argv[0]
        sys.exit(1)

    fasta = sys.argv[1]
    outfilename = sys.argv[2]

    GenomeDict={}
    sequence=''
    inputdatafile = open(fasta)
    for line in inputdatafile:
        if line[0]=='>':
            if sequence != '':
                GenomeDict[chr] = ''.join(sequence)
            chr = line.strip().split('>')[1]
            sequence=[]
            Keep=False
            continue
        else:
            sequence.append(line.strip())
    GenomeDict[chr] = ''.join(sequence)

    outfile = open(outfilename, 'w')
    outfile.write('#chr\tlength\tGC%\n')

    chromosomes = GenomeDict.keys()
    chromosomes.sort()

    All = []
    AllL = 0.0
    AllC = 0

    for chr in chromosomes:
        GenomeDict[chr] = GenomeDict[chr].upper()
        length = len(GenomeDict[chr])
        AllC += length
        GC = GenomeDict[chr].count('G') + GenomeDict[chr].count('C')
        AT = GenomeDict[chr].count('A') + GenomeDict[chr].count('T')
        outline = chr + '\t' + str(length) + '\t' + str(GC/(0.0 + AT + GC))
        outfile.write(outline + '\n')
        All.append((GC+AT,GC/(0.0 + AT + GC)))
        AllL += (GC+AT)

    GCtotal = 0

    for (L,GC) in All:
        GCtotal += (L/AllL)*GC

    print outfilename + '\tgenome-wide GC\t' + str(GCtotal)
    outline = '#total\t' + str(AllC) + '\t' + str(GCtotal)
    outfile.write(outline)

    outfile.close()

   
run()
