##################################
#                                #
# Last modified 8/12/2009         # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s targetsfilename(fasta) miRNAfilename(fasta) minimalseedmatch outputfilename' % sys.argv[0]
        sys.exit(1)
    
    cachePages = 2000000

    targetfilename = sys.argv[1]
    miRNAfilename = sys.argv[2]
    seedlength = int(sys.argv[3])
    outfilename = sys.argv[4]

    outfile = open(outfilename, 'w')

    nucleotides={'A':'T','G':'C','U':'A','C':'G','N':'N'}
    inputdatafile = open(targetfilename)
    linelist = inputdatafile.readlines()
    targets=[]
    for line in linelist:
        if line[0]=='>':
            name=line.strip().split('>')[1].split(' ')[0]
        else:
            sequence=line.strip()
            targets.append((name,sequence))
    inputdatafile = open(miRNAfilename)
    linelist = inputdatafile.readlines()
    miRNAs=[]
    for line in linelist:
        if line[0]=='>':
            name=line.split('>')[1].split(' ')[0]
        else:
            sequence=line.strip()
            seed=sequence[2:2+seedlength]
            miRNAs.append((name,seed))
    for (miRNA, seed) in miRNAs:
        print miRNA, seed
        seedcomplement=''
        seedreverse=seed[::-1]
        for i in range(len(seed)):
            seedcomplement=seedcomplement+nucleotides[seedreverse[i]]
        for (name, sequence) in targets:
            if len(sequence.split(seedcomplement))!=1:
                outline=name+', '+miRNA+', '+str(len(sequence.split(seedcomplement))-1)+' matches\n'
                outfile.write(outline)

    outfile.close()
   
run()
