##################################
#                                #
# Last modified 01/13/2015       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from sets import Set

def run():

    if len(sys.argv) < 6:
        print 'usage: python %s inputfilename fieldID N db rettype outfilename' % sys.argv[0]
        print '\tInput paramets:'
        print '\t\tinputfilename - list/table of accession IDs'
        print '\t\tcolumn position of ID (0-based)'
        print '\t\tN - maximum number of IDs in each command'
        print '\t\tdb - name of database'
        print '\t\trettype - return type format'
        sys.exit(1)

    inputfilename = sys.argv[1]
    fieldID = int(sys.argv[2])
    N = int(sys.argv[3])
    db = sys.argv[4]
    rettype = sys.argv[5]
    outputfilename = sys.argv[6]

    IDList = []

    lineslist = open(inputfilename)
    i=0
    for line in lineslist:
        if line.strip() == '':
            continue
        fields = line.strip().split('\t')
        ID = fields[fieldID]
        IDList.append(ID)

    IDList = list(Set(IDList))
    IDList.sort()

    outfile = open(outputfilename, 'w')
    i=0
    outline = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=' + db + '&rettype=' + rettype + '&retmode=text&id='
    for ID in IDList:
        i+=1
        outline = outline + IDList[i-1] + ','
        if i % N == 0:
            outfile.write(outline[0:-1] + '\n')
            outline = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=' + db + '&rettype=' + rettype + '&retmode=text&id='

    outfile.write(outline[0:-1] + '\n')

    outfile.close()

run()

