##################################
#                                #
# Last modified 2018/07/16       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import numpy as np
from sklearn.neighbors import KernelDensity
from scipy.stats import gaussian_kde
from statsmodels.nonparametric.kde import KDEUnivariate
from statsmodels.nonparametric.kernel_density import KDEMultivariate

def kde_scipy(x, x_grid, bandwidth=0.2, **kwargs):
    """Kernel Density Estimation with Scipy"""
    # Note that scipy weights its bandwidth by the covariance of the
    # input data.  To make the results comparable to the other methods,
    # we divide the bandwidth by the sample standard deviation here.
    kde = gaussian_kde(x, bw_method=bandwidth / x.std(ddof=1), **kwargs)
    return kde.evaluate(x_grid)

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s input fieldID' % sys.argv[0]
        print '\tthe script will print to stdout by default'
        sys.exit(1)
    
    input = sys.argv[1]
    fieldID = int(sys.argv[2])

    X = []

    DataDict={}

    if input == '-':
        linelist = sys.stdin
    else:
        linelist = open(input)
    for line in linelist:
        fields = line.strip().split('\t')
        X.append(float(fields[fieldID]))

    x_grid = np.linspace(min(X), max(X), 1000)

    X = np.array(X)

    KDE = kde_scipy(X,x_grid,bandwidth=0.2)

    for i in range(len(KDE)):
        print str(x_grid[i]) + '\t' + str(KDE[i])

   
run()
