import pytest

from bkcharts.stats import Bins, Histogram
from bokeh.models import ColumnDataSource

import pandas as pd


@pytest.fixture
def ds(test_data):
    return ColumnDataSource(test_data.auto_data)


def test_explicit_bin_count(ds):
    b = Bins(source=ds, column='mpg', bins=2)
    assert len(b.bins) == 2


def test_auto_bin_count(ds):
    b = Bins(source=ds, column='mpg')
    assert len(b.bins) == 12

    # this should test it still matches
    # http://stats.stackexchange.com/questions/114490/optimal-bin-width-for-two-dimensional-histogram
    # with iterables with the same value
    b = Bins(values=[5,5,5,5,5], bins=None)
    assert len(b.bins) == 3


def test_bin_labeling(ds):
    Bins(source=ds, column='cyl', bins=2)
    assert len(pd.Series(ds.data['cyl_bin']).drop_duplicates()) == 2


def test_histogram_wo_density():
    values = list(range(10))
    h = Histogram(values=values, bins=3)

    assert len(h.bins) == 3
    assert [b.label[0] for b in h.bins] == ['[0.000000, 3.000000]', '(3.000000, 6.000000]', '(6.000000, 9.000000]']
    assert [b.values[0] for b in h.bins] == [3, 3, 4]


def test_histogram_w_density():
    values = list(range(10))
    h = Histogram(values=values, bins=3, density=True)

    assert len(h.bins) == 3
    assert [b.label[0] for b in h.bins] == ['[0.000000, 3.000000]', '(3.000000, 6.000000]', '(6.000000, 9.000000]']
    assert [b.values[0] for b in h.bins] == [0.1, 0.1, 0.13333333333333333]


def test_histogram_ill_defined_data():
    # See e.g. #3660
    for x in (-21, -0.001, 0, 0.001, 21):
        values  = [x, x]
        h = Histogram(values=values)

        assert len(h.bins) <= 3
        assert len(h.bins) >= 1
        assert sum([b.value for b in h.bins]) == 2
