from __future__ import print_function, division, absolute_import

import sys

import pytest

from dask.sizeof import sizeof, getsizeof
from dask.utils import funcname


def test_base():
    assert sizeof(1) == getsizeof(1)


def test_name():
    assert funcname(sizeof) == 'sizeof'


def test_containers():
    assert sizeof([1, 2, [3]]) > (getsizeof(3) * 3 + getsizeof([]))


def test_numpy():
    np = pytest.importorskip('numpy')
    assert 8000 <= sizeof(np.empty(1000, dtype='f8')) <= 9000
    dt = np.dtype('f8')
    assert sizeof(dt) == sys.getsizeof(dt)


def test_pandas():
    pd = pytest.importorskip('pandas')
    df = pd.DataFrame({'x': [1, 2, 3], 'y': ['a' * 100, 'b' * 100, 'c' * 100]},
                      index=[10, 20, 30])

    assert sizeof(df) >= sizeof(df.x) + sizeof(df.y) - sizeof(df.index)
    assert sizeof(df.x) >= sizeof(df.index)
    assert sizeof(df.y) >= 100 * 3
    assert sizeof(df.index) >= 20

    assert isinstance(sizeof(df), int)
    assert isinstance(sizeof(df.x), int)
    assert isinstance(sizeof(df.index), int)


def test_pandas_repeated_column():
    pd = pytest.importorskip('pandas')
    df = pd.DataFrame({'x': [1, 2, 3]})

    assert sizeof(df[['x', 'x', 'x']]) > sizeof(df)


def test_sparse_matrix():
    sparse = pytest.importorskip('scipy.sparse')
    sp = sparse.eye(10)
    # These are the 32-bit Python 2.7 values.
    assert sizeof(sp.todia()) >= 152
    assert sizeof(sp.tobsr()) >= 232
    assert sizeof(sp.tocoo()) >= 240
    assert sizeof(sp.tocsc()) >= 232
    assert sizeof(sp.tocsr()) >= 232
    assert sizeof(sp.todok()) >= 192
    assert sizeof(sp.tolil()) >= 204


def test_serires_object_dtype():
    pd = pytest.importorskip('pandas')
    s = pd.Series(['a'] * 1000)
    assert sizeof('a') * 1000 < sizeof(s) < 2 * sizeof('a') * 1000

    s = pd.Series(['a' * 1000] * 1000)
    assert sizeof(s) > 1000000


def test_dataframe_object_dtype():
    pd = pytest.importorskip('pandas')
    df = pd.DataFrame({'x': ['a'] * 1000})
    assert sizeof('a') * 1000 < sizeof(df) < 2 * sizeof('a') * 1000

    s = pd.Series(['a' * 1000] * 1000)
    assert sizeof(s) > 1000000


def test_empty():
    pd = pytest.importorskip('pandas')
    df = pd.DataFrame({'x': [1, 2, 3], 'y': ['a' * 100, 'b' * 100, 'c' * 100]},
                      index=[10, 20, 30])
    empty = df.head(0)

    assert sizeof(empty) > 0
    assert sizeof(empty.x) > 0
    assert sizeof(empty.y) > 0
    assert sizeof(empty.index) > 0
