import numpy as np
import scipy.sparse as sp

from scipy import linalg
from numpy.testing import (assert_array_almost_equal,
                           assert_array_equal,
                           assert_equal)

from sklearn.datasets import make_classification
from sklearn.utils.sparsefuncs import (mean_variance_axis,
                                       incr_mean_variance_axis,
                                       inplace_column_scale,
                                       inplace_row_scale,
                                       inplace_swap_row, inplace_swap_column,
                                       min_max_axis,
                                       count_nonzero, csc_median_axis_0)
from sklearn.utils.sparsefuncs_fast import assign_rows_csr
from sklearn.utils.testing import assert_raises


def test_mean_variance_axis0():
    X, _ = make_classification(5, 4, random_state=0)
    # Sparsify the array a little bit
    X[0, 0] = 0
    X[2, 1] = 0
    X[4, 3] = 0
    X_lil = sp.lil_matrix(X)
    X_lil[1, 0] = 0
    X[1, 0] = 0
    X_csr = sp.csr_matrix(X_lil)

    X_means, X_vars = mean_variance_axis(X_csr, axis=0)
    assert_array_almost_equal(X_means, np.mean(X, axis=0))
    assert_array_almost_equal(X_vars, np.var(X, axis=0))

    X_csc = sp.csc_matrix(X_lil)
    X_means, X_vars = mean_variance_axis(X_csc, axis=0)

    assert_array_almost_equal(X_means, np.mean(X, axis=0))
    assert_array_almost_equal(X_vars, np.var(X, axis=0))
    assert_raises(TypeError, mean_variance_axis, X_lil, axis=0)

    X = X.astype(np.float32)
    X_csr = X_csr.astype(np.float32)
    X_csc = X_csr.astype(np.float32)
    X_means, X_vars = mean_variance_axis(X_csr, axis=0)
    assert_array_almost_equal(X_means, np.mean(X, axis=0))
    assert_array_almost_equal(X_vars, np.var(X, axis=0))
    X_means, X_vars = mean_variance_axis(X_csc, axis=0)
    assert_array_almost_equal(X_means, np.mean(X, axis=0))
    assert_array_almost_equal(X_vars, np.var(X, axis=0))
    assert_raises(TypeError, mean_variance_axis, X_lil, axis=0)


def test_mean_variance_axis1():
    X, _ = make_classification(5, 4, random_state=0)
    # Sparsify the array a little bit
    X[0, 0] = 0
    X[2, 1] = 0
    X[4, 3] = 0
    X_lil = sp.lil_matrix(X)
    X_lil[1, 0] = 0
    X[1, 0] = 0
    X_csr = sp.csr_matrix(X_lil)

    X_means, X_vars = mean_variance_axis(X_csr, axis=1)
    assert_array_almost_equal(X_means, np.mean(X, axis=1))
    assert_array_almost_equal(X_vars, np.var(X, axis=1))

    X_csc = sp.csc_matrix(X_lil)
    X_means, X_vars = mean_variance_axis(X_csc, axis=1)

    assert_array_almost_equal(X_means, np.mean(X, axis=1))
    assert_array_almost_equal(X_vars, np.var(X, axis=1))
    assert_raises(TypeError, mean_variance_axis, X_lil, axis=1)

    X = X.astype(np.float32)
    X_csr = X_csr.astype(np.float32)
    X_csc = X_csr.astype(np.float32)
    X_means, X_vars = mean_variance_axis(X_csr, axis=1)
    assert_array_almost_equal(X_means, np.mean(X, axis=1))
    assert_array_almost_equal(X_vars, np.var(X, axis=1))
    X_means, X_vars = mean_variance_axis(X_csc, axis=1)
    assert_array_almost_equal(X_means, np.mean(X, axis=1))
    assert_array_almost_equal(X_vars, np.var(X, axis=1))
    assert_raises(TypeError, mean_variance_axis, X_lil, axis=1)


def test_incr_mean_variance_axis():
    for axis in [0, 1]:
        rng = np.random.RandomState(0)
        n_features = 50
        n_samples = 10
        data_chunks = [rng.random_integers(0, 1, size=n_features)
                       for i in range(n_samples)]

        # default params for incr_mean_variance
        last_mean = np.zeros(n_features)
        last_var = np.zeros_like(last_mean)
        last_n = 0

        # Test errors
        X = np.array(data_chunks[0])
        X = np.atleast_2d(X)
        X_lil = sp.lil_matrix(X)
        X_csr = sp.csr_matrix(X_lil)
        assert_raises(TypeError, incr_mean_variance_axis, axis,
                      last_mean, last_var, last_n)
        assert_raises(TypeError, incr_mean_variance_axis, axis,
                      last_mean, last_var, last_n)
        assert_raises(TypeError, incr_mean_variance_axis, X_lil, axis,
                      last_mean, last_var, last_n)

        # Test _incr_mean_and_var with a 1 row input
        X_means, X_vars = mean_variance_axis(X_csr, axis)
        X_means_incr, X_vars_incr, n_incr = \
            incr_mean_variance_axis(X_csr, axis, last_mean, last_var, last_n)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)  # X.shape[axis] picks # samples

        X_csc = sp.csc_matrix(X_lil)
        X_means, X_vars = mean_variance_axis(X_csc, axis)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)

        # Test _incremantal_mean_and_var with whole data
        X = np.vstack(data_chunks)
        X_lil = sp.lil_matrix(X)
        X_csr = sp.csr_matrix(X_lil)
        X_means, X_vars = mean_variance_axis(X_csr, axis)
        X_means_incr, X_vars_incr, n_incr = \
            incr_mean_variance_axis(X_csr, axis, last_mean, last_var, last_n)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)

        X_csc = sp.csc_matrix(X_lil)
        X_means, X_vars = mean_variance_axis(X_csc, axis)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)

        # All data but as float
        X = X.astype(np.float32)
        X_csr = X_csr.astype(np.float32)
        X_means, X_vars = mean_variance_axis(X_csr, axis)
        X_means_incr, X_vars_incr, n_incr = \
            incr_mean_variance_axis(X_csr, axis, last_mean, last_var, last_n)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)

        X_csc = X_csr.astype(np.float32)
        X_means, X_vars = mean_variance_axis(X_csc, axis)
        assert_array_almost_equal(X_means, X_means_incr)
        assert_array_almost_equal(X_vars, X_vars_incr)
        assert_equal(X.shape[axis], n_incr)


def test_mean_variance_illegal_axis():
    X, _ = make_classification(5, 4, random_state=0)
    # Sparsify the array a little bit
    X[0, 0] = 0
    X[2, 1] = 0
    X[4, 3] = 0
    X_csr = sp.csr_matrix(X)
    assert_raises(ValueError, mean_variance_axis, X_csr, axis=-3)
    assert_raises(ValueError, mean_variance_axis, X_csr, axis=2)
    assert_raises(ValueError, mean_variance_axis, X_csr, axis=-1)

    assert_raises(ValueError, incr_mean_variance_axis, X_csr, axis=-3,
                  last_mean=None, last_var=None, last_n=None)
    assert_raises(ValueError, incr_mean_variance_axis, X_csr, axis=2,
                  last_mean=None, last_var=None, last_n=None)
    assert_raises(ValueError, incr_mean_variance_axis, X_csr, axis=-1,
                  last_mean=None, last_var=None, last_n=None)


def test_densify_rows():
    X = sp.csr_matrix([[0, 3, 0],
                       [2, 4, 0],
                       [0, 0, 0],
                       [9, 8, 7],
                       [4, 0, 5]], dtype=np.float64)
    X_rows = np.array([0, 2, 3], dtype=np.intp)
    out = np.ones((6, X.shape[1]), dtype=np.float64)
    out_rows = np.array([1, 3, 4], dtype=np.intp)

    expect = np.ones_like(out)
    expect[out_rows] = X[X_rows, :].toarray()

    assign_rows_csr(X, X_rows, out_rows, out)
    assert_array_equal(out, expect)


def test_inplace_column_scale():
    rng = np.random.RandomState(0)
    X = sp.rand(100, 200, 0.05)
    Xr = X.tocsr()
    Xc = X.tocsc()
    XA = X.toarray()
    scale = rng.rand(200)
    XA *= scale

    inplace_column_scale(Xc, scale)
    inplace_column_scale(Xr, scale)
    assert_array_almost_equal(Xr.toarray(), Xc.toarray())
    assert_array_almost_equal(XA, Xc.toarray())
    assert_array_almost_equal(XA, Xr.toarray())
    assert_raises(TypeError, inplace_column_scale, X.tolil(), scale)

    X = X.astype(np.float32)
    scale = scale.astype(np.float32)
    Xr = X.tocsr()
    Xc = X.tocsc()
    XA = X.toarray()
    XA *= scale
    inplace_column_scale(Xc, scale)
    inplace_column_scale(Xr, scale)
    assert_array_almost_equal(Xr.toarray(), Xc.toarray())
    assert_array_almost_equal(XA, Xc.toarray())
    assert_array_almost_equal(XA, Xr.toarray())
    assert_raises(TypeError, inplace_column_scale, X.tolil(), scale)


def test_inplace_row_scale():
    rng = np.random.RandomState(0)
    X = sp.rand(100, 200, 0.05)
    Xr = X.tocsr()
    Xc = X.tocsc()
    XA = X.toarray()
    scale = rng.rand(100)
    XA *= scale.reshape(-1, 1)

    inplace_row_scale(Xc, scale)
    inplace_row_scale(Xr, scale)
    assert_array_almost_equal(Xr.toarray(), Xc.toarray())
    assert_array_almost_equal(XA, Xc.toarray())
    assert_array_almost_equal(XA, Xr.toarray())
    assert_raises(TypeError, inplace_column_scale, X.tolil(), scale)

    X = X.astype(np.float32)
    scale = scale.astype(np.float32)
    Xr = X.tocsr()
    Xc = X.tocsc()
    XA = X.toarray()
    XA *= scale.reshape(-1, 1)
    inplace_row_scale(Xc, scale)
    inplace_row_scale(Xr, scale)
    assert_array_almost_equal(Xr.toarray(), Xc.toarray())
    assert_array_almost_equal(XA, Xc.toarray())
    assert_array_almost_equal(XA, Xr.toarray())
    assert_raises(TypeError, inplace_column_scale, X.tolil(), scale)


def test_inplace_swap_row():
    X = np.array([[0, 3, 0],
                  [2, 4, 0],
                  [0, 0, 0],
                  [9, 8, 7],
                  [4, 0, 5]], dtype=np.float64)
    X_csr = sp.csr_matrix(X)
    X_csc = sp.csc_matrix(X)

    swap = linalg.get_blas_funcs(('swap',), (X,))
    swap = swap[0]
    X[0], X[-1] = swap(X[0], X[-1])
    inplace_swap_row(X_csr, 0, -1)
    inplace_swap_row(X_csc, 0, -1)
    assert_array_equal(X_csr.toarray(), X_csc.toarray())
    assert_array_equal(X, X_csc.toarray())
    assert_array_equal(X, X_csr.toarray())

    X[2], X[3] = swap(X[2], X[3])
    inplace_swap_row(X_csr, 2, 3)
    inplace_swap_row(X_csc, 2, 3)
    assert_array_equal(X_csr.toarray(), X_csc.toarray())
    assert_array_equal(X, X_csc.toarray())
    assert_array_equal(X, X_csr.toarray())
    assert_raises(TypeError, inplace_swap_row, X_csr.tolil())

    X = np.array([[0, 3, 0],
                  [2, 4, 0],
                  [0, 0, 0],
                  [9, 8, 7],
                  [4, 0, 5]], dtype=np.float32)
    X_csr = sp.csr_matrix(X)
    X_csc = sp.csc_matrix(X)
    swap = linalg.get_blas_funcs(('swap',), (X,))
    swap = swap[0]
    X[0], X[-1] = swap(X[0], X[-1])
    inplace_swap_row(X_csr, 0, -1)
    inplace_swap_row(X_csc, 0, -1)
    assert_array_equal(X_csr.toarray(), X_csc.toarray())
    assert_array_equal(X, X_csc.toarray())
    assert_array_equal(X, X_csr.toarray())
    X[2], X[3] = swap(X[2], X[3])
    inplace_swap_row(X_csr, 2, 3)
    inplace_swap_row(X_csc, 2, 3)
    assert_array_equal(X_csr.toarray(), X_csc.toarray())
    assert_array_equal(X, X_csc.toarray())
    assert_array_equal(X, X_csr.toarray())
    assert_raises(TypeError, inplace_swap_row, X_csr.tolil())


def test_inplace_swap_column():
    X = np.array([[0, 3, 0],
                  [2, 4, 0],
                  [0, 0, 0],
                  [9, 8, 7],
                  [4, 0, 5]], dtype=np.float64)
    X_csr = sp.csr_matrix(X)
    X_csc = sp.csc_matrix(X)

    swap = linalg.get_blas_funcs(('swap',), (X,))
    swap = swap[0]
    X[:, 0], X[:, -1] = swap(X[:, 0], X[:, -1])
    inplace_swap_column(X_csr, 0, -1)
    inplace_swap_column(X_csc, 0, -1)
    assert_array_equal(X_csr.toarray(), X_csc.toarray())
    assert_array_equal(X, X_csc.toarray())
    assert_array_equal(X, X_csr.toarray())

    X[:, 0], X[:, 1] = swap(X[:, 0], X[:, 1])
    inplace_swap_column(X_csr, 0, 1)
    inplace_swap_column(X_csc, 0, 1)
    assert_array_equal(X_csr.toarray(), X_csc.toarray())
    assert_array_equal(X, X_csc.toarray())
    assert_array_equal(X, X_csr.toarray())
    assert_raises(TypeError, inplace_swap_column, X_csr.tolil())

    X = np.array([[0, 3, 0],
                  [2, 4, 0],
                  [0, 0, 0],
                  [9, 8, 7],
                  [4, 0, 5]], dtype=np.float32)
    X_csr = sp.csr_matrix(X)
    X_csc = sp.csc_matrix(X)
    swap = linalg.get_blas_funcs(('swap',), (X,))
    swap = swap[0]
    X[:, 0], X[:, -1] = swap(X[:, 0], X[:, -1])
    inplace_swap_column(X_csr, 0, -1)
    inplace_swap_column(X_csc, 0, -1)
    assert_array_equal(X_csr.toarray(), X_csc.toarray())
    assert_array_equal(X, X_csc.toarray())
    assert_array_equal(X, X_csr.toarray())
    X[:, 0], X[:, 1] = swap(X[:, 0], X[:, 1])
    inplace_swap_column(X_csr, 0, 1)
    inplace_swap_column(X_csc, 0, 1)
    assert_array_equal(X_csr.toarray(), X_csc.toarray())
    assert_array_equal(X, X_csc.toarray())
    assert_array_equal(X, X_csr.toarray())
    assert_raises(TypeError, inplace_swap_column, X_csr.tolil())


def test_min_max_axis0():
    X = np.array([[0, 3, 0],
                  [2, -1, 0],
                  [0, 0, 0],
                  [9, 8, 7],
                  [4, 0, 5]], dtype=np.float64)
    X_csr = sp.csr_matrix(X)
    X_csc = sp.csc_matrix(X)

    mins_csr, maxs_csr = min_max_axis(X_csr, axis=0)
    assert_array_equal(mins_csr, X.min(axis=0))
    assert_array_equal(maxs_csr, X.max(axis=0))

    mins_csc, maxs_csc = min_max_axis(X_csc, axis=0)
    assert_array_equal(mins_csc, X.min(axis=0))
    assert_array_equal(maxs_csc, X.max(axis=0))

    X = X.astype(np.float32)
    X_csr = sp.csr_matrix(X)
    X_csc = sp.csc_matrix(X)
    mins_csr, maxs_csr = min_max_axis(X_csr, axis=0)
    assert_array_equal(mins_csr, X.min(axis=0))
    assert_array_equal(maxs_csr, X.max(axis=0))
    mins_csc, maxs_csc = min_max_axis(X_csc, axis=0)
    assert_array_equal(mins_csc, X.min(axis=0))
    assert_array_equal(maxs_csc, X.max(axis=0))


def test_min_max_axis1():
    X = np.array([[0, 3, 0],
                  [2, -1, 0],
                  [0, 0, 0],
                  [9, 8, 7],
                  [4, 0, 5]], dtype=np.float64)
    X_csr = sp.csr_matrix(X)
    X_csc = sp.csc_matrix(X)

    mins_csr, maxs_csr = min_max_axis(X_csr, axis=1)
    assert_array_equal(mins_csr, X.min(axis=1))
    assert_array_equal(maxs_csr, X.max(axis=1))

    mins_csc, maxs_csc = min_max_axis(X_csc, axis=1)
    assert_array_equal(mins_csc, X.min(axis=1))
    assert_array_equal(maxs_csc, X.max(axis=1))

    X = X.astype(np.float32)
    X_csr = sp.csr_matrix(X)
    X_csc = sp.csc_matrix(X)
    mins_csr, maxs_csr = min_max_axis(X_csr, axis=1)
    assert_array_equal(mins_csr, X.min(axis=1))
    assert_array_equal(maxs_csr, X.max(axis=1))
    mins_csc, maxs_csc = min_max_axis(X_csc, axis=1)
    assert_array_equal(mins_csc, X.min(axis=1))
    assert_array_equal(maxs_csc, X.max(axis=1))


def test_min_max_axis_errors():
    X = np.array([[0, 3, 0],
                  [2, -1, 0],
                  [0, 0, 0],
                  [9, 8, 7],
                  [4, 0, 5]], dtype=np.float64)
    X_csr = sp.csr_matrix(X)
    X_csc = sp.csc_matrix(X)
    assert_raises(TypeError, min_max_axis, X_csr.tolil(), axis=0)
    assert_raises(ValueError, min_max_axis, X_csr, axis=2)
    assert_raises(ValueError, min_max_axis, X_csc, axis=-3)


def test_count_nonzero():
    X = np.array([[0, 3, 0],
                  [2, -1, 0],
                  [0, 0, 0],
                  [9, 8, 7],
                  [4, 0, 5]], dtype=np.float64)
    X_csr = sp.csr_matrix(X)
    X_csc = sp.csc_matrix(X)
    X_nonzero = X != 0
    sample_weight = [.5, .2, .3, .1, .1]
    X_nonzero_weighted = X_nonzero * np.array(sample_weight)[:, None]

    for axis in [0, 1, -1, -2, None]:
        assert_array_almost_equal(count_nonzero(X_csr, axis=axis),
                                  X_nonzero.sum(axis=axis))
        assert_array_almost_equal(count_nonzero(X_csr, axis=axis,
                                                sample_weight=sample_weight),
                                  X_nonzero_weighted.sum(axis=axis))

    assert_raises(TypeError, count_nonzero, X_csc)
    assert_raises(ValueError, count_nonzero, X_csr, axis=2)


def test_csc_row_median():
    # Test csc_row_median actually calculates the median.

    # Test that it gives the same output when X is dense.
    rng = np.random.RandomState(0)
    X = rng.rand(100, 50)
    dense_median = np.median(X, axis=0)
    csc = sp.csc_matrix(X)
    sparse_median = csc_median_axis_0(csc)
    assert_array_equal(sparse_median, dense_median)

    # Test that it gives the same output when X is sparse
    X = rng.rand(51, 100)
    X[X < 0.7] = 0.0
    ind = rng.randint(0, 50, 10)
    X[ind] = -X[ind]
    csc = sp.csc_matrix(X)
    dense_median = np.median(X, axis=0)
    sparse_median = csc_median_axis_0(csc)
    assert_array_equal(sparse_median, dense_median)

    # Test for toy data.
    X = [[0, -2], [-1, -1], [1, 0], [2, 1]]
    csc = sp.csc_matrix(X)
    assert_array_equal(csc_median_axis_0(csc), np.array([0.5, -0.5]))
    X = [[0, -2], [-1, -5], [1, -3]]
    csc = sp.csc_matrix(X)
    assert_array_equal(csc_median_axis_0(csc), np.array([0., -3]))

    # Test that it raises an Error for non-csc matrices.
    assert_raises(TypeError, csc_median_axis_0, sp.csr_matrix(X))
