Merge pull request #4681 from kumarkrishna/improve_theano_var

Uses corrected two pass algorithm in theano.tensor.var

Merge pull request #4681 from kumarkrishna/improve_theano_var
3fb22c7a · ballasn · GitHub · 4aad70d8 · d43eb7e2 · 3fb22c7a
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -3175,7 +3175,7 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,


 @constructor
-def var(input, axis=None, ddof=0, keepdims=False):
+def var(input, axis=None, ddof=0, keepdims=False, corrected=False):
    """
    Computes the variance along the given axis(es) of a tensor `input`.

@@ -3190,18 +3190,24 @@ def var(input, axis=None, ddof=0, keepdims=False):
        If this is set to True, the axes which are reduced are
        left in the result as dimensions with size one. With this option,
        the result will broadcast correctly against the original tensor.
+    corrected : bool
+        If this is set to True, the 'corrected_two_pass' algorithm is
+        used to compute the variance.
+        Refer : http://www.cs.yale.edu/publications/techreports/tr222.pdf

    Notes
    -----
-    It uses the two-pass algorithm for more stable results.
+    Default uses the two-pass algorithm (reference below).
    https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
-    There exist other implementations that are even more stable, but probably
-    slower.
+    Also supports 'corrected_two_pass' algorithm (using the 'corrected' flag)
+    which is numerically more stable. There exist other implementations that
+    offer better stability, but probably slower.

    """

    if isinstance(ddof, (bool)):
-        raise ValueError('Parameter keepdims is now at index 3: (input, axis=None, ddof=0, keepdims=False)')
+        raise ValueError('Parameter keepdims is now at index 3: (input, \
+                          axis=None, ddof=0, keepdims=False, corrected=False)')

    input_ndim = input.type.ndim
    if axis is None:
@@ -3227,39 +3233,60 @@ def var(input, axis=None, ddof=0, keepdims=False):
        v = sum((centered_input ** 2), axis=axis, keepdims=keepdims)
        for i in axis:
            v = true_div(v, shp[i])
+
+    # use 'corrected_two_pass' algorithm
+    if corrected:
+        if ddof == 0:
+            error = mean(centered_input, axis, keepdims=keepdims) ** 2
+        else:
+            shp = shape(input) - ddof
+            shp_inp = shape(input)
+            error = sum(centered_input, axis=axis, keepdims=keepdims) ** 2
+            for i in axis:
+                error = true_div(error, shp[i] * shp_inp[i])
+        v = v - error
+
    v.name = 'var'
    return v


 @constructor
-def std(input, axis=None, ddof=0, keepdims=False):
+def std(input, axis=None, ddof=0, keepdims=False, corrected=False):
    """
    Computes the standard deviation along the given axis(es) of a tensor `input`.

    Parameters
    ----------
-    axis : None or int or (list of int) (see `Sum`)
-        Compute the standard deviation along this axis of the tensor.
+    axis: None or int or (list of int) (see `Sum`)
+        Compute the variance along this axis of the tensor.
        None means all axes (like numpy).
+    ddof: Degrees of freedom; 0 would compute the ML estimate, 1 would compute
+        the unbiased estimate.
    keepdims : bool
-        If this is set to True, the axes which are reduced are left in the
-        result as dimensions with size one. With this option, the result will
-        broadcast correctly against the original tensor.
+        If this is set to True, the axes which are reduced are
+        left in the result as dimensions with size one. With this option,
+        the result will broadcast correctly against the original tensor.
+    corrected : bool
+        If this is set to True, the 'corrected_two_pass' algorithm is
+        used to compute the variance.
+        Refer : http://www.cs.yale.edu/publications/techreports/tr222.pdf

    Notes
    -----
-    It calls `var()` and `var()` uses the two-pass algorithm for more stable
-    results.
+    It calls 'var()' and 'var()' uses the two-pass algorithm (reference below).
    https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
-    There exist other implementations that are even more stable, but probably
-    slower.
+    Function 'var()' also supports 'corrected_two_pass' algorithm (using the
+    'corrected' flag) which is numerically more stable. There exist other
+    implementations that offer better stability, but probably slower.

    """

    if isinstance(ddof, (bool)):
-        raise ValueError('Parameter keepdims is now at index 3: (input, axis=None, ddof=0, keepdims=False)')
+        raise ValueError('Parameter keepdims is now at index 3: (input, \
+                          axis=None, ddof=0, keepdims=False, corrected=False)')

-    ret = sqrt(var(input=input, axis=axis, ddof=ddof, keepdims=keepdims))
+    ret = sqrt(var(input=input, axis=axis, ddof=ddof,
+                   keepdims=keepdims, corrected=corrected))
    ret.name = 'std'
    return ret


--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -3611,16 +3611,16 @@ class T_Join_and_Split(unittest.TestCase):
        f = function([a, b], s, mode=self.mode)
        v = numpy.zeros((2, 3, 2))
        v[:,:,0] = v1
-        v[:,:,1] = v2 
-        out = f(v1, v2) 
+        v[:,:,1] = v2
+        out = f(v1, v2)
        self.assertTrue(v.shape == out.shape)
        self.assertTrue(numpy.all(v == out))
        s = stack([a, b], axis=-2)
        f = function([a, b], s, mode=self.mode)
        v = numpy.zeros((2, 2, 3))
        v[:,0,:] = v1
-        v[:,1,:] = v2 
-        out = f(v1, v2) 
+        v[:,1,:] = v2
+        out = f(v1, v2)
        self.assertTrue(v.shape == out.shape)
        self.assertTrue(numpy.all(v == out))
        # Testing out-of-bounds axis
@@ -5470,7 +5470,7 @@ def test_tile():
    # Test 1,2,3,4-dimensional cases.
    # Test input x has the shape [2], [2, 4], [2, 4, 3], [2, 4, 3, 5].
    test_shape = [2, 4, 3, 5]
-    k = 0 
+    k = 0
    for xtype in [vector(), matrix(), tensor3(), tensor4()]:
        x = xtype
        k = k+1
@@ -5528,7 +5528,7 @@ def test_tile():
            reps_ = r[:k-1]
            f = function([x], tile(x, reps_, ndim_))
            assert numpy.all( f(x_) == numpy.tile(x_, [1, 1] + reps_))
-           
+
        # error raising test: ndim not specified when reps is vector
        reps = ivector()
        numpy.testing.assert_raises(ValueError, tile, x, reps)
@@ -5536,7 +5536,7 @@ def test_tile():
        # error raising test: not a integer
        for reps in [2.5, fscalar(), fvector()]:
            numpy.testing.assert_raises(ValueError, tile, x, reps)
-        
+
        # error raising test: the dimension of reps exceeds 1
        reps = imatrix()
        numpy.testing.assert_raises(ValueError, tile, x, reps)
@@ -5547,14 +5547,14 @@ def test_tile():
            if k > 1:
                ndim = k-1
                numpy.testing.assert_raises(ValueError, tile, x, reps, ndim)
-        
+
        # error raising test: reps is list, len(reps) > ndim
        r = [2, 3, 4, 5, 6]
        reps = r[:k+1]
        ndim = k
        numpy.testing.assert_raises(ValueError, tile, x, reps, ndim)

-        # error raising test: 
+        # error raising test:
        # reps is tensor.vector and len(reps_value) > ndim,
        # reps_value is the real value when excuting the function.
        reps = ivector()
@@ -6352,8 +6352,6 @@ def test_var():
    f = function([a], var(a))

    a_val = numpy.arange(60).reshape(3, 4, 5)
-    # print numpy.var(a_val)
-    # print f(a_val)
    assert numpy.allclose(numpy.var(a_val), f(a_val))

    f = function([a], var(a, axis=0))
@@ -6368,12 +6366,31 @@ def test_var():
    f = function([a], var(a, axis=0, ddof=0))
    assert numpy.allclose(numpy.var(a_val, axis=0, ddof=0), f(a_val))

-    f = function([a], var(a, axis=1,ddof=1))
-    assert numpy.allclose(numpy.var(a_val, axis=1,ddof=1), f(a_val))
+    f = function([a], var(a, axis=1, ddof=1))
+    assert numpy.allclose(numpy.var(a_val, axis=1, ddof=1), f(a_val))

    f = function([a], var(a, axis=2, ddof=1))
    assert numpy.allclose(numpy.var(a_val, axis=2, ddof=1), f(a_val))

+    f = function([a], var(a, ddof=0, corrected=True))
+    mean_a = numpy.mean(a_val)
+    centered_a = a_val - mean_a
+    v = numpy.mean(centered_a ** 2)
+    error = (numpy.mean(centered_a)) ** 2
+    v = v - error
+    assert numpy.allclose(v, f(a_val))
+
+    f = function([a], var(a, axis=2, ddof=1, corrected=True))
+    mean_a = numpy.mean(a_val, axis=2, keepdims=True)
+    centered_a = a_val - mean_a
+    v = numpy.var(a_val, axis=2, ddof=1)
+    shp_inp = numpy.shape(a_val)
+    shp = shp_inp - numpy.array(1)
+    error = (numpy.sum(centered_a, axis=2)) ** 2
+    error = numpy.true_divide(error, shp[1] * shp_inp[1])
+    v = v - error
+    assert numpy.allclose(v, f(a_val))
+

 class T_sum(unittest.TestCase):
    def test_sum_overflow(self):

--- a/theano/tensor/var.py
+++ b/theano/tensor/var.py
@@ -643,13 +643,15 @@ class _tensor_py_operators(object):
                                        dtype=dtype, keepdims=keepdims,
                                        acc_dtype=acc_dtype)

-    def var(self, axis=None, keepdims=False):
+    def var(self, axis=None, ddof=0, keepdims=False, corrected=False):
        """See `theano.tensor.var`."""
-        return theano.tensor.basic.var(self, axis, keepdims=keepdims)
+        return theano.tensor.basic.var(self, axis=axis, ddof=ddof,
+                                       keepdims=keepdims, corrected=corrected)

-    def std(self, axis=None, keepdims=False):
+    def std(self, axis=None, ddof=0, keepdims=False, corrected=False):
        """See `theano.tensor.std`."""
-        return theano.tensor.basic.std(self, axis, keepdims=keepdims)
+        return theano.tensor.basic.std(self, axis=axis, ddof=ddof,
+                                       keepdims=keepdims, corrected=corrected)

    def min(self, axis=None, keepdims=False):
        """See `theano.tensor.min`."""