提交 3fb22c7a authored 作者: ballasn's avatar ballasn 提交者: GitHub

Merge pull request #4681 from kumarkrishna/improve_theano_var

Uses corrected two pass algorithm in theano.tensor.var
...@@ -3175,7 +3175,7 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False, ...@@ -3175,7 +3175,7 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
@constructor @constructor
def var(input, axis=None, ddof=0, keepdims=False): def var(input, axis=None, ddof=0, keepdims=False, corrected=False):
""" """
Computes the variance along the given axis(es) of a tensor `input`. Computes the variance along the given axis(es) of a tensor `input`.
...@@ -3190,18 +3190,24 @@ def var(input, axis=None, ddof=0, keepdims=False): ...@@ -3190,18 +3190,24 @@ def var(input, axis=None, ddof=0, keepdims=False):
If this is set to True, the axes which are reduced are If this is set to True, the axes which are reduced are
left in the result as dimensions with size one. With this option, left in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original tensor. the result will broadcast correctly against the original tensor.
corrected : bool
If this is set to True, the 'corrected_two_pass' algorithm is
used to compute the variance.
Refer : http://www.cs.yale.edu/publications/techreports/tr222.pdf
Notes Notes
----- -----
It uses the two-pass algorithm for more stable results. Default uses the two-pass algorithm (reference below).
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
There exist other implementations that are even more stable, but probably Also supports 'corrected_two_pass' algorithm (using the 'corrected' flag)
slower. which is numerically more stable. There exist other implementations that
offer better stability, but probably slower.
""" """
if isinstance(ddof, (bool)): if isinstance(ddof, (bool)):
raise ValueError('Parameter keepdims is now at index 3: (input, axis=None, ddof=0, keepdims=False)') raise ValueError('Parameter keepdims is now at index 3: (input, \
axis=None, ddof=0, keepdims=False, corrected=False)')
input_ndim = input.type.ndim input_ndim = input.type.ndim
if axis is None: if axis is None:
...@@ -3227,39 +3233,60 @@ def var(input, axis=None, ddof=0, keepdims=False): ...@@ -3227,39 +3233,60 @@ def var(input, axis=None, ddof=0, keepdims=False):
v = sum((centered_input ** 2), axis=axis, keepdims=keepdims) v = sum((centered_input ** 2), axis=axis, keepdims=keepdims)
for i in axis: for i in axis:
v = true_div(v, shp[i]) v = true_div(v, shp[i])
# use 'corrected_two_pass' algorithm
if corrected:
if ddof == 0:
error = mean(centered_input, axis, keepdims=keepdims) ** 2
else:
shp = shape(input) - ddof
shp_inp = shape(input)
error = sum(centered_input, axis=axis, keepdims=keepdims) ** 2
for i in axis:
error = true_div(error, shp[i] * shp_inp[i])
v = v - error
v.name = 'var' v.name = 'var'
return v return v
@constructor @constructor
def std(input, axis=None, ddof=0, keepdims=False): def std(input, axis=None, ddof=0, keepdims=False, corrected=False):
""" """
Computes the standard deviation along the given axis(es) of a tensor `input`. Computes the standard deviation along the given axis(es) of a tensor `input`.
Parameters Parameters
---------- ----------
axis : None or int or (list of int) (see `Sum`) axis: None or int or (list of int) (see `Sum`)
Compute the standard deviation along this axis of the tensor. Compute the variance along this axis of the tensor.
None means all axes (like numpy). None means all axes (like numpy).
ddof: Degrees of freedom; 0 would compute the ML estimate, 1 would compute
the unbiased estimate.
keepdims : bool keepdims : bool
If this is set to True, the axes which are reduced are left in the If this is set to True, the axes which are reduced are
result as dimensions with size one. With this option, the result will left in the result as dimensions with size one. With this option,
broadcast correctly against the original tensor. the result will broadcast correctly against the original tensor.
corrected : bool
If this is set to True, the 'corrected_two_pass' algorithm is
used to compute the variance.
Refer : http://www.cs.yale.edu/publications/techreports/tr222.pdf
Notes Notes
----- -----
It calls `var()` and `var()` uses the two-pass algorithm for more stable It calls 'var()' and 'var()' uses the two-pass algorithm (reference below).
results.
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
There exist other implementations that are even more stable, but probably Function 'var()' also supports 'corrected_two_pass' algorithm (using the
slower. 'corrected' flag) which is numerically more stable. There exist other
implementations that offer better stability, but probably slower.
""" """
if isinstance(ddof, (bool)): if isinstance(ddof, (bool)):
raise ValueError('Parameter keepdims is now at index 3: (input, axis=None, ddof=0, keepdims=False)') raise ValueError('Parameter keepdims is now at index 3: (input, \
axis=None, ddof=0, keepdims=False, corrected=False)')
ret = sqrt(var(input=input, axis=axis, ddof=ddof, keepdims=keepdims)) ret = sqrt(var(input=input, axis=axis, ddof=ddof,
keepdims=keepdims, corrected=corrected))
ret.name = 'std' ret.name = 'std'
return ret return ret
......
...@@ -3611,16 +3611,16 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3611,16 +3611,16 @@ class T_Join_and_Split(unittest.TestCase):
f = function([a, b], s, mode=self.mode) f = function([a, b], s, mode=self.mode)
v = numpy.zeros((2, 3, 2)) v = numpy.zeros((2, 3, 2))
v[:,:,0] = v1 v[:,:,0] = v1
v[:,:,1] = v2 v[:,:,1] = v2
out = f(v1, v2) out = f(v1, v2)
self.assertTrue(v.shape == out.shape) self.assertTrue(v.shape == out.shape)
self.assertTrue(numpy.all(v == out)) self.assertTrue(numpy.all(v == out))
s = stack([a, b], axis=-2) s = stack([a, b], axis=-2)
f = function([a, b], s, mode=self.mode) f = function([a, b], s, mode=self.mode)
v = numpy.zeros((2, 2, 3)) v = numpy.zeros((2, 2, 3))
v[:,0,:] = v1 v[:,0,:] = v1
v[:,1,:] = v2 v[:,1,:] = v2
out = f(v1, v2) out = f(v1, v2)
self.assertTrue(v.shape == out.shape) self.assertTrue(v.shape == out.shape)
self.assertTrue(numpy.all(v == out)) self.assertTrue(numpy.all(v == out))
# Testing out-of-bounds axis # Testing out-of-bounds axis
...@@ -5470,7 +5470,7 @@ def test_tile(): ...@@ -5470,7 +5470,7 @@ def test_tile():
# Test 1,2,3,4-dimensional cases. # Test 1,2,3,4-dimensional cases.
# Test input x has the shape [2], [2, 4], [2, 4, 3], [2, 4, 3, 5]. # Test input x has the shape [2], [2, 4], [2, 4, 3], [2, 4, 3, 5].
test_shape = [2, 4, 3, 5] test_shape = [2, 4, 3, 5]
k = 0 k = 0
for xtype in [vector(), matrix(), tensor3(), tensor4()]: for xtype in [vector(), matrix(), tensor3(), tensor4()]:
x = xtype x = xtype
k = k+1 k = k+1
...@@ -5528,7 +5528,7 @@ def test_tile(): ...@@ -5528,7 +5528,7 @@ def test_tile():
reps_ = r[:k-1] reps_ = r[:k-1]
f = function([x], tile(x, reps_, ndim_)) f = function([x], tile(x, reps_, ndim_))
assert numpy.all( f(x_) == numpy.tile(x_, [1, 1] + reps_)) assert numpy.all( f(x_) == numpy.tile(x_, [1, 1] + reps_))
# error raising test: ndim not specified when reps is vector # error raising test: ndim not specified when reps is vector
reps = ivector() reps = ivector()
numpy.testing.assert_raises(ValueError, tile, x, reps) numpy.testing.assert_raises(ValueError, tile, x, reps)
...@@ -5536,7 +5536,7 @@ def test_tile(): ...@@ -5536,7 +5536,7 @@ def test_tile():
# error raising test: not a integer # error raising test: not a integer
for reps in [2.5, fscalar(), fvector()]: for reps in [2.5, fscalar(), fvector()]:
numpy.testing.assert_raises(ValueError, tile, x, reps) numpy.testing.assert_raises(ValueError, tile, x, reps)
# error raising test: the dimension of reps exceeds 1 # error raising test: the dimension of reps exceeds 1
reps = imatrix() reps = imatrix()
numpy.testing.assert_raises(ValueError, tile, x, reps) numpy.testing.assert_raises(ValueError, tile, x, reps)
...@@ -5547,14 +5547,14 @@ def test_tile(): ...@@ -5547,14 +5547,14 @@ def test_tile():
if k > 1: if k > 1:
ndim = k-1 ndim = k-1
numpy.testing.assert_raises(ValueError, tile, x, reps, ndim) numpy.testing.assert_raises(ValueError, tile, x, reps, ndim)
# error raising test: reps is list, len(reps) > ndim # error raising test: reps is list, len(reps) > ndim
r = [2, 3, 4, 5, 6] r = [2, 3, 4, 5, 6]
reps = r[:k+1] reps = r[:k+1]
ndim = k ndim = k
numpy.testing.assert_raises(ValueError, tile, x, reps, ndim) numpy.testing.assert_raises(ValueError, tile, x, reps, ndim)
# error raising test: # error raising test:
# reps is tensor.vector and len(reps_value) > ndim, # reps is tensor.vector and len(reps_value) > ndim,
# reps_value is the real value when excuting the function. # reps_value is the real value when excuting the function.
reps = ivector() reps = ivector()
...@@ -6352,8 +6352,6 @@ def test_var(): ...@@ -6352,8 +6352,6 @@ def test_var():
f = function([a], var(a)) f = function([a], var(a))
a_val = numpy.arange(60).reshape(3, 4, 5) a_val = numpy.arange(60).reshape(3, 4, 5)
# print numpy.var(a_val)
# print f(a_val)
assert numpy.allclose(numpy.var(a_val), f(a_val)) assert numpy.allclose(numpy.var(a_val), f(a_val))
f = function([a], var(a, axis=0)) f = function([a], var(a, axis=0))
...@@ -6368,12 +6366,31 @@ def test_var(): ...@@ -6368,12 +6366,31 @@ def test_var():
f = function([a], var(a, axis=0, ddof=0)) f = function([a], var(a, axis=0, ddof=0))
assert numpy.allclose(numpy.var(a_val, axis=0, ddof=0), f(a_val)) assert numpy.allclose(numpy.var(a_val, axis=0, ddof=0), f(a_val))
f = function([a], var(a, axis=1,ddof=1)) f = function([a], var(a, axis=1, ddof=1))
assert numpy.allclose(numpy.var(a_val, axis=1,ddof=1), f(a_val)) assert numpy.allclose(numpy.var(a_val, axis=1, ddof=1), f(a_val))
f = function([a], var(a, axis=2, ddof=1)) f = function([a], var(a, axis=2, ddof=1))
assert numpy.allclose(numpy.var(a_val, axis=2, ddof=1), f(a_val)) assert numpy.allclose(numpy.var(a_val, axis=2, ddof=1), f(a_val))
f = function([a], var(a, ddof=0, corrected=True))
mean_a = numpy.mean(a_val)
centered_a = a_val - mean_a
v = numpy.mean(centered_a ** 2)
error = (numpy.mean(centered_a)) ** 2
v = v - error
assert numpy.allclose(v, f(a_val))
f = function([a], var(a, axis=2, ddof=1, corrected=True))
mean_a = numpy.mean(a_val, axis=2, keepdims=True)
centered_a = a_val - mean_a
v = numpy.var(a_val, axis=2, ddof=1)
shp_inp = numpy.shape(a_val)
shp = shp_inp - numpy.array(1)
error = (numpy.sum(centered_a, axis=2)) ** 2
error = numpy.true_divide(error, shp[1] * shp_inp[1])
v = v - error
assert numpy.allclose(v, f(a_val))
class T_sum(unittest.TestCase): class T_sum(unittest.TestCase):
def test_sum_overflow(self): def test_sum_overflow(self):
......
...@@ -643,13 +643,15 @@ class _tensor_py_operators(object): ...@@ -643,13 +643,15 @@ class _tensor_py_operators(object):
dtype=dtype, keepdims=keepdims, dtype=dtype, keepdims=keepdims,
acc_dtype=acc_dtype) acc_dtype=acc_dtype)
def var(self, axis=None, keepdims=False): def var(self, axis=None, ddof=0, keepdims=False, corrected=False):
"""See `theano.tensor.var`.""" """See `theano.tensor.var`."""
return theano.tensor.basic.var(self, axis, keepdims=keepdims) return theano.tensor.basic.var(self, axis=axis, ddof=ddof,
keepdims=keepdims, corrected=corrected)
def std(self, axis=None, keepdims=False): def std(self, axis=None, ddof=0, keepdims=False, corrected=False):
"""See `theano.tensor.std`.""" """See `theano.tensor.std`."""
return theano.tensor.basic.std(self, axis, keepdims=keepdims) return theano.tensor.basic.std(self, axis=axis, ddof=ddof,
keepdims=keepdims, corrected=corrected)
def min(self, axis=None, keepdims=False): def min(self, axis=None, keepdims=False):
"""See `theano.tensor.min`.""" """See `theano.tensor.min`."""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论