Merge pull request #1226 from lamblin/stable_reduce

More stable reduce operations by default

Merge pull request #1226 from lamblin/stable_reduce
3cb9ac35 · nouiz · 3b31bfa8 · 47ad172d · 3cb9ac35 · 3cb9ac35
--- a/doc/library/tensor/basic.txt
+++ b/doc/library/tensor/basic.txt
@@ -715,13 +715,34 @@ Reductions
    if axis=None, Theano 0.5rc1 or later: argmin over the flattened tensor (like numpy)
                  older: then axis is assumed to be ndim(x)-1

-.. function:: sum(x, axis=None, keepdims=False)
+.. function:: sum(x, axis=None, dtype=None, keepdims=False, acc_dtype=None)

    :Parameter: *x* -  symbolic Tensor (or compatible)
    :Parameter: *axis* - axis or axes along which to compute the sum
+    :Parameter: *dtype* - The dtype of the returned tensor.
+        If None, then we use the default dtype which is the same as
+        the input tensor's dtype except when:
+
+        - the input dtype is a signed integer of precision < 64 bit, in
+          which case we use int64
+        - the input dtype is an unsigned integer of precision < 64 bit, in
+          which case we use uint64
+
+        This default dtype does _not_ depend on the value of "acc_dtype".
+
    :Parameter: *keepdims* - (boolean) If this is set to True, the axes which are reduced are
 		left in the result as dimensions with size one. With this option, the result
 		will broadcast correctly against the original tensor.
+
+    :Parameter: *acc_dtype* -  The dtype of the internal accumulator.
+        If None (default), we use the dtype in the list below,
+        or the input dtype if its precision is higher:
+
+        - for int dtypes, we use at least int64;
+        - for uint dtypes, we use at least uint64;
+        - for float dtypes, we use at least float64;
+        - for complex dtypes, we use at least complex128.
+
    :Returns: sum of *x* along *axis*

    axis can be:
@@ -729,13 +750,34 @@ Reductions
     * an *int* - computed along this axis
     * a *list of ints* - computed along these axes

-.. function:: prod(x, axis=None, keepdims=False)
+.. function:: prod(x, axis=None, dtype=None, keepdims=False, acc_dtype=None)

    :Parameter: *x* -  symbolic Tensor (or compatible)
    :Parameter: *axis* - axis or axes along which to compute the product
+    :Parameter: *dtype* - The dtype of the returned tensor.
+        If None, then we use the default dtype which is the same as
+        the input tensor's dtype except when:
+
+        - the input dtype is a signed integer of precision < 64 bit, in
+          which case we use int64
+        - the input dtype is an unsigned integer of precision < 64 bit, in
+          which case we use uint64
+
+        This default dtype does _not_ depend on the value of "acc_dtype".
+
    :Parameter: *keepdims* - (boolean) If this is set to True, the axes which are reduced are
 		left in the result as dimensions with size one. With this option, the result
 		will broadcast correctly against the original tensor.
+
+    :Parameter: *acc_dtype* -  The dtype of the internal accumulator.
+        If None (default), we use the dtype in the list below,
+        or the input dtype if its precision is higher:
+
+        - for int dtypes, we use at least int64;
+        - for uint dtypes, we use at least uint64;
+        - for float dtypes, we use at least float64;
+        - for complex dtypes, we use at least complex128.
+
    :Returns: product of every term in *x* along *axis*

    axis can be:
@@ -743,13 +785,22 @@ Reductions
     * an *int* - computed along this axis
     * a *list of ints* - computed along these axes

-.. function:: mean(x, axis=None, keepdims=False)
+.. function:: mean(x, axis=None, dtype=None, keepdims=False, acc_dtype=None)

    :Parameter: *x* -  symbolic Tensor (or compatible)
    :Parameter: *axis* - axis or axes along which to compute the mean
+    :Parameter: *dtype* - The dtype to cast the result of the inner summation into.
+        For instance, by default, a sum of a float32 tensor will be
+        done in float64 (acc_dtype would be float64 by default),
+        but that result will be casted back in float32.
    :Parameter: *keepdims* - (boolean) If this is set to True, the axes which are reduced are
 		left in the result as dimensions with size one. With this option, the result
 		will broadcast correctly against the original tensor.
+    :Parameter: *acc_dtype* -  The dtype of the internal accumulator of the
+        inner summation. This will not necessarily be the dtype of the
+        output (in particular if it is a discrete (int/uint) dtype, the
+        output will be in a float type).  If None, then we use the same
+        rules as :ref:`sum()`.
    :Returns: mean value of *x* along *axis*

    axis can be:

--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -458,7 +458,8 @@ def grad(cost, wrt, consider_constant=None,
            g_cost = g_cost.astype(cost.type.dtype)
        # DO NOT enforce g_cost to be 0 if cost is an integer.
        # This is to be enforced by the Op.grad method for the Op that outputs cost.
-        assert g_cost not in tensor.discrete_dtypes
+        if hasattr(g_cost.type, 'dtype'):
+            assert g_cost.type.dtype not in tensor.discrete_dtypes

        grad_dict[cost] = g_cost


--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -1826,13 +1826,15 @@ class _tensor_py_operators:

    dot = __dot__

-    def sum(self, axis=None, dtype=None, keepdims=False):
+    def sum(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
        """See `theano.tensor.sum`"""
-        return sum(self, axis=axis, dtype=dtype, keepdims=keepdims)
+        return sum(self, axis=axis, dtype=dtype, keepdims=keepdims,
+                acc_dtype=acc_dtype)

-    def prod(self, axis=None, dtype=None, keepdims=False):
+    def prod(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
        """See `theano.tensor.prod`"""
-        return prod(self, axis=axis, dtype=dtype, keepdims=keepdims)
+        return prod(self, axis=axis, dtype=dtype, keepdims=keepdims,
+                acc_dtype=acc_dtype)

    def norm(self, L, axis=None):
        if L == 0:
@@ -1842,9 +1844,10 @@ class _tensor_py_operators:
        # optimizations will/should catch cases like L=1, L=2
        return pow(pow(abs_(self), L).sum(axis=axis), 1.0 / L)

-    def mean(self, axis=None, dtype=None, keepdims=False):
+    def mean(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
        """See `theano.tensor.mean`"""
-        return mean(self, axis=axis, dtype=dtype, keepdims=keepdims)
+        return mean(self, axis=axis, dtype=dtype, keepdims=keepdims,
+                acc_dtype=acc_dtype)

    def var(self, axis=None, keepdims=False):
        """See `theano.tensor.var`"""
@@ -3777,7 +3780,7 @@ pprint.assign(tensor_copy, printing.IgnorePrinter())


 @constructor
-def sum(input, axis=None, dtype=None, keepdims=False):
+def sum(input, axis=None, dtype=None, keepdims=False, acc_dtype=None):
    """
    Computes the sum along the given axis(es) of a tensor `input`

@@ -3790,10 +3793,10 @@ def sum(input, axis=None, dtype=None, keepdims=False):

    For full documentation see ``tensor.elemwise.Sum``.
    In particular please pay attention to the important warning when using
-    a custom dtype.
+    a custom acc_dtype.
    """

-    out = elemwise.Sum(axis=axis, dtype=dtype)(input)
+    out = elemwise.Sum(axis=axis, dtype=dtype, acc_dtype=acc_dtype)(input)

    if keepdims:
        out = makeKeepDims(input, out, axis)
@@ -3803,7 +3806,7 @@ pprint.assign(Sum(), printing.FunctionPrinter('sum'))


 @constructor
-def prod(input, axis=None, dtype=None, keepdims=False):
+def prod(input, axis=None, dtype=None, keepdims=False, acc_dtype=None):
    """
    Computes the product along the given axis(es) of a tensor `input`

@@ -3817,7 +3820,7 @@ def prod(input, axis=None, dtype=None, keepdims=False):
    For full documentation see ``tensor.elemwise.Prod``.
    """

-    out = elemwise.Prod(axis, dtype=dtype)(input)
+    out = elemwise.Prod(axis, dtype=dtype, acc_dtype=acc_dtype)(input)

    if keepdims:
        out = makeKeepDims(input, out, axis)
@@ -3868,7 +3871,8 @@ class Mean(elemwise.CAReduce):


 @constructor
-def mean(input, axis=None, dtype=None, op=False, keepdims=False):
+def mean(input, axis=None, dtype=None, op=False, keepdims=False,
+        acc_dtype=None):
    """
    Computes the mean value along the given axis(es) of a tensor `input`

@@ -3876,17 +3880,23 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False):
                 None means all axes (like numpy).
    :type axis: None or int or (list of int) (see `Sum`)

-    :param dtype: dtype to use for the inner summation. This will not
-                  necessarily be the dtype of the output (in particular
-                  if it is a discrete (int/uint) dtype, the output will
-                  be in a float type).
-                  If None, then we use the same rules as `sum()`.
+    :param dtype: dtype to cast the result of the inner summation into.
+        For instance, by default, a sum of a float32 tensor will be
+        done in float64 (acc_dtype would be float64 by default),
+        but that result will be casted back in float32.
    :type dtype: None or string

    :param keepdims: If this is set to True, the axes which are reduced are
        left in the result as dimensions with size one. With this option,
        the result will broadcast correctly against the original tensor.

+    :param acc_dtype: dtype to use for the inner summation. This will not
+                  necessarily be the dtype of the output (in particular
+                  if it is a discrete (int/uint) dtype, the output will
+                  be in a float type).
+                  If None, then we use the same rules as `sum()`.
+    :type acc_dtype: None or string
+
    :note: for gpu, if you specify dtype=float32, everything will be done
           on the gpu.
    """
@@ -3898,6 +3908,12 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False):
                    'and will always use float64. If you want to specify '
                    'the dtype, call tensor.mean(..., op=False).',
                    dtype)
+        if acc_dtype not in (None, 'float64'):
+            raise NotImplementedError(
+                    'The Mean op does not support the acc_dtype argument, '
+                    'and will always use float64. If you want to specify '
+                    'acc_dtype, call tensor.mean(..., op=False).',
+                    dtype)
        out = Mean(axis)(input)
        if keepdims:
            out = makeKeepDims(input, out, axis)
@@ -3911,7 +3927,8 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False):
        # Let sum() infer the appropriate dtype.
        sum_dtype = None

-    s = sum(input, axis=axis, dtype=sum_dtype, keepdims=keepdims)
+    s = sum(input, axis=axis, dtype=sum_dtype, keepdims=keepdims,
+            acc_dtype=acc_dtype)
    shp = shape(input)

    # Cast shp into a float type

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
--- a/theano/tensor/tests/test_elemwise.py
+++ b/theano/tensor/tests/test_elemwise.py