提交 3cb9ac35 authored 作者: nouiz's avatar nouiz

Merge pull request #1226 from lamblin/stable_reduce

More stable reduce operations by default
......@@ -715,13 +715,34 @@ Reductions
if axis=None, Theano 0.5rc1 or later: argmin over the flattened tensor (like numpy)
older: then axis is assumed to be ndim(x)-1
.. function:: sum(x, axis=None, keepdims=False)
.. function:: sum(x, axis=None, dtype=None, keepdims=False, acc_dtype=None)
:Parameter: *x* - symbolic Tensor (or compatible)
:Parameter: *axis* - axis or axes along which to compute the sum
:Parameter: *dtype* - The dtype of the returned tensor.
If None, then we use the default dtype which is the same as
the input tensor's dtype except when:
- the input dtype is a signed integer of precision < 64 bit, in
which case we use int64
- the input dtype is an unsigned integer of precision < 64 bit, in
which case we use uint64
This default dtype does _not_ depend on the value of "acc_dtype".
:Parameter: *keepdims* - (boolean) If this is set to True, the axes which are reduced are
left in the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor.
:Parameter: *acc_dtype* - The dtype of the internal accumulator.
If None (default), we use the dtype in the list below,
or the input dtype if its precision is higher:
- for int dtypes, we use at least int64;
- for uint dtypes, we use at least uint64;
- for float dtypes, we use at least float64;
- for complex dtypes, we use at least complex128.
:Returns: sum of *x* along *axis*
axis can be:
......@@ -729,13 +750,34 @@ Reductions
* an *int* - computed along this axis
* a *list of ints* - computed along these axes
.. function:: prod(x, axis=None, keepdims=False)
.. function:: prod(x, axis=None, dtype=None, keepdims=False, acc_dtype=None)
:Parameter: *x* - symbolic Tensor (or compatible)
:Parameter: *axis* - axis or axes along which to compute the product
:Parameter: *dtype* - The dtype of the returned tensor.
If None, then we use the default dtype which is the same as
the input tensor's dtype except when:
- the input dtype is a signed integer of precision < 64 bit, in
which case we use int64
- the input dtype is an unsigned integer of precision < 64 bit, in
which case we use uint64
This default dtype does _not_ depend on the value of "acc_dtype".
:Parameter: *keepdims* - (boolean) If this is set to True, the axes which are reduced are
left in the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor.
:Parameter: *acc_dtype* - The dtype of the internal accumulator.
If None (default), we use the dtype in the list below,
or the input dtype if its precision is higher:
- for int dtypes, we use at least int64;
- for uint dtypes, we use at least uint64;
- for float dtypes, we use at least float64;
- for complex dtypes, we use at least complex128.
:Returns: product of every term in *x* along *axis*
axis can be:
......@@ -743,13 +785,22 @@ Reductions
* an *int* - computed along this axis
* a *list of ints* - computed along these axes
.. function:: mean(x, axis=None, keepdims=False)
.. function:: mean(x, axis=None, dtype=None, keepdims=False, acc_dtype=None)
:Parameter: *x* - symbolic Tensor (or compatible)
:Parameter: *axis* - axis or axes along which to compute the mean
:Parameter: *dtype* - The dtype to cast the result of the inner summation into.
For instance, by default, a sum of a float32 tensor will be
done in float64 (acc_dtype would be float64 by default),
but that result will be casted back in float32.
:Parameter: *keepdims* - (boolean) If this is set to True, the axes which are reduced are
left in the result as dimensions with size one. With this option, the result
will broadcast correctly against the original tensor.
:Parameter: *acc_dtype* - The dtype of the internal accumulator of the
inner summation. This will not necessarily be the dtype of the
output (in particular if it is a discrete (int/uint) dtype, the
output will be in a float type). If None, then we use the same
rules as :ref:`sum()`.
:Returns: mean value of *x* along *axis*
axis can be:
......
......@@ -458,7 +458,8 @@ def grad(cost, wrt, consider_constant=None,
g_cost = g_cost.astype(cost.type.dtype)
# DO NOT enforce g_cost to be 0 if cost is an integer.
# This is to be enforced by the Op.grad method for the Op that outputs cost.
assert g_cost not in tensor.discrete_dtypes
if hasattr(g_cost.type, 'dtype'):
assert g_cost.type.dtype not in tensor.discrete_dtypes
grad_dict[cost] = g_cost
......
......@@ -1826,13 +1826,15 @@ class _tensor_py_operators:
dot = __dot__
def sum(self, axis=None, dtype=None, keepdims=False):
def sum(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
"""See `theano.tensor.sum`"""
return sum(self, axis=axis, dtype=dtype, keepdims=keepdims)
return sum(self, axis=axis, dtype=dtype, keepdims=keepdims,
acc_dtype=acc_dtype)
def prod(self, axis=None, dtype=None, keepdims=False):
def prod(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
"""See `theano.tensor.prod`"""
return prod(self, axis=axis, dtype=dtype, keepdims=keepdims)
return prod(self, axis=axis, dtype=dtype, keepdims=keepdims,
acc_dtype=acc_dtype)
def norm(self, L, axis=None):
if L == 0:
......@@ -1842,9 +1844,10 @@ class _tensor_py_operators:
# optimizations will/should catch cases like L=1, L=2
return pow(pow(abs_(self), L).sum(axis=axis), 1.0 / L)
def mean(self, axis=None, dtype=None, keepdims=False):
def mean(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
"""See `theano.tensor.mean`"""
return mean(self, axis=axis, dtype=dtype, keepdims=keepdims)
return mean(self, axis=axis, dtype=dtype, keepdims=keepdims,
acc_dtype=acc_dtype)
def var(self, axis=None, keepdims=False):
"""See `theano.tensor.var`"""
......@@ -3777,7 +3780,7 @@ pprint.assign(tensor_copy, printing.IgnorePrinter())
@constructor
def sum(input, axis=None, dtype=None, keepdims=False):
def sum(input, axis=None, dtype=None, keepdims=False, acc_dtype=None):
"""
Computes the sum along the given axis(es) of a tensor `input`
......@@ -3790,10 +3793,10 @@ def sum(input, axis=None, dtype=None, keepdims=False):
For full documentation see ``tensor.elemwise.Sum``.
In particular please pay attention to the important warning when using
a custom dtype.
a custom acc_dtype.
"""
out = elemwise.Sum(axis=axis, dtype=dtype)(input)
out = elemwise.Sum(axis=axis, dtype=dtype, acc_dtype=acc_dtype)(input)
if keepdims:
out = makeKeepDims(input, out, axis)
......@@ -3803,7 +3806,7 @@ pprint.assign(Sum(), printing.FunctionPrinter('sum'))
@constructor
def prod(input, axis=None, dtype=None, keepdims=False):
def prod(input, axis=None, dtype=None, keepdims=False, acc_dtype=None):
"""
Computes the product along the given axis(es) of a tensor `input`
......@@ -3817,7 +3820,7 @@ def prod(input, axis=None, dtype=None, keepdims=False):
For full documentation see ``tensor.elemwise.Prod``.
"""
out = elemwise.Prod(axis, dtype=dtype)(input)
out = elemwise.Prod(axis, dtype=dtype, acc_dtype=acc_dtype)(input)
if keepdims:
out = makeKeepDims(input, out, axis)
......@@ -3868,7 +3871,8 @@ class Mean(elemwise.CAReduce):
@constructor
def mean(input, axis=None, dtype=None, op=False, keepdims=False):
def mean(input, axis=None, dtype=None, op=False, keepdims=False,
acc_dtype=None):
"""
Computes the mean value along the given axis(es) of a tensor `input`
......@@ -3876,17 +3880,23 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False):
None means all axes (like numpy).
:type axis: None or int or (list of int) (see `Sum`)
:param dtype: dtype to use for the inner summation. This will not
necessarily be the dtype of the output (in particular
if it is a discrete (int/uint) dtype, the output will
be in a float type).
If None, then we use the same rules as `sum()`.
:param dtype: dtype to cast the result of the inner summation into.
For instance, by default, a sum of a float32 tensor will be
done in float64 (acc_dtype would be float64 by default),
but that result will be casted back in float32.
:type dtype: None or string
:param keepdims: If this is set to True, the axes which are reduced are
left in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original tensor.
:param acc_dtype: dtype to use for the inner summation. This will not
necessarily be the dtype of the output (in particular
if it is a discrete (int/uint) dtype, the output will
be in a float type).
If None, then we use the same rules as `sum()`.
:type acc_dtype: None or string
:note: for gpu, if you specify dtype=float32, everything will be done
on the gpu.
"""
......@@ -3898,6 +3908,12 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False):
'and will always use float64. If you want to specify '
'the dtype, call tensor.mean(..., op=False).',
dtype)
if acc_dtype not in (None, 'float64'):
raise NotImplementedError(
'The Mean op does not support the acc_dtype argument, '
'and will always use float64. If you want to specify '
'acc_dtype, call tensor.mean(..., op=False).',
dtype)
out = Mean(axis)(input)
if keepdims:
out = makeKeepDims(input, out, axis)
......@@ -3911,7 +3927,8 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False):
# Let sum() infer the appropriate dtype.
sum_dtype = None
s = sum(input, axis=axis, dtype=sum_dtype, keepdims=keepdims)
s = sum(input, axis=axis, dtype=sum_dtype, keepdims=keepdims,
acc_dtype=acc_dtype)
shp = shape(input)
# Cast shp into a float type
......
......@@ -19,6 +19,10 @@ from theano.tensor import elemwise_cgen as cgen
config = theano.config
# We cannot import discrete_dtypes from tensor.basic yet,
# so we redefine them here
discrete_dtypes = map(str, scalar.discrete_types)
# tensor depends on elemwise to provide definitions for several ops
# but elemwise needs to make TensorType instances, so we have these as
......@@ -1279,6 +1283,12 @@ class CAReduce(Op):
axis = range(input.ndim)
variable = input
to_reduce = reversed(sorted(axis))
if hasattr(self, 'acc_dtype'):
acc_dtype = self.acc_dtype
else:
acc_dtype = node.outputs[0].type.dtype
if to_reduce:
for dimension in to_reduce:
# If it's a zero-size array, use scalar_op.identity
......@@ -1289,7 +1299,7 @@ class CAReduce(Op):
v_shape = list(variable.shape)
del v_shape[dimension]
variable = numpy.empty(tuple(v_shape),
dtype=variable.dtype)
dtype=acc_dtype)
variable.fill(self.scalar_op.identity)
else:
raise ValueError((
......@@ -1307,7 +1317,8 @@ class CAReduce(Op):
variable = self.ufunc.reduce(variable, dimension,
dtype='object')
else:
variable = self.ufunc.reduce(variable, dimension)
variable = self.ufunc.reduce(variable, dimension,
dtype=acc_dtype)
variable = numpy.asarray(variable)
if numpy.may_share_memory(variable, input):
......@@ -1317,7 +1328,9 @@ class CAReduce(Op):
output[0] = theano._asarray(variable,
dtype=node.outputs[0].type.dtype)
else:
output[0] = numpy.copy(variable)
# Force a copy
output[0] = numpy.array(variable, copy=True,
dtype=node.outputs[0].type.dtype)
def infer_shape(self, node, shapes):
ishape, = shapes
......@@ -1339,6 +1352,14 @@ class CAReduce(Op):
idtype = input.type.dtype_specs()[1]
odtype = output.type.dtype_specs()[1]
if hasattr(self, 'acc_dtype'):
acc_type = TensorType(
broadcastable=node.outputs[0].broadcastable,
dtype=self.acc_dtype)
adtype = acc_type.dtype_specs()[1]
else:
adtype = odtype
axis = self.axis
if axis is None:
axis = range(len(input.type.broadcastable))
......@@ -1356,13 +1377,25 @@ class CAReduce(Op):
for i, (input, iname) in enumerate(izip(node.inputs, inames)):
sub['lv%i' % i] = iname
decl = cgen.make_declare([order], [idtype], sub)
decl = ""
if adtype != odtype:
# Create an accumulator variable different from the output
aname = "acc"
decl = acc_type.c_declare(aname, sub)
decl += acc_type.c_init(aname, sub)
else:
# the output is the accumulator variable
aname = oname
decl += cgen.make_declare([order], [idtype], sub)
checks = cgen.make_checks([order], [idtype], sub)
alloc = ""
i += 1
sub['lv%i' % i] = oname
sub['olv'] = oname
# Allocate output buffer
alloc += cgen.make_declare(
[range(nnested) + ['x'] * len(axis)],
[odtype], dict(sub, lv0=oname))
......@@ -1371,6 +1404,19 @@ class CAReduce(Op):
[range(nnested) + ['x'] * len(axis)],
[odtype], dict(sub, lv0=oname))
if adtype != odtype:
# Allocate accumulation buffer
sub['lv%i' % i] = aname
sub['olv'] = aname
alloc += cgen.make_declare(
[range(nnested) + ['x'] * len(axis)],
[adtype], dict(sub, lv0=aname))
alloc += cgen.make_alloc([order1], adtype, sub)
alloc += cgen.make_checks(
[range(nnested) + ['x'] * len(axis)],
[adtype], dict(sub, lv0=aname))
if hasattr(self.scalar_op, 'identity'):
identity = self.scalar_op.identity
elif self.scalar_op in [scalar.maximum, scalar.minimum]:
......@@ -1414,7 +1460,7 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
task0_decl = (
"%(dtype)s& %(name)s_i = *%(name)s_iter;\n"
"%(name)s_i = %(identity)s;"
% dict(dtype=odtype, name=onames[0], identity=identity))
% dict(dtype=adtype, name=aname, identity=identity))
task1_decl = ("%(dtype)s& %(name)s_i = *%(name)s_iter;\n"
% dict(dtype=idtype, name=inames[0]))
......@@ -1427,8 +1473,8 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
[Scalar(dtype=output.type.dtype)()
for input in node.outputs]),
None,
["%s_i" % onames[0], "%s_i" % inames[0]],
["%s_i" % onames[0]],
["%s_i" % aname, "%s_i" % inames[0]],
["%s_i" % aname],
sub)
code1 = """
{
......@@ -1450,8 +1496,16 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
all_code = [task0_decl + code1]
loop = cgen.make_loop(
[order, range(nnested) + ['x'] * len(axis)],
[idtype, odtype], all_code, sub)
return decl, checks, alloc, loop
[idtype, adtype], all_code, sub)
end = ""
if adtype != odtype:
end = """
PyArray_CopyInto(%(oname)s, %(aname)s);
""" % dict(oname=oname, aname=aname)
end += acc_type.c_cleanup(aname, sub)
return decl, checks, alloc, loop, end
def c_code(self, node, name, inames, onames, sub):
code = "\n".join(self._c_all(node, name, inames, onames, sub))
......@@ -1462,7 +1516,7 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
return ['<vector>', '<algorithm>']
def c_code_cache_version_apply(self, node):
version = [4] # the version corresponding to the c code in this Op
version = [5] # the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend...
scalar_node = Apply(self.scalar_op,
......@@ -1532,13 +1586,19 @@ class CAReduceDtype(CAReduce):
Reduces a scalar operation along the specified axis(es).
This subclass of CAReduce accepts an additional "dtype" parameter,
that specifies which dtype will be used for the accumulation.
that specifies which dtype the output should be.
It also accepts an optional "acc_dtype", which specify the dtype that
will be used for the accumulation.
So, the accumulation will be done into a tensor of dtype "acc_dtype",
then it will be casted into "dtype" and returned.
If no dtype is provided, one will be inferred so as not to lose
too much precision.
"""
def __init__(self, scalar_op, axis=None, dtype=None):
def __init__(self, scalar_op, axis=None, dtype=None, acc_dtype=None):
"""
Usage: CAReduceDtype(scalar_op, axis=None, dtype=None)
......@@ -1549,26 +1609,38 @@ class CAReduceDtype(CAReduce):
- list of dimensions that we want to reduce
- if None, all dimensions are reduced
:param dtype: The dtype of the internal accumulator and returned
tensor. If None, then we use the default dtype which is the same as the
input tensor's dtype except when:
:param dtype: The dtype of the returned
tensor. If None, then we use the default dtype which is the same
as the input tensor's dtype except when:
- the input dtype is a signed integer of precision < 64 bit, in
which case we use int64
- the input dtype is an unsigned integer of precision < 64 bit, in
which case we use uint64
This behavior is similar in spirit to that of numpy (except numpy
uses the default machine integer while we always use 64 bit integers to
avoid platform-dependent behavior).
This default dtype does _not_ depend on the value of "acc_dtype".
This behavior is similar in spirit to that of numpy (except numpy
uses the default machine integer while we always use 64 bit
integers to avoid platform-dependent behavior).
:param acc_dtype: The dtype of the internal accumulator.
If None (default), we use the dtype in the list below,
or the input dtype if its precision is higher:
- for int dtypes, we use at least int64;
- for uint dtypes, we use at least uint64;
- for float dtypes, we use at least float64;
- for complex dtypes, we use at least complex128.
"""
CAReduce.__init__(self, scalar_op, axis=axis)
self.dtype = dtype
self.acc_dtype = acc_dtype
def __eq__(self, other):
return CAReduce.__eq__(self, other) and self.dtype == other.dtype
return (CAReduce.__eq__(self, other)
and self.dtype == other.dtype
and self.acc_dtype == other.acc_dtype)
def __hash__(self):
return CAReduce.__hash__(self) ^ hash(self.dtype)
return CAReduce.__hash__(self) ^ hash((self.dtype, self.acc_dtype))
def __setstate__(self, d):
self.__dict__.update(d)
......@@ -1578,6 +1650,11 @@ class CAReduceDtype(CAReduce):
# could be in an apply node with a specified dtype.
self.dtype = "OLD"
if not hasattr(self, "acc_dtype"):
# acc_dtype is not used by any external Op, so we do not
# need to keep the previous behaviour here.
self.acc_dtype = None
def _output_dtype(self, idtype):
dtype = self.dtype
if dtype == "OLD":
......@@ -1599,25 +1676,46 @@ class CAReduceDtype(CAReduce):
uint16='uint64',
uint32='uint64',
).get(idtype, idtype)
elif (dtype in theano.tensor.continuous_dtypes and
idtype in theano.tensor.discrete_dtypes):
# Specifying a continuous output for discrete input is OK
else:
# The important is that the accumulator dtype does not
# lose precision. Then, the result can be downcasted.
return dtype
def _acc_dtype(self, idtype):
acc_dtype = self.acc_dtype
if acc_dtype is None:
return dict(
int8='int64',
int16='int64',
int32='int64',
uint8='uint64',
uint16='uint64',
uint32='uint64',
float32='float64',
complex64='complex128',
).get(idtype, idtype)
elif (acc_dtype in theano.tensor.continuous_dtypes and
idtype in theano.tensor.discrete_dtypes):
# Specifying a continuous accumulator for discrete input is OK
return acc_dtype
else:
# The conversion has to be considered an upcast.
upcasted_dtype = scalar.upcast(idtype, dtype)
if dtype != upcasted_dtype:
upcasted_dtype = scalar.upcast(idtype, acc_dtype)
if acc_dtype != upcasted_dtype:
raise TypeError(
'Cannot build %s node with input dtype %s '
'and output dtype %s, as precision would be lost. '
'To correct this error, you can either:\n'
' - not specify a dtype, or\n'
' - use a dtype at least as precise as %s.\n'
'and acc_dtype %s, as precision would be lost. '
'To correct this error, you can:\n'
' - not specify acc_dtype, or\n'
' - use an acc_dtype at least as precise as %s.\n'
' - specify "dtype" instead of "acc_dtype", so '
'the reduction will be precise, but the result will '
'be casted into "dtype" at the end.\n'
'If you are expecting the precision loss, you can '
'use tensor.cast(..., dtype="%s"), either on your '
'input, or on the output of the reduce operation.'
% (self, idtype, dtype, upcasted_dtype, dtype))
return dtype
'use tensor.cast(..., dtype="%s"), on your input.'
% (self, idtype, acc_dtype, upcasted_dtype, acc_dtype))
return acc_dtype
def make_node(self, input):
# We need to redefine make_node so that, if self.dtype is None,
......@@ -1625,12 +1723,17 @@ class CAReduceDtype(CAReduce):
# of the appropriate dtype.
input = as_tensor_variable(input)
dtype = self._output_dtype(input.dtype)
acc_dtype = self._acc_dtype(input.dtype)
assert dtype is not None
if dtype == self.dtype:
assert acc_dtype is not None
if dtype == self.dtype and acc_dtype == self.acc_dtype:
# Don't build another instance
op = self
else:
op = self.__class__(axis=self.axis, dtype=dtype)
op = self.__class__(axis=self.axis,
dtype=dtype, acc_dtype=acc_dtype)
assert op.acc_dtype is not None
return CAReduce.make_node(op, input)
......@@ -1643,7 +1746,7 @@ class Sum(CAReduceDtype):
tensor input.
"""
def __init__(self, axis=None, dtype=None):
def __init__(self, axis=None, dtype=None, acc_dtype=None):
"""
Constructor.
......@@ -1658,8 +1761,18 @@ class Sum(CAReduceDtype):
which case we use int64
- the input dtype is an unsigned integer of precision < 64 bit, in
which case we use uint64
This value does not depend on the value of "acc_dtype".
:param acc_dtype: The dtype of the internal accumulator.
If None (default), we use the dtype in the list below,
or the input dtype if its precision is higher:
- for int dtypes, we use at least int64;
- for uint dtypes, we use at least uint64;
- for float dtypes, we use at least float64;
- for complex dtypes, we use at least complex128.
"""
CAReduceDtype.__init__(self, scalar.add, axis=axis, dtype=dtype)
CAReduceDtype.__init__(self, scalar.add, axis=axis,
dtype=dtype, acc_dtype=acc_dtype)
def grad(self, inp, grads):
x, = inp
......@@ -1667,7 +1780,7 @@ class Sum(CAReduceDtype):
out = self(*inp)
if out.dtype.find('int') != -1:
return [x.zeros_like().astype(theano.config.floatX)]
return [theano.tensor.zeros_like(x, dtype=theano.config.floatX)]
gz, = grads
gz = as_tensor_variable(gz)
......@@ -1710,8 +1823,10 @@ class Prod(CAReduceDtype):
difference that this defines the gradient of prod wrt its tensor
input.
"""
def __init__(self, axis=None, dtype=None, no_zeros_in_input=False):
CAReduceDtype.__init__(self, scalar.mul, axis=axis, dtype=dtype)
def __init__(self, axis=None, dtype=None, acc_dtype=None,
no_zeros_in_input=False):
CAReduceDtype.__init__(self, scalar.mul, axis=axis,
dtype=dtype, acc_dtype=acc_dtype)
self.no_zeros_in_input = no_zeros_in_input
def __setstate__(self, dct):
......@@ -1779,8 +1894,11 @@ class Prod(CAReduceDtype):
out = self(*inp)
if out.dtype[0:3] in ('int', 'uin'):
return [prod_in.zeros_like().astype(theano.config.floatX)]
if (out.dtype in discrete_dtypes or
self.acc_dtype in discrete_dtypes):
# There is an int conversion in the way
return [theano.tensor.zeros_like(prod_in,
dtype=theano.config.floatX)]
# Prepare the broadcasting that is used everywhere to broadcast
# over the original groups (ie. broadcast over the elements of a given
......@@ -1893,8 +2011,9 @@ mul_without_zeros = MulWithoutZeros(scalar.upcast_out,
class ProdWithoutZeros(CAReduceDtype):
def __init__(self, axis=None, dtype=None):
CAReduceDtype.__init__(self, mul_without_zeros, axis=axis, dtype=dtype)
def __init__(self, axis=None, dtype=None, acc_dtype=None):
CAReduceDtype.__init__(self, mul_without_zeros, axis=axis,
dtype=dtype, acc_dtype=acc_dtype)
def __str__(self):
if self.axis is None:
......
......@@ -632,6 +632,24 @@ class T_sum_dtype(unittest.TestCase):
uint32='uint64',
).get(dtype, dtype)
def test_sum_default_acc_dtype(self):
##Test the default acc_dtype of a sum().
# We try multiple axis combinations even though axis should not matter.
axes = [None, 0, 1, [0], [1], [0, 1]]
for idx, dtype in enumerate(imap(str, theano.scalar.all_types)):
axis = axes[idx % len(axes)]
x = tensor.matrix(dtype=dtype).sum(axis=axis)
assert x.owner.op.acc_dtype == dict(
int8='int64',
int16='int64',
int32='int64',
uint8='uint64',
uint16='uint64',
uint32='uint64',
float32='float64',
complex64='complex128',
).get(dtype, dtype)
def test_sum_custom_dtype(self):
"""
Test the ability to provide your own output dtype for a sum.
......@@ -649,16 +667,44 @@ class T_sum_dtype(unittest.TestCase):
output_dtype.startswith('complex')):
continue
axis = axes[idx % len(axes)]
sum_var = x.sum(dtype=output_dtype, axis=axis)
assert sum_var.dtype == output_dtype
if "complex" in input_dtype:
continue
# Check that we can take the gradient
grad_var = tensor.grad(sum_var.sum(), x,
disconnected_inputs='ignore')
idx += 1
def test_sum_custom_acc_dtype(self):
"""
Test the ability to provide your own accumulator dtype for a sum.
"""
# We try multiple axis combinations even though axis should not matter.
axes = [None, 0, 1, [0], [1], [0, 1]]
idx = 0
for input_dtype in imap(str, theano.scalar.all_types):
x = tensor.matrix(dtype=input_dtype)
for acc_dtype in imap(str, theano.scalar.all_types):
# If the accumulator is a complex, the gradient of the sum will
# cast the complex to the input dtype. We can't call the normal
# cast on a complex to a not complex as this is ambiguous.
if (not input_dtype.startswith('complex') and
acc_dtype.startswith('complex')):
continue
axis = axes[idx % len(axes)]
# If output_dtype would force a downcast, we expect a TypeError
# We always allow int/uint inputs with float/complex outputs.
upcasted_dtype = scalar.upcast(input_dtype, output_dtype)
if (output_dtype == upcasted_dtype or
upcasted_dtype = scalar.upcast(input_dtype, acc_dtype)
if (acc_dtype == upcasted_dtype or
(input_dtype in tensor.discrete_dtypes and
output_dtype in tensor.continuous_dtypes)
acc_dtype in tensor.continuous_dtypes)
):
sum_var = x.sum(dtype=output_dtype, axis=axis)
assert sum_var.dtype == output_dtype
sum_var = x.sum(acc_dtype=acc_dtype, axis=axis)
assert sum_var.owner.op.acc_dtype == acc_dtype
if "complex" in input_dtype:
continue
......@@ -667,10 +713,18 @@ class T_sum_dtype(unittest.TestCase):
disconnected_inputs='ignore')
else:
self.assertRaises(TypeError,
x.sum, dtype=output_dtype, axis=axis)
x.sum, acc_dtype=acc_dtype, axis=axis)
idx += 1
def test_sum_precision(self):
# Check that the default accumulator precision is sufficient
x = theano.shared(numpy.asarray([1e8, 1, -1e8], dtype='float32'))
s = x.sum()
f = theano.function([], s)
s_val = f()
assert numpy.allclose(s_val, 1)
class T_mean_dtype(unittest.TestCase):
def test_mean_default_dtype(self):
......@@ -688,7 +742,6 @@ class T_mean_dtype(unittest.TestCase):
assert x.dtype == dtype, (x, x.dtype, dtype)
def test_mean_custom_dtype(self):
"""
Test the ability to provide your own output dtype for a mean.
"""
......@@ -727,6 +780,14 @@ class T_mean_dtype(unittest.TestCase):
idx += 1
def test_mean_precision(self):
# Check that the default accumulator precision is sufficient
x = theano.shared(numpy.asarray([1e8, 1, -1e8], dtype='float32'))
m = x.mean()
f = theano.function([], m)
m_val = f()
assert numpy.allclose(m_val, 1. / 3)
class T_prod_dtype(unittest.TestCase):
def test_prod_default_dtype(self):
......@@ -747,6 +808,26 @@ class T_prod_dtype(unittest.TestCase):
uint32='uint64',
).get(dtype, dtype)
def test_prod_default_acc_dtype(self):
"""
Test the default acc_dtype of a prod().
"""
# We try multiple axis combinations even though axis should not matter.
axes = [None, 0, 1, [0], [1], [0, 1]]
for idx, dtype in enumerate(imap(str, theano.scalar.all_types)):
axis = axes[idx % len(axes)]
x = tensor.matrix(dtype=dtype).prod(axis=axis)
assert x.owner.op.acc_dtype == dict(
int8='int64',
int16='int64',
int32='int64',
uint8='uint64',
uint16='uint64',
uint32='uint64',
float32='float64',
complex64='complex128',
).get(dtype, dtype)
def test_prod_custom_dtype(self):
"""
Test the ability to provide your own output dtype for a prod.
......@@ -758,24 +839,45 @@ class T_prod_dtype(unittest.TestCase):
x = tensor.matrix(dtype=input_dtype)
for output_dtype in imap(str, theano.scalar.all_types):
axis = axes[idx % len(axes)]
# If output_dtype would force a downcast, we expect a TypeError
prod_var = x.prod(dtype=output_dtype, axis=axis)
assert prod_var.dtype == output_dtype
if "complex" in output_dtype or "complex" in input_dtype:
continue
# Check that we can take the gradient
grad_var = tensor.grad(prod_var.sum(), x,
disconnected_inputs='ignore')
idx += 1
def test_prod_custom_acc_dtype(self):
"""
Test the ability to provide your own acc_dtype for a prod.
"""
# We try multiple axis combinations even though axis should not matter.
axes = [None, 0, 1, [0], [1], [0, 1]]
idx = 0
for input_dtype in imap(str, theano.scalar.all_types):
x = tensor.matrix(dtype=input_dtype)
for acc_dtype in imap(str, theano.scalar.all_types):
axis = axes[idx % len(axes)]
# If acc_dtype would force a downcast, we expect a TypeError
# We always allow int/uint inputs with float/complex outputs.
upcasted_dtype = scalar.upcast(input_dtype, output_dtype)
if (output_dtype == upcasted_dtype or
upcasted_dtype = scalar.upcast(input_dtype, acc_dtype)
if (acc_dtype == upcasted_dtype or
(input_dtype in tensor.discrete_dtypes and
output_dtype in tensor.continuous_dtypes)
acc_dtype in tensor.continuous_dtypes)
):
prod_var = x.prod(dtype=output_dtype, axis=axis)
assert prod_var.dtype == output_dtype
prod_var = x.prod(acc_dtype=acc_dtype, axis=axis)
assert prod_var.owner.op.acc_dtype == acc_dtype
if "complex" in output_dtype:
if "complex" in acc_dtype:
continue
# Check that we can take the gradient
grad_var = tensor.grad(prod_var.sum(), x,
disconnected_inputs='ignore')
else:
self.assertRaises(TypeError,
x.prod, dtype=output_dtype, axis=axis)
x.prod, acc_dtype=acc_dtype, axis=axis)
idx += 1
......@@ -799,6 +901,26 @@ class T_prod_without_zeros_dtype(unittest.TestCase):
uint32='uint64',
).get(dtype, dtype)
def test_prod_without_zeros_default_acc_dtype(self):
"""
Test the default dtype of a ProdWithoutZeros().
"""
# We try multiple axis combinations even though axis should not matter.
axes = [None, 0, 1, [0], [1], [0, 1]]
for idx, dtype in enumerate(imap(str, theano.scalar.all_types)):
axis = axes[idx % len(axes)]
x = ProdWithoutZeros(axis=axis)(tensor.matrix(dtype=dtype))
assert x.owner.op.acc_dtype == dict(
int8='int64',
int16='int64',
int32='int64',
uint8='uint64',
uint16='uint64',
uint32='uint64',
float32='float64',
complex64='complex128'
).get(dtype, dtype)
def test_prod_without_zeros_custom_dtype(self):
"""
Test the ability to provide your own output dtype for a ProdWithoutZeros().
......@@ -810,19 +932,35 @@ class T_prod_without_zeros_dtype(unittest.TestCase):
x = tensor.matrix(dtype=input_dtype)
for output_dtype in imap(str, theano.scalar.all_types):
axis = axes[idx % len(axes)]
# If output_dtype would force a downcast, we expect a TypeError
prod_woz_var = ProdWithoutZeros(
axis=axis, dtype=output_dtype)(x)
assert prod_woz_var.dtype == output_dtype
idx += 1
def test_prod_without_zeros_custom_acc_dtype(self):
"""
Test the ability to provide your own acc_dtype for a ProdWithoutZeros().
"""
# We try multiple axis combinations even though axis should not matter.
axes = [None, 0, 1, [0], [1], [0, 1]]
idx = 0
for input_dtype in imap(str, theano.scalar.all_types):
x = tensor.matrix(dtype=input_dtype)
for acc_dtype in imap(str, theano.scalar.all_types):
axis = axes[idx % len(axes)]
# If acc_dtype would force a downcast, we expect a TypeError
# We always allow int/uint inputs with float/complex outputs.
upcasted_dtype = scalar.upcast(input_dtype, output_dtype)
if (output_dtype == upcasted_dtype or
upcasted_dtype = scalar.upcast(input_dtype, acc_dtype)
if (acc_dtype == upcasted_dtype or
(input_dtype in tensor.discrete_dtypes and
output_dtype in tensor.continuous_dtypes)
acc_dtype in tensor.continuous_dtypes)
):
prod_woz_var = ProdWithoutZeros(
axis=axis, dtype=output_dtype)(x)
assert prod_woz_var.dtype == output_dtype
axis=axis, acc_dtype=acc_dtype)(x)
assert prod_woz_var.owner.op.acc_dtype == acc_dtype
else:
self.assertRaises(TypeError,
ProdWithoutZeros(axis=axis, dtype=output_dtype),
ProdWithoutZeros(axis=axis, acc_dtype=acc_dtype),
x)
idx += 1
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论