提交 52cb8ec7 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2228 from lamblin/fix_float16

Prevent computations in float16 in scalar and elemwise
差异被折叠。
...@@ -10,6 +10,7 @@ If you do want to rewrite these tests, bear in mind: ...@@ -10,6 +10,7 @@ If you do want to rewrite these tests, bear in mind:
""" """
import unittest import unittest
import numpy as np
import theano import theano
from theano.gof import FunctionGraph from theano.gof import FunctionGraph
...@@ -20,8 +21,12 @@ from theano.scalar.basic import (floats, float32, float64, ...@@ -20,8 +21,12 @@ from theano.scalar.basic import (floats, float32, float64,
ints, int8, int32, complex64, ints, int8, int32, complex64,
ComplexError, IntDiv, TrueDiv, ComplexError, IntDiv, TrueDiv,
Composite, add, div_proxy, clip, Composite, add, div_proxy, clip,
and_, eq, neq, invert, mul) and_, eq, neq, invert, mul, Scalar)
import numpy from theano.scalar.basic import (
true_div, inv, log, log2, log10, log1p, exp, exp2, expm1, sqrt, deg2rad,
rad2deg, cos, arccos, sin, arcsin, tan, arctan, arctan2, cosh, arccosh,
sinh, arcsinh, tanh, arctanh)
def inputs(): def inputs():
return floats('xyz') return floats('xyz')
...@@ -75,7 +80,7 @@ class test_ScalarOps(unittest.TestCase): ...@@ -75,7 +80,7 @@ class test_ScalarOps(unittest.TestCase):
g3 = theano.gradient.grad(a3, x) g3 = theano.gradient.grad(a3, x)
fn3 = gof.DualLinker().accept(FunctionGraph([x], [g3])).make_function() fn3 = gof.DualLinker().accept(FunctionGraph([x], [g3])).make_function()
rng = numpy.random.RandomState(utt.fetch_seed()) rng = np.random.RandomState(utt.fetch_seed())
ntests = 50 ntests = 50
for i in xrange(ntests): for i in xrange(ntests):
...@@ -235,6 +240,128 @@ class test_logical(unittest.TestCase): ...@@ -235,6 +240,128 @@ class test_logical(unittest.TestCase):
self.assertTrue(fn(a,b) == ~a, (a,)) self.assertTrue(fn(a,b) == ~a, (a,))
# This class does not inherit from unittest.TestCase, because it would
# interfere with the "yield" mechanism that automatically generates test, see
# http://stackoverflow.com/questions/6689537/nose-test-generators-inside-class
# Therefore, it needs to be named "test_..." or "Test_...", so nose can pick
# it up by name, otherwise the tests would not be executed.
class test_upgrade_to_float(object):
# Test for Ops whose output has to be floating point, even when all
# inputs are ints.
# In particular, when the inputs are int8, the output should be
# at least float32, not float16.
unary_ops_vals = [
(inv, range(-127, 0) + range(1, 127)),
(sqrt, range(0, 128)),
(log, range(1, 128)),
(log2, range(1, 128)),
(log10, range(1, 128)),
(log1p, range(0, 128)),
(exp, range(-127, 89)),
(exp2, range(-127, 89)),
(expm1, range(-127, 89)),
(deg2rad, range(-127, 128)),
(rad2deg, range(-127, 128)),
(cos, range(-127, 128)),
(arccos, range(-1, 2)),
(cosh, range(-89, 90)),
(arccosh, range(1, 128)),
(sin, range(-127, 128)),
(arcsin, range(-1, 2)),
(sinh, range(-89, 90)),
(arcsinh, range(-127, 128)),
(tan, range(-3, 4)),
(arctan, range(-127, 128)),
(tanh, range(-127, 128)),
(arctanh, [0])]
binary_ops_vals = [
(arctan2, range(-127, 128), range(-127, 128))]
@staticmethod
def _test_unary(unary_op, x_range):
xi = int8('xi')
xf = float32('xf')
ei = unary_op(xi)
fi = theano.function([xi], ei)
ef = unary_op(xf)
ff = theano.function([xf], ef)
for x_val in x_range:
outi = fi(x_val)
outf = ff(x_val)
assert outi.dtype == outf.dtype, 'incorrect dtype'
assert np.allclose(outi, outf), 'insufficient precision'
@staticmethod
def _test_binary(binary_op, x_range, y_range):
xi = int8('xi')
yi = int8('yi')
xf = float32('xf')
yf = float32('yf')
ei = binary_op(xi, yi)
fi = theano.function([xi, yi], ei)
ef = binary_op(xf, yf)
ff = theano.function([xf, yf], ef)
for x_val in x_range:
for y_val in y_range:
outi = fi(x_val, y_val)
outf = ff(x_val, y_val)
assert outi.dtype == outf.dtype, 'incorrect dtype'
assert np.allclose(outi, outf), 'insufficient precision'
def test_true_div(self):
# true_div's upcast policy is not exactly "upgrade_to_float",
# so the test is a little bit different
x_range = range(-127, 128)
y_range = range(-127, 0) + range(1, 127)
xi = int8('xi')
yi = int8('yi')
xf = Scalar(theano.config.floatX)('xf')
yf = Scalar(theano.config.floatX)('yf')
ei = true_div(xi, yi)
fi = theano.function([xi, yi], ei)
ef = true_div(xf, yf)
ff = theano.function([xf, yf], ef)
for x_val in x_range:
for y_val in y_range:
outi = fi(x_val, y_val)
outf = ff(x_val, y_val)
assert outi.dtype == outf.dtype, 'incorrect dtype'
assert np.allclose(outi, outf), 'insufficient precision'
def test_unary(self):
# Automatically define all individual unary tests
for unary_op, x_range in self.unary_ops_vals:
test_name = 'test_%s' % unary_op.name
# Make a lambda function so we can name the test
test = lambda: self._test_unary(unary_op, x_range)
test.description = test_name
yield test
def test_binary(self):
# Automatically define all individual binary tests
for binary_op, x_range, y_range in self.binary_ops_vals:
test_name = 'test_%s' % binary_op.name
# Make a lambda function so we can name the test
test = lambda: self._test_binary(binary_op, x_range, y_range)
test.description = test_name
yield test
class test_complex_mod(unittest.TestCase): class test_complex_mod(unittest.TestCase):
"""Make sure % fails on complex numbers.""" """Make sure % fails on complex numbers."""
......
...@@ -1812,7 +1812,7 @@ def round(a, mode="half_away_from_zero"): ...@@ -1812,7 +1812,7 @@ def round(a, mode="half_away_from_zero"):
raise Exception("round mode %s is not implemented." % mode) raise Exception("round mode %s is not implemented." % mode)
@_scal_elemwise_with_nfunc('around', 1, -1) @_scal_elemwise_with_nfunc('around', 1, 1)
def round_half_to_even(a): def round_half_to_even(a):
"""round_half_to_even(a)""" """round_half_to_even(a)"""
...@@ -1952,20 +1952,20 @@ def chi2sf(x, k): ...@@ -1952,20 +1952,20 @@ def chi2sf(x, k):
#numpy.real(float32) return a view on the inputs. #numpy.real(float32) return a view on the inputs.
#@_scal_elemwise_with_nfunc('real', 1, -1) #@_scal_elemwise_with_nfunc('real', 1, 1)
@_scal_elemwise @_scal_elemwise
def real(z): def real(z):
"""Return real component of complex-valued tensor `z`""" """Return real component of complex-valued tensor `z`"""
_tensor_py_operators.real = property(real) _tensor_py_operators.real = property(real)
@_scal_elemwise_with_nfunc('imag', 1, -1) @_scal_elemwise_with_nfunc('imag', 1, 1)
def imag(z): def imag(z):
"""Return imaginary component of complex-valued tensor `z`""" """Return imaginary component of complex-valued tensor `z`"""
_tensor_py_operators.imag = property(imag) _tensor_py_operators.imag = property(imag)
@_scal_elemwise_with_nfunc('angle', 1, -1) @_scal_elemwise_with_nfunc('angle', 1, 1)
def angle(z): def angle(z):
"""Return polar-coordinate angle of complex-valued tensor `z`""" """Return polar-coordinate angle of complex-valued tensor `z`"""
...@@ -1975,7 +1975,7 @@ def complex(real, imag): ...@@ -1975,7 +1975,7 @@ def complex(real, imag):
"""Return complex-valued tensor with `real` and `imag` components""" """Return complex-valued tensor with `real` and `imag` components"""
@_scal_elemwise_with_nfunc('conj', 1, -1) @_scal_elemwise_with_nfunc('conj', 1, 1)
def conj(z): def conj(z):
"""Return the complex conjugate of `z`.""" """Return the complex conjugate of `z`."""
......
...@@ -18,9 +18,10 @@ from theano.tensor import elemwise_cgen as cgen ...@@ -18,9 +18,10 @@ from theano.tensor import elemwise_cgen as cgen
config = theano.config config = theano.config
# We cannot import discrete_dtypes from tensor.basic yet, # We cannot import discrete_dtypes or float_dtypes from tensor.basic yet,
# so we redefine them here # so we redefine them here
discrete_dtypes = map(str, scalar.discrete_types) discrete_dtypes = map(str, scalar.discrete_types)
float_dtypes = map(str, scalar.float_types)
# tensor depends on elemwise to provide definitions for several ops # tensor depends on elemwise to provide definitions for several ops
...@@ -472,14 +473,11 @@ class Elemwise(OpenMPOp): ...@@ -472,14 +473,11 @@ class Elemwise(OpenMPOp):
the input's storage. (Just like destroymap, but without the lists.) the input's storage. (Just like destroymap, but without the lists.)
* nfunc_spec: either None or a tuple of three elements, * nfunc_spec: either None or a tuple of three elements,
(nfunc_name, nin, nout) such that getattr(numpy, nfunc_name) (nfunc_name, nin, nout) such that getattr(numpy, nfunc_name)
implements this operation, takes nin inputs and abs(nout) outputs implements this operation, takes nin inputs and nout outputs.
(nout < 0 if the numpy function does not provide the option of Note that nin cannot always be inferred from the scalar op's
providing a numpy array to store the results in). Note that nin own nin field because that value is sometimes 0 (meaning a
cannot always be inferred from the scalar op's own nin field variable number of inputs), whereas the numpy function may
because that value is sometimes 0 (meaning a variable number of not have varargs.
inputs), whereas the numpy function may not have varargs.
NOTE: as of now, the sign of the nout field is ignored (some work
needs to be done to resize the destinations when needed).
""" """
if inplace_pattern is None: if inplace_pattern is None:
inplace_pattern = {} inplace_pattern = {}
...@@ -819,43 +817,24 @@ class Elemwise(OpenMPOp): ...@@ -819,43 +817,24 @@ class Elemwise(OpenMPOp):
out_shape.append(max(values)) out_shape.append(max(values))
out_shape = tuple(out_shape) out_shape = tuple(out_shape)
# Commented as we don't reuse outputs now. ufunc_args = inputs
# ufunc_kwargs = {}
# if not self.inplace_pattern:
# for output, storage in izip(node.outputs, output_storage):
# odat = storage[0]
# if odat is not None:
# if odat.shape != out_shape:
# # It is unsafe to try to resize odat,
# # we have to allocate output storage.
# odat = None
# if odat is None:
# odat = numpy.ndarray(out_shape, dtype=output.type.dtype)
# storage[0] = odat
# else:
# for i, (output, storage) in enumerate(
# izip(node.outputs, output_storage)):
# #i is an output idx
# if i in self.inplace_pattern:
# odat = inputs[self.inplace_pattern[i]]
# else:
# odat = storage[0]
# if odat is not None:
# if odat.shape != out_shape:
# # It is unsafe to try to resize odat,
# # we have to allocate output storage.
# odat = None
# if odat is None:
# odat = numpy.ndarray(out_shape,
# dtype=output.type.dtype)
# storage[0] = odat
ufunc_args = inputs # + output_storage
if self.nfunc and len(inputs) == self.nfunc_spec[1]: if self.nfunc and len(inputs) == self.nfunc_spec[1]:
ufunc = self.nfunc ufunc = self.nfunc
nout = self.nfunc_spec[2] nout = self.nfunc_spec[2]
if nout < 0: # Numpy ufuncs will sometimes perform operations in
nout = -nout # float16, in particular when the input is int8.
# This is not something that we want, and we do not
# do it in the C code, so we specify that the computation
# should be carried out in the returned dtype.
# This is done via the "sig" kwarg of the ufunc, its value
# should be something like "ff->f", where the characters
# represent the dtype of the inputs and outputs.
out_dtype = node.outputs[0].dtype
if out_dtype in float_dtypes and isinstance(ufunc, numpy.ufunc):
char = numpy.sctype2char(out_dtype)
sig = char * node.nin + '->' + char * node.nout
ufunc_kwargs['sig'] = sig
# Unfortunately, the else case does not allow us to # Unfortunately, the else case does not allow us to
# directly feed the destination arguments to the nfunc # directly feed the destination arguments to the nfunc
# since it sometimes requires resizing. Doing this # since it sometimes requires resizing. Doing this
...@@ -869,7 +848,7 @@ class Elemwise(OpenMPOp): ...@@ -869,7 +848,7 @@ class Elemwise(OpenMPOp):
self.scalar_op.nout)) self.scalar_op.nout))
nout = ufunc.nout nout = ufunc.nout
variables = ufunc(*ufunc_args) variables = ufunc(*ufunc_args, **ufunc_kwargs)
if nout == 1: if nout == 1:
variables = [variables] variables = [variables]
......
...@@ -31,6 +31,11 @@ class ScalarSigmoid(scalar.UnaryScalarOp): ...@@ -31,6 +31,11 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
return 0.0 return 0.0
if x > 30.0: if x > 30.0:
return 1.0 return 1.0
# If x is an int8 or uint8, numpy.exp will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return 1.0 / (1.0 + numpy.exp(-x, sig='f'))
return 1.0 / (1.0 + numpy.exp(-x)) return 1.0 / (1.0 + numpy.exp(-x))
def impl(self, x): def impl(self, x):
...@@ -268,8 +273,11 @@ def hard_sigmoid(x): ...@@ -268,8 +273,11 @@ def hard_sigmoid(x):
Removing the slope and shift does not make it faster. Removing the slope and shift does not make it faster.
""" """
slope = 0.2 # Use the same dtype as determined by "upgrade_to_float",
shift = 0.5 # and perform computation in that dtype.
out_dtype = scalar.upgrade_to_float(scalar.Scalar(dtype=x.dtype))[0].dtype
slope = tensor.constant(0.2, dtype=out_dtype)
shift = tensor.constant(0.5, dtype=out_dtype)
x = (x * slope) + shift x = (x * slope) + shift
x = tensor.clip(x, 0, 1) x = tensor.clip(x, 0, 1)
return x return x
...@@ -300,6 +308,11 @@ class ScalarSoftplus(scalar.UnaryScalarOp): ...@@ -300,6 +308,11 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
return 0.0 return 0.0
if x > 30.0: if x > 30.0:
return x return x
# If x is an int8 or uint8, numpy.exp will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.log1p(numpy.exp(x, sig='f'))
return numpy.log1p(numpy.exp(x)) return numpy.log1p(numpy.exp(x))
def impl(self, x): def impl(self, x):
......
...@@ -16,7 +16,7 @@ from theano.tensor.nnet.sigm import ( ...@@ -16,7 +16,7 @@ from theano.tensor.nnet.sigm import (
register_local_1msigmoid, simplify_mul, register_local_1msigmoid, simplify_mul,
) )
from theano.tensor.tests.test_basic import (makeBroadcastTester, rand, from theano.tensor.tests.test_basic import (makeBroadcastTester, rand,
check_floatX, check_floatX, upcast_int8_nfunc,
_good_broadcast_unary_normal_no_complex) _good_broadcast_unary_normal_no_complex)
...@@ -30,8 +30,8 @@ class T_sigmoid(unittest.TestCase): ...@@ -30,8 +30,8 @@ class T_sigmoid(unittest.TestCase):
SigmoidTester = makeBroadcastTester( SigmoidTester = makeBroadcastTester(
op=sigmoid, op=sigmoid,
expected=lambda inputs: check_floatX( expected=upcast_int8_nfunc(lambda inputs: check_floatX(
inputs, 1/(1+numpy.exp(-inputs))), inputs, 1 / (1 + numpy.exp(-inputs)))),
good=_good_broadcast_unary_normal_no_complex, good=_good_broadcast_unary_normal_no_complex,
#grad=_grad_broadcast_unary_normal, #grad=_grad_broadcast_unary_normal,
name='SigmoidTester', name='SigmoidTester',
...@@ -39,8 +39,8 @@ SigmoidTester = makeBroadcastTester( ...@@ -39,8 +39,8 @@ SigmoidTester = makeBroadcastTester(
UltraFastSigmoidTester = makeBroadcastTester( UltraFastSigmoidTester = makeBroadcastTester(
op=ultra_fast_sigmoid, op=ultra_fast_sigmoid,
expected=lambda inputs: check_floatX( expected=upcast_int8_nfunc(lambda inputs: check_floatX(
inputs, 1/(1+numpy.exp(-inputs))), inputs, 1 / (1 + numpy.exp(-inputs)))),
good=_good_broadcast_unary_normal_no_complex, good=_good_broadcast_unary_normal_no_complex,
#grad=_grad_broadcast_unary_normal, #grad=_grad_broadcast_unary_normal,
name='UltraFastSigmoidTester', name='UltraFastSigmoidTester',
...@@ -49,20 +49,21 @@ UltraFastSigmoidTester = makeBroadcastTester( ...@@ -49,20 +49,21 @@ UltraFastSigmoidTester = makeBroadcastTester(
HardSigmoidTester = makeBroadcastTester( HardSigmoidTester = makeBroadcastTester(
op=hard_sigmoid, op=hard_sigmoid,
expected=lambda inputs: check_floatX( expected=upcast_int8_nfunc(lambda inputs: check_floatX(
inputs, 1/(1+numpy.exp(-inputs))), inputs, 1 / (1 + numpy.exp(-inputs)))),
good=_good_broadcast_unary_normal_no_complex, good=_good_broadcast_unary_normal_no_complex,
#grad=_grad_broadcast_unary_normal, #grad=_grad_broadcast_unary_normal,
name='UltraFastSigmoidTester', name='HardSigmoidTester',
# This is an approx of the sigmoid. That is why we raise eps # This is an approx of the sigmoid. That is why we raise eps
eps=1e-1) eps=1e-1)
SoftplusTester = makeBroadcastTester( SoftplusTester = makeBroadcastTester(
op=softplus, op=softplus,
expected=lambda inputs: check_floatX( expected=upcast_int8_nfunc(lambda inputs: check_floatX(
inputs, numpy.log1p(numpy.exp(inputs))), inputs, numpy.log1p(numpy.exp(inputs)))),
good=_good_broadcast_unary_normal_no_complex, good=dict(_good_broadcast_unary_normal_no_complex,
int8=[numpy.arange(-127, 89, dtype='int8')]),
#grad=_grad_broadcast_unary_normal, #grad=_grad_broadcast_unary_normal,
name='SoftplusTester', name='SoftplusTester',
) )
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论