提交 52cb8ec7 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2228 from lamblin/fix_float16

Prevent computations in float16 in scalar and elemwise
差异被折叠。
......@@ -10,6 +10,7 @@ If you do want to rewrite these tests, bear in mind:
"""
import unittest
import numpy as np
import theano
from theano.gof import FunctionGraph
......@@ -20,8 +21,12 @@ from theano.scalar.basic import (floats, float32, float64,
ints, int8, int32, complex64,
ComplexError, IntDiv, TrueDiv,
Composite, add, div_proxy, clip,
and_, eq, neq, invert, mul)
import numpy
and_, eq, neq, invert, mul, Scalar)
from theano.scalar.basic import (
true_div, inv, log, log2, log10, log1p, exp, exp2, expm1, sqrt, deg2rad,
rad2deg, cos, arccos, sin, arcsin, tan, arctan, arctan2, cosh, arccosh,
sinh, arcsinh, tanh, arctanh)
def inputs():
return floats('xyz')
......@@ -75,7 +80,7 @@ class test_ScalarOps(unittest.TestCase):
g3 = theano.gradient.grad(a3, x)
fn3 = gof.DualLinker().accept(FunctionGraph([x], [g3])).make_function()
rng = numpy.random.RandomState(utt.fetch_seed())
rng = np.random.RandomState(utt.fetch_seed())
ntests = 50
for i in xrange(ntests):
......@@ -235,6 +240,128 @@ class test_logical(unittest.TestCase):
self.assertTrue(fn(a,b) == ~a, (a,))
# This class does not inherit from unittest.TestCase, because it would
# interfere with the "yield" mechanism that automatically generates test, see
# http://stackoverflow.com/questions/6689537/nose-test-generators-inside-class
# Therefore, it needs to be named "test_..." or "Test_...", so nose can pick
# it up by name, otherwise the tests would not be executed.
class test_upgrade_to_float(object):
# Test for Ops whose output has to be floating point, even when all
# inputs are ints.
# In particular, when the inputs are int8, the output should be
# at least float32, not float16.
unary_ops_vals = [
(inv, range(-127, 0) + range(1, 127)),
(sqrt, range(0, 128)),
(log, range(1, 128)),
(log2, range(1, 128)),
(log10, range(1, 128)),
(log1p, range(0, 128)),
(exp, range(-127, 89)),
(exp2, range(-127, 89)),
(expm1, range(-127, 89)),
(deg2rad, range(-127, 128)),
(rad2deg, range(-127, 128)),
(cos, range(-127, 128)),
(arccos, range(-1, 2)),
(cosh, range(-89, 90)),
(arccosh, range(1, 128)),
(sin, range(-127, 128)),
(arcsin, range(-1, 2)),
(sinh, range(-89, 90)),
(arcsinh, range(-127, 128)),
(tan, range(-3, 4)),
(arctan, range(-127, 128)),
(tanh, range(-127, 128)),
(arctanh, [0])]
binary_ops_vals = [
(arctan2, range(-127, 128), range(-127, 128))]
@staticmethod
def _test_unary(unary_op, x_range):
xi = int8('xi')
xf = float32('xf')
ei = unary_op(xi)
fi = theano.function([xi], ei)
ef = unary_op(xf)
ff = theano.function([xf], ef)
for x_val in x_range:
outi = fi(x_val)
outf = ff(x_val)
assert outi.dtype == outf.dtype, 'incorrect dtype'
assert np.allclose(outi, outf), 'insufficient precision'
@staticmethod
def _test_binary(binary_op, x_range, y_range):
xi = int8('xi')
yi = int8('yi')
xf = float32('xf')
yf = float32('yf')
ei = binary_op(xi, yi)
fi = theano.function([xi, yi], ei)
ef = binary_op(xf, yf)
ff = theano.function([xf, yf], ef)
for x_val in x_range:
for y_val in y_range:
outi = fi(x_val, y_val)
outf = ff(x_val, y_val)
assert outi.dtype == outf.dtype, 'incorrect dtype'
assert np.allclose(outi, outf), 'insufficient precision'
def test_true_div(self):
# true_div's upcast policy is not exactly "upgrade_to_float",
# so the test is a little bit different
x_range = range(-127, 128)
y_range = range(-127, 0) + range(1, 127)
xi = int8('xi')
yi = int8('yi')
xf = Scalar(theano.config.floatX)('xf')
yf = Scalar(theano.config.floatX)('yf')
ei = true_div(xi, yi)
fi = theano.function([xi, yi], ei)
ef = true_div(xf, yf)
ff = theano.function([xf, yf], ef)
for x_val in x_range:
for y_val in y_range:
outi = fi(x_val, y_val)
outf = ff(x_val, y_val)
assert outi.dtype == outf.dtype, 'incorrect dtype'
assert np.allclose(outi, outf), 'insufficient precision'
def test_unary(self):
# Automatically define all individual unary tests
for unary_op, x_range in self.unary_ops_vals:
test_name = 'test_%s' % unary_op.name
# Make a lambda function so we can name the test
test = lambda: self._test_unary(unary_op, x_range)
test.description = test_name
yield test
def test_binary(self):
# Automatically define all individual binary tests
for binary_op, x_range, y_range in self.binary_ops_vals:
test_name = 'test_%s' % binary_op.name
# Make a lambda function so we can name the test
test = lambda: self._test_binary(binary_op, x_range, y_range)
test.description = test_name
yield test
class test_complex_mod(unittest.TestCase):
"""Make sure % fails on complex numbers."""
......
......@@ -1812,7 +1812,7 @@ def round(a, mode="half_away_from_zero"):
raise Exception("round mode %s is not implemented." % mode)
@_scal_elemwise_with_nfunc('around', 1, -1)
@_scal_elemwise_with_nfunc('around', 1, 1)
def round_half_to_even(a):
"""round_half_to_even(a)"""
......@@ -1952,20 +1952,20 @@ def chi2sf(x, k):
#numpy.real(float32) return a view on the inputs.
#@_scal_elemwise_with_nfunc('real', 1, -1)
#@_scal_elemwise_with_nfunc('real', 1, 1)
@_scal_elemwise
def real(z):
"""Return real component of complex-valued tensor `z`"""
_tensor_py_operators.real = property(real)
@_scal_elemwise_with_nfunc('imag', 1, -1)
@_scal_elemwise_with_nfunc('imag', 1, 1)
def imag(z):
"""Return imaginary component of complex-valued tensor `z`"""
_tensor_py_operators.imag = property(imag)
@_scal_elemwise_with_nfunc('angle', 1, -1)
@_scal_elemwise_with_nfunc('angle', 1, 1)
def angle(z):
"""Return polar-coordinate angle of complex-valued tensor `z`"""
......@@ -1975,7 +1975,7 @@ def complex(real, imag):
"""Return complex-valued tensor with `real` and `imag` components"""
@_scal_elemwise_with_nfunc('conj', 1, -1)
@_scal_elemwise_with_nfunc('conj', 1, 1)
def conj(z):
"""Return the complex conjugate of `z`."""
......
......@@ -18,9 +18,10 @@ from theano.tensor import elemwise_cgen as cgen
config = theano.config
# We cannot import discrete_dtypes from tensor.basic yet,
# We cannot import discrete_dtypes or float_dtypes from tensor.basic yet,
# so we redefine them here
discrete_dtypes = map(str, scalar.discrete_types)
float_dtypes = map(str, scalar.float_types)
# tensor depends on elemwise to provide definitions for several ops
......@@ -472,14 +473,11 @@ class Elemwise(OpenMPOp):
the input's storage. (Just like destroymap, but without the lists.)
* nfunc_spec: either None or a tuple of three elements,
(nfunc_name, nin, nout) such that getattr(numpy, nfunc_name)
implements this operation, takes nin inputs and abs(nout) outputs
(nout < 0 if the numpy function does not provide the option of
providing a numpy array to store the results in). Note that nin
cannot always be inferred from the scalar op's own nin field
because that value is sometimes 0 (meaning a variable number of
inputs), whereas the numpy function may not have varargs.
NOTE: as of now, the sign of the nout field is ignored (some work
needs to be done to resize the destinations when needed).
implements this operation, takes nin inputs and nout outputs.
Note that nin cannot always be inferred from the scalar op's
own nin field because that value is sometimes 0 (meaning a
variable number of inputs), whereas the numpy function may
not have varargs.
"""
if inplace_pattern is None:
inplace_pattern = {}
......@@ -819,43 +817,24 @@ class Elemwise(OpenMPOp):
out_shape.append(max(values))
out_shape = tuple(out_shape)
# Commented as we don't reuse outputs now.
#
# if not self.inplace_pattern:
# for output, storage in izip(node.outputs, output_storage):
# odat = storage[0]
# if odat is not None:
# if odat.shape != out_shape:
# # It is unsafe to try to resize odat,
# # we have to allocate output storage.
# odat = None
# if odat is None:
# odat = numpy.ndarray(out_shape, dtype=output.type.dtype)
# storage[0] = odat
# else:
# for i, (output, storage) in enumerate(
# izip(node.outputs, output_storage)):
# #i is an output idx
# if i in self.inplace_pattern:
# odat = inputs[self.inplace_pattern[i]]
# else:
# odat = storage[0]
# if odat is not None:
# if odat.shape != out_shape:
# # It is unsafe to try to resize odat,
# # we have to allocate output storage.
# odat = None
# if odat is None:
# odat = numpy.ndarray(out_shape,
# dtype=output.type.dtype)
# storage[0] = odat
ufunc_args = inputs # + output_storage
ufunc_args = inputs
ufunc_kwargs = {}
if self.nfunc and len(inputs) == self.nfunc_spec[1]:
ufunc = self.nfunc
nout = self.nfunc_spec[2]
if nout < 0:
nout = -nout
# Numpy ufuncs will sometimes perform operations in
# float16, in particular when the input is int8.
# This is not something that we want, and we do not
# do it in the C code, so we specify that the computation
# should be carried out in the returned dtype.
# This is done via the "sig" kwarg of the ufunc, its value
# should be something like "ff->f", where the characters
# represent the dtype of the inputs and outputs.
out_dtype = node.outputs[0].dtype
if out_dtype in float_dtypes and isinstance(ufunc, numpy.ufunc):
char = numpy.sctype2char(out_dtype)
sig = char * node.nin + '->' + char * node.nout
ufunc_kwargs['sig'] = sig
# Unfortunately, the else case does not allow us to
# directly feed the destination arguments to the nfunc
# since it sometimes requires resizing. Doing this
......@@ -869,7 +848,7 @@ class Elemwise(OpenMPOp):
self.scalar_op.nout))
nout = ufunc.nout
variables = ufunc(*ufunc_args)
variables = ufunc(*ufunc_args, **ufunc_kwargs)
if nout == 1:
variables = [variables]
......
......@@ -31,6 +31,11 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
return 0.0
if x > 30.0:
return 1.0
# If x is an int8 or uint8, numpy.exp will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return 1.0 / (1.0 + numpy.exp(-x, sig='f'))
return 1.0 / (1.0 + numpy.exp(-x))
def impl(self, x):
......@@ -268,8 +273,11 @@ def hard_sigmoid(x):
Removing the slope and shift does not make it faster.
"""
slope = 0.2
shift = 0.5
# Use the same dtype as determined by "upgrade_to_float",
# and perform computation in that dtype.
out_dtype = scalar.upgrade_to_float(scalar.Scalar(dtype=x.dtype))[0].dtype
slope = tensor.constant(0.2, dtype=out_dtype)
shift = tensor.constant(0.5, dtype=out_dtype)
x = (x * slope) + shift
x = tensor.clip(x, 0, 1)
return x
......@@ -300,6 +308,11 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
return 0.0
if x > 30.0:
return x
# If x is an int8 or uint8, numpy.exp will compute the result in
# half-precision (float16), where we want float32.
x_dtype = str(getattr(x, 'dtype', ''))
if x_dtype in ('int8', 'uint8'):
return numpy.log1p(numpy.exp(x, sig='f'))
return numpy.log1p(numpy.exp(x))
def impl(self, x):
......
......@@ -16,7 +16,7 @@ from theano.tensor.nnet.sigm import (
register_local_1msigmoid, simplify_mul,
)
from theano.tensor.tests.test_basic import (makeBroadcastTester, rand,
check_floatX,
check_floatX, upcast_int8_nfunc,
_good_broadcast_unary_normal_no_complex)
......@@ -30,8 +30,8 @@ class T_sigmoid(unittest.TestCase):
SigmoidTester = makeBroadcastTester(
op=sigmoid,
expected=lambda inputs: check_floatX(
inputs, 1/(1+numpy.exp(-inputs))),
expected=upcast_int8_nfunc(lambda inputs: check_floatX(
inputs, 1 / (1 + numpy.exp(-inputs)))),
good=_good_broadcast_unary_normal_no_complex,
#grad=_grad_broadcast_unary_normal,
name='SigmoidTester',
......@@ -39,8 +39,8 @@ SigmoidTester = makeBroadcastTester(
UltraFastSigmoidTester = makeBroadcastTester(
op=ultra_fast_sigmoid,
expected=lambda inputs: check_floatX(
inputs, 1/(1+numpy.exp(-inputs))),
expected=upcast_int8_nfunc(lambda inputs: check_floatX(
inputs, 1 / (1 + numpy.exp(-inputs)))),
good=_good_broadcast_unary_normal_no_complex,
#grad=_grad_broadcast_unary_normal,
name='UltraFastSigmoidTester',
......@@ -49,20 +49,21 @@ UltraFastSigmoidTester = makeBroadcastTester(
HardSigmoidTester = makeBroadcastTester(
op=hard_sigmoid,
expected=lambda inputs: check_floatX(
inputs, 1/(1+numpy.exp(-inputs))),
expected=upcast_int8_nfunc(lambda inputs: check_floatX(
inputs, 1 / (1 + numpy.exp(-inputs)))),
good=_good_broadcast_unary_normal_no_complex,
#grad=_grad_broadcast_unary_normal,
name='UltraFastSigmoidTester',
name='HardSigmoidTester',
# This is an approx of the sigmoid. That is why we raise eps
eps=1e-1)
SoftplusTester = makeBroadcastTester(
op=softplus,
expected=lambda inputs: check_floatX(
inputs, numpy.log1p(numpy.exp(inputs))),
good=_good_broadcast_unary_normal_no_complex,
expected=upcast_int8_nfunc(lambda inputs: check_floatX(
inputs, numpy.log1p(numpy.exp(inputs)))),
good=dict(_good_broadcast_unary_normal_no_complex,
int8=[numpy.arange(-127, 89, dtype='int8')]),
#grad=_grad_broadcast_unary_normal,
name='SoftplusTester',
)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论