提交 66277226 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #6088 from nouiz/float16

Fix opt crash in float16 and enable C code for MaxAndArgmax and Argmax
...@@ -1911,12 +1911,11 @@ def local_gpu_elemwise_careduce(node): ...@@ -1911,12 +1911,11 @@ def local_gpu_elemwise_careduce(node):
# operation with some reduction pattern will probably results # operation with some reduction pattern will probably results
# in slow down. # in slow down.
isinstance(node.inputs[0].owner.op.scalar_op, scalar.basic.Sqr)): isinstance(node.inputs[0].owner.op.scalar_op, scalar.basic.Sqr)):
op = node.op
inp = node.inputs[0].owner.inputs[0] inp = node.inputs[0].owner.inputs[0]
return [GpuCAReduceCuda(scalar_op=op.scalar_op, props = node.op._props_dict()
axis=op.axis, props["pre_scalar_op"] = scalar.basic.sqr
reduce_mask=op.reduce_mask, out = GpuCAReduceCuda(**props)(inp)
pre_scalar_op=scalar.basic.sqr)(inp)] return [out]
@local_optimizer(None) @local_optimizer(None)
......
...@@ -1219,6 +1219,7 @@ class MaxAndArgmax(Op): ...@@ -1219,6 +1219,7 @@ class MaxAndArgmax(Op):
E_axis = 'invalid axis' E_axis = 'invalid axis'
params_type = Generic() params_type = Generic()
__props__ = ('axis',) __props__ = ('axis',)
_f16_ok = True
def __init__(self, axis): def __init__(self, axis):
assert isinstance(axis, list) assert isinstance(axis, list)
...@@ -1427,6 +1428,7 @@ class Argmax(Op): ...@@ -1427,6 +1428,7 @@ class Argmax(Op):
nout = 1 nout = 1
E_axis = 'invalid axis' E_axis = 'invalid axis'
__props__ = () __props__ = ()
_f16_ok = True
def make_node(self, x, axis=None): def make_node(self, x, axis=None):
x = _as_tensor_variable(x) x = _as_tensor_variable(x)
......
...@@ -41,7 +41,7 @@ from theano.tensor import ( ...@@ -41,7 +41,7 @@ from theano.tensor import (
inplace, iscalar, matrix, minimum, matrices, maximum, mul, neq, inplace, iscalar, matrix, minimum, matrices, maximum, mul, neq,
Reshape, row, scalar, scalars, second, smallest, stack, sub, Tensor, Reshape, row, scalar, scalars, second, smallest, stack, sub, Tensor,
tensor_copy, tensordot, TensorType, Tri, tri, tril, triu, unbroadcast, tensor_copy, tensordot, TensorType, Tri, tri, tril, triu, unbroadcast,
var, Join, shape, MaxAndArgmax, lscalar, zvector, exp, var, Argmax, Join, shape, MaxAndArgmax, lscalar, zvector, exp,
get_scalar_constant_value, ivector, reshape, scalar_from_tensor, scal, get_scalar_constant_value, ivector, reshape, scalar_from_tensor, scal,
iscalars, arange, dscalars, fvector, imatrix, numeric_grad, iscalars, arange, dscalars, fvector, imatrix, numeric_grad,
opt, lvector, true_div, max, min, Split, roll, opt, lvector, true_div, max, min, Split, roll,
...@@ -106,8 +106,11 @@ def inplace_func(inputs, outputs, mode=None, allow_input_downcast=False, ...@@ -106,8 +106,11 @@ def inplace_func(inputs, outputs, mode=None, allow_input_downcast=False,
name=name) name=name)
def eval_outputs(outputs): def eval_outputs(outputs, ops=(), mode=None):
variables = inplace_func([], outputs)() f = inplace_func([], outputs, mode=mode)
variables = f()
if ops:
assert any(isinstance(node.op, ops) for node in f.maker.fgraph.apply_nodes)
if isinstance(variables, (tuple, list)) and len(variables) == 1: if isinstance(variables, (tuple, list)) and len(variables) == 1:
return variables[0] return variables[0]
return variables return variables
...@@ -3106,6 +3109,21 @@ class T_max_and_argmax(unittest.TestCase): ...@@ -3106,6 +3109,21 @@ class T_max_and_argmax(unittest.TestCase):
v_shape = eval_outputs(max_and_argmax(n, axis)[0].shape) v_shape = eval_outputs(max_and_argmax(n, axis)[0].shape)
assert tuple(v_shape) == np.max(data, np_axis).shape assert tuple(v_shape) == np.max(data, np_axis).shape
def test2_float16(self):
# Test negative values and bigger range to make sure numpy don't do the argmax as on uint16
data = (rand(20, 30).astype("float16") - 0.5) * 20
n = shared(data)
for (axis, np_axis) in [(-1, -1), (0, 0), (1, 1), (None, None),
([0, 1], None), ([1, 0], None),
(NoneConst.clone(), None),
(constant(0), 0)]:
v, i = eval_outputs(max_and_argmax(n, axis), (MaxAndArgmax,))
assert i.dtype == 'int64'
self.assertTrue(np.all(v == np.max(data, np_axis)))
self.assertTrue(np.all(i == np.argmax(data, np_axis)))
v_shape = eval_outputs(max_and_argmax(n, axis)[0].shape)
assert tuple(v_shape) == np.max(data, np_axis).shape
def test2_invalid(self): def test2_invalid(self):
n = as_tensor_variable(rand(2, 3)) n = as_tensor_variable(rand(2, 3))
# Silence expected error messages # Silence expected error messages
...@@ -3321,6 +3339,19 @@ class T_argmin_argmax(unittest.TestCase): ...@@ -3321,6 +3339,19 @@ class T_argmin_argmax(unittest.TestCase):
v_shape = eval_outputs(fct(n, axis).shape) v_shape = eval_outputs(fct(n, axis).shape)
assert tuple(v_shape) == nfct(data, np_axis).shape assert tuple(v_shape) == nfct(data, np_axis).shape
def test2_float16(self):
# Test negative values and bigger range to make sure numpy don't do the argmax as on uint16
data = (rand(20, 30).astype("float16") - 0.5) * 20
n = shared(data)
mode = get_default_mode().including("local_max_and_argmax", "uncanonicalize")
for fct, nfct in [(argmax, np.argmax), (argmin, np.argmin)]:
for (axis, np_axis) in [(-1, -1), (0, 0), (1, 1), (None, None),
([0, 1], None), ([1, 0], None)]:
v = eval_outputs(fct(n, axis), (Argmax,), mode=mode)
self.assertTrue(np.all(v == nfct(data, np_axis)))
v_shape = eval_outputs(fct(n, axis).shape, mode=mode)
assert tuple(v_shape) == nfct(data, np_axis).shape
def test2_invalid(self): def test2_invalid(self):
for fct, nfct in [(argmax, np.argmax), (argmin, np.argmin)]: for fct, nfct in [(argmax, np.argmax), (argmin, np.argmin)]:
n = as_tensor_variable(rand(2, 3)) n = as_tensor_variable(rand(2, 3))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论