提交 66277226 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #6088 from nouiz/float16

Fix opt crash in float16 and enable C code for MaxAndArgmax and Argmax
......@@ -1911,12 +1911,11 @@ def local_gpu_elemwise_careduce(node):
# operation with some reduction pattern will probably results
# in slow down.
isinstance(node.inputs[0].owner.op.scalar_op, scalar.basic.Sqr)):
op = node.op
inp = node.inputs[0].owner.inputs[0]
return [GpuCAReduceCuda(scalar_op=op.scalar_op,
axis=op.axis,
reduce_mask=op.reduce_mask,
pre_scalar_op=scalar.basic.sqr)(inp)]
props = node.op._props_dict()
props["pre_scalar_op"] = scalar.basic.sqr
out = GpuCAReduceCuda(**props)(inp)
return [out]
@local_optimizer(None)
......
......@@ -1219,6 +1219,7 @@ class MaxAndArgmax(Op):
E_axis = 'invalid axis'
params_type = Generic()
__props__ = ('axis',)
_f16_ok = True
def __init__(self, axis):
assert isinstance(axis, list)
......@@ -1427,6 +1428,7 @@ class Argmax(Op):
nout = 1
E_axis = 'invalid axis'
__props__ = ()
_f16_ok = True
def make_node(self, x, axis=None):
x = _as_tensor_variable(x)
......
......@@ -41,7 +41,7 @@ from theano.tensor import (
inplace, iscalar, matrix, minimum, matrices, maximum, mul, neq,
Reshape, row, scalar, scalars, second, smallest, stack, sub, Tensor,
tensor_copy, tensordot, TensorType, Tri, tri, tril, triu, unbroadcast,
var, Join, shape, MaxAndArgmax, lscalar, zvector, exp,
var, Argmax, Join, shape, MaxAndArgmax, lscalar, zvector, exp,
get_scalar_constant_value, ivector, reshape, scalar_from_tensor, scal,
iscalars, arange, dscalars, fvector, imatrix, numeric_grad,
opt, lvector, true_div, max, min, Split, roll,
......@@ -106,8 +106,11 @@ def inplace_func(inputs, outputs, mode=None, allow_input_downcast=False,
name=name)
def eval_outputs(outputs):
variables = inplace_func([], outputs)()
def eval_outputs(outputs, ops=(), mode=None):
f = inplace_func([], outputs, mode=mode)
variables = f()
if ops:
assert any(isinstance(node.op, ops) for node in f.maker.fgraph.apply_nodes)
if isinstance(variables, (tuple, list)) and len(variables) == 1:
return variables[0]
return variables
......@@ -3106,6 +3109,21 @@ class T_max_and_argmax(unittest.TestCase):
v_shape = eval_outputs(max_and_argmax(n, axis)[0].shape)
assert tuple(v_shape) == np.max(data, np_axis).shape
def test2_float16(self):
# Test negative values and bigger range to make sure numpy don't do the argmax as on uint16
data = (rand(20, 30).astype("float16") - 0.5) * 20
n = shared(data)
for (axis, np_axis) in [(-1, -1), (0, 0), (1, 1), (None, None),
([0, 1], None), ([1, 0], None),
(NoneConst.clone(), None),
(constant(0), 0)]:
v, i = eval_outputs(max_and_argmax(n, axis), (MaxAndArgmax,))
assert i.dtype == 'int64'
self.assertTrue(np.all(v == np.max(data, np_axis)))
self.assertTrue(np.all(i == np.argmax(data, np_axis)))
v_shape = eval_outputs(max_and_argmax(n, axis)[0].shape)
assert tuple(v_shape) == np.max(data, np_axis).shape
def test2_invalid(self):
n = as_tensor_variable(rand(2, 3))
# Silence expected error messages
......@@ -3321,6 +3339,19 @@ class T_argmin_argmax(unittest.TestCase):
v_shape = eval_outputs(fct(n, axis).shape)
assert tuple(v_shape) == nfct(data, np_axis).shape
def test2_float16(self):
# Test negative values and bigger range to make sure numpy don't do the argmax as on uint16
data = (rand(20, 30).astype("float16") - 0.5) * 20
n = shared(data)
mode = get_default_mode().including("local_max_and_argmax", "uncanonicalize")
for fct, nfct in [(argmax, np.argmax), (argmin, np.argmin)]:
for (axis, np_axis) in [(-1, -1), (0, 0), (1, 1), (None, None),
([0, 1], None), ([1, 0], None)]:
v = eval_outputs(fct(n, axis), (Argmax,), mode=mode)
self.assertTrue(np.all(v == nfct(data, np_axis)))
v_shape = eval_outputs(fct(n, axis).shape, mode=mode)
assert tuple(v_shape) == nfct(data, np_axis).shape
def test2_invalid(self):
for fct, nfct in [(argmax, np.argmax), (argmin, np.argmin)]:
n = as_tensor_variable(rand(2, 3))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论