提交 91bc16c3 authored 作者: notoraptor's avatar notoraptor

Optimize MAX(ABS(X)).

上级 c2c28793
...@@ -3765,6 +3765,9 @@ def local_dnn_reduction(node): ...@@ -3765,6 +3765,9 @@ def local_dnn_reduction(node):
scal = 'norm1' scal = 'norm1'
else: else:
return return
elif (isinstance(node.op.scalar_op, theano.scalar.basic.Maximum) and
isinstance(node.op.pre_scalar_op, theano.scalar.basic.Abs)):
scal = 'absmax'
else: else:
return return
......
...@@ -1207,7 +1207,7 @@ def local_gpua_careduce(op, context_name, inputs, outputs): ...@@ -1207,7 +1207,7 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
return False return False
x, = inputs x, = inputs
idtype = x.dtype idtype = x.dtype
adtype = getattr(op, 'acc_dtype', None) adtype = getattr(op, 'acc_dtype', idtype)
odtype = getattr(op, 'dtype', outputs[0].dtype) odtype = getattr(op, 'dtype', outputs[0].dtype)
# Force accumulator to float32 for float32 inputs since tree # Force accumulator to float32 for float32 inputs since tree
......
...@@ -1597,6 +1597,20 @@ def test_dnn_reduction_sum_abs(): ...@@ -1597,6 +1597,20 @@ def test_dnn_reduction_sum_abs():
utt.assert_allclose(np.abs(M_val).sum(axis=axis), f(M_val)) utt.assert_allclose(np.abs(M_val).sum(axis=axis), f(M_val))
def test_dnn_reduction_absmax():
if not dnn.dnn_available(test_ctx_name) or dnn.version(raises=False) < 6000:
raise SkipTest(dnn.dnn_available.msg)
M = T.matrix()
for axis in (None, 0, 1):
out = abs(M).max(axis=axis)
f = theano.function([M], out, mode=mode_with_gpu)
assert any(isinstance(node.op, dnn.GpuDnnReduction) and node.op.red_op == 'absmax'
for node in f.maker.fgraph.apply_nodes)
M_val = np.random.random((4, 5)).astype(theano.config.floatX)
utt.assert_allclose(np.max(np.abs(M_val), axis=axis), f(M_val))
def dnn_reduction_strides(shp, shuffle, slice): def dnn_reduction_strides(shp, shuffle, slice):
utt.fetch_seed() utt.fetch_seed()
inp = GpuArrayType('float32', (False,) * len(shp), inp = GpuArrayType('float32', (False,) * len(shp),
......
...@@ -363,44 +363,28 @@ def test_local_gpu_elemwise_careduce(): ...@@ -363,44 +363,28 @@ def test_local_gpu_elemwise_careduce():
mode_with_gpu_no_cudnn = mode_with_gpu.excluding('cudnn') mode_with_gpu_no_cudnn = mode_with_gpu.excluding('cudnn')
x = theano.tensor.matrix() x = theano.tensor.matrix()
o = (x * x).sum() def fn_sum_square(x, axis):
f = theano.function([x], o, mode=mode_with_gpu_no_cudnn) return (x * x).sum(axis=axis)
topo = f.maker.fgraph.toposort()
assert len(topo) == 3 def fn_sum_abs(x, axis):
assert isinstance(topo[1].op, GpuCAReduceCuda) return abs(x).sum(axis=axis)
assert topo[1].op.pre_scalar_op == theano.scalar.sqr
assert _check_stack_trace(f) def fn_max_abs(x, axis):
data = np.random.rand(3, 4).astype(theano.config.floatX) return abs(x).max(axis=axis)
utt.assert_allclose(f(data), (data * data).sum())
for fn, pre_scalar_op in ((fn_sum_square, theano.scalar.sqr),
o = (x * x).sum(axis=1) (fn_sum_abs, theano.scalar.abs_),
f = theano.function([x], o, mode=mode_with_gpu_no_cudnn) (fn_max_abs, theano.scalar.abs_)):
topo = f.maker.fgraph.toposort() for axis in (None, 0, 1):
assert len(topo) == 3 o = fn(x, axis)
assert isinstance(topo[1].op, GpuCAReduceCuda) f = theano.function([x], o, mode=mode_with_gpu_no_cudnn)
assert topo[1].op.pre_scalar_op == theano.scalar.sqr topo = f.maker.fgraph.toposort()
assert _check_stack_trace(f) assert len(topo) == 3
utt.assert_allclose(f(data), (data * data).sum(axis=1)) assert isinstance(topo[1].op, GpuCAReduceCuda)
assert topo[1].op.pre_scalar_op == pre_scalar_op
# assert _check_stack_trace(f)
o = abs(x).sum() data = np.random.rand(3, 4).astype(theano.config.floatX)
f = theano.function([x], o, mode=mode_with_gpu_no_cudnn) utt.assert_allclose(fn(data, axis), f(data))
topo = f.maker.fgraph.toposort()
assert len(topo) == 3
assert isinstance(topo[1].op, GpuCAReduceCuda)
assert topo[1].op.pre_scalar_op == theano.scalar.abs_
assert _check_stack_trace(f)
data = np.random.rand(3, 4).astype(theano.config.floatX)
utt.assert_allclose(f(data), np.abs(data).sum())
o = abs(x).sum(axis=1)
f = theano.function([x], o, mode=mode_with_gpu_no_cudnn)
topo = f.maker.fgraph.toposort()
assert len(topo) == 3
assert isinstance(topo[1].op, GpuCAReduceCuda)
assert topo[1].op.pre_scalar_op == theano.scalar.abs_
assert _check_stack_trace(f)
utt.assert_allclose(f(data), np.abs(data).sum(axis=1))
def test_local_lift_dot22scalar(): def test_local_lift_dot22scalar():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论