提交 8655069d authored 作者: notoraptor's avatar notoraptor

Optimize SUM(x^2) to cuDNN reduction.

上级 e6acc109
......@@ -3746,19 +3746,33 @@ def local_dnn_reduction(node):
node.op.acc_dtype == 'float64'):
return
if node.op.pre_scalar_op is not None:
# Might want to handle absmax, avg, norm1, norm2 here
def _identity(a):
return a
def _square(a):
return GpuElemwise(theano.scalar.basic.sqr)(a)
scal = node.op.scalar_op.name
post = _identity
if (isinstance(node.op.pre_scalar_op, theano.scalar.basic.Sqr) and
isinstance(node.op.scalar_op, theano.scalar.basic.Add)):
scal = 'norm2'
post = _square
elif node.op.pre_scalar_op is not None:
# Might want to handle absmax, avg, norm1, and other cases of norm2 here
return
if not cudnn.cudnnReduceTensorOp_t.has_alias(node.op.scalar_op.name):
if not cudnn.cudnnReduceTensorOp_t.has_alias(scal):
return
with inherit_stack_trace(node.outputs):
return (GpuDnnReduction(node.op.scalar_op.name,
node.op.axis,
node.op.acc_dtype,
node.op.dtype,
False)(node.inputs[0]),)
ret = GpuDnnReduction(scal,
node.op.axis,
node.op.acc_dtype,
node.op.dtype,
False)(node.inputs[0])
return [post(ret)]
@register_opt('cudnn')
......
......@@ -1569,6 +1569,19 @@ def test_dnn_reduction_opt():
yield dnn_reduction, 2, idtype, adtype, odtype
def test_dnn_reduction_sum_squares():
if not dnn.dnn_available(test_ctx_name) or dnn.version(raises=False) < 6000:
raise SkipTest(dnn.dnn_available.msg)
M = T.matrix()
out = (M**2).sum()
f = theano.function([M], out, mode=mode_with_gpu)
assert any(isinstance(node.op, dnn.GpuDnnReduction) and node.op.red_op == 'norm2'
for node in f.maker.fgraph.apply_nodes)
M_val = np.random.random((4, 5)).astype(theano.config.floatX)
utt.assert_allclose((M_val**2).sum(), f(M_val))
def dnn_reduction_strides(shp, shuffle, slice):
utt.fetch_seed()
inp = GpuArrayType('float32', (False,) * len(shp),
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论