提交 05649d91 authored 作者: Frederic Bastien's avatar Frederic Bastien

Don't scalar float* elemwise unless the result is needed on the GPU.

上级 0c53fb52
......@@ -702,6 +702,13 @@ def local_gpua_elemwise(op, context_name, inputs, outputs):
name = 'Gpu' + name
if len(outputs) > 1:
return
# We move float* scalar only if the outputs is used on the GPU.
# This will trigger a backward pass when needed, but will prevent
# many useless transfer to only compute GpuElemwise on scalar.
if outputs[0].ndim == 0 and len([c for c, _ in outputs[0].clients
if isinstance(c.op, GpuFromHost)]) == 0:
return
have_cuda = False
have_opencl = False
if inputs and isinstance(inputs[0].type, GpuArrayType):
......
......@@ -493,6 +493,27 @@ def test_many_arg_elemwise():
utt.assert_allclose(results_gpu, results_cpu)
def test_not_useless_scalar_gpuelemwise():
# We don't want to move elemwise on scalar on the GPU when the
# result will be used on the GPU!
with theano.configparser.change_flags(warn_float64='ignore'):
X = tensor.fmatrix()
x = np.random.randn(32, 32).astype(np.float32)
m1 = theano.shared(np.random.randn(32, 32).astype(np.float32))
loss = (X - tensor.dot(X, m1)).norm(L=2)
lr0 = .001
grad = tensor.grad(loss, m1)
train = theano.function(inputs=[X], updates=[(m1, m1 - lr1 * grad)],
mode=mode_with_gpu)
train(x)
topo = train.maker.fgraph.toposort()
gemms = [app for app in topo if isinstance(app.op, GpuGemm)]
assert len(gemms) == 1
assert isinstance(gemms[0].inputs[1].owner.op, tensor.Elemwise)
def test_local_lift_abstractconv_gpu_shape():
prev = theano.config.on_opt_error
try:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论