提交 bd2ec838 authored 作者: Frederic Bastien's avatar Frederic Bastien

Do not move all scalar float on the GPU.

上级 05649d91
......@@ -703,12 +703,6 @@ def local_gpua_elemwise(op, context_name, inputs, outputs):
if len(outputs) > 1:
return
# We move float* scalar only if the outputs is used on the GPU.
# This will trigger a backward pass when needed, but will prevent
# many useless transfer to only compute GpuElemwise on scalar.
if outputs[0].ndim == 0 and len([c for c, _ in outputs[0].clients
if isinstance(c.op, GpuFromHost)]) == 0:
return
have_cuda = False
have_opencl = False
if inputs and isinstance(inputs[0].type, GpuArrayType):
......
......@@ -502,10 +502,10 @@ def test_not_useless_scalar_gpuelemwise():
x = np.random.randn(32, 32).astype(np.float32)
m1 = theano.shared(np.random.randn(32, 32).astype(np.float32))
loss = (X - tensor.dot(X, m1)).norm(L=2)
lr0 = .001
lr = theano.shared(np.asarray(.001, dtype=np.float32))
grad = tensor.grad(loss, m1)
train = theano.function(inputs=[X], updates=[(m1, m1 - lr1 * grad)],
train = theano.function(inputs=[X], updates=[(m1, m1 - lr * grad)],
mode=mode_with_gpu)
train(x)
topo = train.maker.fgraph.toposort()
......
......@@ -38,8 +38,8 @@ def move_to_gpu(data):
# We don't support complex on the GPU
if str(data.dtype) in tensor.basic.complex_dtypes:
return False
# We don't want scalar int on the GPU.
if data.ndim == 0 and str(data.dtype) in tensor.basic.discrete_dtypes:
# We don't want scalars on the GPU.
if data.ndim == 0:
return False
return True
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论