提交 e6e6a433 authored 作者: nouiz's avatar nouiz

Merge pull request #1043 from goodfeli/fix_incsubtensor_opt

fix bug in gpu inc subtensor optimization and add a unit test
......@@ -822,6 +822,9 @@ def local_gpu_incsubtensor(node):
gpu_from_host(x),
gpu_from_host(y),
*coords)]
# Incrementing a float32 x results in a float32
# output even if y is float64, so we can downcast
# y to put it on GPU
if type(node.op) == tensor.IncSubtensor and \
node.inputs[0].dtype == "float32":
x, y = node.inputs[0:2]
......@@ -838,6 +841,8 @@ def local_gpu_incsubtensor(node):
go_gpu = True
gpu_y, = y.owner.inputs
else:
if y.dtype != 'float32':
y = tensor.cast(y, 'float32')
gpu_y = gpu_from_host(y)
if go_gpu:
return [host_from_gpu(GpuIncSubtensor(
......
......@@ -377,6 +377,29 @@ class TestIfElse(theano.tests.test_ifelse.test_ifelse):
def get_ifelse(self, n):
return theano.ifelse.IfElse(n, gpu=True, as_view=True)
def test_incsubtensor_mixed():
# This catches a bug that occurred when incrementing
# a float32 tensor by a float64 tensor.
# The result is defined to be float32, so it is OK
# to downcast the float64 increment in order to
# transfer it to the GPU.
# The bug was that the optimization called GpuFromHost
# without casting first, causing the optimization to
# fail.
X = tensor.fmatrix()
Y = tensor.dmatrix()
Z = tensor.inc_subtensor(X[0:1,0:1],Y)
f = theano.function([X,Y], Z, mode=mode_with_gpu)
packed, = f.maker.fgraph.inputs[1].clients
client, idx = packed
print client
assert isinstance(client.op, tensor.Elemwise)
assert isinstance(client.op.scalar_op, theano.scalar.Cast)
packed ,= client.outputs[0].clients
client, idx = packed
assert isinstance(client.op, cuda.GpuFromHost)
if __name__ == '__main__':
test_gpualloc()
test_opt_gpujoin_onlyajoin()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论