提交 c056820d authored 作者: Frederic's avatar Frederic

make sure it work with non scalar tensor and test it.

上级 a9e821f5
...@@ -1345,7 +1345,8 @@ def local_gpualloc_memset_0(node): ...@@ -1345,7 +1345,8 @@ def local_gpualloc_memset_0(node):
if isinstance(node.op, GpuAlloc) and not node.op.memset_0: if isinstance(node.op, GpuAlloc) and not node.op.memset_0:
inp = node.inputs[0] inp = node.inputs[0]
if (isinstance(inp, CudaNdarrayConstant) and if (isinstance(inp, CudaNdarrayConstant) and
numpy.asarray(inp.data) == 0 and inp.data.size == 1): inp.data.size == 1 and
(numpy.asarray(inp.data) == 0).all()):
new_out = GpuAlloc(memset_0=True)(*node.inputs) new_out = GpuAlloc(memset_0=True)(*node.inputs)
return [new_out] return [new_out]
......
...@@ -73,6 +73,10 @@ def test_gpualloc(): ...@@ -73,6 +73,10 @@ def test_gpualloc():
def test_alloc_memset_0(): def test_alloc_memset_0():
i = tensor.iscalar() i = tensor.iscalar()
z = numpy.zeros((1,), dtype='float32') z = numpy.zeros((1,), dtype='float32')
o = numpy.ones((1,), dtype='float32')
ones = numpy.ones((2,), dtype='float32')
# Test with 0
a = basic_ops.gpu_alloc(cuda.gpu_from_host(tensor.constant(z)), i) a = basic_ops.gpu_alloc(cuda.gpu_from_host(tensor.constant(z)), i)
f = theano.function([i], a, mode=mode_with_gpu) f = theano.function([i], a, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
...@@ -80,6 +84,24 @@ def test_alloc_memset_0(): ...@@ -80,6 +84,24 @@ def test_alloc_memset_0():
assert isinstance(topo[0].op, basic_ops.GpuAlloc) and topo[0].op.memset_0 assert isinstance(topo[0].op, basic_ops.GpuAlloc) and topo[0].op.memset_0
assert (numpy.asarray(f(6)) == 0).all() assert (numpy.asarray(f(6)) == 0).all()
# Test with 1
a = basic_ops.gpu_alloc(cuda.gpu_from_host(tensor.constant(o)), i)
f = theano.function([i], a, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, basic_ops.GpuAlloc)
assert not topo[0].op.memset_0
assert (numpy.asarray(f(6)) == 1).all()
# Test with 1, 1
a = basic_ops.gpu_alloc(cuda.gpu_from_host(tensor.constant(ones)), i)
f = theano.function([i], a, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, basic_ops.GpuAlloc)
assert not topo[0].op.memset_0
assert (numpy.asarray(f(2)) == 1).all()
def test_gpuspecifyshape(): def test_gpuspecifyshape():
x = cuda.shared_constructor(numpy.ones(3,dtype='float32'), 'x') x = cuda.shared_constructor(numpy.ones(3,dtype='float32'), 'x')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论