提交 d304bb64 authored 作者: abergeron's avatar abergeron

Merge pull request #2725 from nouiz/alloc_empty

Disable GpuAllocEmpty merge
......@@ -11,6 +11,7 @@
.. autofunction:: theano.tensor.signal.downsample.max_pool_2d
.. autofunction:: theano.tensor.signal.downsample.max_pool_2d_same_size
.. function:: fft(*todo)
......
......@@ -655,6 +655,16 @@ class PureOp(object):
"""
return True
def do_merge(self, node):
"""This allow to disable the merge of ops in the graph.
This is very rarely a good idea to disable it. Do not use if
you do not understand this small comment. You probably do not
need it.
"""
return True
class Op(utils.object2, PureOp, CLinkerOp):
"""Convenience class to bundle `PureOp` and `CLinkerOp`"""
......
......@@ -517,6 +517,8 @@ class MergeFeature(object):
"""Check if a node can be merged, and queue that replacement."""
if node in self.nodes_seen:
return
if not node.op.do_merge(node):
return
# These asserts ensure that the fgraph has set the clients field
# properly.
......
......@@ -2584,6 +2584,15 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
raise TypeError('index must be vector')
if x_.type.ndim == 0:
raise TypeError('cannot index into a scalar')
if y_.type.ndim > x_.type.ndim:
if self.set_instead_of_inc:
opname = 'set'
else:
opname = 'increment'
raise TypeError(
'cannot %s x subtensor with ndim=%s'
' by y with ndim=%s to x subtensor with ndim=%s ' % (
opname, x_.type.ndim, y_.type.ndim))
return Apply(self, [x_, y_, ilist_], [x_.type()])
......@@ -2750,6 +2759,15 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
raise TypeError('index must be vector')
if x_.type.ndim == 0:
raise TypeError('cannot index into a scalar')
if y_.type.ndim > x_.type.ndim:
if self.set_instead_of_inc:
opname = 'set'
else:
opname = 'increment'
raise TypeError(
'cannot %s x subtensor with ndim=%s'
' by y with ndim=%s to x subtensor with ndim=%s ' % (
opname, x_.type.ndim, y_.type.ndim))
return Apply(self, [x_, y_, ilist_], [x_.type()])
......@@ -3288,6 +3306,9 @@ class GpuAllocEmpty(GpuOp):
# XXX: We could implement and call CudaNdarray.empty(sh) instead.
out[0] = cuda_ndarray.cuda_ndarray.CudaNdarray.zeros(sh)
def do_merge(self, node):
return False
def c_code(self, node, name, inputs, out_, sub):
out, = out_
fail = sub['fail']
......@@ -3340,6 +3361,9 @@ class GpuAlloc(GpuAllocEmpty):
"""
__props__ = ('memset_0',)
def do_merge(self, node):
return True
def __init__(self, memset_0=False):
self.memset_0 = memset_0
......
......@@ -372,6 +372,26 @@ def test_reshape():
pass
def test_alloc_empty():
# Test that we allocated correctly
f = theano.function([], cuda.basic_ops.gpu_alloc_empty(2, 3))
assert len(f.maker.fgraph.apply_nodes) == 1
out = f()
assert out.shape == (2, 3)
assert out.dtype == 'float32'
# Test that we do not merge them.
f = theano.function([], [cuda.basic_ops.gpu_alloc_empty(2, 3),
cuda.basic_ops.gpu_alloc_empty(2, 3)])
out = f()
assert out[0].shape == (2, 3)
assert out[0].dtype == 'float32'
assert out[1].shape == (2, 3)
assert out[1].dtype == 'float32'
assert len([node for node in f.maker.fgraph.apply_nodes
if isinstance(node.op, cuda.basic_ops.GpuAllocEmpty)]) == 2
def test_elemwise_empty():
# test with 0 element
a = tcn.shared_constructor(theano._asarray(numpy.random.rand(0, 0),
......@@ -953,8 +973,7 @@ class T_Join_and_Split(theano.tensor.tests.test_basic.T_Join_and_Split):
utt.seed_rng()
self.mode = mode_with_gpu.excluding('constant_folding')
self.join_op = cuda.GpuJoin()
# No gpu split.
self.split_op_class = tensor.Split
self.split_op_class = cuda.GpuSplit
# No Make vector on the gpu, Join used instead
self.make_vector_op = cuda.GpuJoin()
self.floatX = "float32"
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论