提交 06aedd59 authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Merge pull request #446 from lamblin/fix_do_constant_folding

Do not constant-fold Alloc if it is the output
...@@ -2005,14 +2005,19 @@ class GpuAlloc(Op): ...@@ -2005,14 +2005,19 @@ class GpuAlloc(Op):
return (3,) return (3,)
def do_constant_folding(self, node): def do_constant_folding(self, node):
if any([isinstance(client[0].op, ( for client in node.outputs[0].clients:
if client[0] == 'output':
# If the output is a constant, it will have to be deepcopied
# each time the function is called. So we do not fold.
return False
elif (not isinstance(client[0], basestring)
and isinstance(client[0].op, (
tensor.IncSubtensor, tensor.IncSubtensor,
tensor.AdvancedIncSubtensor1, tensor.AdvancedIncSubtensor1,
GpuIncSubtensor, GpuIncSubtensor,
GpuAdvancedIncSubtensor1 GpuAdvancedIncSubtensor1
)) ))):
for client in node.outputs[0].clients]): return False
return False
return True return True
gpu_alloc = GpuAlloc() gpu_alloc = GpuAlloc()
......
...@@ -2617,12 +2617,18 @@ class Alloc(gof.Op): ...@@ -2617,12 +2617,18 @@ class Alloc(gof.Op):
return self.make_node(eval_points[0], *inputs[1:]).outputs return self.make_node(eval_points[0], *inputs[1:]).outputs
def do_constant_folding(self, node): def do_constant_folding(self, node):
if python_any([isinstance(client[0].op, (IncSubtensor, for client in node.outputs[0].clients:
AdvancedIncSubtensor1, if client[0] == 'output':
AdvancedIncSubtensor, # If the output is a constant, it will have to be deepcopied
)) # each time the function is called. So we do not fold.
for client in node.outputs[0].clients]): return False
return False elif (not isinstance(client[0], basestring)
and isinstance(client[0].op, (
IncSubtensor,
AdvancedIncSubtensor1,
AdvancedIncSubtensor,
))):
return False
return True return True
......
...@@ -1278,8 +1278,11 @@ class TestAlloc(unittest.TestCase): ...@@ -1278,8 +1278,11 @@ class TestAlloc(unittest.TestCase):
shared = staticmethod(theano.shared) shared = staticmethod(theano.shared)
allocs = [tensor.Alloc] * 3 allocs = [tensor.Alloc] * 3
def setUp(self):
self.rng = numpy.random.RandomState(seed=utt.fetch_seed())
def test_alloc_constant_folding(self): def test_alloc_constant_folding(self):
test_params = numpy.asarray(numpy.random.randn(50 * 60), test_params = numpy.asarray(self.rng.randn(50 * 60),
self.dtype) self.dtype)
some_vector = vector('some_vector', dtype=self.dtype) some_vector = vector('some_vector', dtype=self.dtype)
...@@ -1312,6 +1315,20 @@ class TestAlloc(unittest.TestCase): ...@@ -1312,6 +1315,20 @@ class TestAlloc(unittest.TestCase):
fobj(test_params) fobj(test_params)
fgrad(test_params) fgrad(test_params)
def test_alloc_output(self):
val = tensor.constant(self.rng.randn(1,1), dtype=self.dtype)
for alloc in self.allocs:
# The output is the result of the alloc operation,
# we do not want it to be constant-folded
out = alloc()(val, 50, 60)
f = theano.function([], out)
topo = f.maker.env.toposort()
assert numpy.sum([isinstance(node.op, alloc)
for node in topo]) == 1
assert not isinstance(topo[0].op,
theano.compile.function_module.DeepCopyOp)
def test_eye(): def test_eye():
def check(dtype, N, M_=None, k=0): def check(dtype, N, M_=None, k=0):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论