提交 06aedd59 authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Merge pull request #446 from lamblin/fix_do_constant_folding

Do not constant-fold Alloc if it is the output
......@@ -2005,14 +2005,19 @@ class GpuAlloc(Op):
return (3,)
def do_constant_folding(self, node):
if any([isinstance(client[0].op, (
for client in node.outputs[0].clients:
if client[0] == 'output':
# If the output is a constant, it will have to be deepcopied
# each time the function is called. So we do not fold.
return False
elif (not isinstance(client[0], basestring)
and isinstance(client[0].op, (
tensor.IncSubtensor,
tensor.AdvancedIncSubtensor1,
GpuIncSubtensor,
GpuAdvancedIncSubtensor1
))
for client in node.outputs[0].clients]):
return False
))):
return False
return True
gpu_alloc = GpuAlloc()
......
......@@ -2617,12 +2617,18 @@ class Alloc(gof.Op):
return self.make_node(eval_points[0], *inputs[1:]).outputs
def do_constant_folding(self, node):
if python_any([isinstance(client[0].op, (IncSubtensor,
AdvancedIncSubtensor1,
AdvancedIncSubtensor,
))
for client in node.outputs[0].clients]):
return False
for client in node.outputs[0].clients:
if client[0] == 'output':
# If the output is a constant, it will have to be deepcopied
# each time the function is called. So we do not fold.
return False
elif (not isinstance(client[0], basestring)
and isinstance(client[0].op, (
IncSubtensor,
AdvancedIncSubtensor1,
AdvancedIncSubtensor,
))):
return False
return True
......
......@@ -1278,8 +1278,11 @@ class TestAlloc(unittest.TestCase):
shared = staticmethod(theano.shared)
allocs = [tensor.Alloc] * 3
def setUp(self):
self.rng = numpy.random.RandomState(seed=utt.fetch_seed())
def test_alloc_constant_folding(self):
test_params = numpy.asarray(numpy.random.randn(50 * 60),
test_params = numpy.asarray(self.rng.randn(50 * 60),
self.dtype)
some_vector = vector('some_vector', dtype=self.dtype)
......@@ -1312,6 +1315,20 @@ class TestAlloc(unittest.TestCase):
fobj(test_params)
fgrad(test_params)
def test_alloc_output(self):
val = tensor.constant(self.rng.randn(1,1), dtype=self.dtype)
for alloc in self.allocs:
# The output is the result of the alloc operation,
# we do not want it to be constant-folded
out = alloc()(val, 50, 60)
f = theano.function([], out)
topo = f.maker.env.toposort()
assert numpy.sum([isinstance(node.op, alloc)
for node in topo]) == 1
assert not isinstance(topo[0].op,
theano.compile.function_module.DeepCopyOp)
def test_eye():
def check(dtype, N, M_=None, k=0):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论