提交 2e20f232 authored 作者: Frederic's avatar Frederic

Make a machanism to don't have Alloc and GpuAlloc constant folded in some cases.

上级 7b943a5b
......@@ -217,6 +217,20 @@ following methods:
``__str__`` method include the name of the op and the Op's parameters'
values.
.. function:: do_constant_folding(node)
*Default:* Return True
By default when optimizations are enabled, we remove during
function compilation apply node that have all their input
constants. We replace the Apply node with a Theano constant
variable. This way, the apply node is not executed at each function
call. If you want to force the execution of an op during the
function call, make do_constant_folding return False.
As done in the Alloc op, you can return False only in some case by
analysing the graph from the node parameter.
At a bare minimum, a new Op must define ``make_node`` and ``perform``, which
have no defaults.
......
......@@ -509,6 +509,16 @@ class PureOp(object):
"""
raise utils.MethodNotDefined("perform", type(self), self.__class__.__name__)
def do_constant_folding(self, node):
"""
This allow each op to dertermine if they want to be constant
folded when all there in put are constant. This allow them to
choose where they put their memory/speed trade off. Also, it
could make thing faster as Constant can't be used for inplace
operation(see *IncSubtensor)
"""
return True
class Op(utils.object2, PureOp, CLinkerOp):
"""Convenience class to bundle `PureOp` and `CLinkerOp`"""
......
......@@ -2004,6 +2004,17 @@ class GpuAlloc(Op):
def c_code_cache_version(self):
return (3,)
def do_constant_folding(self, node):
if any([isinstance(client[0].op, (
tensor.IncSubtensor,
tensor.AdvancedIncSubtensor1,
GpuIncSubtensor,
GpuAdvancedIncSubtensor1
))
for client in node.outputs[0].clients]):
return False
return True
gpu_alloc = GpuAlloc()
......
......@@ -728,7 +728,16 @@ def test_gpualloc_output_to_gpu():
assert numpy.allclose(numpy.ones(a.get_value(borrow=True).shape)+9,f_gpu(9))
assert numpy.allclose(f(5),f_gpu(5))
import theano.tensor.tests.test_basic
class TestAlloc(theano.tensor.tests.test_basic.TestAlloc):
dtype = "float32"
mode = mode_with_gpu
shared = staticmethod(cuda.shared_constructor)
allocs = [B.GpuAlloc, B.GpuAlloc, tensor.Alloc]
class T_Join_and_Split(theano.tensor.tests.test_basic.T_Join_and_Split):
def setUp(self):
utt.seed_rng()
......
......@@ -2616,6 +2616,16 @@ class Alloc(gof.Op):
return [None]
return self.make_node(eval_points[0], *inputs[1:]).outputs
def do_constant_folding(self, node):
if python_any([isinstance(client[0].op, (IncSubtensor,
AdvancedIncSubtensor1,
AdvancedIncSubtensor,
))
for client in node.outputs[0].clients]):
return False
return True
alloc = Alloc()
pprint.assign(alloc, printing.FunctionPrinter('alloc'))
......
......@@ -3767,6 +3767,9 @@ def constant_folding(node):
if not isinstance(input, Constant):
return False
#condition: all inputs are constant
if not node.op.do_constant_folding(node):
# The op ask to don't be constant folded.
return False
storage_map = dict([(i, [i.data]) for i in node.inputs])
compute_map = dict([(i, [True]) for i in node.inputs])
......
......@@ -48,6 +48,11 @@ except ImportError:
mode_no_scipy = "FAST_RUN"
floatX = config.floatX
if config.mode == "FAST_COMPILE":
mode_opt = "FAST_RUN"
else:
mode_opt = get_default_mode()
### seed random number generator so that unittests are deterministic ###
utt.seed_rng()
......@@ -1266,6 +1271,48 @@ Alloc13GradTester = makeBroadcastTester(
),
)
class TestAlloc(unittest.TestCase):
dtype = config.floatX
mode = mode_opt
shared = staticmethod(theano.shared)
allocs = [tensor.Alloc] * 3
def test_alloc_constant_folding(self):
test_params = numpy.asarray(numpy.random.randn(50 * 60),
self.dtype)
some_vector = vector('some_vector', dtype=self.dtype)
some_matrix = some_vector.reshape((60, 50))
variables = self.shared(numpy.ones((50,), dtype=self.dtype))
idx = tensor.constant(numpy.arange(50))
for alloc, (subtensor, n_alloc) in zip(self.allocs, [
#IncSubtensor1
(some_matrix[:60], 2),
#AdvancedIncSubtensor1
(some_matrix[arange(60)], 2),
#AdvancedIncSubtensor
(some_matrix[idx, idx], 1)]):
derp = sum(dot(subtensor, variables))
fobj = theano.function([some_vector], derp, mode=self.mode)
grad_derp = theano.grad(derp, some_vector)
fgrad = theano.function([some_vector], grad_derp,
mode=self.mode)
topo_obj = fobj.maker.env.toposort()
assert numpy.sum([isinstance(node.op, alloc)
for node in topo_obj]) == 0
topo_grad = fgrad.maker.env.toposort()
#print subtensor
#theano.printing.debugprint(fgrad)
assert numpy.sum([isinstance(node.op, alloc)
for node in topo_grad]) == n_alloc
fobj(test_params)
fgrad(test_params)
def test_eye():
def check(dtype, N, M_=None, k=0):
# Theano does not accept None as a tensor.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论