提交 394e8cf0 authored 作者: abergeron's avatar abergeron

Merge pull request #1794 from nouiz/recursion_limit

Recursion limit
...@@ -20,7 +20,9 @@ since 2007. But it is also approachable enough to be used in the classroom ...@@ -20,7 +20,9 @@ since 2007. But it is also approachable enough to be used in the classroom
News News
==== ====
* Theano 0.6rc3 was released. Everybody is encouraged to update. * Ian Goodfellow did a `12h class with exercises on Theano <https://github.com/goodfeli/theano_exercises>`_.
* Theano 0.6 was released. Everybody is encouraged to update.
* New technical report on Theano: `Theano: new features and speed improvements <http://arxiv.org/abs/1211.5590>`_. * New technical report on Theano: `Theano: new features and speed improvements <http://arxiv.org/abs/1211.5590>`_.
However, please keep citing the other paper below in scientific work involving Theano. However, please keep citing the other paper below in scientific work involving Theano.
......
...@@ -2974,7 +2974,40 @@ class Composite(ScalarOp): ...@@ -2974,7 +2974,40 @@ class Composite(ScalarOp):
# We need to clone the graph as sometimes its nodes already # We need to clone the graph as sometimes its nodes already
# contain a reference to an fgraph. As we want the Composite # contain a reference to an fgraph. As we want the Composite
# to be pickable, we can't have reference to fgraph. # to be pickable, we can't have reference to fgraph.
inputs, outputs = gof.graph.clone(inputs, outputs)
# Also, if there is Composite in the inner graph, we want to
# remove them. In that case, we do a more complicated clone
# that will flatten Composite. We don't need to do this
# recusively, as the way the fusion optimizer work, we have
# only 1 new Composite each time at the output.
if len(outputs) > 1 or not any([isinstance(var.owner.op, Composite)
for var in outputs]):
# No inner Composite
inputs, outputs = gof.graph.clone(inputs, outputs)
else:
# Inner Composite that we need to flatten
assert len(outputs) == 1
# 1. Create a new graph from inputs up to the
# Composite
res = theano.compile.rebuild_collect_shared(
inputs=inputs,
outputs=outputs[0].owner.inputs,
copy_inputs_over=False) # Clone also the inputs
# 2. We continue this partial clone with the graph in
# the inner Composite
res2 = theano.compile.rebuild_collect_shared(
inputs=outputs[0].owner.op.inputs,
outputs=outputs[0].owner.op.outputs,
replace=dict(zip(outputs[0].owner.op.inputs, res[1]))
)
assert len(res2[1]) == len(outputs)
assert len(res[0]) == len(inputs)
assert res[0] != inputs
inputs, outputs = res[0], res2[1]
# Next assert comment just for speed
#assert not any([isinstance(node.op, Composite) for node in
# theano.gof.graph.ops(inputs, outputs)])
self.inputs = copy(inputs) self.inputs = copy(inputs)
self.outputs = copy(outputs) self.outputs = copy(outputs)
self.inputs_type = tuple([input.type for input in inputs]) self.inputs_type = tuple([input.type for input in inputs])
......
...@@ -68,19 +68,17 @@ class test_composite(unittest.TestCase): ...@@ -68,19 +68,17 @@ class test_composite(unittest.TestCase):
fn = gof.DualLinker().accept(g).make_function() fn = gof.DualLinker().accept(g).make_function()
assert fn(1.0, 2.0) == 1.5 assert fn(1.0, 2.0) == 1.5
# def test_sin(self): def test_flatten(self):
# x = inputs() #Test that we flatten multiple Composite.
# e = sin(x) x, y, z = inputs()
# C = Composite([x], [e]) C = Composite([x, y], [x + y])
# c = C.make_node(x) CC = Composite([x, y], [C(x * y, y)])
# # print c.c_code(['x'], ['z'], dict(id = 0)) assert not isinstance(CC.outputs[0].owner.op, Composite)
# g = FunctionGraph([x], [c.out])
# fn = gof.DualLinker().accept(g).make_function() # Test with multiple outputs
# assert fn(0) == 0 CC = Composite([x, y, z], [C(x * y, y), C(x * z, y)])
# assert fn(3.14159265358/2) == 1 #We don't flatten that case.
# assert fn(3.14159265358) == 0 assert isinstance(CC.outputs[0].owner.op, Composite)
# WRITEME: Test for sin, pow, and other scalar ops.
def test_with_constants(self): def test_with_constants(self):
x, y, z = inputs() x, y, z = inputs()
......
...@@ -508,6 +508,12 @@ class EmptyConstantError(NotScalarConstantError): ...@@ -508,6 +508,12 @@ class EmptyConstantError(NotScalarConstantError):
""" """
get_scalar_constant_value_elemwises = (
scal.Cast, scal.Switch,
scal.NEQ, scal.EQ,
scal.LT, scal.GT, scal.LE, scal.GE,
scal.Sub, scal.Add, scal.Mod, scal.Mul,
scal.IntDiv, scal.TrueDiv)
def get_scalar_constant_value(v): def get_scalar_constant_value(v):
"""return the constant scalar(0-D) value underlying variable `v` """return the constant scalar(0-D) value underlying variable `v`
...@@ -562,7 +568,7 @@ def get_scalar_constant_value(v): ...@@ -562,7 +568,7 @@ def get_scalar_constant_value(v):
compile.ops.OutputGuard, compile.ops.OutputGuard,
compile.DeepCopyOp)): compile.DeepCopyOp)):
return get_scalar_constant_value(v.owner.inputs[0]) return get_scalar_constant_value(v.owner.inputs[0])
if (isinstance(v.owner.op, theano.compile.ops.Shape_i) and elif (isinstance(v.owner.op, theano.compile.ops.Shape_i) and
isinstance(v.owner.inputs[0], Constant)): isinstance(v.owner.inputs[0], Constant)):
return v.owner.inputs[0].data.shape[v.owner.op.i] return v.owner.inputs[0].data.shape[v.owner.op.i]
# Don't act as the constant_folding optimization here as this # Don't act as the constant_folding optimization here as this
...@@ -570,26 +576,29 @@ def get_scalar_constant_value(v): ...@@ -570,26 +576,29 @@ def get_scalar_constant_value(v):
# mess with the stabilization optimization and be too slow. # mess with the stabilization optimization and be too slow.
# We put all the scalar Ops used by get_canonical_form_slice() # We put all the scalar Ops used by get_canonical_form_slice()
# to allow it to determine the broadcast pattern correctly. # to allow it to determine the broadcast pattern correctly.
if ((isinstance(v.owner.op, Elemwise) and elif isinstance(v.owner.op, scal.ScalarOp):
isinstance(v.owner.op.scalar_op, scal.Second)) or if isinstance(v.owner.op, scal.Second):
isinstance(v.owner.op, scal.Second)): # We don't need both input to be constant for second
# We don't need both input to be constant for second shape, val = v.owner.inputs
shape, val = v.owner.inputs return get_scalar_constant_value(val)
return get_scalar_constant_value(val) if isinstance(v.owner.op, get_scalar_constant_value_elemwises):
elemwises = (scal.Cast, scal.Switch, const = [get_scalar_constant_value(i)
scal.NEQ, scal.EQ, for i in v.owner.inputs]
scal.LT, scal.GT, scal.LE, scal.GE, ret = [[None]]
scal.Sub, scal.Add, scal.Mod, scal.Mul, v.owner.op.perform(v.owner, const, ret)
scal.IntDiv, scal.TrueDiv) return ret[0][0]
if (isinstance(v.owner.op, Elemwise) and elif isinstance(v.owner.op, Elemwise):
len(v.owner.outputs) == 1 and if isinstance(v.owner.op.scalar_op, scal.Second):
(isinstance(v.owner.op.scalar_op, elemwises) or # We don't need both input to be constant for second
isinstance(v.owner.op, elemwises))): shape, val = v.owner.inputs
const = [get_scalar_constant_value(i) for i in v.owner.inputs] return get_scalar_constant_value(val)
ret = [[None]] elif isinstance(v.owner.op.scalar_op,
v.owner.op.perform(v.owner, const, ret) get_scalar_constant_value_elemwises):
return ret[0][0] const = [get_scalar_constant_value(i) for i in v.owner.inputs]
if isinstance(v.owner.op, theano.tensor.subtensor.Subtensor) and v.ndim == 0: ret = [[None]]
v.owner.op.perform(v.owner, const, ret)
return ret[0][0]
elif isinstance(v.owner.op, theano.tensor.subtensor.Subtensor) and v.ndim == 0:
if isinstance(v.owner.inputs[0], TensorConstant): if isinstance(v.owner.inputs[0], TensorConstant):
cdata = tuple(v.owner.op.get_constant_idx(v.owner.inputs)) cdata = tuple(v.owner.op.get_constant_idx(v.owner.inputs))
try: try:
...@@ -626,7 +635,7 @@ def get_scalar_constant_value(v): ...@@ -626,7 +635,7 @@ def get_scalar_constant_value(v):
# join can cast implicitly its input in some case. # join can cast implicitly its input in some case.
return theano._asarray(ret, dtype=v.type.dtype) return theano._asarray(ret, dtype=v.type.dtype)
if (v.owner.inputs[0].owner and elif (v.owner.inputs[0].owner and
isinstance(v.owner.inputs[0].owner.op, isinstance(v.owner.inputs[0].owner.op,
theano.tensor.opt.MakeVector) and theano.tensor.opt.MakeVector) and
# MakeVector normally accept only scalar as input. # MakeVector normally accept only scalar as input.
......
...@@ -774,8 +774,7 @@ class Elemwise(OpenMPOp): ...@@ -774,8 +774,7 @@ class Elemwise(OpenMPOp):
super(Elemwise, self).perform(node, inputs, output_storage) super(Elemwise, self).perform(node, inputs, output_storage)
maxsize = max(len(input.shape) for input in inputs) maxsize = max(len(input.shape) for input in inputs)
for dims in izip(*[([(1, True)] * (maxsize - len(input.shape)) for dims in izip(*[zip(input.shape, sinput.type.broadcastable)
+ zip(input.shape, sinput.type.broadcastable))
for input, sinput in zip(inputs, node.inputs)]): for input, sinput in zip(inputs, node.inputs)]):
if max(d for d, b in dims) != 1 and (1, False) in dims: if max(d for d, b in dims) != 1 and (1, False) in dims:
# yes there may be more compact ways to write this code, # yes there may be more compact ways to write this code,
...@@ -808,34 +807,36 @@ class Elemwise(OpenMPOp): ...@@ -808,34 +807,36 @@ class Elemwise(OpenMPOp):
out_shape.append(max(values)) out_shape.append(max(values))
out_shape = tuple(out_shape) out_shape = tuple(out_shape)
if not self.inplace_pattern: # Commented as we don't reuse outputs now.
for output, storage in izip(node.outputs, output_storage): #
odat = storage[0] # if not self.inplace_pattern:
if odat is not None: # for output, storage in izip(node.outputs, output_storage):
if odat.shape != out_shape: # odat = storage[0]
# It is unsafe to try to resize odat, # if odat is not None:
# we have to allocate output storage. # if odat.shape != out_shape:
odat = None # # It is unsafe to try to resize odat,
if odat is None: # # we have to allocate output storage.
odat = numpy.ndarray(out_shape, dtype=output.type.dtype) # odat = None
storage[0] = odat # if odat is None:
else: # odat = numpy.ndarray(out_shape, dtype=output.type.dtype)
for i, (output, storage) in enumerate( # storage[0] = odat
izip(node.outputs, output_storage)): # else:
#i is an output idx # for i, (output, storage) in enumerate(
if i in self.inplace_pattern: # izip(node.outputs, output_storage)):
odat = inputs[self.inplace_pattern[i]] # #i is an output idx
else: # if i in self.inplace_pattern:
odat = storage[0] # odat = inputs[self.inplace_pattern[i]]
if odat is not None: # else:
if odat.shape != out_shape: # odat = storage[0]
# It is unsafe to try to resize odat, # if odat is not None:
# we have to allocate output storage. # if odat.shape != out_shape:
odat = None # # It is unsafe to try to resize odat,
if odat is None: # # we have to allocate output storage.
odat = numpy.ndarray(out_shape, # odat = None
dtype=output.type.dtype) # if odat is None:
storage[0] = odat # odat = numpy.ndarray(out_shape,
# dtype=output.type.dtype)
# storage[0] = odat
ufunc_args = inputs # + output_storage ufunc_args = inputs # + output_storage
if self.nfunc and len(inputs) == self.nfunc_spec[1]: if self.nfunc and len(inputs) == self.nfunc_spec[1]:
...@@ -860,26 +861,25 @@ class Elemwise(OpenMPOp): ...@@ -860,26 +861,25 @@ class Elemwise(OpenMPOp):
if nout == 1: if nout == 1:
variables = [variables] variables = [variables]
i = 0
for variable, storage, nout in izip(variables, output_storage, for variable, storage, nout in izip(variables, output_storage,
node.outputs): node.outputs):
if str(getattr(variable, "dtype", "")) == 'object': if getattr(variable, "dtype", "") == 'object':
# Since numpy 1.6, function created with numpy.frompyfunc # Since numpy 1.6, function created with numpy.frompyfunc
# always return an ndarray with dtype object # always return an ndarray with dtype object
variable = numpy.asarray(variable, dtype=nout.dtype) variable = numpy.asarray(variable, dtype=nout.dtype)
# The storage has been resized earlier. if i in self.inplace_pattern:
if hasattr(variable, 'shape'): odat = inputs[self.inplace_pattern[i]]
assert storage[0].shape == variable.shape odat[...] = variable
storage[0] = odat
# Sometimes NumPy return a Python type.
elif not isinstance(variable, numpy.ndarray):
variable = numpy.asarray(variable, nout.dtype)
storage[0] = variable
else: else:
# If variable has not shape, then it is a scalar. storage[0] = variable
assert numpy.prod(storage[0].shape) == 1 i += 1
storage[0][...] = variable
assert str(storage[0].dtype) != 'object'
# the following should be used instead of the previous loop,
# unfortunately it tends to segfault
# self.ufunc(*(ufunc_args+[s[0] for s in output_storage]))
def infer_shape(self, node, i_shapes): def infer_shape(self, node, i_shapes):
rval = [] rval = []
......
...@@ -4888,11 +4888,40 @@ class FusionOptimizer(Optimizer): ...@@ -4888,11 +4888,40 @@ class FusionOptimizer(Optimizer):
print >> stream, blanc, " time_toposort", prof[7] print >> stream, blanc, " time_toposort", prof[7]
def local_add_mul_fusion(node):
"""Fuse consecutive add or mul in one such node with more inputs.
It is better to fuse add/mul that way then in a Composite node as
this make the inner graph of the Compiste smaller. This allow to
put more computation in a Composite before hitting the max
recusion limit when pickling Composite.
"""
if (not isinstance(node.op, Elemwise) or
not isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul))):
return False
s_op = node.op.scalar_op.__class__
for inp in node.inputs:
if (inp.owner and
isinstance(inp.owner.op, Elemwise) and
isinstance(inp.owner.op.scalar_op, s_op)):
l = list(node.inputs)
l.remove(inp)
return [node.op(*(l + inp.owner.inputs))]
if config.tensor.local_elemwise_fusion: if config.tensor.local_elemwise_fusion:
_logger.debug("enabling optimization fusion elemwise in fast_run") _logger.debug("enabling optimization fusion elemwise in fast_run")
#Must be after gpu(48.5) and before AddDestroyHandler(49.5) #Must be after gpu(48.5) and before AddDestroyHandler(49.5)
fuse_seqopt = gof.SequenceDB()
fuse_seqopt.register('local_add_mul_fusion',
FusionOptimizer(local_add_mul_fusion),
0, 'fast_run', 'fusion')
fuse_seqopt.register('composite_elemwise_fusion',
FusionOptimizer(local_elemwise_fusion),
1, 'fast_run', 'fusion')
compile.optdb.register('elemwise_fusion', compile.optdb.register('elemwise_fusion',
FusionOptimizer(local_elemwise_fusion), 49, fuse_seqopt, 49,
'fast_run', 'fusion', 'local_elemwise_fusion', 'fast_run', 'fusion', 'local_elemwise_fusion',
'FusionOptimizer') 'FusionOptimizer')
else: else:
......
...@@ -1207,6 +1207,36 @@ class test_fusion(unittest.TestCase): ...@@ -1207,6 +1207,36 @@ class test_fusion(unittest.TestCase):
# Test it on some dummy values # Test it on some dummy values
f(*[range(i, 4 + i) for i in range(35)]) f(*[range(i, 4 + i) for i in range(35)])
def test_pickle_big_fusion(self):
"""In the past, pickle of Composite generated in tha case
crashed with max recusion limit. So we where not able to
generate C code in that case.
"""
factors = []
sd = tensor.dscalar()
means = tensor.dvector()
cst_05 = theano.tensor.constant(.5)
cst_m05 = theano.tensor.constant(-.5)
cst_2 = theano.tensor.constant(2)
cst_m2 = theano.tensor.constant(-2)
ones = theano.tensor.constant(numpy.ones(10))
n = 85
if theano.config.mode in ["DebugMode", "DEBUG_MODE"]:
n = 10
for i in range(n):
f = (cst_m05 * sd ** cst_m2 * (ones - means[i]) ** cst_2 +
cst_05 * tensor.log(cst_05 * (sd ** cst_m2) / numpy.pi))
factors.append(tensor.sum(f))
logp = tensor.add(*factors)
vars = [sd, means]
dlogp = function(vars, [theano.grad(logp, v) for v in vars])
dlogp(2, numpy.random.rand(n))
def speed_fusion(self, shared_fn=shared, gpu=False, s=None): def speed_fusion(self, shared_fn=shared, gpu=False, s=None):
""" """
param type s: a slice object param type s: a slice object
...@@ -1676,8 +1706,8 @@ class test_local_subtensor_lift(unittest.TestCase): ...@@ -1676,8 +1706,8 @@ class test_local_subtensor_lift(unittest.TestCase):
f = function([x, y, z], tensor.exp(x + y + z)[0], mode=mode_opt) f = function([x, y, z], tensor.exp(x + y + z)[0], mode=mode_opt)
prog = f.maker.fgraph.toposort() prog = f.maker.fgraph.toposort()
assert isinstance(prog[1].op, tensor.DimShuffle) assert isinstance(prog[0].op, tensor.DimShuffle)
assert isinstance(prog[0].op, tensor.Subtensor) # first subtensor assert isinstance(prog[1].op, tensor.Subtensor) # first subtensor
assert isinstance(prog[2].op, tensor.Subtensor) # first subtensor assert isinstance(prog[2].op, tensor.Subtensor) # first subtensor
assert isinstance(prog[3].op.scalar_op, theano.scalar. assert isinstance(prog[3].op.scalar_op, theano.scalar.
Composite) # Composite{add,add} Composite) # Composite{add,add}
...@@ -1693,8 +1723,8 @@ class test_local_subtensor_lift(unittest.TestCase): ...@@ -1693,8 +1723,8 @@ class test_local_subtensor_lift(unittest.TestCase):
f = function([x, y, z], tensor.exp(x + y + z)[0:2], mode=mode_opt) f = function([x, y, z], tensor.exp(x + y + z)[0:2], mode=mode_opt)
prog = f.maker.fgraph.toposort() prog = f.maker.fgraph.toposort()
assert isinstance(prog[1].op, tensor.DimShuffle) assert isinstance(prog[0].op, tensor.DimShuffle)
assert isinstance(prog[0].op, tensor.Subtensor) # first subtensor assert isinstance(prog[1].op, tensor.Subtensor) # first subtensor
assert isinstance(prog[2].op, tensor.Subtensor) # first subtensor assert isinstance(prog[2].op, tensor.Subtensor) # first subtensor
assert isinstance(prog[3].op.scalar_op, theano.scalar. assert isinstance(prog[3].op.scalar_op, theano.scalar.
Composite) # Composite{add,add} Composite) # Composite{add,add}
...@@ -3402,7 +3432,7 @@ class T_local_erfc(unittest.TestCase): ...@@ -3402,7 +3432,7 @@ class T_local_erfc(unittest.TestCase):
assert len(f.maker.fgraph.apply_nodes) == 1, len(f.maker.fgraph.apply_nodes) assert len(f.maker.fgraph.apply_nodes) == 1, len(f.maker.fgraph.apply_nodes)
assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX
assert len(f.maker.fgraph.toposort()[0].fgraph.toposort()[ assert len(f.maker.fgraph.toposort()[0].fgraph.toposort()[
0].op.scalar_op.fgraph.apply_nodes)==2,len(f.maker.fgraph.toposort()[0].fgraph.toposort()[0].op.scalar_op.fgraph.apply_nodes) 0].op.scalar_op.fgraph.apply_nodes)==22,len(f.maker.fgraph.toposort()[0].fgraph.toposort()[0].op.scalar_op.fgraph.apply_nodes)
#TODO: fix this problem #TODO: fix this problem
if theano.config.floatX=="float32" and theano.config.mode in ["DebugMode", "DEBUG_MODE"]: if theano.config.floatX=="float32" and theano.config.mode in ["DebugMode", "DEBUG_MODE"]:
raise KnownFailureTest( raise KnownFailureTest(
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论