提交 f9fc5dfd authored 作者: Dustin Webb's avatar Dustin Webb

Updated local_alloc_elemwise to remove all allocs when possible and to add…

Updated local_alloc_elemwise to remove all allocs when possible and to add assert only when we cannot determine whether the shapes will be correct. Deprecated experimental.local_alloc_elemwise and enabled local_alloc_elemwise by default.
上级 795ded70
......@@ -1529,6 +1529,7 @@ def local_remove_useless_assert(node):
return [assert_(node.inputs[0], *cond)]
@register_specialize
@gof.local_optimizer([T.Elemwise])
def local_alloc_elemwise(node):
"""
......@@ -1536,7 +1537,7 @@ def local_alloc_elemwise(node):
-> elemwise(x, y.TensorType(no broadcast flag))
elemwise(dimshuffle(alloc(x, shp)),... ,y.TensorType(BROADCAST CONDITION))
-> elemwise(x, y.TensorType(no broadcast flag))
-> elemwise(x.dimshuffle(...), y.TensorType(no broadcast flag))
BROADCAST CONDITION: the condition is that the one input that are
not to be optimized to have the same broadcast pattern as the
......@@ -1548,99 +1549,124 @@ def local_alloc_elemwise(node):
"""
if not isinstance(node.op, T.Elemwise):
return False
if len(node.outputs) > 1:
#This is a supposition this code make that I'm not sure is always true.
# Ensure all outputs have the same broadcast pattern
# This is a supposition that I'm not sure is always true.
assert all([list(o.type.broadcastable) == list(
node.outputs[0].type.broadcastable) for o in
node.outputs[1:]])
# The broadcast pattern of the ouptut must match the broadcast pattern of
# at least one of the inputs.
if not any([list(i.type.broadcastable) == list(
node.outputs[0].type.broadcastable) for i in node.inputs]):
return False
if not any([i.owner and (isinstance(i.owner.op, T.Alloc) or \
(isinstance(i.owner.op, T.DimShuffle) and
def dimshuffled_alloc(i):
return (isinstance(i.owner.op, T.DimShuffle) and
i.owner.inputs[0].owner and \
isinstance(i.owner.inputs[0].owner.op, T.Alloc)))
isinstance(i.owner.inputs[0].owner.op, T.Alloc))
# At least one input must have an owner that is either a T.Alloc or a
# T.DimShuffle with an owner that is a T.Alloc -- otherwise there is
# nothing to optimize.
if not any([i.owner
and (isinstance(i.owner.op, T.Alloc) or dimshuffled_alloc(i))
for i in node.inputs]):
return False
no_broad_idx = -1
## Search for input that we can use as a baseline for the dimensions.
assert_op_idx = -1
for idx, i in enumerate(node.inputs):
if not i.owner:
if list(i.type.broadcastable) == [False, ] * i.type.ndim:
no_broad_idx = idx
if i.type.broadcastable == node.outputs[0].type.broadcastable:
# Prefer an input that is not a T.Alloc nor a T.DimShuffle of a
# T.Alloc so that all allocs can be optimized.
if not (i.owner
and (isinstance(i.owner.op, T.Alloc)
or dimshuffled_alloc(i))):
assert_op_idx = idx
break
# It may be the case that only T.Allocs and T.DimShuffle of T.Allocs exist.
if assert_op_idx < 0:
# We want to optimize as many allocs as possible. When there is more
# than one then do all but one.
if len(node.inputs) > 1:
assert_op_idx = 0 # The first one is as good as any to use.
else:
continue
if not any(i.type.broadcastable) and not isinstance(i.owner.op,
T.Alloc):
no_broad_idx = idx
break
elif list(i.type.broadcastable) == list(
node.outputs[0].type.broadcastable) \
and not isinstance(i.owner.op, T.Alloc) \
and not (isinstance(i.owner.op, T.DimShuffle) and
i.owner.inputs[0].owner and \
isinstance(i.owner.inputs[0].owner.op, T.Alloc)):
no_broad_idx = idx
break
# When there is only one input then we can optimize if the
# broadcast patterns of the input and output match.
i = node.inputs[0]
if i.type.broadcastable == node.outputs[0].type.broadcastable:
new_i = []
if isinstance(i.owner.op, T.Alloc):
new_i.append(i.owner.inputs[0])
elif dimshuffled_alloc(i):
new_i.append(i.owner.inputs[0].owner.inputs[0])
assert(len(new_i) > 0)
return node.op(*new_i,
return_list=True)
# Otherwise nothing can be done.
return False
assert no_broad_idx >= 0
assert_op = node.inputs[no_broad_idx]
assert_op = node.inputs[assert_op_idx]
cmp_op = assert_op
new = []
new_i = []
for i in node.inputs:
# Remove alloc
if (i.owner and isinstance(i.owner.op, T.Alloc)
and i.owner.inputs[0].type != i.owner.outputs[0].type):
# when i.owner.inputs[0].type == i.owner.outputs[0].type we
# will remove that alloc later
assert i.type.ndim == cmp_op.ndim
if theano.config.experimental.local_alloc_elemwise_assert:
if (theano.config.experimental.local_alloc_elemwise_assert
and node.fgraph.shape_feature.same_shape(i, cmp_op)):
assert_op = assert_(assert_op,
*[T.eq(i.shape[idx], cmp_op.shape[idx])\
for idx in xrange(i.type.ndim) \
if not i.type.broadcastable[idx]])
new.append(i.owner.inputs[0])
elif i.owner and isinstance(i.owner.op, T.DimShuffle) \
and i.owner.inputs[0].owner \
and isinstance(i.owner.inputs[0].owner.op, T.Alloc):
new_i.append(i.owner.inputs[0])
# Remove Alloc in DimShuffle
elif i.owner and dimshuffled_alloc(i):
assert i.type.ndim == cmp_op.type.ndim
if theano.config.experimental.local_alloc_elemwise_assert:
if (theano.config.experimental.local_alloc_elemwise_assert
and node.fgraph.shape_feature.same_shape(i, cmp_op)):
assert_op = assert_(assert_op,
*[T.eq(i.shape[idx], cmp_op.shape[idx])
for idx in xrange(i.type.ndim)
if not i.type.broadcastable[idx]])
new.append(i.owner.inputs[0].owner.inputs[0])
new_i.append(i.owner.inputs[0].owner.inputs[0])
else:
new.append(i)
new[no_broad_idx] = assert_op
new_i.append(i)
new_i[assert_op_idx] = assert_op
if theano.config.experimental.local_alloc_elemwise_assert:
assert assert_op.owner.op is assert_
return [node.op(*new)]
return node.op(*new_i, return_list=True)
#TODO, global optimizer that lift the assert to the beginning of the graph.
#TODO, when all inputs can be optimized do all except one
#TODO, optimize all inputs when possible -- currently when all inputs have
# an alloc all but one is optimized.
theano.configparser.AddConfigVar('experimental.local_alloc_elemwise',
"If True enable the experimental optimization local_alloc_elemwise",
theano.configparser.BoolParam(False),
theano.configparser.BoolParam(
False,
is_valid=lambda x: return not x
),
in_c_key=False)
#This version if faster but not as save.
#This version if faster but not as safe.
theano.configparser.AddConfigVar('experimental.local_alloc_elemwise_assert',
"If False enable the experimental optimization local_alloc_elemwise"
" but WITHOUT assert into the graph!",
theano.configparser.BoolParam(True),
in_c_key=False)
if theano.config.experimental.local_alloc_elemwise:
#enabled by default when the lifter of assert is done.
register_specialize(local_alloc_elemwise)
else:
#don't register them in fast_run by default to have them disabled
#by default disable them by default as we are not sure it is
#always a good idea to replace an alloc with multiple op.
compile.optdb['specialize'].register("local_alloc_elemwise",
local_alloc_elemwise)
############################
# Constant Canonicalization
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论