提交 f9fc5dfd authored 作者: Dustin Webb's avatar Dustin Webb

Updated local_alloc_elemwise to remove all allocs when possible and to add…

Updated local_alloc_elemwise to remove all allocs when possible and to add assert only when we cannot determine whether the shapes will be correct. Deprecated experimental.local_alloc_elemwise and enabled local_alloc_elemwise by default.
上级 795ded70
...@@ -1529,6 +1529,7 @@ def local_remove_useless_assert(node): ...@@ -1529,6 +1529,7 @@ def local_remove_useless_assert(node):
return [assert_(node.inputs[0], *cond)] return [assert_(node.inputs[0], *cond)]
@register_specialize
@gof.local_optimizer([T.Elemwise]) @gof.local_optimizer([T.Elemwise])
def local_alloc_elemwise(node): def local_alloc_elemwise(node):
""" """
...@@ -1536,7 +1537,7 @@ def local_alloc_elemwise(node): ...@@ -1536,7 +1537,7 @@ def local_alloc_elemwise(node):
-> elemwise(x, y.TensorType(no broadcast flag)) -> elemwise(x, y.TensorType(no broadcast flag))
elemwise(dimshuffle(alloc(x, shp)),... ,y.TensorType(BROADCAST CONDITION)) elemwise(dimshuffle(alloc(x, shp)),... ,y.TensorType(BROADCAST CONDITION))
-> elemwise(x, y.TensorType(no broadcast flag)) -> elemwise(x.dimshuffle(...), y.TensorType(no broadcast flag))
BROADCAST CONDITION: the condition is that the one input that are BROADCAST CONDITION: the condition is that the one input that are
not to be optimized to have the same broadcast pattern as the not to be optimized to have the same broadcast pattern as the
...@@ -1548,99 +1549,124 @@ def local_alloc_elemwise(node): ...@@ -1548,99 +1549,124 @@ def local_alloc_elemwise(node):
""" """
if not isinstance(node.op, T.Elemwise): if not isinstance(node.op, T.Elemwise):
return False return False
if len(node.outputs) > 1: if len(node.outputs) > 1:
#This is a supposition this code make that I'm not sure is always true. # Ensure all outputs have the same broadcast pattern
# This is a supposition that I'm not sure is always true.
assert all([list(o.type.broadcastable) == list( assert all([list(o.type.broadcastable) == list(
node.outputs[0].type.broadcastable) for o in node.outputs[0].type.broadcastable) for o in
node.outputs[1:]]) node.outputs[1:]])
# The broadcast pattern of the ouptut must match the broadcast pattern of
# at least one of the inputs.
if not any([list(i.type.broadcastable) == list( if not any([list(i.type.broadcastable) == list(
node.outputs[0].type.broadcastable) for i in node.inputs]): node.outputs[0].type.broadcastable) for i in node.inputs]):
return False return False
if not any([i.owner and (isinstance(i.owner.op, T.Alloc) or \
(isinstance(i.owner.op, T.DimShuffle) and def dimshuffled_alloc(i):
i.owner.inputs[0].owner and \ return (isinstance(i.owner.op, T.DimShuffle) and
isinstance(i.owner.inputs[0].owner.op, T.Alloc))) i.owner.inputs[0].owner and \
isinstance(i.owner.inputs[0].owner.op, T.Alloc))
# At least one input must have an owner that is either a T.Alloc or a
# T.DimShuffle with an owner that is a T.Alloc -- otherwise there is
# nothing to optimize.
if not any([i.owner
and (isinstance(i.owner.op, T.Alloc) or dimshuffled_alloc(i))
for i in node.inputs]): for i in node.inputs]):
return False return False
no_broad_idx = -1
## Search for input that we can use as a baseline for the dimensions.
assert_op_idx = -1
for idx, i in enumerate(node.inputs): for idx, i in enumerate(node.inputs):
if not i.owner: if i.type.broadcastable == node.outputs[0].type.broadcastable:
if list(i.type.broadcastable) == [False, ] * i.type.ndim: # Prefer an input that is not a T.Alloc nor a T.DimShuffle of a
no_broad_idx = idx # T.Alloc so that all allocs can be optimized.
if not (i.owner
and (isinstance(i.owner.op, T.Alloc)
or dimshuffled_alloc(i))):
assert_op_idx = idx
break break
else:
continue
if not any(i.type.broadcastable) and not isinstance(i.owner.op,
T.Alloc):
no_broad_idx = idx
break
elif list(i.type.broadcastable) == list(
node.outputs[0].type.broadcastable) \
and not isinstance(i.owner.op, T.Alloc) \
and not (isinstance(i.owner.op, T.DimShuffle) and
i.owner.inputs[0].owner and \
isinstance(i.owner.inputs[0].owner.op, T.Alloc)):
no_broad_idx = idx
break
assert no_broad_idx >= 0 # It may be the case that only T.Allocs and T.DimShuffle of T.Allocs exist.
assert_op = node.inputs[no_broad_idx] if assert_op_idx < 0:
# We want to optimize as many allocs as possible. When there is more
# than one then do all but one.
if len(node.inputs) > 1:
assert_op_idx = 0 # The first one is as good as any to use.
else:
# When there is only one input then we can optimize if the
# broadcast patterns of the input and output match.
i = node.inputs[0]
if i.type.broadcastable == node.outputs[0].type.broadcastable:
new_i = []
if isinstance(i.owner.op, T.Alloc):
new_i.append(i.owner.inputs[0])
elif dimshuffled_alloc(i):
new_i.append(i.owner.inputs[0].owner.inputs[0])
assert(len(new_i) > 0)
return node.op(*new_i,
return_list=True)
# Otherwise nothing can be done.
return False
assert_op = node.inputs[assert_op_idx]
cmp_op = assert_op cmp_op = assert_op
new = [] new_i = []
for i in node.inputs: for i in node.inputs:
# Remove alloc
if (i.owner and isinstance(i.owner.op, T.Alloc) if (i.owner and isinstance(i.owner.op, T.Alloc)
and i.owner.inputs[0].type != i.owner.outputs[0].type): and i.owner.inputs[0].type != i.owner.outputs[0].type):
# when i.owner.inputs[0].type == i.owner.outputs[0].type we # when i.owner.inputs[0].type == i.owner.outputs[0].type we
# will remove that alloc later # will remove that alloc later
assert i.type.ndim == cmp_op.ndim assert i.type.ndim == cmp_op.ndim
if theano.config.experimental.local_alloc_elemwise_assert: if (theano.config.experimental.local_alloc_elemwise_assert
and node.fgraph.shape_feature.same_shape(i, cmp_op)):
assert_op = assert_(assert_op, assert_op = assert_(assert_op,
*[T.eq(i.shape[idx], cmp_op.shape[idx])\ *[T.eq(i.shape[idx], cmp_op.shape[idx])\
for idx in xrange(i.type.ndim) \ for idx in xrange(i.type.ndim) \
if not i.type.broadcastable[idx]]) if not i.type.broadcastable[idx]])
new.append(i.owner.inputs[0]) new_i.append(i.owner.inputs[0])
elif i.owner and isinstance(i.owner.op, T.DimShuffle) \
and i.owner.inputs[0].owner \ # Remove Alloc in DimShuffle
and isinstance(i.owner.inputs[0].owner.op, T.Alloc): elif i.owner and dimshuffled_alloc(i):
assert i.type.ndim == cmp_op.type.ndim assert i.type.ndim == cmp_op.type.ndim
if theano.config.experimental.local_alloc_elemwise_assert: if (theano.config.experimental.local_alloc_elemwise_assert
and node.fgraph.shape_feature.same_shape(i, cmp_op)):
assert_op = assert_(assert_op, assert_op = assert_(assert_op,
*[T.eq(i.shape[idx], cmp_op.shape[idx]) *[T.eq(i.shape[idx], cmp_op.shape[idx])
for idx in xrange(i.type.ndim) for idx in xrange(i.type.ndim)
if not i.type.broadcastable[idx]]) if not i.type.broadcastable[idx]])
new.append(i.owner.inputs[0].owner.inputs[0]) new_i.append(i.owner.inputs[0].owner.inputs[0])
else: else:
new.append(i) new_i.append(i)
new[no_broad_idx] = assert_op new_i[assert_op_idx] = assert_op
if theano.config.experimental.local_alloc_elemwise_assert: if theano.config.experimental.local_alloc_elemwise_assert:
assert assert_op.owner.op is assert_ assert assert_op.owner.op is assert_
return [node.op(*new)] return node.op(*new_i, return_list=True)
#TODO, global optimizer that lift the assert to the beginning of the graph. #TODO, global optimizer that lift the assert to the beginning of the graph.
#TODO, when all inputs can be optimized do all except one #TODO, optimize all inputs when possible -- currently when all inputs have
# an alloc all but one is optimized.
theano.configparser.AddConfigVar('experimental.local_alloc_elemwise', theano.configparser.AddConfigVar('experimental.local_alloc_elemwise',
"If True enable the experimental optimization local_alloc_elemwise", "If True enable the experimental optimization local_alloc_elemwise",
theano.configparser.BoolParam(False), theano.configparser.BoolParam(
in_c_key=False) False,
#This version if faster but not as save. is_valid=lambda x: return not x
),
in_c_key=False)
#This version if faster but not as safe.
theano.configparser.AddConfigVar('experimental.local_alloc_elemwise_assert', theano.configparser.AddConfigVar('experimental.local_alloc_elemwise_assert',
"If False enable the experimental optimization local_alloc_elemwise" "If False enable the experimental optimization local_alloc_elemwise"
" but WITHOUT assert into the graph!", " but WITHOUT assert into the graph!",
theano.configparser.BoolParam(True), theano.configparser.BoolParam(True),
in_c_key=False) in_c_key=False)
if theano.config.experimental.local_alloc_elemwise:
#enabled by default when the lifter of assert is done.
register_specialize(local_alloc_elemwise)
else:
#don't register them in fast_run by default to have them disabled
#by default disable them by default as we are not sure it is
#always a good idea to replace an alloc with multiple op.
compile.optdb['specialize'].register("local_alloc_elemwise",
local_alloc_elemwise)
############################ ############################
# Constant Canonicalization # Constant Canonicalization
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论