提交 25aca395 authored 作者: Dustin Webb's avatar Dustin Webb

Parameterized local_elemwise_alloc_op so as to be able to apply it to GpuAlloc.

上级 ba45997f
...@@ -1606,136 +1606,141 @@ compile.optdb['specialize'].register('local_remove_all_assert', ...@@ -1606,136 +1606,141 @@ compile.optdb['specialize'].register('local_remove_all_assert',
local_remove_all_assert, local_remove_all_assert,
use_db_name_as_tag=False) use_db_name_as_tag=False)
@register_specialize("local_alloc_elemwise") def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):
@gof.local_optimizer([T.Elemwise]) def local_elemwise_alloc(node):
def local_elemwise_alloc(node): """
""" elemwise(alloc(x, shp), ..., y.TensorType(BROADCAST CONDITION))
elemwise(alloc(x, shp), ..., y.TensorType(BROADCAST CONDITION)) -> elemwise(x, y.TensorType(BROADCAST CONDITION))
-> elemwise(x, y.TensorType(BROADCAST CONDITION))
elemwise(dimshuffle(alloc(x, shp)),... ,y.TensorType(BROADCAST CONDITION))
elemwise(dimshuffle(alloc(x, shp)),... ,y.TensorType(BROADCAST CONDITION)) -> elemwise(x.dimshuffle(...), y.TensorType(BROADCAST CONDITION))
-> elemwise(x.dimshuffle(...), y.TensorType(BROADCAST CONDITION))
BROADCAST CONDITION: the condition is that the one input that are
BROADCAST CONDITION: the condition is that the one input that are not to be optimized to have the same broadcast pattern as the
not to be optimized to have the same broadcast pattern as the output
output
We can change the alloc by a dimshuffle as the elemwise
We can change the alloc by a dimshuffle as the elemwise already have the shape info. The dimshuffle will be faster
already have the shape info. The dimshuffle will be faster to exec
to exec """
""" if not isinstance(node.op, ElemwiseOP):
if not isinstance(node.op, T.Elemwise):
return False
if len(node.outputs) > 1:
# Ensure all outputs have the same broadcast pattern
# This is a supposition that I'm not sure is always true.
assert all([o.type.broadcastable ==
node.outputs[0].type.broadcastable for o in
node.outputs[1:]])
# The broadcast pattern of the ouptut must match the broadcast pattern of
# at least one of the inputs.
if not any([i.type.broadcastable ==
node.outputs[0].type.broadcastable for i in node.inputs]):
return False
def dimshuffled_alloc(i):
return (isinstance(i.owner.op, T.DimShuffle) and
i.owner.inputs[0].owner and \
isinstance(i.owner.inputs[0].owner.op, T.Alloc))
# At least one input must have an owner that is either a T.Alloc or a
# T.DimShuffle with an owner that is a T.Alloc -- otherwise there is
# nothing to optimize.
if not any([i.owner
and (isinstance(i.owner.op, T.Alloc) or dimshuffled_alloc(i))
for i in node.inputs]):
return False
## Search for input that we can use as a baseline for the dimensions.
assert_op_idx = -1
for idx, i in enumerate(node.inputs):
if i.type.broadcastable == node.outputs[0].type.broadcastable:
# Prefer an input that is not a T.Alloc nor a T.DimShuffle of a
# T.Alloc so that all allocs can be optimized.
if not (i.owner
and (isinstance(i.owner.op, T.Alloc)
or dimshuffled_alloc(i))):
assert_op_idx = idx
break
# It may be the case that only T.Allocs and T.DimShuffle of T.Allocs exist.
if assert_op_idx < 0:
# We want to optimize as many allocs as possible. When there is more
# than one then do all but one.
# number of inputs with alloc or dimshuffle alloc
l2 = [i for i in node.inputs
if (i.owner and (isinstance(i.owner.op, T.Alloc)
or dimshuffled_alloc(i)))]
# If only 1 alloc or dimshuffle alloc, it is the one we will use for the shape
# So no alloc would be removed.
if len(l2) > 1:
# l containt inputs with alloc or dimshuffle alloc only.
# Its length will always be at least one, as we checked that before
l = [idx for idx, i in enumerate(node.inputs)
if i.type.broadcastable == node.outputs[0].type.broadcastable]
assert_op_idx = l[0] # The first one is as good as any to use.
else:
# Nothing would be optimized!
return False return False
assert_op = node.inputs[assert_op_idx] if len(node.outputs) > 1:
cmp_op = assert_op # Ensure all outputs have the same broadcast pattern
new_i = [] # This is a supposition that I'm not sure is always true.
assert all([o.type.broadcastable ==
for i in node.inputs: node.outputs[0].type.broadcastable for o in
# Remove alloc node.outputs[1:]])
if (i.owner and isinstance(i.owner.op, T.Alloc)
and i.owner.inputs[0].type != i.owner.outputs[0].type): # The broadcast pattern of the ouptut must match the broadcast pattern of
# when i.owner.inputs[0].type == i.owner.outputs[0].type we # at least one of the inputs.
# will remove that alloc later if not any([i.type.broadcastable ==
node.outputs[0].type.broadcastable for i in node.inputs]):
assert i.type.ndim == cmp_op.ndim return False
if (theano.config.experimental.local_alloc_elemwise_assert
and not node.fgraph.shape_feature.same_shape(i, cmp_op)): def dimshuffled_alloc(i):
assert_op = assert_(assert_op, return (isinstance(i.owner.op, DimShuffleOP) and
*[T.eq(i.shape[idx], cmp_op.shape[idx])\ i.owner.inputs[0].owner and \
for idx in xrange(i.type.ndim) \ isinstance(i.owner.inputs[0].owner.op, AllocOP))
# At least one input must have an owner that is either a AllocOP or a
# DimShuffleOP with an owner that is a AllocOP -- otherwise there is
# nothing to optimize.
if not any([i.owner
and (isinstance(i.owner.op, AllocOP) or dimshuffled_alloc(i))
for i in node.inputs]):
return False
## Search for input that we can use as a baseline for the dimensions.
assert_op_idx = -1
for idx, i in enumerate(node.inputs):
if i.type.broadcastable == node.outputs[0].type.broadcastable:
# Prefer an input that is not a AllocOP nor a DimShuffleOP of a
# AllocOP so that all allocs can be optimized.
if not (i.owner
and (isinstance(i.owner.op, AllocOP)
or dimshuffled_alloc(i))):
assert_op_idx = idx
break
# It may be the case that only AllocOP and DimShuffleOP of AllocOP exist.
if assert_op_idx < 0:
# We want to optimize as many allocs as possible. When there is more
# than one then do all but one.
# number of inputs with alloc or dimshuffle alloc
l2 = [i for i in node.inputs
if (i.owner and (isinstance(i.owner.op, AllocOP)
or dimshuffled_alloc(i)))]
# If only 1 alloc or dimshuffle alloc, it is the one we will use for the shape
# So no alloc would be removed.
if len(l2) > 1:
# l containt inputs with alloc or dimshuffle alloc only.
# Its length will always be at least one, as we checked that before
l = [idx for idx, i in enumerate(node.inputs)
if i.type.broadcastable == node.outputs[0].type.broadcastable]
assert_op_idx = l[0] # The first one is as good as any to use.
else:
# Nothing would be optimized!
return False
assert_op = node.inputs[assert_op_idx]
cmp_op = assert_op
new_i = []
for i in node.inputs:
# Remove alloc
if (i.owner and isinstance(i.owner.op, AllocOP)
and i.owner.inputs[0].type != i.owner.outputs[0].type):
# when i.owner.inputs[0].type == i.owner.outputs[0].type we
# will remove that alloc later
assert i.type.ndim == cmp_op.ndim
if (theano.config.experimental.local_alloc_elemwise_assert
and not node.fgraph.shape_feature.same_shape(i, cmp_op)):
assert_op = assert_(assert_op,
*[T.eq(i.shape[idx], cmp_op.shape[idx])\
for idx in xrange(i.type.ndim) \
if not i.type.broadcastable[idx]])
new_i.append(i.owner.inputs[0])
# Remove Alloc in DimShuffle
elif i.owner and dimshuffled_alloc(i):
assert i.type.ndim == cmp_op.type.ndim
if (theano.config.experimental.local_alloc_elemwise_assert
and not node.fgraph.shape_feature.same_shape(i, cmp_op)):
assert_op = assert_(assert_op,
*[T.eq(i.shape[idx], cmp_op.shape[idx])
for idx in xrange(i.type.ndim)
if not i.type.broadcastable[idx]]) if not i.type.broadcastable[idx]])
new_i.append(i.owner.inputs[0]) alloc_input = i.owner.inputs[0].owner.inputs[0]
if alloc_input.ndim != i.owner.inputs[0].ndim:
# Remove Alloc in DimShuffle # The alloc can add dimension to the value
elif i.owner and dimshuffled_alloc(i): # We add a dimshuffle to add them.
assert i.type.ndim == cmp_op.type.ndim # We let later optimization merge the multiple dimshuffle
if (theano.config.experimental.local_alloc_elemwise_assert nb_dim_to_add = i.owner.inputs[0].ndim - alloc_input.ndim
and not node.fgraph.shape_feature.same_shape(i, cmp_op)): alloc_input = alloc_input.dimshuffle(['x'] * nb_dim_to_add +
assert_op = assert_(assert_op, range(alloc_input.ndim))
*[T.eq(i.shape[idx], cmp_op.shape[idx])
for idx in xrange(i.type.ndim) # We need to keep the dimshuffle. It could swap axes or
if not i.type.broadcastable[idx]]) # add dimensions anywhere.
alloc_input = i.owner.inputs[0].owner.inputs[0] new_i.append(i.owner.op(alloc_input))
if alloc_input.ndim != i.owner.inputs[0].ndim: else:
# The alloc can add dimension to the value new_i.append(i)
# We add a dimshuffle to add them. new_i[assert_op_idx] = assert_op
# We let later optimization merge the multiple dimshuffle
nb_dim_to_add = i.owner.inputs[0].ndim - alloc_input.ndim return node.op(*new_i, return_list=True)
alloc_input = alloc_input.dimshuffle(['x'] * nb_dim_to_add +
range(alloc_input.ndim))
# We need to keep the dimshuffle. It could swap axes or
# add dimensions anywhere.
new_i.append(i.owner.op(alloc_input))
else:
new_i.append(i)
new_i[assert_op_idx] = assert_op
return node.op(*new_i, return_list=True)
return local_elemwise_alloc
#TODO, global optimizer that lift the assert to the beginning of the graph. #TODO, global optimizer that lift the assert to the beginning of the graph.
#TODO, optimize all inputs when possible -- currently when all inputs have #TODO, optimize all inputs when possible -- currently when all inputs have
# an alloc all but one is optimized. # an alloc all but one is optimized.
local_elemwise_alloc = register_specialize(gof.local_optimizer([T.Elemwise])(
local_elemwise_alloc_op(T.Elemwise, T.Alloc, T.DimShuffle)
))
theano.configparser.AddConfigVar('experimental.local_alloc_elemwise', theano.configparser.AddConfigVar('experimental.local_alloc_elemwise',
"DEPRECATED: If True, enable the experimental" "DEPRECATED: If True, enable the experimental"
" optimization local_alloc_elemwise." " optimization local_alloc_elemwise."
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论