提交 2036661e authored 作者: abergeron's avatar abergeron

Merge pull request #2070 from nouiz/opt_inc_sub

Opt inc sub
...@@ -811,8 +811,8 @@ class ConvOp(OpenMPOp): ...@@ -811,8 +811,8 @@ class ConvOp(OpenMPOp):
shuffled_kerns.name = 'shuffled_for_conv3D(%s)' % flipped_kerns.name shuffled_kerns.name = 'shuffled_for_conv3D(%s)' % flipped_kerns.name
tmp_node = theano.tensor.nnet.conv3D( tmp_node = theano.tensor.nnet.conv3D(
V = shuffled_inputs, V=shuffled_inputs,
W= shuffled_kerns, W=shuffled_kerns,
b=theano.tensor.alloc(numpy.asarray(0, dtype=kerns.dtype), b=theano.tensor.alloc(numpy.asarray(0, dtype=kerns.dtype),
kerns.shape[0]), kerns.shape[0]),
d=(self.dx, self.dy, 1)) d=(self.dx, self.dy, 1))
......
...@@ -799,7 +799,21 @@ class ShapeFeature(object): ...@@ -799,7 +799,21 @@ class ShapeFeature(object):
# #
# worst case, we loop over shape_of and replace things # worst case, we loop over shape_of and replace things
raise NotImplementedError(s_i) raise NotImplementedError(s_i)
elif s_i.type.dtype[:3] in ('int', 'uint'):
# s_i is x.shape[i], we change it to Shape_i.
if (s_i.owner and
isinstance(s_i.owner.op, Subtensor) and
s_i.owner.inputs[0].owner and
isinstance(s_i.owner.inputs[0].owner.op, T.Shape)):
assert s_i.ndim == 0
assert len(s_i.owner.inputs) == 2
try:
i = get_scalar_constant_value(s_i.owner.inputs[1])
s_i = Shape_i(i)(s_i.owner.inputs[0].owner.inputs[0])
except NotScalarConstantError:
pass
if s_i.type.dtype[:3] in ('int', 'uint'):
if getattr(s_i.type, 'ndim', 0): if getattr(s_i.type, 'ndim', 0):
raise TypeError('Shape element must be scalar', s_i) raise TypeError('Shape element must be scalar', s_i)
return s_i return s_i
...@@ -1131,6 +1145,40 @@ class ShapeFeature(object): ...@@ -1131,6 +1145,40 @@ class ShapeFeature(object):
self.set_shape_i(v, ii, new_r) self.set_shape_i(v, ii, new_r)
self.shape_of_reverse_index[r] = set() self.shape_of_reverse_index[r] = set()
def same_shape(self, x, y):
"""Return True if we are able to assert that x and y have the
same shape
"""
sx = self.shape_of[x]
sy = self.shape_of[y]
if sx is None or sy is None:
return False
assert len(sx) == len(sy)
for dx, dy in zip(sx, sy):
if dx is dy:
continue
# Need to try to find that they are the same shape. We
# need to compare the full graph. It could be slow. So I
# just implement for now the case of Shape_i.
if not dx.owner or not dy.owner:
return False
if (not isinstance(dx.owner.op, Shape_i) or
not isinstance(dy.owner.op, Shape_i)):
return False
opx = dx.owner.op
opy = dy.owner.op
if not (opx.i == opy.i):
return False
# FB I'm not sure is this handle correctly constants.
if dx.owner.inputs[0] == dy.owner.inputs[0]:
return True
# To be sure to cover all case, call equal_computation.
# Can't use theano.gof.graph.is_same_graph(dx, dy)
# As it currently expect that dx and dy aren't in a FunctionGraph
from theano.scan_module.scan_utils import equal_computations
return equal_computations([dx], [dy])
class ShapeOptimizer(Optimizer): class ShapeOptimizer(Optimizer):
"""Optimizer that serves to add ShapeFeature as an fgraph feature. """Optimizer that serves to add ShapeFeature as an fgraph feature.
...@@ -1640,6 +1688,54 @@ def local_upcast_elemwise_constant_inputs(node): ...@@ -1640,6 +1688,54 @@ def local_upcast_elemwise_constant_inputs(node):
################## ##################
@register_canonicalize
@register_specialize
@gof.local_optimizer([IncSubtensor])
def local_useless_inc_subtensor(node):
"""Remove IncSubtensor, when we overwrite the full inputs with the
new value.
"""
if not isinstance(node.op, IncSubtensor):
return
if node.op.set_instead_of_inc is False:
# This is an IncSubtensor, so the init value must be zeros
try:
c = get_scalar_constant_value(node.inputs[0])
if c != 0:
return
except NotScalarConstantError:
return
if (node.inputs[0].ndim != node.inputs[1].ndim or
node.inputs[0].broadcastable != node.inputs[1].broadcastable):
# FB: I didn't check if this case can happen, but this opt
# don't support it.
return
# We have a SetSubtensor or an IncSubtensor on zeros
# If is this IncSubtensor useful?
# Check that we keep all the original data.
# Put the constant inputs in the slice.
idx_cst = theano.tensor.subtensor.get_idx_list(node.inputs[1:],
node.op.idx_list)
if all(isinstance(e, slice) and e.start is None and
e.stop is None and (e.step is None or T.extract_constant(e.step) == -1)
for e in idx_cst):
# IncSubtensor broadcast node.inputs[1] on node.inputs[0]
# based on run time shapes, so we must check they are the same.
if not hasattr(node.fgraph, 'shape_feature'):
return
if not node.fgraph.shape_feature.same_shape(node.inputs[0],
node.inputs[1]):
return
# There is no reverse, so we don't need a replacement.
if all(e.step is None
for e in node.op.idx_list):
# They are the same shape, so we can remore this IncSubtensor
return [node.inputs[1]]
return [Subtensor(node.op.idx_list)(*node.inputs[1:])]
@register_canonicalize @register_canonicalize
@register_specialize @register_specialize
@gof.local_optimizer([Subtensor]) @gof.local_optimizer([Subtensor])
...@@ -3366,11 +3462,17 @@ ALL_REDUCE = [T.elemwise.CAReduce, T.elemwise.All, T.elemwise.Any, ...@@ -3366,11 +3462,17 @@ ALL_REDUCE = [T.elemwise.CAReduce, T.elemwise.All, T.elemwise.Any,
T.elemwise.Sum, T.elemwise.Prod, T.elemwise.Sum, T.elemwise.Prod,
T.elemwise.ProdWithoutZeros] T.elemwise.ProdWithoutZeros]
@register_canonicalize @register_canonicalize
@register_uncanonicalize # Needed for MaxAndArgmax -> CAReduce @register_uncanonicalize # Needed for MaxAndArgmax -> CAReduce
@gof.local_optimizer(ALL_REDUCE) @gof.local_optimizer(ALL_REDUCE)
def local_reduce_join(node): def local_reduce_join(node):
"""Max(Join(a,b), axis=0) -> Maximum(a,b) """ """Reduce{scalar.op}(Join(a, b), axis=0) -> Elemwise{scalar.op}(a, b)
:note: supported scalar.op are Maximum, Mimimum in some cases and
Add and Mul in all cases.
"""
if (isinstance(node.op, T.CAReduce) and if (isinstance(node.op, T.CAReduce) and
node.inputs[0].owner and node.inputs[0].owner and
isinstance(node.inputs[0].owner.op, T.Join)): isinstance(node.inputs[0].owner.op, T.Join)):
...@@ -3385,6 +3487,9 @@ def local_reduce_join(node): ...@@ -3385,6 +3487,9 @@ def local_reduce_join(node):
return return
elif not isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul)): elif not isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul)):
return return
elif len(join.inputs) <= 2:
# This is a useless join, that will get removed by another opt.
return
new_inp = [] new_inp = []
for inp in join.inputs[1:]: for inp in join.inputs[1:]:
......
...@@ -1571,6 +1571,53 @@ def test_log_add(): ...@@ -1571,6 +1571,53 @@ def test_log_add():
#TODO: (write and) test that the optimization works with Sum in addition to working with Add. #TODO: (write and) test that the optimization works with Sum in addition to working with Add.
def test_local_useless_inc_subtensor():
x = tensor.matrix('x')
y = tensor.matrix('y')
for sub in [slice(None), slice(None, None, -1)]:
o = tensor.set_subtensor(x[::, sub], y)
f = theano.function([x, y], o)
o_shape = tensor.set_subtensor(x[::, sub],
tensor.specify_shape(y, x.shape))
f_shape = theano.function([x, y], o_shape)
# Test with shape info
topo = f_shape.maker.fgraph.toposort()
assert not any(isinstance(n.op, tensor.IncSubtensor) for n in topo)
out = f_shape([[2, 3]], [[3, 4]])
assert (out == numpy.asarray([[3, 4]])[::, sub]).all()
# Test that without shape info, we don't apply the opt.
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, tensor.IncSubtensor)
out = f([[2, 3]], [[3, 4]])
assert (out == numpy.asarray([[3, 4]])[::, sub]).all()
# Test that we don't remove shape error
try:
f([[2, 3]], [[3, 4], [4, 5]])
assert False
except (ValueError, AssertionError):
pass
# Test that we don't remove broadcastability
out = f([[2, 3], [3, 4]], [[5, 6]])
assert (out == numpy.asarray([[5, 6], [5, 6]])[::, sub]).all()
# Test that we do not optimize others strides even when sub and y
# have same shapes
sub = x[::, ::2]
o_shape = tensor.set_subtensor(sub,
tensor.specify_shape(y, sub.shape))
f_shape = theano.function([x, y], o_shape)
topo = f_shape.maker.fgraph.toposort()
theano.printing.debugprint(f_shape)
assert any(isinstance(n.op, tensor.IncSubtensor) for n in topo)
out = f_shape([[2, 3, 6, 7]], [[8, 9]])
assert (out == numpy.asarray([[8, 3, 9, 7]])).all()
def test_local_useless_subtensor(): def test_local_useless_subtensor():
x = tensor.matrix('x') x = tensor.matrix('x')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论