提交 3262c424 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Expand local_useless_reshape

Also move introduction of dimshuffle into another optimization, and update tests
上级 1a620aad
...@@ -39,7 +39,7 @@ from theano import scalar ...@@ -39,7 +39,7 @@ from theano import scalar
from theano.scalar import basic from theano.scalar import basic
from theano.tensor import basic as T from theano.tensor import basic as T
from theano import compile # to register the optimizer built by this file from theano import compile # to register the optimizer built by this file
from theano.compile.ops import Shape_i from theano.compile.ops import Shape, Shape_i
from theano.tensor.type import (values_eq_approx_remove_inf, from theano.tensor.type import (values_eq_approx_remove_inf,
values_eq_approx_remove_nan, values_eq_approx_remove_nan,
values_eq_approx_remove_inf_nan) values_eq_approx_remove_inf_nan)
...@@ -4176,7 +4176,96 @@ def local_useless_reshape(node): ...@@ -4176,7 +4176,96 @@ def local_useless_reshape(node):
Remove two kinds of useless reshape. Remove two kinds of useless reshape.
Remove Reshape when both the input and output have a single dimension. Remove Reshape when both the input and output have a single dimension.
Remove Reshape when reshaping to the shape of the input.
"""
op = node.op
if not isinstance(op, Reshape):
return False
input = node.inputs[0]
output = node.outputs[0]
output_shape = node.inputs[1]
if input.ndim != output.ndim:
return False
# Simple case: both input and output have a single dimension
if (input.ndim == 1 and output.ndim == 1 and
input.broadcastable == output.broadcastable):
return [input]
# Second case: all the shapes match the input shape
# Match Reshape(x, x.shape)
if output_shape.owner and isinstance(output_shape.owner.op, Shape):
shape_input = output_shape.owner.inputs[0]
if shape_input == input:
return [input]
# Match Reshape(x, [x.shape[0], ..., x.shape[-1]]), accounting for
# broadcastable and constant dimensions
if output_shape.owner and isinstance(output_shape.owner.op, MakeVector):
output_shape_is = output_shape.owner.inputs
if not hasattr(node, 'fgraph'):
shape_feature = None
else:
shape_feature = getattr(node.fgraph, 'shape_feature', None)
shape_match = [False] * input.ndim
for dim in xrange(input.ndim):
outshp_i = output_shape_is[dim]
# Match Shape_i{dim}(input)
if (outshp_i.owner and isinstance(outshp_i.owner.op, Shape_i) and
outshp_i.owner.op.i == dim and
outshp_i.owner.inputs[0] == input):
shape_match[dim] = True
continue
# Match Shape(input)[dim]
if (outshp_i.owner and isinstance(outshp_i.owner.op, Subtensor) and
len(outshp_i.owner.inputs) == 2 and
extract_constant(outshp_i.owner.inputs[1]) == dim):
subtensor_inp = outshp_i.owner.inputs[0]
if (subtensor_inp.owner and
isinstance(subtensor_inp.owner.op, Shape)):
shape_input_i = subtensor_inp.owner.inputs[0]
if shape_input_i == input:
shape_match[dim] = True
continue
# Match 1 if input.broadcastable[dim] is True
if (input.broadcastable[dim] and
extract_constant(outshp_i, only_process_constants=1) == 1):
shape_match[dim] = True
continue
# Match shape_of[input][dim] or its constant equivalent
if shape_feature:
inpshp_i = shape_feature.get_shape(input, dim)
if (inpshp_i == outshp_i or
(extract_constant(inpshp_i, only_process_constants=1) ==
extract_constant(outshp_i, only_process_constants=1))):
shape_match[dim] = True
continue
if all(shape_match):
return [input]
# TODO later: if all the shapes except one match, we may want to
# consider it useless as well, like we do in the 1-dim case.
@register_canonicalize
@gof.local_optimizer([T.Reshape])
def local_reshape_to_dimshuffle(node):
"""
Broadcastable dimensions in Reshape are replaced with dimshuffle. Broadcastable dimensions in Reshape are replaced with dimshuffle.
The goal is to avoid using reshape to add or remove broadcastable
dimensions, but use dimshuffle instead, so dimshuffles can cancel out
or be removed later on.
For example: For example:
- reshape(v, (m,)) --> v # if v.ndim == 1 - reshape(v, (m,)) --> v # if v.ndim == 1
- reshape(x, (1, n)) --> dimshuffle{x,0}(reshape(x, (n,)) - reshape(x, (1, n)) --> dimshuffle{x,0}(reshape(x, (n,))
...@@ -4192,10 +4281,6 @@ def local_useless_reshape(node): ...@@ -4192,10 +4281,6 @@ def local_useless_reshape(node):
output = node.outputs[0] output = node.outputs[0]
output_shape = node.inputs[1] output_shape = node.inputs[1]
if (input.ndim == 1 and output.ndim == 1 and
input.broadcastable == output.broadcastable):
return [input]
dimshuffle_new_order = [] dimshuffle_new_order = []
new_output_shape = [] new_output_shape = []
index = 0 # index over the output of the new reshape index = 0 # index over the output of the new reshape
......
...@@ -35,6 +35,7 @@ from theano.tensor.opt import ( ...@@ -35,6 +35,7 @@ from theano.tensor.opt import (
local_useless_alloc, local_useless_alloc,
local_greedy_distributor, local_greedy_distributor,
local_useless_reshape, local_useless_reshape,
local_reshape_to_dimshuffle,
mul_canonizer, mul_canonizer,
out2in, out2in,
Shape_i, Shape_i,
...@@ -6179,7 +6180,28 @@ class Test_local_useless_reshape(unittest.TestCase): ...@@ -6179,7 +6180,28 @@ class Test_local_useless_reshape(unittest.TestCase):
assert not any(isinstance(n.op, tensor.basic.Reshape) for n in topo) assert not any(isinstance(n.op, tensor.basic.Reshape) for n in topo)
def test_1(self): def test_1(self):
reshape_lift = out2in(local_useless_reshape) x = theano.tensor.matrix('x')
r = x.reshape(x.shape)
m0 = theano.compile.get_default_mode()
m1 = m0.including('local_useless_reshape')
f1 = theano.function([x], r, mode=m1)
topo = f1.maker.fgraph.toposort()
assert not any(isinstance(n.op, tensor.basic.Reshape) for n in topo)
m2 = m1.excluding('ShapeOpt')
f2 = theano.function([x], r, mode=m2)
topo = f2.maker.fgraph.toposort()
assert not any(isinstance(n.op, tensor.basic.Reshape) for n in topo)
class Test_local_reshape_to_dimshuffle(unittest.TestCase):
def setUp(self):
self.rng = numpy.random.RandomState(utt.fetch_seed())
def test_1(self):
reshape_lift = out2in(local_reshape_to_dimshuffle)
useless_reshape = out2in(local_useless_reshape)
x = shared(self.rng.randn(4,)) x = shared(self.rng.randn(4,))
y = shared(self.rng.randn(5, 6)) y = shared(self.rng.randn(5, 6))
reshape_x = tensor.reshape(x, (1, 4)) reshape_x = tensor.reshape(x, (1, 4))
...@@ -6194,6 +6216,7 @@ class Test_local_useless_reshape(unittest.TestCase): ...@@ -6194,6 +6216,7 @@ class Test_local_useless_reshape(unittest.TestCase):
"TensorConstant{[1 5 1 6 1 1]})]")) "TensorConstant{[1 5 1 6 1 1]})]"))
reshape_lift.optimize(g) reshape_lift.optimize(g)
useless_reshape.optimize(g)
self.assertTrue(str(g) == "[DimShuffle{x,0}" self.assertTrue(str(g) == "[DimShuffle{x,0}"
"(<TensorType(float64, vector)>), " "(<TensorType(float64, vector)>), "
"DimShuffle{x,0,x,1,x,x}" "DimShuffle{x,0,x,1,x,x}"
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论