提交 c13853ad authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #3392 from nouiz/aalmah-elemwise_opt

elemwise opt
......@@ -89,7 +89,7 @@ _logger = logging.getLogger('theano.scan_module.scan_opt')
list_opt_slice = [tensor.opt.local_abs_merge,
tensor.opt.local_mul_switch_sink,
tensor.opt.local_upcast_elemwise_constant_inputs,
tensor.opt.local_remove_switch_const_cond,
tensor.opt.local_useless_switch,
tensor.opt.constant_folding]
......
......@@ -1554,9 +1554,24 @@ def local_useless_elemwise(node):
mul(x) -> x
add(x) -> x
identity(x) -> x
and(x,1) -> x
and(x,0) -> zeros_like(x)
or(x,0) -> x
or(x,1) -> ones_like(x)
xor(x,x) -> zeros_like(x)
"""
if isinstance(node.op, T.Elemwise):
def zeros_like(node, in_idx):
# it is the same var in the graph. That will always be true
return [T.fill(node.inputs[in_idx],
T.constant(0.0, dtype=node.outputs[0].type.dtype))]
def ones_like(node, in_idx):
# it is the same var in the graph. That will always be true
return [T.fill(node.inputs[in_idx],
T.constant(1.0, dtype=node.outputs[0].type.dtype))]
if node.op.scalar_op == theano.scalar.eq and len(node.inputs) == 2:
if node.inputs[0] == node.inputs[1]:
# it is the same var in the graph. That will always be true
......@@ -1581,14 +1596,57 @@ def local_useless_elemwise(node):
elif node.op.scalar_op == theano.scalar.mul and len(node.inputs) == 1:
# No need to copy over any stack trace
return [node.inputs[0]]
elif node.op.scalar_op == theano.scalar.add and len(node.inputs) == 1:
# No need to copy over any stack trace
return [node.inputs[0]]
elif (node.op.scalar_op == theano.scalar.identity and
len(node.inputs) == 1):
# No need to copy over any stack trace
return [node.inputs[0]]
elif (isinstance(node.op.scalar_op, scalar.AND) and
len(node.inputs) == 2):
if isinstance(node.inputs[0], T.TensorConstant):
const_val = T.extract_constant(node.inputs[0])
if not isinstance(const_val, Variable):
if const_val == 0:
return zeros_like(node, 1)
else:
return [node.inputs[1]]
if isinstance(node.inputs[1], T.TensorConstant):
const_val = T.extract_constant(node.inputs[1])
if not isinstance(const_val, Variable):
if const_val == 0:
return zeros_like(node, 0)
else:
return [node.inputs[0]]
elif (isinstance(node.op.scalar_op, scalar.OR) and
len(node.inputs) == 2):
if isinstance(node.inputs[0], T.TensorConstant):
const_val = T.extract_constant(node.inputs[0])
if not isinstance(const_val, Variable):
if const_val == 0:
return [node.inputs[1]]
else:
return ones_like(node, 1)
if isinstance(node.inputs[1], T.TensorConstant):
const_val = T.extract_constant(node.inputs[1])
if not isinstance(const_val, Variable):
if const_val == 0:
return [node.inputs[0]]
else:
return ones_like(node, 0)
elif (isinstance(node.op.scalar_op, scalar.XOR) and
len(node.inputs) == 2):
if node.inputs[0] is node.inputs[1]:
return zeros_like(node, 0)
@register_specialize
@gof.local_optimizer([T.Elemwise])
......@@ -2389,7 +2447,7 @@ def merge_two_slices(slice1, len1, slice2, len2):
"""
list_opt = [local_abs_merge, local_mul_switch_sink,
local_upcast_elemwise_constant_inputs,
local_remove_switch_const_cond, constant_folding]
local_useless_switch, constant_folding]
if type(slice1) is not slice:
raise ValueError(('First provided slice should actually be of type'
......@@ -2767,10 +2825,11 @@ def local_inplace_setsubtensor(node):
"""
if isinstance(node.op, IncSubtensor) and not node.op.inplace:
dta = node.op.destroyhandler_tolerate_aliased
new_op = node.op.__class__(
node.op.idx_list, inplace=True,
set_instead_of_inc=node.op.set_instead_of_inc,
destroyhandler_tolerate_aliased=node.op.destroyhandler_tolerate_aliased)
destroyhandler_tolerate_aliased=dta)
new_node = new_op(*node.inputs)
return [new_node]
return False
......@@ -3206,15 +3265,18 @@ def local_join_make_vector(node):
# Switch opts #
###############
@register_canonicalize
@register_canonicalize('fast_compile', 'local_remove_switch_const_cond')
@register_specialize
@gof.local_optimizer([T.Elemwise])
def local_remove_switch_const_cond(node):
def local_useless_switch(node):
"""
This optimization makes the following changes in the graph:
T.switch(cond,left,right) -->
if cond is constant and cond == 0: right
if cond is constant and cond != 0: left
if left is right -> left
T.switch(le(shape_i{id}(X), 0), 0, shape_i{id}(X)) -> shape_i{id}(X)
"""
if (isinstance(node.op, T.Elemwise) and
isinstance(node.op.scalar_op, scalar.basic.Switch)):
......@@ -3235,7 +3297,25 @@ def local_remove_switch_const_cond(node):
out = T.alloc(out, *[node.outputs[0].shape[i] for i
in xrange(out.ndim)])
return [out]
# if left is right -> left
if node.inputs[1] is node.inputs[2]:
return [node.inputs[1]]
# This case happens with scan.
# Elemwise{switch}(le(shape_i{id}(X), 0), 0, shape_i{id}(X)) -> shape_i{id}(X)
left = node.inputs[1]
right = node.inputs[2]
cond_var = node.inputs[0]
if cond_var.owner and \
isinstance(cond_var.owner.op, T.Elemwise) and \
isinstance(cond_var.owner.op.scalar_op, scalar.LE) and \
cond_var.owner.inputs[0].owner and \
isinstance(cond_var.owner.inputs[0].owner.op, Shape_i) and \
T.extract_constant(cond_var.owner.inputs[1]) == 0 and \
T.extract_constant(left) == 0 and \
right is cond_var.owner.inputs[0]:
assert right.type == node.outputs[0].type
return [right]
return False
return False
......@@ -4136,6 +4216,110 @@ def local_elemwise_sub_zeros(node):
return [T.zeros_like(node.inputs[0])]
@register_specialize
@register_stabilize
@register_canonicalize
@gof.local_optimizer([T.Elemwise])
def local_useless_elemwise_comparison(node):
"""...
:note: These cases appear in the graph generated by scan.
These optimizations will make the graph easier to read.
# Comparing to itself is constant
Elemwise[{LT,GT}](X, X) -> Elemwise[zeros](X)
Elemwise[{LE,GE}](X, X) -> Elemwise[ones](X)
Elemwise[{minimum,maximum}](X, X) -> X
# Comparing shape to 0 can be constant
Elemwise[LT](X.shape[i], 0) -> Elemwise[zeros](X)
Elemwise[GE](X.shape[i], 0) -> Elemwise[ones](X)
Elemwise[maximum](X.shape[i], 0) -> X.shape[i]
Elemwise[maximum](0, X.shape[i]) -> X.shape[i]
Elemwise[minimum](X.shape[i], 0) -> 0
Elemwise[minimum](0, X.shape[i]) -> 0
# The shape can be replaced with sum of shapes
Elemwise[LT](add([anything that is shapes]), 0) -> Elemwise[zeros](X)
Elemwise[GE](add([anything that is shapes]), 0) -> Elemwise[ones](X)
"""
if not isinstance(node.op, T.Elemwise):
return
if node.op.scalar_op.nin != 2:
return
# Elemwise[{LT,GT}](X, X) -> Elemwise[zeros](X)
if isinstance(node.op.scalar_op, (scalar.LT, scalar.GT)) and \
node.inputs[0] is node.inputs[1]:
return [T.zeros_like(node.inputs[0], dtype=node.outputs[0].dtype)]
# Elemwise[{LE,GE}](X, X) -> Elemwise[ones](X)
if isinstance(node.op.scalar_op, (scalar.LE, scalar.GE)) and \
node.inputs[0] is node.inputs[1]:
return [T.ones_like(node.inputs[0], dtype=node.outputs[0].dtype)]
# Elemwise[{minimum,maximum}](X, X) -> X
if isinstance(node.op.scalar_op, (scalar.Minimum, scalar.Maximum)) and \
node.inputs[0] is node.inputs[1]:
return [node.inputs[0]]
# Elemwise[LT](X.shape[i], 0) -> Elemwise[zeros](X)
if isinstance(node.op.scalar_op, scalar.LT) and \
node.inputs[0].owner and \
isinstance(node.inputs[0].owner.op, Shape_i) and \
T.extract_constant(node.inputs[1]) == 0:
return [T.zeros_like(node.inputs[0], dtype=node.outputs[0].dtype)]
# Elemwise[GE](X.shape[i], 0) -> Elemwise[ones](X)
if isinstance(node.op.scalar_op, scalar.GE) and \
node.inputs[0].owner and \
isinstance(node.inputs[0].owner.op, Shape_i) and \
T.extract_constant(node.inputs[1]) == 0:
return [T.ones_like(node.inputs[0], dtype=node.outputs[0].dtype)]
# Elemwise[maximum](X.shape[i], 0) -> X.shape[i]
if isinstance(node.op.scalar_op, scalar.Maximum) and \
node.inputs[0].owner and \
isinstance(node.inputs[0].owner.op, Shape_i) and \
T.extract_constant(node.inputs[1]) == 0:
return [node.inputs[0]]
# Elemwise[maximum](0, X.shape[i]) -> X.shape[i]
if isinstance(node.op.scalar_op, scalar.Maximum) and \
T.extract_constant(node.inputs[0]) == 0 and \
node.inputs[1].owner and \
isinstance(node.inputs[1].owner.op, Shape_i):
return [node.inputs[1]]
# Elemwise[minimum](X.shape[i], 0) -> 0
if isinstance(node.op.scalar_op, scalar.Minimum) and \
node.inputs[0].owner and \
isinstance(node.inputs[0].owner.op, Shape_i) and \
T.extract_constant(node.inputs[1]) == 0:
return [T.zeros_like(node.inputs[0], dtype=node.outputs[0].dtype)]
# Elemwise[minimum](0, X.shape[i]) -> 0
if isinstance(node.op.scalar_op, scalar.Minimum) and \
T.extract_constant(node.inputs[0]) == 0 and \
node.inputs[1].owner and \
isinstance(node.inputs[1].owner.op, Shape_i):
return [T.zeros_like(node.inputs[1], dtype=node.outputs[0].dtype)]
# Elemwise[LT](add([anything that is shapes]), 0) -> Elemwise[zeros](X)
if isinstance(node.op.scalar_op, scalar.LT) and \
node.inputs[0].owner and \
isinstance(node.inputs[0].owner.op, Elemwise) and \
isinstance(node.inputs[0].owner.op.scalar_op, scalar.Add) and \
all([isinstance(var.owner and var.owner.op, Shape_i)
for var in node.inputs[0].owner.inputs]) and \
T.extract_constant(node.inputs[1]) == 0:
return [T.zeros_like(node.inputs[0], dtype=node.outputs[0].dtype)]
# Elemwise[GE](add([anything that is shapes]), 0) -> Elemwise[ones](X)
if isinstance(node.op.scalar_op, scalar.GE) and \
node.inputs[0].owner and \
isinstance(node.inputs[0].owner.op, Elemwise) and \
isinstance(node.inputs[0].owner.op.scalar_op, scalar.Add) and \
all([isinstance(var.owner and var.owner.op, Shape_i)
for var in node.inputs[0].owner.inputs]) and \
T.extract_constant(node.inputs[1]) == 0:
return [T.ones_like(node.inputs[0], dtype=node.outputs[0].dtype)]
return
@register_canonicalize
@register_specialize
@gof.local_optimizer([T.Sum, T.elemwise.Prod])
......
......@@ -3135,6 +3135,201 @@ def test_local_fill_useless():
assert T.Alloc in ops
f(m_, x_)
class Test_local_useless_elemwise_comparison(unittest.TestCase):
def test_local_useless_elemwise_comparison(self):
# TODO: test each case individually.
# The following case is what made me discover those cases.
X = T.matrix('X')
Y = T.vector('Y')
X_sum, updates = theano.scan(fn=lambda x: x.sum(),
outputs_info=None,
sequences=[X],
non_sequences=None)
Z = X_sum + Y
theano.printing.debugprint(Z)
# here is the output for the debug print:
"""
Elemwise{add,no_inplace} [@A] ''
|for{cpu,scan_fn} [@B] ''
| |Subtensor{int64} [@C] ''
| | |Shape [@D] ''
| | | |Subtensor{int64::} [@E] 'X[0:]'
| | | |X [@F]
| | | |Constant{0} [@G]
| | |Constant{0} [@H]
| |Subtensor{:int64:} [@I] ''
| | |Subtensor{int64::} [@E] 'X[0:]'
| | |ScalarFromTensor [@J] ''
| | |Subtensor{int64} [@C] ''
| |Subtensor{int64} [@C] ''
|Y [@K]
Inner graphs of the scan ops:
for{cpu,scan_fn} [@B] ''
>Sum{acc_dtype=float64} [@L] ''
> |X[t] [@M] -> [@I]
"""
mode = theano.compile.get_default_mode().excluding('fusion')
f = theano.function([X, Y], Z, mode=mode)
theano.printing.debugprint(f, print_type=True)
# here is the output for the debug print:
"""
Elemwise{Add}[(0, 0)] [@A] <TensorType(float64, vector)> '' 7
|for{cpu,scan_fn} [@B] <TensorType(float64, vector)> '' 6
| |Shape_i{0} [@C] <TensorType(int64, scalar)> '' 0
| | |X [@D] <TensorType(float64, matrix)>
| |Subtensor{int64:int64:int8} [@E] <TensorType(float64, matrix)> '' 5
| | |X [@D] <TensorType(float64, matrix)>
| | |ScalarFromTensor [@F] <int64> '' 4
| | | |Elemwise{switch,no_inplace} [@G] <TensorType(int64, scalar)> '' 3
| | | |Elemwise{le,no_inplace} [@H] <TensorType(int8, scalar)> '' 2
| | | | |Shape_i{0} [@C] <TensorType(int64, scalar)> '' 0
| | | | |TensorConstant{0} [@I] <TensorType(int8, scalar)>
| | | |TensorConstant{0} [@I] <TensorType(int8, scalar)>
| | | |TensorConstant{0} [@J] <TensorType(int64, scalar)>
| | |ScalarFromTensor [@K] <int64> '' 1
| | | |Shape_i{0} [@C] <TensorType(int64, scalar)> '' 0
| | |Constant{1} [@L] <int8>
| |Shape_i{0} [@C] <TensorType(int64, scalar)> '' 0
|Y [@M] <TensorType(float64, vector)>
Inner graphs of the scan ops:
for{cpu,scan_fn} [@B] <TensorType(float64, vector)> ''
>Sum{acc_dtype=float64} [@N] <TensorType(float64, scalar)> ''
> |X[t] [@O] <TensorType(float64, vector)> -> [@E]
"""
def assert_eqs_const(self, f, val):
topo = f.maker.fgraph.toposort()
elem = topo[0]
assert len(topo) == 1, topo
assert elem.op == deep_copy_op, elem.op
assert len(elem.inputs) == 1, elem.inputs
assert isinstance(elem.inputs[0], T.TensorConstant), elem
assert T.extract_constant(elem.inputs[0]) == val, val
def assert_identity(self, f):
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert topo[0].op == deep_copy_op
x_val = 10
assert f(x_val) == x_val
#def assert_returns
def test_inequality_with_self(self):
x = T.scalar('x', dtype=config.floatX)
mode = theano.compile.get_default_mode().including('local_useless_elemwise_comparison')
f = theano.function([x], T.lt(x, x), mode=mode)
self.assert_eqs_const(f, 0)
f = theano.function([x], T.le(x, x), mode=mode)
self.assert_eqs_const(f, 1)
f = theano.function([x], T.gt(x, x), mode=mode)
self.assert_eqs_const(f, 0)
f = theano.function([x], T.ge(x, x), mode=mode)
self.assert_eqs_const(f, 1)
f = theano.function([x], T.minimum(x, x), mode=mode)
self.assert_identity(f)
f = theano.function([x], T.maximum(x, x), mode=mode)
self.assert_identity(f)
def test_shape_inequality_with_self(self):
x = T.vector('x', dtype=config.floatX)
mode = theano.compile.get_default_mode().including('local_useless_elemwise_comparison',
'local_shape_to_shape_i',
'local_track_shape_i',
'local_subtensor_make_vector')
f = theano.function([x], T.lt(x.shape[0], 0), mode=mode)
self.assert_eqs_const(f, 0)
f = theano.function([x], T.ge(x.shape[0], 0), mode=mode)
self.assert_eqs_const(f, 1)
f = theano.function([x], T.maximum(x.shape[0], 0), mode=mode)
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, Shape_i), topo[0].op
x_val = numpy.ones(100, dtype=config.floatX)
assert f(x_val) == x_val.shape[0]
f = theano.function([x], T.maximum(0, x.shape[0]), mode=mode)
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, Shape_i), topo[0].op
x_val = numpy.ones(100, dtype=config.floatX)
assert f(x_val) == x_val.shape[0]
f = theano.function([x], T.minimum(x.shape[0], 0), mode=mode)
self.assert_eqs_const(f, 0)
f = theano.function([x], T.minimum(0, x.shape[0]), mode=mode)
self.assert_eqs_const(f, 0)
def test_shape_add_inequality(self):
x = T.vector('x', dtype=config.floatX)
mode = theano.compile.get_default_mode().including('local_useless_elemwise_comparison',
'local_shape_to_shape_i',
'local_track_shape_i',
'local_subtensor_make_vector')
y = T.vector('y', dtype=config.floatX)
f = theano.function([x, y], T.lt(x.shape[0]+y.shape[0], 0), mode=mode)
self.assert_eqs_const(f, 0)
f = theano.function([x, y], T.ge(x.shape[0]+y.shape[0], 0), mode=mode)
self.assert_eqs_const(f, 1)
def test_and(self):
mode = theano.compile.get_default_mode().including('canonicalize')
x = T.scalar('x', dtype='int8')
f = theano.function([x], T.and_(x, 0), mode=mode)
self.assert_eqs_const(f, 0)
f = theano.function([x], T.and_(0, x), mode=mode)
self.assert_eqs_const(f, 0)
f = theano.function([x], T.and_(x, 1), mode=mode)
self.assert_identity(f)
f = theano.function([x], T.and_(1, x), mode=mode)
self.assert_identity(f)
def test_or(self):
mode = theano.compile.get_default_mode().including('canonicalize')
x = T.scalar('x', dtype='int8')
f = theano.function([x], T.or_(x, 1), mode=mode)
self.assert_eqs_const(f, 1)
f = theano.function([x], T.or_(1, x), mode=mode)
self.assert_eqs_const(f, 1)
f = theano.function([x], T.or_(x, 0), mode=mode)
self.assert_identity(f)
f = theano.function([x], T.or_(0, x), mode=mode)
self.assert_identity(f)
def test_xor(self):
mode = theano.compile.get_default_mode().including('canonicalize')
x = T.scalar('x', dtype='int8')
f = theano.function([x], T.xor(x, x), mode=mode)
self.assert_eqs_const(f, 0)
class Test_local_useless_alloc(unittest.TestCase):
def setUp(self):
......@@ -4446,6 +4641,53 @@ class test_local_remove_switch_const_cond(unittest.TestCase):
vy = numpy.array([[7, 8, 9], [10, 11, 12]], dtype=dtype2)
assert numpy.all(f(vx, vy) == vx)
def test_left_is_right(self):
for dtype1 in ['int32', 'int64']:
x = theano.tensor.matrix('x', dtype=dtype1)
varc = theano.tensor.matrix('varc', dtype=dtype1)
z1 = theano.tensor.switch(1, x, x)
z0 = theano.tensor.switch(0, x, x)
z2 = theano.tensor.switch(varc, x, x)
f1 = theano.function([x], z1, mode=self.mode)
f0 = theano.function([x], z0, mode=self.mode)
f2 = theano.function([x,varc], z2, mode=self.mode)
topo = f1.maker.fgraph.toposort()
assert len(topo) == 1
assert topo[0].op == deep_copy_op
topo = f0.maker.fgraph.toposort()
assert len(topo) == 1
assert topo[0].op == deep_copy_op
topo = f2.maker.fgraph.toposort()
assert len(topo) == 1
assert topo[0].op == deep_copy_op
vx = numpy.array([[1, 2, 3], [4, 5, 6]], dtype=dtype1)
vc = numpy.array([[1, 2, 3], [4, 5, 6]], dtype=dtype1)
assert numpy.all(f1(vx) == vx)
assert numpy.all(f0(vx) == vx)
assert numpy.all(f2(vx,vc) == vx)
def test_shape_le_0(self):
for dtype1 in ['float32', 'float64']:
x = theano.tensor.matrix('x', dtype=dtype1)
z0 = theano.tensor.switch(theano.tensor.le(x.shape[0], 0), 0, x.shape[0])
f0 = theano.function([x], z0, mode=self.mode)
assert isinstance(f0.maker.fgraph.toposort()[0].op, Shape_i)
z1 = theano.tensor.switch(theano.tensor.le(x.shape[1], 0), 0, x.shape[1])
f1 = theano.function([x], z1, mode=self.mode)
assert isinstance(f1.maker.fgraph.toposort()[0].op, Shape_i)
vx = numpy.random.randn(0,5).astype(dtype1)
assert f0(vx) == 0
assert f1(vx) == 5
def test_broadcast1(self):
# test switch(cst, matrix, row)
x = theano.tensor.matrix('x', dtype='int32')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论