提交 280f0c80 authored 作者: nouiz's avatar nouiz

Merge pull request #1089 from lamblin/fix_softmaxgrad

Make opt work when shape has been specified.
......@@ -1378,7 +1378,21 @@ def local_argmax_pushdown(node):
def _check_rows_is_arange_len_labels(rows, labels):
'''Check that 'rows' is the same node as T.arange(labels.shape[0])'''
'''Check that 'rows' is the same node as T.arange(labels.shape[0])
Also considers the case where labels.shape[0] is constant and equal
to 1, and T.arange(labels.shape[0]) has been constant-folded into 0.
'''
if labels.owner and hasattr(labels.owner.fgraph, 'shape_feature'):
shape_of = labels.owner.fgraph.shape_feature.shape_of
# TODO: consider cases where shape_of[labels] is constant, and
# has a value different from 1.
# This case is harder, as _is_const only accepts a scalar value
# as second argument, so checking for
# _is_const(rows, numpy.arange(...)) does not work for the moment.
if len(shape_of[labels]) == 1 and _is_const(shape_of[labels][0], 1):
return _is_const(rows, 0)
if rows.owner and isinstance(rows.owner.op, tensor.ARange):
start, stop, step = rows.owner.inputs
......
......@@ -779,13 +779,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
x = T.vector('x')
y = T.lvector('y')
def print_graph(func):
for i, node in enumerate(func.maker.fgraph.toposort()):
print i, node
# Last node should be the output
print i, printing.pprint(node.outputs[0])
print
## Test that a biased softmax is optimized correctly
bias_expressions = [
T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
......@@ -794,7 +787,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
for expr in bias_expressions:
f = theano.function([x, y], expr, mode=mode)
if verbose:
print_graph(f)
printing.debugprint(f)
try:
ops = [node.op for node in f.maker.fgraph.toposort()]
assert len(ops) == 5
......@@ -807,7 +800,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise
g = theano.function([x, y], T.grad(expr, x), mode=mode)
if verbose:
print_graph(g)
printing.debugprint(g)
try:
ops = [node.op for node in g.maker.fgraph.toposort()]
assert len(ops) == 4
......@@ -833,13 +826,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
b = T.vector('b')
y = T.lvector('y')
def print_graph(func):
for i, node in enumerate(func.maker.fgraph.toposort()):
print i, node
# Last node should be the output
print i, printing.pprint(node.outputs[0])
print
## Test that a biased softmax is optimized correctly
bias_expressions = [
T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
......@@ -850,7 +836,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
for expr in bias_expressions:
f = theano.function([x, b, y], expr, mode=mode)
if verbose:
print_graph(f)
printing.debugprint(f)
try:
ops = [node.op for node in f.maker.fgraph.toposort()]
# [big_op, sum, dim_shuffle]
......@@ -871,7 +857,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
config.warn.sum_div_dimshuffle_bug = backup
if verbose:
print_graph(g)
printing.debugprint(g)
try:
ops = [node.op for node in g.maker.fgraph.toposort()]
assert len(ops) <= 6
......@@ -885,7 +871,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
def test_optimize_xent_vector3(self):
# Same as test_optimize_xent_vector2, but y is the result of
# a "flatten", and it somehow makes the constant-folding
# a "flatten", and it used to make the constant-folding
# of arange(y.shape[0]) happen before the xent optimization
verbose = 0
mode = theano.compile.mode.get_default_mode()
......@@ -901,13 +887,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
y_ = T.lvector('y_')
y = y_.flatten()
def print_graph(func):
for i, node in enumerate(func.maker.fgraph.toposort()):
print i, node
# Last node should be the output
print i, printing.pprint(node.outputs[0])
print
## Test that a biased softmax is optimized correctly
bias_expressions = [
T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
......@@ -918,7 +897,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
for expr in bias_expressions:
f = theano.function([x, b, y_], expr, mode=mode)
if verbose:
print_graph(f)
printing.debugprint(f)
try:
ops = [node.op for node in f.maker.fgraph.toposort()]
# [big_op, sum, dim_shuffle, flatten]
......@@ -939,7 +918,69 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
config.warn.sum_div_dimshuffle_bug = backup
if verbose:
print_graph(g)
printing.debugprint(g)
try:
ops = [node.op for node in g.maker.fgraph.toposort()]
assert len(ops) <= 6
assert crossentropy_softmax_1hot_with_bias_dx in ops
assert softmax_with_bias in ops
assert softmax_grad not in ops
g(x_val, b_val, y_val)
except Exception:
theano.printing.debugprint(g)
raise
def test_optimize_xent_vector4(self):
# Same as test_optimize_xent_vector2, but y is the result of
# a "specify_shape" that indicates its length is 1, so the
# constant-folding of arange(y.shape[0]) happen before the xent
# optimization
verbose = 0
mode = theano.compile.mode.get_default_mode()
if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed())
x_val = rng.randn(5).astype(config.floatX)
b_val = rng.randn(5).astype(config.floatX)
y_val = numpy.asarray([2])
x = T.vector('x')
b = T.vector('b')
y_ = T.lvector('y_')
y = T.specify_shape(y_, (1,))
## Test that a biased softmax is optimized correctly
bias_expressions = [
T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions:
f = theano.function([x, b, y_], expr, mode=mode)
if verbose:
printing.debugprint(f)
try:
ops = [node.op for node in f.maker.fgraph.toposort()]
# [big_op, sum, dim_shuffle, specify_shape]
assert len(ops) <= 4
assert crossentropy_softmax_argmax_1hot_with_bias in ops
assert not [1 for o in ops
if isinstance(o, T.AdvancedSubtensor)]
f(x_val, b_val, y_val)
except Exception:
theano.printing.debugprint(f)
raise
backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False
try:
g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
finally:
config.warn.sum_div_dimshuffle_bug = backup
if verbose:
printing.debugprint(g)
try:
ops = [node.op for node in g.maker.fgraph.toposort()]
assert len(ops) <= 6
......@@ -964,12 +1005,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
y = T.lvector('y')
a = T.scalar('a')
def print_graph(func):
for i, node in enumerate(func.maker.fgraph.toposort()):
print i, node
# Last node should be the output
print i, pprint(node.outputs[0])
def validate_fn_graph(func):
# The graph of the function should not have softmax anymore
has_cx1hot = False
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论