提交 b22ae136 authored 作者: Harm de Vries's avatar Harm de Vries

Some graph optimizations are not working

上级 4a2b55eb
...@@ -572,6 +572,10 @@ softmax_op = Softmax() ...@@ -572,6 +572,10 @@ softmax_op = Softmax()
def softmax_graph(c): def softmax_graph(c):
return tensor.exp(c) / tensor.exp(c).sum(axis=-1, keepdims=True) return tensor.exp(c) / tensor.exp(c).sum(axis=-1, keepdims=True)
def softmax(c):
if c.ndim == 1:
c = tensor.shape_padleft(c, n_ones=1)
return softmax_graph(c)
@opt.register_specialize('fast_compile_gpu') @opt.register_specialize('fast_compile_gpu')
@gof.local_optimizer([softmax_op]) @gof.local_optimizer([softmax_op])
...@@ -640,7 +644,7 @@ def softmax_simplifier(numerators, denominators): ...@@ -640,7 +644,7 @@ def softmax_simplifier(numerators, denominators):
if not numerator.type.dtype.startswith('float'): if not numerator.type.dtype.startswith('float'):
continue continue
if not numerator.type.broadcastable == (False, False): if numerator.ndim != 2:
continue continue
if numerator.owner and numerator.owner.op == tensor.exp: if numerator.owner and numerator.owner.op == tensor.exp:
x = numerator.owner.inputs[0] x = numerator.owner.inputs[0]
...@@ -667,6 +671,7 @@ def softmax_simplifier(numerators, denominators): ...@@ -667,6 +671,7 @@ def softmax_simplifier(numerators, denominators):
numerators.remove(numerator) numerators.remove(numerator)
denominators.remove(matching_denom) denominators.remove(matching_denom)
numerators.append(softmax_op(x)) numerators.append(softmax_op(x))
return numerators, denominators return numerators, denominators
opt.local_mul_canonizer.add_simplifier(softmax_simplifier, opt.local_mul_canonizer.add_simplifier(softmax_simplifier,
'softmax_simplifier') 'softmax_simplifier')
...@@ -728,7 +733,7 @@ if 0: ...@@ -728,7 +733,7 @@ if 0:
rest.append(add_in) rest.append(add_in)
# print 'maybe_ds =', maybe_ds # print 'maybe_ds =', maybe_ds
# if maybe_ds: # if maybe_ds:
# print 'maybe_ds.ndim =', maybe_ds.ndim, ', maybe_sm.ndim =', maybe_sm.ndim #I will make a plot with the average over many realizations. # print 'maybe_ds.ndim =', maybe_ds.ndim, ', maybe_sm.ndim =', maybe_sm.ndim
continue continue
if maybe_sm is mul_inputs[0]: if maybe_sm is mul_inputs[0]:
......
...@@ -21,7 +21,7 @@ from theano.tensor.nnet import (categorical_crossentropy, ...@@ -21,7 +21,7 @@ from theano.tensor.nnet import (categorical_crossentropy,
CrossentropySoftmaxArgmax1HotWithBias, CrossentropySoftmaxArgmax1HotWithBias,
CrossentropyCategorical1Hot, CrossentropyCategorical1Hot,
CrossentropyCategorical1HotGrad, CrossentropyCategorical1HotGrad,
sigmoid, softplus, Softmax, sigmoid, softplus, Softmax, softmax,
softmax_op, softmax_graph, SoftmaxWithBias, softmax_op, softmax_graph, SoftmaxWithBias,
softmax_grad, softmax_grad,
softmax_with_bias, SoftmaxGrad, softmax_with_bias, SoftmaxGrad,
...@@ -612,10 +612,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -612,10 +612,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Basic case # Basic case
expressions = [ expressions = [
T.sum(-T.log(softmax_graph(x)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_graph(x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_graph(x))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax_graph(x))[T.arange(y.shape[0]), y]) T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y])
] ]
for expr in expressions: for expr in expressions:
# Verify the optimizer worked on the expressions # Verify the optimizer worked on the expressions
...@@ -650,10 +650,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -650,10 +650,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Test that a biased softmax is optimized correctly # Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax_graph(x + b)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_graph(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_graph(x + b))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax_graph(b + x))[T.arange(y.shape[0]), y])] T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x, b, y], expr, mode=mode) f = theano.function([x, b, y], expr, mode=mode)
...@@ -683,10 +683,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -683,10 +683,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Test that using "mean" instead of sum works, too # Test that using "mean" instead of sum works, too
mean_expressions = [ mean_expressions = [
T.mean(-T.log(softmax_graph(x)[T.arange(y.shape[0]), y])), T.mean(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax_graph(x)[T.arange(y.shape[0]), y])), -T.mean(T.log(softmax(x)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax_graph(x))[T.arange(y.shape[0]), y]), -T.mean(T.log(softmax(x))[T.arange(y.shape[0]), y]),
T.mean(-T.log(softmax_graph(x))[T.arange(y.shape[0]), y])] T.mean(-T.log(softmax(x))[T.arange(y.shape[0]), y])]
for expr in mean_expressions: for expr in mean_expressions:
f = theano.function([x, y], expr, mode=mode) f = theano.function([x, y], expr, mode=mode)
...@@ -720,10 +720,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -720,10 +720,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise raise
mean_bias_expressions = [ mean_bias_expressions = [
T.mean(-T.log(softmax_graph(x + b)[T.arange(y.shape[0]), y])), T.mean(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax_graph(b + x)[T.arange(y.shape[0]), y])), -T.mean(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax_graph(x + b))[T.arange(y.shape[0]), y]), -T.mean(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.mean(-T.log(softmax_graph(b + x))[T.arange(y.shape[0]), y])] T.mean(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in mean_bias_expressions: for expr in mean_bias_expressions:
f = theano.function([x, b, y], expr, mode=mode) f = theano.function([x, b, y], expr, mode=mode)
...@@ -764,10 +764,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -764,10 +764,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
y = T.lvector('y') y = T.lvector('y')
yi = T.cast(y, 'int32') yi = T.cast(y, 'int32')
expressions = [ expressions = [
T.sum(-T.log(softmax_graph(x)[T.arange(yi.shape[0]), yi])), T.sum(-T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
-T.sum(T.log(softmax_graph(x)[T.arange(yi.shape[0]), yi])), -T.sum(T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
-T.sum(T.log(softmax_graph(x))[T.arange(yi.shape[0]), yi]), -T.sum(T.log(softmax(x))[T.arange(yi.shape[0]), yi]),
T.sum(-T.log(softmax_graph(x))[T.arange(yi.shape[0]), yi]) T.sum(-T.log(softmax(x))[T.arange(yi.shape[0]), yi])
] ]
for expr in expressions: for expr in expressions:
...@@ -815,8 +815,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -815,8 +815,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Test that a biased softmax is optimized correctly # Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax_graph(x)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_graph(x)[T.arange(y.shape[0]), y]))] -T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x, y], expr, mode=mode) f = theano.function([x, y], expr, mode=mode)
...@@ -862,10 +862,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -862,10 +862,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Test that a biased softmax is optimized correctly # Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax_graph(x + b)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_graph(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_graph(x + b))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax_graph(b + x))[T.arange(y.shape[0]), y])] T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x, b, y], expr, mode=mode) f = theano.function([x, b, y], expr, mode=mode)
...@@ -923,10 +923,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -923,10 +923,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Test that a biased softmax is optimized correctly # Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax_graph(x + b)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_graph(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_graph(x + b))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax_graph(b + x))[T.arange(y.shape[0]), y])] T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x, b, y_], expr, mode=mode) f = theano.function([x, b, y_], expr, mode=mode)
...@@ -985,10 +985,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -985,10 +985,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Test that a biased softmax is optimized correctly # Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax_graph(x + b)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_graph(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_graph(x + b))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax_graph(b + x))[T.arange(y.shape[0]), y])] T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x, b, y_], expr, mode=mode) f = theano.function([x, b, y_], expr, mode=mode)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论