提交 4a2b55eb authored 作者: Harm de Vries's avatar Harm de Vries

Replace op with graph, added test for testing 2nd derivative

上级 9f000926
...@@ -738,7 +738,7 @@ class T_Scan(unittest.TestCase): ...@@ -738,7 +738,7 @@ class T_Scan(unittest.TestCase):
def forward_scanner(x_t): def forward_scanner(x_t):
a2_t = tensor.dot(x_t, W) a2_t = tensor.dot(x_t, W)
y_t = tensor.nnet.softmax(a2_t) y_t = tensor.nnet.softmax_graph(a2_t)
return y_t return y_t
y, _ = theano.scan(fn=forward_scanner, sequences=x, y, _ = theano.scan(fn=forward_scanner, sequences=x,
......
...@@ -570,7 +570,7 @@ class Softmax(gof.Op): ...@@ -570,7 +570,7 @@ class Softmax(gof.Op):
softmax_op = Softmax() softmax_op = Softmax()
def softmax_graph(c): def softmax_graph(c):
return tensor.exp(c) / tensor.exp(c).sum(axis=1, keepdims=True) return tensor.exp(c) / tensor.exp(c).sum(axis=-1, keepdims=True)
@opt.register_specialize('fast_compile_gpu') @opt.register_specialize('fast_compile_gpu')
...@@ -666,7 +666,7 @@ def softmax_simplifier(numerators, denominators): ...@@ -666,7 +666,7 @@ def softmax_simplifier(numerators, denominators):
if matching_denom: if matching_denom:
numerators.remove(numerator) numerators.remove(numerator)
denominators.remove(matching_denom) denominators.remove(matching_denom)
numerators.append(softmax(x)) numerators.append(softmax_op(x))
return numerators, denominators return numerators, denominators
opt.local_mul_canonizer.add_simplifier(softmax_simplifier, opt.local_mul_canonizer.add_simplifier(softmax_simplifier,
'softmax_simplifier') 'softmax_simplifier')
......
...@@ -21,8 +21,8 @@ from theano.tensor.nnet import (categorical_crossentropy, ...@@ -21,8 +21,8 @@ from theano.tensor.nnet import (categorical_crossentropy,
CrossentropySoftmaxArgmax1HotWithBias, CrossentropySoftmaxArgmax1HotWithBias,
CrossentropyCategorical1Hot, CrossentropyCategorical1Hot,
CrossentropyCategorical1HotGrad, CrossentropyCategorical1HotGrad,
sigmoid, softplus, sigmoid, softplus, Softmax,
Softmax, softmax_op, softmax_graph, SoftmaxWithBias, softmax_op, softmax_graph, SoftmaxWithBias,
softmax_grad, softmax_grad,
softmax_with_bias, SoftmaxGrad, softmax_with_bias, SoftmaxGrad,
Prepend_scalar_constant_to_each_row, Prepend_scalar_constant_to_each_row,
...@@ -74,7 +74,7 @@ class T_Softmax(utt.InferShapeTester): ...@@ -74,7 +74,7 @@ class T_Softmax(utt.InferShapeTester):
admat = matrix() admat = matrix()
admat_val = numpy.random.rand(3, 4).astype(config.floatX) admat_val = numpy.random.rand(3, 4).astype(config.floatX)
self._compile_and_check([admat], [Softmax()(admat)], self._compile_and_check([admat], [Softmax()(admat)],
[admat_val], Softmax) [admat_val], Softmax)
def test_vector(self): def test_vector(self):
x = T.vector() x = T.vector()
...@@ -612,10 +612,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -612,10 +612,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Basic case # Basic case
expressions = [ expressions = [
T.sum(-T.log(softmax_op(x)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax_graph(x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_op(x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax_graph(x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_op(x))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax_graph(x))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax_op(x))[T.arange(y.shape[0]), y]) T.sum(-T.log(softmax_graph(x))[T.arange(y.shape[0]), y])
] ]
for expr in expressions: for expr in expressions:
# Verify the optimizer worked on the expressions # Verify the optimizer worked on the expressions
...@@ -650,10 +650,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -650,10 +650,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Test that a biased softmax is optimized correctly # Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax_op(x + b)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax_graph(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_op(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax_graph(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_op(x + b))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax_graph(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax_op(b + x))[T.arange(y.shape[0]), y])] T.sum(-T.log(softmax_graph(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x, b, y], expr, mode=mode) f = theano.function([x, b, y], expr, mode=mode)
...@@ -683,10 +683,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -683,10 +683,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Test that using "mean" instead of sum works, too # Test that using "mean" instead of sum works, too
mean_expressions = [ mean_expressions = [
T.mean(-T.log(softmax_op(x)[T.arange(y.shape[0]), y])), T.mean(-T.log(softmax_graph(x)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax_op(x)[T.arange(y.shape[0]), y])), -T.mean(T.log(softmax_graph(x)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax_op(x))[T.arange(y.shape[0]), y]), -T.mean(T.log(softmax_graph(x))[T.arange(y.shape[0]), y]),
T.mean(-T.log(softmax_op(x))[T.arange(y.shape[0]), y])] T.mean(-T.log(softmax_graph(x))[T.arange(y.shape[0]), y])]
for expr in mean_expressions: for expr in mean_expressions:
f = theano.function([x, y], expr, mode=mode) f = theano.function([x, y], expr, mode=mode)
...@@ -720,10 +720,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -720,10 +720,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise raise
mean_bias_expressions = [ mean_bias_expressions = [
T.mean(-T.log(softmax_op(x + b)[T.arange(y.shape[0]), y])), T.mean(-T.log(softmax_graph(x + b)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax_op(b + x)[T.arange(y.shape[0]), y])), -T.mean(T.log(softmax_graph(b + x)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax_op(x + b))[T.arange(y.shape[0]), y]), -T.mean(T.log(softmax_graph(x + b))[T.arange(y.shape[0]), y]),
T.mean(-T.log(softmax_op(b + x))[T.arange(y.shape[0]), y])] T.mean(-T.log(softmax_graph(b + x))[T.arange(y.shape[0]), y])]
for expr in mean_bias_expressions: for expr in mean_bias_expressions:
f = theano.function([x, b, y], expr, mode=mode) f = theano.function([x, b, y], expr, mode=mode)
...@@ -764,10 +764,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -764,10 +764,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
y = T.lvector('y') y = T.lvector('y')
yi = T.cast(y, 'int32') yi = T.cast(y, 'int32')
expressions = [ expressions = [
T.sum(-T.log(softmax_op(x)[T.arange(yi.shape[0]), yi])), T.sum(-T.log(softmax_graph(x)[T.arange(yi.shape[0]), yi])),
-T.sum(T.log(softmax_op(x)[T.arange(yi.shape[0]), yi])), -T.sum(T.log(softmax_graph(x)[T.arange(yi.shape[0]), yi])),
-T.sum(T.log(softmax_op(x))[T.arange(yi.shape[0]), yi]), -T.sum(T.log(softmax_graph(x))[T.arange(yi.shape[0]), yi]),
T.sum(-T.log(softmax_op(x))[T.arange(yi.shape[0]), yi]) T.sum(-T.log(softmax_graph(x))[T.arange(yi.shape[0]), yi])
] ]
for expr in expressions: for expr in expressions:
...@@ -815,8 +815,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -815,8 +815,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Test that a biased softmax is optimized correctly # Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax_op(x)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax_graph(x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_op(x)[T.arange(y.shape[0]), y]))] -T.sum(T.log(softmax_graph(x)[T.arange(y.shape[0]), y]))]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x, y], expr, mode=mode) f = theano.function([x, y], expr, mode=mode)
...@@ -862,10 +862,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -862,10 +862,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Test that a biased softmax is optimized correctly # Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax_op(x + b)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax_graph(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_op(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax_graph(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_op(x + b))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax_graph(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax_op(b + x))[T.arange(y.shape[0]), y])] T.sum(-T.log(softmax_graph(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x, b, y], expr, mode=mode) f = theano.function([x, b, y], expr, mode=mode)
...@@ -923,10 +923,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -923,10 +923,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Test that a biased softmax is optimized correctly # Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax_op(x + b)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax_graph(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_op(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax_graph(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_op(x + b))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax_graph(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax_op(b + x))[T.arange(y.shape[0]), y])] T.sum(-T.log(softmax_graph(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x, b, y_], expr, mode=mode) f = theano.function([x, b, y_], expr, mode=mode)
...@@ -985,10 +985,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -985,10 +985,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Test that a biased softmax is optimized correctly # Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax_op(x + b)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax_graph(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_op(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax_graph(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax_op(x + b))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax_graph(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax_op(b + x))[T.arange(y.shape[0]), y])] T.sum(-T.log(softmax_graph(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x, b, y_], expr, mode=mode) f = theano.function([x, b, y_], expr, mode=mode)
...@@ -1071,25 +1071,25 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -1071,25 +1071,25 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Cases to test # Cases to test
expressions = [ expressions = [
a * T.sum(-T.log(softmax_op(x)[T.arange(y.shape[0]), y])), a * T.sum(-T.log(softmax_graph(x)[T.arange(y.shape[0]), y])),
-a * T.sum(T.log(softmax_op(x)[T.arange(y.shape[0]), y])), -a * T.sum(T.log(softmax_graph(x)[T.arange(y.shape[0]), y])),
a * (-T.sum(T.log(softmax_op(x)[T.arange(y.shape[0]), y]))), a * (-T.sum(T.log(softmax_graph(x)[T.arange(y.shape[0]), y]))),
a * T.sum(T.log(softmax_op(x)[T.arange(y.shape[0]), y])), a * T.sum(T.log(softmax_graph(x)[T.arange(y.shape[0]), y])),
a * T.sum(-T.log(softmax_op(x))[T.arange(y.shape[0]), y]), a * T.sum(-T.log(softmax_graph(x))[T.arange(y.shape[0]), y]),
-a * T.sum(T.log(softmax_op(x))[T.arange(y.shape[0]), y]), -a * T.sum(T.log(softmax_graph(x))[T.arange(y.shape[0]), y]),
a * (-T.sum(T.log(softmax_op(x))[T.arange(y.shape[0]), y])), a * (-T.sum(T.log(softmax_graph(x))[T.arange(y.shape[0]), y])),
a * T.sum(T.log(softmax_op(x))[T.arange(y.shape[0]), y]), a * T.sum(T.log(softmax_graph(x))[T.arange(y.shape[0]), y]),
a * T.mean(-T.log(softmax_op(x)[T.arange(y.shape[0]), y])), a * T.mean(-T.log(softmax_graph(x)[T.arange(y.shape[0]), y])),
-a * T.mean(T.log(softmax_op(x)[T.arange(y.shape[0]), y])), -a * T.mean(T.log(softmax_graph(x)[T.arange(y.shape[0]), y])),
a * (-T.mean(T.log(softmax_op(x)[T.arange(y.shape[0]), y]))), a * (-T.mean(T.log(softmax_graph(x)[T.arange(y.shape[0]), y]))),
a * T.mean(T.log(softmax_op(x)[T.arange(y.shape[0]), y])), a * T.mean(T.log(softmax_graph(x)[T.arange(y.shape[0]), y])),
a * T.mean(-T.log(softmax_op(x))[T.arange(y.shape[0]), y]), a * T.mean(-T.log(softmax_graph(x))[T.arange(y.shape[0]), y]),
-a * T.mean(T.log(softmax_op(x))[T.arange(y.shape[0]), y]), -a * T.mean(T.log(softmax_graph(x))[T.arange(y.shape[0]), y]),
a * (-T.mean(T.log(softmax_op(x))[T.arange(y.shape[0]), y])), a * (-T.mean(T.log(softmax_graph(x))[T.arange(y.shape[0]), y])),
a * T.mean(T.log(softmax_op(x))[T.arange(y.shape[0]), y]), a * T.mean(T.log(softmax_graph(x))[T.arange(y.shape[0]), y]),
] ]
for expr in expressions: for expr in expressions:
...@@ -1374,8 +1374,15 @@ class Test_softmax_opt: ...@@ -1374,8 +1374,15 @@ class Test_softmax_opt:
# REPEAT 3 CASES in presence of log(softmax) with the advanced indexing # REPEAT 3 CASES in presence of log(softmax) with the advanced indexing
# etc. # etc.
def test_softmax(): def test_softmax_graph():
from theano.tensor.nnet import softmax_graph rng = numpy.random.RandomState(utt.fetch_seed())
x = theano.shared(rng.normal(size=(3, 4)))
def f(inputs):
y = softmax_graph(x)
z = (y**2).mean()
return theano.grad(z, x, known_grads={y: inputs})
utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test_stabilize_log_softmax(): def test_stabilize_log_softmax():
...@@ -1383,7 +1390,7 @@ def test_stabilize_log_softmax(): ...@@ -1383,7 +1390,7 @@ def test_stabilize_log_softmax():
mode = mode.including('local_log_softmax', 'specialize') mode = mode.including('local_log_softmax', 'specialize')
x = matrix() x = matrix()
y = theano.tensor.nnet.softmax(x) y = theano.tensor.nnet.softmax_graph(x)
z = theano.tensor.log(y) z = theano.tensor.log(y)
f = theano.function([x], z, mode=mode) f = theano.function([x], z, mode=mode)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论