提交 9f000926 authored 作者: Harm de Vries's avatar Harm de Vries

Replaced softmax with either softmax_op or softmax_graph

上级 c6ccaeeb
...@@ -413,7 +413,7 @@ class Softmax(gof.Op): ...@@ -413,7 +413,7 @@ class Softmax(gof.Op):
def grad(self, inp, grads): def grad(self, inp, grads):
x, = inp x, = inp
g_sm, = grads g_sm, = grads
sm = softmax(x) sm = softmax_op(x)
return [softmax_grad(g_sm, sm)] return [softmax_grad(g_sm, sm)]
def R_op(self, inputs, eval_points): def R_op(self, inputs, eval_points):
...@@ -578,7 +578,7 @@ def softmax_graph(c): ...@@ -578,7 +578,7 @@ def softmax_graph(c):
def local_softmax_with_bias(node): def local_softmax_with_bias(node):
"""Try to turn softmax(sum_of_stuff) -> softmax_w_bias(matrix, bias) """Try to turn softmax(sum_of_stuff) -> softmax_w_bias(matrix, bias)
""" """
if node.op == softmax: if node.op == softmax_op:
x, = node.inputs x, = node.inputs
if x.owner and x.owner.op == tensor.add: if x.owner and x.owner.op == tensor.add:
vectors = [] vectors = []
...@@ -1406,7 +1406,7 @@ def crossentropy_to_crossentropy_with_softmax(fgraph): ...@@ -1406,7 +1406,7 @@ def crossentropy_to_crossentropy_with_softmax(fgraph):
if node.op == crossentropy_categorical_1hot: if node.op == crossentropy_categorical_1hot:
nll, = node.outputs nll, = node.outputs
sm, one_of_n = node.inputs sm, one_of_n = node.inputs
if sm.owner and sm.owner.op == softmax: if sm.owner and sm.owner.op == softmax_op:
x, = sm.owner.inputs x, = sm.owner.inputs
new_nll, new_sm, new_am = crossentropy_softmax_argmax_1hot_with_bias(x, new_nll, new_sm, new_am = crossentropy_softmax_argmax_1hot_with_bias(x,
tensor.zeros_like(x[0]), one_of_n) tensor.zeros_like(x[0]), one_of_n)
...@@ -1556,7 +1556,7 @@ def local_advanced_indexing_crossentropy_onehot(node): ...@@ -1556,7 +1556,7 @@ def local_advanced_indexing_crossentropy_onehot(node):
except Exception: except Exception:
pass pass
if sm is not None and sm.owner and sm.owner.op in (softmax, if sm is not None and sm.owner and sm.owner.op in (softmax_op,
softmax_with_bias): softmax_with_bias):
sm_w_bias = local_softmax_with_bias.transform(sm.owner) sm_w_bias = local_softmax_with_bias.transform(sm.owner)
if sm_w_bias: if sm_w_bias:
...@@ -1586,7 +1586,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node): ...@@ -1586,7 +1586,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
except Exception: except Exception:
return return
if (sm is not None) and sm.owner and (sm.owner.op in (softmax, if (sm is not None) and sm.owner and (sm.owner.op in (softmax_op,
softmax_with_bias)): softmax_with_bias)):
sm_w_bias = local_softmax_with_bias.transform(sm.owner) sm_w_bias = local_softmax_with_bias.transform(sm.owner)
if sm_w_bias: if sm_w_bias:
...@@ -2056,7 +2056,7 @@ def make_out_pattern(X): ...@@ -2056,7 +2056,7 @@ def make_out_pattern(X):
return out_var return out_var
local_log_softmax = gof.PatternSub(in_pattern=(tensor.log, (softmax, 'x')), local_log_softmax = gof.PatternSub(in_pattern=(tensor.log, (softmax_op, 'x')),
out_pattern=(make_out_pattern, 'x'), out_pattern=(make_out_pattern, 'x'),
allow_multiple_clients=True) allow_multiple_clients=True)
......
...@@ -22,7 +22,7 @@ from theano.tensor.nnet import (categorical_crossentropy, ...@@ -22,7 +22,7 @@ from theano.tensor.nnet import (categorical_crossentropy,
CrossentropyCategorical1Hot, CrossentropyCategorical1Hot,
CrossentropyCategorical1HotGrad, CrossentropyCategorical1HotGrad,
sigmoid, softplus, sigmoid, softplus,
Softmax, softmax_op, SoftmaxWithBias, Softmax, softmax_op, softmax_graph, SoftmaxWithBias,
softmax_grad, softmax_grad,
softmax_with_bias, SoftmaxGrad, softmax_with_bias, SoftmaxGrad,
Prepend_scalar_constant_to_each_row, Prepend_scalar_constant_to_each_row,
...@@ -52,22 +52,22 @@ class T_Softmax(utt.InferShapeTester): ...@@ -52,22 +52,22 @@ class T_Softmax(utt.InferShapeTester):
def test0(self): def test0(self):
def f(a): def f(a):
return softmax(a)[:, 0] return softmax_op(a)[:, 0]
utt.verify_grad(f, [numpy.random.rand(3, 4)]) utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test1(self): def test1(self):
def f(a): def f(a):
return softmax(a)[:, 1] return softmax_op(a)[:, 1]
utt.verify_grad(f, [numpy.random.rand(3, 4)]) utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test2(self): def test2(self):
def f(a): def f(a):
return softmax(a)[:, 2] return softmax_op(a)[:, 2]
utt.verify_grad(f, [numpy.random.rand(3, 4)]) utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test3(self): def test3(self):
def f(a): def f(a):
return softmax(a)[:, 3] return softmax_op(a)[:, 3]
utt.verify_grad(f, [numpy.random.rand(3, 4)]) utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test_infer_shape(self): def test_infer_shape(self):
...@@ -78,14 +78,14 @@ class T_Softmax(utt.InferShapeTester): ...@@ -78,14 +78,14 @@ class T_Softmax(utt.InferShapeTester):
def test_vector(self): def test_vector(self):
x = T.vector() x = T.vector()
f = theano.function([x], softmax(x)) f = theano.function([x], softmax_op(x))
xv = numpy.random.randn(6).astype(config.floatX) xv = numpy.random.randn(6).astype(config.floatX)
assert numpy.allclose(f(xv), numpy.exp(xv) / numpy.exp(xv).sum()) assert numpy.allclose(f(xv), numpy.exp(xv) / numpy.exp(xv).sum())
def test_vector_grad(self): def test_vector_grad(self):
def f(a): def f(a):
return softmax(a) return softmax_op(a)
utt.verify_grad(f, [numpy.random.rand(4)]) utt.verify_grad(f, [numpy.random.rand(4)])
...@@ -127,10 +127,10 @@ class T_SoftmaxWithBias(utt.InferShapeTester): ...@@ -127,10 +127,10 @@ class T_SoftmaxWithBias(utt.InferShapeTester):
vbias = theano.shared(value=0.1, name='vbias') # 0.01 vbias = theano.shared(value=0.1, name='vbias') # 0.01
hid = T.vector('hid') hid = T.vector('hid')
f = theano.function([hid], f = theano.function([hid],
T.nnet.softmax(T.dot(hid, W.T) + vbias)) T.nnet.softmax_op(T.dot(hid, W.T) + vbias))
ops = [node.op for node in f.maker.fgraph.toposort()] ops = [node.op for node in f.maker.fgraph.toposort()]
assert softmax_with_bias not in ops assert softmax_with_bias not in ops
assert softmax in ops assert softmax_op in ops
f([0, 1, 0]) f([0, 1, 0])
# print f.maker.fgraph.toposort() # print f.maker.fgraph.toposort()
...@@ -398,7 +398,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -398,7 +398,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x, one_of_n], [x, one_of_n],
[op(softmax(x), one_of_n)]) [op(softmax_op(x), one_of_n)])
assert fgraph.outputs[0].owner.op == op assert fgraph.outputs[0].owner.op == op
theano.compile.mode.optdb.query( theano.compile.mode.optdb.query(
...@@ -414,7 +414,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -414,7 +414,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
op = crossentropy_categorical_1hot op = crossentropy_categorical_1hot
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x, one_of_n], [x, one_of_n],
[op(softmax(x), one_of_n)]) [op(softmax_op(x), one_of_n)])
assert fgraph.outputs[0].owner.op == op assert fgraph.outputs[0].owner.op == op
theano.compile.mode.optdb.query( theano.compile.mode.optdb.query(
...@@ -432,7 +432,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -432,7 +432,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x, b, one_of_n], [x, b, one_of_n],
[op(softmax(x + b), one_of_n)]) [op(softmax_op(x + b), one_of_n)])
assert fgraph.outputs[0].owner.op == op assert fgraph.outputs[0].owner.op == op
# print 'BEFORE' # print 'BEFORE'
...@@ -464,7 +464,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -464,7 +464,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x, b, c, one_of_n], [x, b, c, one_of_n],
[op(softmax(T.add(x, b, c)), one_of_n)]) [op(softmax_op(T.add(x, b, c)), one_of_n)])
assert fgraph.outputs[0].owner.op == op assert fgraph.outputs[0].owner.op == op
# print 'BEFORE' # print 'BEFORE'
...@@ -492,7 +492,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -492,7 +492,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
op = crossentropy_categorical_1hot op = crossentropy_categorical_1hot
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x, b, one_of_n], [x, b, one_of_n],
[op(softmax(x + b), one_of_n)]) [op(softmax_op(x + b), one_of_n)])
assert fgraph.outputs[0].owner.op == op assert fgraph.outputs[0].owner.op == op
# print 'BEFORE' # print 'BEFORE'
# for node in fgraph.toposort(): # for node in fgraph.toposort():
...@@ -515,7 +515,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -515,7 +515,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
x = tensor.matrix('x') x = tensor.matrix('x')
one_of_n = tensor.lvector('one_of_n') one_of_n = tensor.lvector('one_of_n')
op = crossentropy_categorical_1hot op = crossentropy_categorical_1hot
xe = op(softmax(x), one_of_n) xe = op(softmax_op(x), one_of_n)
sum_xe = tensor.sum(xe) sum_xe = tensor.sum(xe)
g_x = tensor.grad(sum_xe, x) g_x = tensor.grad(sum_xe, x)
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
...@@ -544,7 +544,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -544,7 +544,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
has_cx1hot = True has_cx1hot = True
if node.op == crossentropy_softmax_1hot_with_bias_dx: if node.op == crossentropy_softmax_1hot_with_bias_dx:
has_cx1hotdx = True has_cx1hotdx = True
if node.op == softmax: if node.op == softmax_op:
has_softmax = True has_softmax = True
if node.op == softmax_grad: if node.op == softmax_grad:
has_softmaxdx = True has_softmaxdx = True
...@@ -557,7 +557,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -557,7 +557,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
x = tensor.vector('x') x = tensor.vector('x')
one_of_n = tensor.lvector('one_of_n') one_of_n = tensor.lvector('one_of_n')
op = crossentropy_categorical_1hot op = crossentropy_categorical_1hot
xe = op(softmax(x), one_of_n) xe = op(softmax_op(x), one_of_n)
sum_xe = tensor.sum(xe) sum_xe = tensor.sum(xe)
g_x = tensor.grad(sum_xe, x) g_x = tensor.grad(sum_xe, x)
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
...@@ -586,7 +586,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -586,7 +586,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
has_cx1hot = True has_cx1hot = True
if node.op == crossentropy_softmax_1hot_with_bias_dx: if node.op == crossentropy_softmax_1hot_with_bias_dx:
has_cx1hotdx = True has_cx1hotdx = True
if node.op == softmax: if node.op == softmax_op:
has_softmax = True has_softmax = True
if node.op == softmax_grad: if node.op == softmax_grad:
has_softmaxdx = True has_softmaxdx = True
...@@ -612,10 +612,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -612,10 +612,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Basic case # Basic case
expressions = [ expressions = [
T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax_op(x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax_op(x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax_op(x))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y]) T.sum(-T.log(softmax_op(x))[T.arange(y.shape[0]), y])
] ]
for expr in expressions: for expr in expressions:
# Verify the optimizer worked on the expressions # Verify the optimizer worked on the expressions
...@@ -641,7 +641,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -641,7 +641,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
ops = [node.op for node in g.maker.fgraph.toposort()] ops = [node.op for node in g.maker.fgraph.toposort()]
assert len(ops) == 2 assert len(ops) == 2
assert crossentropy_softmax_1hot_with_bias_dx in ops assert crossentropy_softmax_1hot_with_bias_dx in ops
assert softmax in ops assert softmax_op in ops
assert softmax_grad not in ops assert softmax_grad not in ops
g(x_val, y_val) g(x_val, y_val)
except Exception: except Exception:
...@@ -650,10 +650,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -650,10 +650,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Test that a biased softmax is optimized correctly # Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax_op(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax_op(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax_op(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])] T.sum(-T.log(softmax_op(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x, b, y], expr, mode=mode) f = theano.function([x, b, y], expr, mode=mode)
...@@ -683,10 +683,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -683,10 +683,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Test that using "mean" instead of sum works, too # Test that using "mean" instead of sum works, too
mean_expressions = [ mean_expressions = [
T.mean(-T.log(softmax(x)[T.arange(y.shape[0]), y])), T.mean(-T.log(softmax_op(x)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax(x)[T.arange(y.shape[0]), y])), -T.mean(T.log(softmax_op(x)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax(x))[T.arange(y.shape[0]), y]), -T.mean(T.log(softmax_op(x))[T.arange(y.shape[0]), y]),
T.mean(-T.log(softmax(x))[T.arange(y.shape[0]), y])] T.mean(-T.log(softmax_op(x))[T.arange(y.shape[0]), y])]
for expr in mean_expressions: for expr in mean_expressions:
f = theano.function([x, y], expr, mode=mode) f = theano.function([x, y], expr, mode=mode)
...@@ -712,7 +712,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -712,7 +712,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
#there's an extra dimshuffle in there #there's an extra dimshuffle in there
# but I can't think of a good rule to get rid of it # but I can't think of a good rule to get rid of it
assert crossentropy_softmax_1hot_with_bias_dx in ops assert crossentropy_softmax_1hot_with_bias_dx in ops
assert softmax in ops assert softmax_op in ops
assert softmax_grad not in ops assert softmax_grad not in ops
g(x_val, y_val) g(x_val, y_val)
except Exception: except Exception:
...@@ -720,10 +720,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -720,10 +720,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise raise
mean_bias_expressions = [ mean_bias_expressions = [
T.mean(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])), T.mean(-T.log(softmax_op(x + b)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax(b + x)[T.arange(y.shape[0]), y])), -T.mean(T.log(softmax_op(b + x)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax(x + b))[T.arange(y.shape[0]), y]), -T.mean(T.log(softmax_op(x + b))[T.arange(y.shape[0]), y]),
T.mean(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])] T.mean(-T.log(softmax_op(b + x))[T.arange(y.shape[0]), y])]
for expr in mean_bias_expressions: for expr in mean_bias_expressions:
f = theano.function([x, b, y], expr, mode=mode) f = theano.function([x, b, y], expr, mode=mode)
...@@ -764,10 +764,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -764,10 +764,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
y = T.lvector('y') y = T.lvector('y')
yi = T.cast(y, 'int32') yi = T.cast(y, 'int32')
expressions = [ expressions = [
T.sum(-T.log(softmax(x)[T.arange(yi.shape[0]), yi])), T.sum(-T.log(softmax_op(x)[T.arange(yi.shape[0]), yi])),
-T.sum(T.log(softmax(x)[T.arange(yi.shape[0]), yi])), -T.sum(T.log(softmax_op(x)[T.arange(yi.shape[0]), yi])),
-T.sum(T.log(softmax(x))[T.arange(yi.shape[0]), yi]), -T.sum(T.log(softmax_op(x))[T.arange(yi.shape[0]), yi]),
T.sum(-T.log(softmax(x))[T.arange(yi.shape[0]), yi]) T.sum(-T.log(softmax_op(x))[T.arange(yi.shape[0]), yi])
] ]
for expr in expressions: for expr in expressions:
...@@ -794,7 +794,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -794,7 +794,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
ops = [node.op for node in g.maker.fgraph.toposort()] ops = [node.op for node in g.maker.fgraph.toposort()]
assert len(ops) == 3 assert len(ops) == 3
assert crossentropy_softmax_1hot_with_bias_dx in ops assert crossentropy_softmax_1hot_with_bias_dx in ops
assert softmax in ops assert softmax_op in ops
assert softmax_grad not in ops assert softmax_grad not in ops
g(x_val, y_val) g(x_val, y_val)
except Exception: except Exception:
...@@ -815,8 +815,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -815,8 +815,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Test that a biased softmax is optimized correctly # Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax_op(x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))] -T.sum(T.log(softmax_op(x)[T.arange(y.shape[0]), y]))]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x, y], expr, mode=mode) f = theano.function([x, y], expr, mode=mode)
...@@ -839,7 +839,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -839,7 +839,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
ops = [node.op for node in g.maker.fgraph.toposort()] ops = [node.op for node in g.maker.fgraph.toposort()]
assert len(ops) == 4 assert len(ops) == 4
assert crossentropy_softmax_1hot_with_bias_dx in ops assert crossentropy_softmax_1hot_with_bias_dx in ops
assert softmax in ops assert softmax_op in ops
assert softmax_grad not in ops assert softmax_grad not in ops
g(x_val, y_val) g(x_val, y_val)
except Exception: except Exception:
...@@ -862,10 +862,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -862,10 +862,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Test that a biased softmax is optimized correctly # Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax_op(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax_op(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax_op(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])] T.sum(-T.log(softmax_op(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x, b, y], expr, mode=mode) f = theano.function([x, b, y], expr, mode=mode)
...@@ -923,10 +923,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -923,10 +923,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Test that a biased softmax is optimized correctly # Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax_op(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax_op(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax_op(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])] T.sum(-T.log(softmax_op(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x, b, y_], expr, mode=mode) f = theano.function([x, b, y_], expr, mode=mode)
...@@ -985,10 +985,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -985,10 +985,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Test that a biased softmax is optimized correctly # Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax_op(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax_op(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax_op(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])] T.sum(-T.log(softmax_op(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x, b, y_], expr, mode=mode) f = theano.function([x, b, y_], expr, mode=mode)
...@@ -1046,7 +1046,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -1046,7 +1046,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
for node in func.maker.fgraph.toposort(): for node in func.maker.fgraph.toposort():
if node.op == crossentropy_softmax_argmax_1hot_with_bias: if node.op == crossentropy_softmax_argmax_1hot_with_bias:
has_cx1hot = True has_cx1hot = True
if node.op == softmax: if node.op == softmax_op:
has_softmax = True has_softmax = True
assert has_cx1hot assert has_cx1hot
...@@ -1060,7 +1060,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -1060,7 +1060,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
for node in func.maker.fgraph.toposort(): for node in func.maker.fgraph.toposort():
if node.op == crossentropy_softmax_1hot_with_bias_dx: if node.op == crossentropy_softmax_1hot_with_bias_dx:
has_cx1hotdx = True has_cx1hotdx = True
if node.op == softmax: if node.op == softmax_op:
has_softmax = True has_softmax = True
if node.op == softmax_grad: if node.op == softmax_grad:
has_softmaxdx = True has_softmaxdx = True
...@@ -1071,25 +1071,25 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -1071,25 +1071,25 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
# Cases to test # Cases to test
expressions = [ expressions = [
a * T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])), a * T.sum(-T.log(softmax_op(x)[T.arange(y.shape[0]), y])),
-a * T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y])), -a * T.sum(T.log(softmax_op(x)[T.arange(y.shape[0]), y])),
a * (-T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))), a * (-T.sum(T.log(softmax_op(x)[T.arange(y.shape[0]), y]))),
a * T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y])), a * T.sum(T.log(softmax_op(x)[T.arange(y.shape[0]), y])),
a * T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y]), a * T.sum(-T.log(softmax_op(x))[T.arange(y.shape[0]), y]),
-a * T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]), -a * T.sum(T.log(softmax_op(x))[T.arange(y.shape[0]), y]),
a * (-T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y])), a * (-T.sum(T.log(softmax_op(x))[T.arange(y.shape[0]), y])),
a * T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]), a * T.sum(T.log(softmax_op(x))[T.arange(y.shape[0]), y]),
a * T.mean(-T.log(softmax(x)[T.arange(y.shape[0]), y])), a * T.mean(-T.log(softmax_op(x)[T.arange(y.shape[0]), y])),
-a * T.mean(T.log(softmax(x)[T.arange(y.shape[0]), y])), -a * T.mean(T.log(softmax_op(x)[T.arange(y.shape[0]), y])),
a * (-T.mean(T.log(softmax(x)[T.arange(y.shape[0]), y]))), a * (-T.mean(T.log(softmax_op(x)[T.arange(y.shape[0]), y]))),
a * T.mean(T.log(softmax(x)[T.arange(y.shape[0]), y])), a * T.mean(T.log(softmax_op(x)[T.arange(y.shape[0]), y])),
a * T.mean(-T.log(softmax(x))[T.arange(y.shape[0]), y]), a * T.mean(-T.log(softmax_op(x))[T.arange(y.shape[0]), y]),
-a * T.mean(T.log(softmax(x))[T.arange(y.shape[0]), y]), -a * T.mean(T.log(softmax_op(x))[T.arange(y.shape[0]), y]),
a * (-T.mean(T.log(softmax(x))[T.arange(y.shape[0]), y])), a * (-T.mean(T.log(softmax_op(x))[T.arange(y.shape[0]), y])),
a * T.mean(T.log(softmax(x))[T.arange(y.shape[0]), y]), a * T.mean(T.log(softmax_op(x))[T.arange(y.shape[0]), y]),
] ]
for expr in expressions: for expr in expressions:
...@@ -1130,7 +1130,7 @@ def test_argmax_pushdown(): ...@@ -1130,7 +1130,7 @@ def test_argmax_pushdown():
# test that the max_and_argmax is pushed down if the max is not used # test that the max_and_argmax is pushed down if the max is not used
out = tensor.max_and_argmax( out = tensor.max_and_argmax(
softmax(tensor.exp(tensor.tanh(sigmoid(x)))), softmax_graph(tensor.exp(tensor.tanh(sigmoid(x)))),
axis=-1)[1] axis=-1)[1]
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x], [x],
...@@ -1147,7 +1147,7 @@ def test_argmax_pushdown(): ...@@ -1147,7 +1147,7 @@ def test_argmax_pushdown():
x = tensor.matrix() x = tensor.matrix()
# test that the max_and_argmax is not pushed down if the max is used # test that the max_and_argmax is not pushed down if the max is used
out = tensor.max_and_argmax( out = tensor.max_and_argmax(
softmax(tensor.exp(tensor.tanh(sigmoid(x)))), softmax_graph(tensor.exp(tensor.tanh(sigmoid(x)))),
axis=-1)[0] axis=-1)[0]
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x], [x],
...@@ -1236,7 +1236,7 @@ def test_asymptotic_32(): ...@@ -1236,7 +1236,7 @@ def test_asymptotic_32():
x2 = tensor.dvector() x2 = tensor.dvector()
y = tensor.lvector() y = tensor.lvector()
c = categorical_crossentropy(softmax(x + x2), y) c = categorical_crossentropy(softmax_graph(x + x2), y)
f = theano.function([x, y, x2], [c.sum(), f = theano.function([x, y, x2], [c.sum(),
tensor.grad(c.sum(), x)], mode='FAST_RUN') tensor.grad(c.sum(), x)], mode='FAST_RUN')
if 0: if 0:
...@@ -1293,7 +1293,7 @@ class Test_softmax_opt: ...@@ -1293,7 +1293,7 @@ class Test_softmax_opt:
# printing.debugprint(f) # printing.debugprint(f)
# print '===' # print '==='
assert len(f_ops) == 1 assert len(f_ops) == 1
assert softmax in f_ops assert softmax_op in f_ops
f(self.rng.rand(3, 4).astype(config.floatX)) f(self.rng.rand(3, 4).astype(config.floatX))
def test_basic_keepdims(self): def test_basic_keepdims(self):
...@@ -1307,7 +1307,7 @@ class Test_softmax_opt: ...@@ -1307,7 +1307,7 @@ class Test_softmax_opt:
# printing.debugprint(f) # printing.debugprint(f)
# print '===' # print '==='
assert len(f_ops) == 1 assert len(f_ops) == 1
assert softmax in f_ops assert softmax_op in f_ops
f(self.rng.rand(3, 4).astype(config.floatX)) f(self.rng.rand(3, 4).astype(config.floatX))
def test_grad(self): def test_grad(self):
...@@ -1329,7 +1329,7 @@ class Test_softmax_opt: ...@@ -1329,7 +1329,7 @@ class Test_softmax_opt:
raise SkipTest('Optimization not enabled for the moment') raise SkipTest('Optimization not enabled for the moment')
assert len(g_ops) == 2 assert len(g_ops) == 2
assert softmax in g_ops assert softmax_op in g_ops
assert softmax_grad in g_ops assert softmax_grad in g_ops
g(self.rng.rand(3, 4), self.rng.uniform(.5, 1, (3, 4))) g(self.rng.rand(3, 4), self.rng.uniform(.5, 1, (3, 4)))
...@@ -1375,8 +1375,7 @@ class Test_softmax_opt: ...@@ -1375,8 +1375,7 @@ class Test_softmax_opt:
# etc. # etc.
def test_softmax(): def test_softmax():
from theano.tensor.nnet import softmax from theano.tensor.nnet import softmax_graph
def test_stabilize_log_softmax(): def test_stabilize_log_softmax():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论