提交 e05036f0 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2847 from harmdevries89/softmax_iss2050

[MRG] softmax function that builds expression instead of using softmax op
......@@ -739,7 +739,7 @@ class T_Scan(unittest.TestCase):
def forward_scanner(x_t):
a2_t = tensor.dot(x_t, W)
y_t = tensor.nnet.softmax(a2_t)
y_t = tensor.nnet.softmax_graph(a2_t)
return y_t
y, _ = theano.scan(fn=forward_scanner, sequences=x,
......
......@@ -78,12 +78,17 @@ class SoftmaxWithBias(gof.Op):
if b.shape[0] != x.shape[1]:
raise ValueError('b must have same number of columns as x')
sm = numpy.zeros_like(x)
for i in xrange(sm.shape[0]):
row = x[i] + b
sm[i] = numpy.exp(row - numpy.max(row))
sm[i] *= 1.0 / numpy.sum(sm[i])
output_storage[0][0] = sm
# sm = numpy.zeros_like(x)
# for i in xrange(sm.shape[0]):
# row = x[i] + b
# sm[i] = numpy.exp(row - numpy.max(row))
# sm[i] *= 1.0 / numpy.sum(sm[i])
# output_storage[0][0] = sm
x_plus_b = x + b[None, :]
e_x = numpy.exp(x_plus_b - x_plus_b.max(axis=1)[:, None])
e_x *= 1.0 / e_x.sum(axis=1)[:, None]
output_storage[0][0] = e_x
def grad(self, inp, grads):
x, b = inp
......@@ -304,8 +309,17 @@ class SoftmaxGrad(gof.Op):
dx[i] = dy_times_sm_i - sum(dy_times_sm_i) * sm[i]
output_storage[0][0] = dx
def grad(self, *args):
raise NotImplementedError()
def grad(self, inp, grads):
dy, sm = inp
g, = grads
tmp = g + tensor.neg(tensor.sum(g*sm, axis=1).dimshuffle((0, 'x')))
g_dy = tmp * sm
tmp2 = tensor.sum(dy*sm, axis=1).dimshuffle((0, 'x'))
g_sm = tmp*dy - g *tmp2
return g_dy, g_sm
def infer_shape(self, node, shape):
return [shape[1]]
......@@ -414,7 +428,7 @@ class Softmax(gof.Op):
def grad(self, inp, grads):
x, = inp
g_sm, = grads
sm = softmax(x)
sm = softmax_op(x)
return [softmax_grad(g_sm, sm)]
def R_op(self, inputs, eval_points):
......@@ -568,15 +582,20 @@ class Softmax(gof.Op):
def c_code_cache_version():
return (3,)
softmax = Softmax()
softmax_op = Softmax()
def softmax_graph(c):
return tensor.exp(c) / tensor.exp(c).sum(axis=-1, keepdims=True)
def softmax(c):
return softmax_op(c)
@opt.register_specialize('fast_compile_gpu')
@gof.local_optimizer([softmax])
@gof.local_optimizer([softmax_op])
def local_softmax_with_bias(node):
"""Try to turn softmax(sum_of_stuff) -> softmax_w_bias(matrix, bias)
"""
if node.op == softmax:
if node.op == softmax_op:
x, = node.inputs
if x.owner and x.owner.op == tensor.add:
vectors = []
......@@ -638,7 +657,7 @@ def softmax_simplifier(numerators, denominators):
if not numerator.type.dtype.startswith('float'):
continue
if not numerator.type.broadcastable == (False, False):
if numerator.ndim != 2:
continue
if numerator.owner and numerator.owner.op == tensor.exp:
x = numerator.owner.inputs[0]
......@@ -664,7 +683,8 @@ def softmax_simplifier(numerators, denominators):
if matching_denom:
numerators.remove(numerator)
denominators.remove(matching_denom)
numerators.append(softmax(x))
numerators.append(softmax_op(x))
return numerators, denominators
opt.local_mul_canonizer.add_simplifier(softmax_simplifier,
'softmax_simplifier')
......@@ -1404,7 +1424,7 @@ def crossentropy_to_crossentropy_with_softmax(fgraph):
if node.op == crossentropy_categorical_1hot:
nll, = node.outputs
sm, one_of_n = node.inputs
if sm.owner and sm.owner.op == softmax:
if sm.owner and sm.owner.op == softmax_op:
x, = sm.owner.inputs
new_nll, new_sm, new_am = crossentropy_softmax_argmax_1hot_with_bias(x,
tensor.zeros_like(x[0]), one_of_n)
......@@ -1450,7 +1470,7 @@ def local_softmax_grad_to_crossentropy_with_softmax_grad(node):
def local_argmax_pushdown(node):
if node.op == tensor._max_and_argmax and node.inputs[0].owner and \
len(node.outputs[0].clients) > 0 and node.inputs[0].owner.op in \
(softmax, softplus, tensor.exp, tensor.log, tensor.tanh, sigmoid,
(softmax_op, softplus, tensor.exp, tensor.log, tensor.tanh, sigmoid,
softmax_with_bias):
if theano.config.warn.argmax_pushdown_bug:
logging.getLogger('theano.tensor.nnet.nnet').warn("WARNING: there "
......@@ -1466,7 +1486,7 @@ def local_argmax_pushdown(node):
x_max, x_argmax = node.outputs
x, axis = node.inputs
# TODO: Make a list/set of monotonic ops...
if x.owner and x.owner.op in (softmax, softplus, tensor.exp,
if x.owner and x.owner.op in (softmax_op, softplus, tensor.exp,
tensor.log, tensor.tanh, sigmoid):
pre_x, = x.owner.inputs
return tensor._max_and_argmax(pre_x, axis)
......@@ -1554,7 +1574,7 @@ def local_advanced_indexing_crossentropy_onehot(node):
except Exception:
pass
if sm is not None and sm.owner and sm.owner.op in (softmax,
if sm is not None and sm.owner and sm.owner.op in (softmax_op,
softmax_with_bias):
sm_w_bias = local_softmax_with_bias.transform(sm.owner)
if sm_w_bias:
......@@ -1584,7 +1604,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
except Exception:
return
if (sm is not None) and sm.owner and (sm.owner.op in (softmax,
if (sm is not None) and sm.owner and (sm.owner.op in (softmax_op,
softmax_with_bias)):
sm_w_bias = local_softmax_with_bias.transform(sm.owner)
if sm_w_bias:
......@@ -2054,7 +2074,7 @@ def make_out_pattern(X):
return out_var
local_log_softmax = gof.PatternSub(in_pattern=(tensor.log, (softmax, 'x')),
local_log_softmax = gof.PatternSub(in_pattern=(tensor.log, (softmax_op, 'x')),
out_pattern=(make_out_pattern, 'x'),
allow_multiple_clients=True)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论