提交 60a89d0b authored 作者: James Bergstra's avatar James Bergstra

Moved sigmoid() and softmax() to new file, added some related optimizations and tests.

上级 525000a6
from nnet import *
from sigm import softplus, sigmoid, sigmoid_inplace, scalar_sigmoid
......@@ -4,89 +4,14 @@
"""
from theano import gof
from theano import scalar
from theano import printing
from theano.printing import pprint
from theano.tensor import basic as tensor
from theano.tensor import elemwise
from theano.tensor import opt
from theano.compile import optdb
import numpy
############
#
# SCALAR OPS
#
class ScalarSigmoid(scalar.UnaryScalarOp):
@staticmethod
def st_impl(x):
if x < -30.0:
return 0.0
if x > 30.0:
return 1.0
return 1.0 / (1.0 + numpy.exp(-x))
def impl(self, x):
return ScalarSigmoid.st_impl(x)
def grad(self, (x,), (gz,)):
y = scalar_sigmoid(x)
return [gz * y * (1.0 - y)]
def c_code(self, node, name, (x,), (z,), sub):
if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
# print i, repr( theano._asarray(1.0, dtype=dt) / (theano._asarray(1.0, dtype=dt) + numpy.exp(-theano._asarray([i,-i], dtype=dt))))
# the boundary checks prevent us from generating inf
return """%(z)s = %(x)s < -88.0f ? 0.0 : %(x)s > 15.0f ? 1.0f : 1.0f /(1.0f + exp(-%(x)s));""" % locals()
elif node.inputs[0].type == scalar.float64:
return """%(z)s = %(x)s < -709.0 ? 0.0 : %(x)s > 19.0 ? 1.0 : 1.0 /(1.0+exp(-%(x)s));""" % locals()
else:
raise NotImplementedError('only floatingpoint is implemented')
def c_code_cache_version(self):
v = super(ScalarSigmoid, self).c_code_cache_version()
if v:
return (2,) + v
else:
return v
scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid')
sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid')
pprint.assign(sigmoid, printing.FunctionPrinter('sigmoid'))
class ScalarSoftplus(scalar.UnaryScalarOp):
@staticmethod
def static_impl(x):
if x < -30.0:
return 0.0
if x > 30.0:
return x
return numpy.log1p(numpy.exp(x))
def impl(self, x):
return ScalarSoftplus.static_impl(x)
def grad(self, (x,), (gz,)):
return [gz * scalar_sigmoid(x)]
def c_code(self, node, name, (x,), (z,), sub):
if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
# print i, repr( numpy.log1p(numpy.exp(theano._asarray([i,-i], dtype=dt))))
# the boundary checks prevent us from generating inf
return """%(z)s = %(x)s < -103.0f ? 0.0 : %(x)s > 14.0f ? %(x)s : log1p(exp(%(x)s));""" % locals()
elif node.inputs[0].type == scalar.float64:
return """%(z)s = %(x)s < -745.0 ? 0.0 : %(x)s > 16.0 ? %(x)s : log1p(exp(%(x)s));""" % locals()
else:
raise NotImplementedError('only floatingpoint is implemented')
def c_code_cache_version(self):
v = super(ScalarSoftplus, self).c_code_cache_version()
if v:
return (2,) + v
else:
return v
scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus')
softplus = elemwise.Elemwise(scalar_softplus, name='softplus')
pprint.assign(softplus, printing.FunctionPrinter('softplus'))
from .sigm import sigmoid
############
......@@ -1351,6 +1276,7 @@ def categorical_crossentropy(coding_dist, true_dist):
raise TypeError('rank mismatch between coding and true distributions')
from theano import scalar
class Prepend_scalar_constant_to_each_row(gof.Op):
def __init__(self, val = 0):
......@@ -1440,14 +1366,3 @@ prepend_scalar_to_each_row = Prepend_scalar_to_each_row()
prepend_0_to_each_row = Prepend_scalar_constant_to_each_row(0.)
prepend_1_to_each_row = Prepend_scalar_constant_to_each_row(1.)
logsigm_to_softplus = gof.PatternSub(
(tensor.log, (sigmoid, 'x')),
(tensor.neg, (softplus, (tensor.neg, 'x'))),
allow_multiple_clients = True)
log1msigm_to_softplus = gof.PatternSub(
(tensor.log, (tensor.sub, tensor.constant([[1.0]]), (sigmoid, 'x'))),
(tensor.neg, (softplus, 'x')),
allow_multiple_clients = True)
opt.register_specialize(logsigm_to_softplus, name = 'logsigm_to_softplus')
opt.register_specialize(log1msigm_to_softplus, name = 'log1msigm_to_softplus')
"""Ops and optimizations: sigmoid, softplus
These functions implement special cases of exp and log to improve numerical stability.
"""
import numpy
from theano import gof
from theano import scalar
from theano import printing
from theano.tensor import basic as tensor
from theano.printing import pprint
from theano.tensor import elemwise
from theano.tensor import opt
from theano.compile import optdb
############
#
# SCALAR OPS
#
class ScalarSigmoid(scalar.UnaryScalarOp):
@staticmethod
def st_impl(x):
if x < -30.0:
return 0.0
if x > 30.0:
return 1.0
return 1.0 / (1.0 + numpy.exp(-x))
def impl(self, x):
return ScalarSigmoid.st_impl(x)
def grad(self, (x,), (gz,)):
y = scalar_sigmoid(x)
return [gz * y * (1.0 - y)]
def c_code(self, node, name, (x,), (z,), sub):
if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
# print i, repr( theano._asarray(1.0, dtype=dt) / (theano._asarray(1.0, dtype=dt) + numpy.exp(-theano._asarray([i,-i], dtype=dt))))
# the boundary checks prevent us from generating inf
return """%(z)s = %(x)s < -88.0f ? 0.0 : %(x)s > 15.0f ? 1.0f : 1.0f /(1.0f + exp(-%(x)s));""" % locals()
elif node.inputs[0].type == scalar.float64:
return """%(z)s = %(x)s < -709.0 ? 0.0 : %(x)s > 19.0 ? 1.0 : 1.0 /(1.0+exp(-%(x)s));""" % locals()
else:
raise NotImplementedError('only floatingpoint is implemented')
def c_code_cache_version(self):
v = super(ScalarSigmoid, self).c_code_cache_version()
if v:
return (2,) + v
else:
return v
scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid')
sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid')
sigmoid_inplace = elemwise.Elemwise(
ScalarSigmoid(scalar.transfer_type(0)),
inplace_pattern={0:0},
name='sigmoid_inplace',
)
pprint.assign(sigmoid, printing.FunctionPrinter('sigmoid'))
class ScalarSoftplus(scalar.UnaryScalarOp):
@staticmethod
def static_impl(x):
if x < -30.0:
return 0.0
if x > 30.0:
return x
return numpy.log1p(numpy.exp(x))
def impl(self, x):
return ScalarSoftplus.static_impl(x)
def grad(self, (x,), (gz,)):
return [gz * scalar_sigmoid(x)]
def c_code(self, node, name, (x,), (z,), sub):
if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
# print i, repr( numpy.log1p(numpy.exp(theano._asarray([i,-i], dtype=dt))))
# the boundary checks prevent us from generating inf
return """%(z)s = %(x)s < -103.0f ? 0.0 : %(x)s > 14.0f ? %(x)s : log1p(exp(%(x)s));""" % locals()
elif node.inputs[0].type == scalar.float64:
return """%(z)s = %(x)s < -745.0 ? 0.0 : %(x)s > 16.0 ? %(x)s : log1p(exp(%(x)s));""" % locals()
else:
raise NotImplementedError('only floatingpoint is implemented')
def c_code_cache_version(self):
v = super(ScalarSoftplus, self).c_code_cache_version()
if v:
return (2,) + v
else:
return v
scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus')
softplus = elemwise.Elemwise(scalar_softplus, name='softplus')
pprint.assign(softplus, printing.FunctionPrinter('softplus'))
logsigm_to_softplus = gof.PatternSub(
(tensor.log, (sigmoid, 'x')),
(tensor.neg, (softplus, (tensor.neg, 'x'))),
allow_multiple_clients = True)
log1msigm_to_softplus = gof.PatternSub(
(tensor.log, (tensor.sub, tensor.constant([[1.0]]), (sigmoid, 'x'))),
(tensor.neg, (softplus, 'x')),
allow_multiple_clients = True)
opt.register_specialize(logsigm_to_softplus, name = 'logsigm_to_softplus')
opt.register_specialize(log1msigm_to_softplus, name = 'log1msigm_to_softplus')
def is_1pexp(t):
# if t is of form (1+exp(x)), return x
# else return None
if t.owner and t.owner.op == tensor.add:
scalars, scalar_inputs, nonconsts = \
opt.scalarconsts_rest(t.owner.inputs)
# scalar_inputs are potentially dimshuffled and fill'd scalars
if len(nonconsts) == 1:
maybe_exp = nonconsts[0]
if maybe_exp.owner and maybe_exp.owner.op == tensor.exp:
return False, maybe_exp.owner.inputs[0]
return None
def is_exp(t):
# if t is of form (exp(x)) then return x
# else return None
neg = False
if t.owner and t.owner.op == tensor.neg:
t = t.owner.inputs[0]
neg = True
if t.owner and t.owner.op == tensor.exp:
return neg, t.owner.inputs[0]
def partition_num_or_denom(r, f):
if r.owner and r.owner.op == tensor.mul:
a = r.owner.inputs
else:
a = [r]
# ugly 2.4-compatible thing
f_terms = []
neg = False
rest = []
for t in a:
f_t = f(t)
if f_t is None:
rest.append(t)
else:
neg_t, f_t = f_t
f_terms.append(f_t)
neg ^= neg_t #bit flip if neg_t is true
return f_terms, rest, neg
@opt.register_specialize
@opt.register_canonicalize
@gof.local_optimizer([tensor.true_div])
def local_exp_over_1_plus_exp(node):
"""exp(x)/(1+exp(x)) -> sigm(x)
c/(1+exp(x)) -> c*sigm(-x)
"""
# this optimization should be done for numerical stability
# so we don't care to check client counts
if node.op == tensor.true_div:
#find all the exp() terms in the numerator
num, denom = node.inputs
num_exp_x, num_rest, num_neg = partition_num_or_denom(num, is_exp)
denom_1pexp, denom_rest, denom_neg = partition_num_or_denom(denom, is_1pexp)
sigmoids = []
for t in denom_1pexp:
if t in num_exp_x:
# case: exp(x) /(1+exp(x))
sigmoids.append(sigmoid(t))
del num_exp_x[num_exp_x.index(t)]
else:
# case: 1/(1+exp(x))
sigmoids.append(sigmoid(-t))
if not sigmoids: # we didn't find any. abort
return
# put the new numerator together
new_num = sigmoids + [tensor.exp(t) for t in num_exp_x] + num_rest
if len(new_num) == 1:
new_num = new_num[0]
else:
new_num = tensor.mul(*new_num)
if num_neg ^ denom_neg:
new_num = -new_num
if len(denom_rest) == 0:
return [new_num]
elif len(denom_rest) == 1:
return [new_num / denom_rest[0]]
else:
return [new_num / tensor.mul(*denom_rest)]
@opt.register_specialize
@opt.register_canonicalize
@gof.local_optimizer([tensor.mul])
def local_sigm_times_exp(node):
"""
exp(x)*sigm(-x) -> -sigm(x)
"""
# this is a numerical stability thing, so we dont check clients
if node.op == tensor.mul:
exp_x = []
exp_minus_x = []
sigm_x = []
sigm_minus_x = []
other = []
neg = False
for i in node.inputs:
while i.owner and i.owner.op == tensor.neg:
neg ^= True
i = i.owner.inputs[0]
if i.owner and i.owner.op == tensor.exp:
exp_arg = i.owner.inputs[0]
if exp_arg.owner and exp_arg.owner.op == tensor.neg:
exp_minus_x.append(exp_arg.owner.inputs[0])
else:
exp_x.append(exp_arg)
elif i.owner and i.owner.op == sigmoid:
sigm_arg = i.owner.inputs[0]
if sigm_arg.owner and sigm_arg.owner.op == tensor.neg:
sigm_minus_x.append(sigm_arg.owner.inputs[0])
else:
sigm_x.append(sigm_arg)
else:
other.append(i)
# remove matched pairs in exp_x and sigm_minus_x
did_something = False
for i in exp_x:
if i in sigm_minus_x:
del sigm_minus_x[sigm_minus_x.index(i)]
other.append(sigmoid(i))
did_something = True
else:
other.append(i)
# remove matched pairs in exp_minus_x and sigm_x
for i in exp_minus_x:
if i in sigm_x:
del sigm_x[sigm_x.index(i)]
other.append(sigm(-i))
did_something = True
else:
other.append(i)
if did_something:
terms = other + [sigmoid(x) for x in sigm_x] \
+ [sigmoid(-x) for x in sigm_minus_x]
if len(terms)>1:
rval = tensor.mul(*terms)
else:
rval = terms[0]
if neg:
return [-rval]
else:
return [rval]
@opt.register_specialize
@opt.register_canonicalize
@gof.local_optimizer([tensor.inv])
def local_inv_1_plus_exp(node):
"""
1/(1+exp(x)) -> sigm(-x)
"""
# this optimization should be done for numerical stability
# so we don't care to check client counts
if node.op == tensor.inv:
inv_arg = node.inputs[0]
if inv_arg.owner and inv_arg.owner.op == tensor.add:
scalars, scalar_inputs, nonconsts = \
opt.scalarconsts_rest(inv_arg.owner.inputs)
# scalar_inputs are potentially dimshuffled and fill'd scalars
if len(nonconsts) == 1:
if nonconsts[0].owner and nonconsts[0].owner.op == tensor.exp:
if scalars and numpy.allclose(numpy.sum(scalars), 1):
return opt._fill_chain(
sigmoid(tensor.neg(nonconsts[0].owner.inputs[0])),
scalar_inputs)
@opt.register_specialize
@gof.local_optimizer([tensor.sub])
def local_1msigmoid(node):
"""
1-sigm(x) -> sigm(-x)
"""
# this optimization is for speed alone
# so we do check the client count on the sigmoid
if node.op == tensor.sub:
sub_l, sub_r = node.inputs
if len(sub_r.clients) > 1:
return # we probably need both sigm and 1-sigm
if sub_r.owner and sub_r.owner.op == sigmoid:
try:
val_l = opt.get_constant_value(sub_l)
except Exception, e:
return
if numpy.allclose(numpy.sum(val_l), 1):
return [sigmoid(-sub_r.owner.inputs[0])]
import unittest
import theano
from theano import tensor as T
from theano import gof
import numpy
from theano.tests import unittest_tools as utt
from theano.tensor.tests import test_basic as TT
from theano.tensor.nnet import *
class T_sigmoid(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test_elemwise(self):
utt.verify_grad(sigmoid, [numpy.random.rand(3,4)])
class T_softplus(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test_elemwise(self):
utt.verify_grad(softplus, [numpy.random.rand(3,4)])
class T_sigmoid_opts(unittest.TestCase):
def test_exp_over_1_plus_exp(self):
m = theano.config.mode
if m == 'FAST_COMPILE':
m = 'FAST_RUN'
x = T.dvector()
# tests exp_over_1_plus_exp
f = theano.function([x], T.exp(x)/(1+T.exp(x)), mode=m)
#theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [sigmoid]
# tests inv_1_plus_exp
f = theano.function([x], T.fill(x,1.0) / (1+T.exp(-x)), mode=m)
#theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [sigmoid]
# tests inv_1_plus_exp with neg
f = theano.function([x], T.fill(x,-1.0) / (1+T.exp(-x)), mode=m)
#theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [sigmoid,
T.inplace.neg_inplace]
# tests double inv_1_plus_exp with neg
f = theano.function([x], (T.fill(x,-1.0)*T.exp(x)) / ((1+T.exp(x))*(1+T.exp(-x))), mode=m)
#theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [sigmoid,
T.mul]
def test_1msigmoid(self):
m = theano.config.mode
if m == 'FAST_COMPILE':
m = 'FAST_RUN'
x = T.fmatrix()
# tests exp_over_1_plus_exp
f = theano.function([x], 1 - T.exp(x)/(1+T.exp(x)), mode=m)
theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [tensor.neg, sigmoid_inplace]
# tests inv_1_plus_exp
f = theano.function([x], 1 - T.fill(x,1.0) / (1+T.exp(-x)), mode=m)
theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [tensor.neg,
sigmoid_inplace]
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论