提交 383d965b authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Move test_softmax_grad_optimizations inside of T_CrossentropyCategorical1Hot,

add new test test_scale_cost.
上级 5d367913
...@@ -223,89 +223,13 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -223,89 +223,13 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
assert not has_softmax assert not has_softmax
assert not has_softmaxdx assert not has_softmaxdx
def test_argmax_pushdown(): def test_get_rid_of_advanced_indexing_version_of_xent(self):
x = tensor.dmatrix()
env = gof.Env(
[x],
[tensor.max(softmax(tensor.exp(tensor.tanh(sigmoid(x)))))])
theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(env)
#print 'AFTER'
#for node in env.toposort():
#print node.op
assert len(env.toposort()) == 2 # an output_guard is second
assert env.toposort()[0].op == tensor._max_and_argmax
def test_argmax_pushdown_bias():
x = tensor.dmatrix()
b = tensor.dvector()
env = gof.Env(
[x,b],
[tensor.max(softmax_with_bias(x, b))])
theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(env)
print 'AFTER'
for node in env.toposort():
print node.op
assert len(env.toposort()) == 4
assert isinstance(env.toposort()[0].op, tensor.DimShuffle)
assert isinstance(env.toposort()[1].op, tensor.Elemwise)
assert isinstance(env.toposort()[2].op, tensor.MaxAndArgmax)
assert str(env.toposort()[3].op) == 'OutputGuard'
def test_asymptotic_32():
"""
This test makes sure that our functions behave sensibly when huge values are present
"""
for dtype in 'float32', 'float64':
if dtype == 'float32':
x = tensor.fmatrix()
x2 = tensor.fvector()
else:
x = tensor.dmatrix()
x2 = tensor.dvector()
y = tensor.lvector()
c = categorical_crossentropy(softmax(x+x2), y)
f = theano.function([x,y,x2], [c.sum(), tensor.grad(c, x)])
if 0:
for i, n in enumerate( f.maker.env.toposort()):
print i, n
xval = numpy.zeros((5, 5), dtype=dtype)
x2val = numpy.zeros(5, dtype=xval.dtype)
for i in xrange(100):
cval, gxval = f(xval, numpy.arange(5), x2val)
xval -= 100.3 * gxval
#print cval, gxval
assert cval == 0 # no problem going to zero error
#what about when x gets really big?
xval = numpy.zeros((5, 5), dtype=dtype)
x2val = numpy.zeros(5, dtype=xval.dtype)
for i in xrange(100):
cval, gxval = f(xval, numpy.arange(5), x2val)
xval += 100000.3 * gxval
#print cval, gxval
assert cval > 61750000
assert gxval[0,0] == -1.0
assert gxval[0,1] == 0.25
def test_get_rid_of_advanced_indexing_version_of_xent():
verbose = 0 verbose = 0
if 0: mode = 'DEBUG_MODE' # TODO: add the optimization in FAST_COMPILE?
else: mode = 'FAST_RUN' # In the mean time, run it as 'FAST_RUN' instead
mode = theano.compile.mode.get_default_mode()
if mode == 'FAST_COMPILE':
mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
...@@ -322,13 +246,15 @@ def test_get_rid_of_advanced_indexing_version_of_xent(): ...@@ -322,13 +246,15 @@ def test_get_rid_of_advanced_indexing_version_of_xent():
print i, node print i, node
# Last node should be the output # Last node should be the output
print i, pprint(node.outputs[0]) print i, pprint(node.outputs[0])
print
## Basic case ## Basic case
expressions = [ expressions = [
T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y])] T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y])
]
for expr in expressions: for expr in expressions:
# Verify the optimizer worked on the expressions # Verify the optimizer worked on the expressions
...@@ -397,6 +323,183 @@ def test_get_rid_of_advanced_indexing_version_of_xent(): ...@@ -397,6 +323,183 @@ def test_get_rid_of_advanced_indexing_version_of_xent():
g(x_val, b_val, y_val) g(x_val, b_val, y_val)
def test_scale_cost(self):
# TODO: add the optimization in FAST_COMPILE?
# In the mean time, run it as 'FAST_RUN' instead
mode = theano.compile.mode.get_default_mode()
if mode == 'FAST_COMPILE':
mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed())
x_val = rng.randn(3,5)
b_val = rng.randn(5)
y_val = numpy.asarray([2,4,1])
x = T.dmatrix('x')
b = T.dvector('b')
y = T.lvector('y')
a = T.dscalar('a')
def print_graph(func):
for i, node in enumerate(func.maker.env.toposort()):
print i, node
# Last node should be the output
print i, pprint(node.outputs[0])
def validate_fn_graph(func):
# The graph of the function should not have softmax anymore
has_cx1hot = False
has_softmax = False
for node in func.maker.env.toposort():
if node.op == crossentropy_softmax_argmax_1hot_with_bias:
has_cx1hot = True
if node.op == softmax:
has_softmax = True
assert has_cx1hot
assert not has_softmax
def validate_grad_graph(func):
# The graph of the gradient should not have softmaxgrad anymore
has_cx1hotdx = False
has_softmax = False
has_softmaxdx = False
for node in func.maker.env.toposort():
if node.op == crossentropy_softmax_1hot_with_bias_dx:
has_cx1hotdx = True
if node.op == softmax:
has_softmax = True
if node.op == softmax_grad:
has_softmaxdx = True
assert has_cx1hotdx
assert has_softmax
assert not has_softmaxdx
## Cases to test
expressions = [
a * T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
-a * T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y])),
a * (-T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))),
a * T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y])),
a * T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y]),
-a * T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]),
a * (-T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y])),
a * T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]),
a * T.mean(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
-a * T.mean(T.log(softmax(x)[T.arange(y.shape[0]), y])),
a * (-T.mean(T.log(softmax(x)[T.arange(y.shape[0]), y]))),
a * T.mean(T.log(softmax(x)[T.arange(y.shape[0]), y])),
a * T.mean(-T.log(softmax(x))[T.arange(y.shape[0]), y]),
-a * T.mean(T.log(softmax(x))[T.arange(y.shape[0]), y]),
a * (-T.mean(T.log(softmax(x))[T.arange(y.shape[0]), y])),
a * T.mean(T.log(softmax(x))[T.arange(y.shape[0]), y]),
]
for expr in expressions:
# Verify the optimizer worked on the expressions
f = theano.function([x,y,a], expr, mode=mode)
assert 5 <= len(f.maker.env.toposort()) <= 10
validate_fn_graph(f)
f(x_val, y_val, 0.1)
# Verify the gradient wrt x
g = theano.function([x,y,a], T.grad(expr, x), mode=mode)
assert 5 <= len(g.maker.env.toposort()) <= 12
validate_grad_graph(g)
g(x_val, y_val, 0.1)
# Verify the gradient when providing output gradient
h = theano.function([x,y,a], T.grad(expr, x, g_cost=a*x.sum()), mode=mode)
assert 8 <= len(h.maker.env.toposort()) <= 17
validate_grad_graph(h)
h(x_val, y_val, 0.1)
def test_argmax_pushdown():
x = tensor.dmatrix()
env = gof.Env(
[x],
[tensor.max(softmax(tensor.exp(tensor.tanh(sigmoid(x)))))])
theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(env)
#print 'AFTER'
#for node in env.toposort():
#print node.op
assert len(env.toposort()) == 2 # an output_guard is second
assert env.toposort()[0].op == tensor._max_and_argmax
def test_argmax_pushdown_bias():
x = tensor.dmatrix()
b = tensor.dvector()
env = gof.Env(
[x,b],
[tensor.max(softmax_with_bias(x, b))])
theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(env)
print 'AFTER'
for node in env.toposort():
print node.op
assert len(env.toposort()) == 4
assert isinstance(env.toposort()[0].op, tensor.DimShuffle)
assert isinstance(env.toposort()[1].op, tensor.Elemwise)
assert isinstance(env.toposort()[2].op, tensor.MaxAndArgmax)
assert str(env.toposort()[3].op) == 'OutputGuard'
def test_asymptotic_32():
"""
This test makes sure that our functions behave sensibly when huge values are present
"""
for dtype in 'float32', 'float64':
if dtype == 'float32':
x = tensor.fmatrix()
x2 = tensor.fvector()
else:
x = tensor.dmatrix()
x2 = tensor.dvector()
y = tensor.lvector()
c = categorical_crossentropy(softmax(x+x2), y)
f = theano.function([x,y,x2], [c.sum(), tensor.grad(c, x)])
if 0:
for i, n in enumerate( f.maker.env.toposort()):
print i, n
xval = numpy.zeros((5, 5), dtype=dtype)
x2val = numpy.zeros(5, dtype=xval.dtype)
for i in xrange(100):
cval, gxval = f(xval, numpy.arange(5), x2val)
xval -= 100.3 * gxval
#print cval, gxval
assert cval == 0 # no problem going to zero error
#what about when x gets really big?
xval = numpy.zeros((5, 5), dtype=dtype)
x2val = numpy.zeros(5, dtype=xval.dtype)
for i in xrange(100):
cval, gxval = f(xval, numpy.arange(5), x2val)
xval += 100000.3 * gxval
#print cval, gxval
assert cval > 61750000
assert gxval[0,0] == -1.0
assert gxval[0,1] == 0.25
# hint - call the argmax push-down optimization first too # hint - call the argmax push-down optimization first too
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论