提交 fe026b97 authored 作者: Eric Larsen's avatar Eric Larsen 提交者: Frederic

test_nnet.py: layout cleanup

上级 9c66685f
......@@ -22,42 +22,53 @@ from theano.tensor.nnet import (categorical_crossentropy,
CrossentropyCategorical1Hot,
CrossentropyCategorical1HotGrad,
sigmoid, softplus,
Softmax, softmax, SoftmaxWithBias, softmax_grad,
Softmax, softmax, SoftmaxWithBias,
softmax_grad,
softmax_with_bias, SoftmaxGrad,
Prepend_scalar_constant_to_each_row,
Prepend_scalar_to_each_row)
from theano.tensor import dmatrix, dvector, lvector, dscalar
class T_sigmoid(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test_elemwise(self):
utt.verify_grad(sigmoid, [numpy.random.rand(3,4)])
utt.verify_grad(sigmoid, [numpy.random.rand(3, 4)])
class T_softplus(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test_elemwise(self):
utt.verify_grad(softplus, [numpy.random.rand(3,4)])
utt.verify_grad(softplus, [numpy.random.rand(3, 4)])
class T_Softmax(utt.InferShapeTester):
def test0(self):
def f(a):
return softmax(a)[:,0]
utt.verify_grad(f, [numpy.random.rand(3,4)])
return softmax(a)[:, 0]
utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test1(self):
def f(a):
return softmax(a)[:,1]
utt.verify_grad(f, [numpy.random.rand(3,4)])
return softmax(a)[:, 1]
utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test2(self):
def f(a):
return softmax(a)[:,2]
utt.verify_grad(f, [numpy.random.rand(3,4)])
return softmax(a)[:, 2]
utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test3(self):
def f(a):
return softmax(a)[:,3]
utt.verify_grad(f, [numpy.random.rand(3,4)])
return softmax(a)[:, 3]
utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test_infer_shape(self):
admat = dmatrix()
......@@ -71,6 +82,7 @@ class T_Softmax(utt.InferShapeTester):
xv = numpy.random.randn(6).astype(config.floatX)
assert numpy.allclose(f(xv), numpy.exp(xv) / numpy.exp(xv).sum())
def test_vector_grad(self):
def f(a):
return softmax(a)
......@@ -78,46 +90,49 @@ class T_Softmax(utt.InferShapeTester):
class T_SoftmaxWithBias(utt.InferShapeTester):
def test0(self):
def f(a, b):
return softmax_with_bias(a, b)[:,0]
utt.verify_grad(f, [numpy.random.rand(3,4),
return softmax_with_bias(a, b)[:, 0]
utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)])
def test1(self):
def f(a, b):
return softmax_with_bias(a, b)[:,1]
utt.verify_grad(f, [numpy.random.rand(3,4),
return softmax_with_bias(a, b)[:, 1]
utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)])
def test2(self):
def f(a, b):
return softmax_with_bias(a, b)[:,2]
utt.verify_grad(f, [numpy.random.rand(3,4),
return softmax_with_bias(a, b)[:, 2]
utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)])
def test3(self):
def f(a, b):
return softmax_with_bias(a, b)[:,3]
utt.verify_grad(f, [numpy.random.rand(3,4),
return softmax_with_bias(a, b)[:, 3]
utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)])
def test_broadcast(self):
#test that we don't raise an error during optimization for no good
#reason as softmax_with_bias don't support correctly some/all
#broadcasted inputs pattern
initial_W = numpy.asarray( [[0.1,0.1,0.1], \
[0.1,0.1,0.1], \
[0.1,0.1,0.1]], \
dtype = theano.config.floatX)
W = theano.shared(value = initial_W, name = 'W')
vbias=theano.shared(value=0.1, name='vbias') #0.01
hid=T.vector('hid')
initial_W = numpy.asarray([[0.1, 0.1, 0.1], \
[0.1, 0.1, 0.1], \
[0.1, 0.1, 0.1]], \
dtype=theano.config.floatX)
W = theano.shared(value=initial_W, name='W')
vbias = theano.shared(value=0.1, name='vbias') # 0.01
hid = T.vector('hid')
f = theano.function([hid],
T.nnet.softmax(T.dot(hid, W.T) + vbias))
ops = [node.op for node in f.maker.fgraph.toposort()]
assert softmax_with_bias not in ops
assert softmax in ops
f([0,1,0])
f([0, 1, 0])
#print f.maker.fgraph.toposort()
def test_infer_shape(self):
......@@ -125,13 +140,14 @@ class T_SoftmaxWithBias(utt.InferShapeTester):
advec = dvector()
admat_val = numpy.random.rand(3, 4)
advec_val = numpy.random.rand(4)
self._compile_and_check([admat, advec], [SoftmaxWithBias()(admat, advec)],
self._compile_and_check([admat, advec],
[SoftmaxWithBias()(admat, advec)],
[admat_val, advec_val], SoftmaxWithBias)
class T_SoftmaxGrad(utt.InferShapeTester):
def test_infer_shape(self):
def test_infer_shape(self):
admat = dmatrix()
bdmat = dmatrix()
admat_val = numpy.random.rand(3, 4)
......@@ -140,48 +156,60 @@ class T_SoftmaxGrad(utt.InferShapeTester):
[admat_val, bdmat_val], SoftmaxGrad)
class T_CrossentropySoftmax1Hot(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test0(self):
y_idx = [0,1,3]
y_idx = [0, 1, 3]
def f(a, b):
return crossentropy_softmax_1hot_with_bias(a, b, y_idx)[0]
utt.verify_grad(f, [numpy.random.rand(3,4),
utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)])
def test1(self):
y_idx = [0,1,3]
y_idx = [0, 1, 3]
def f(a):
return crossentropy_softmax_1hot(a, y_idx)[0]
utt.verify_grad(f, [numpy.random.rand(3,4)])
utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test_vector(self):
y_idx = [3]
def f(a):
return crossentropy_softmax_1hot(T.shape_padleft(a), y_idx)[0]
utt.verify_grad(f, [numpy.random.rand(4)])
def test_vectors(self):
y_idx = [3]
def f(a, b):
return crossentropy_softmax_1hot(T.shape_padleft(a)+b, y_idx)[0]
return crossentropy_softmax_1hot(T.shape_padleft(a) + b, y_idx)[0]
utt.verify_grad(f, [numpy.random.rand(4), numpy.random.rand(4)])
class T_CrossentropySoftmax1HotWithBiasDx(utt.InferShapeTester):
def test0(self):
def f(sm):
return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
numpy.random.rand(10), # Gradient w.r.t. NLL.
sm, # Softmax output.
numpy.random.randint(low=0, high=5, size=10))) # Class indices.
numpy.random.randint(low=0,
high=5, size=10))) # Class indices.
# Build a random softmax output whose rows sum to 1.
softmax_output = numpy.random.rand(10, 5)
softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
utt.verify_grad(f, [softmax_output])
def test1(self):
rng = numpy.random.RandomState(utt.fetch_seed())
softmax_output = rng.rand(10, 5)
softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
def f(dy):
return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
dy,
......@@ -208,15 +236,18 @@ class T_CrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester):
def setUp(self):
super(T_CrossentropySoftmaxArgmax1HotWithBias, self).setUp()
self.op = theano.tensor.nnet.crossentropy_softmax_argmax_1hot_with_bias
def test0(self):
n_classes = 5
n_samples = 3
# First test gradient when getting a gradient on the NLL output.
def grad_on_nll(x, b):
return self.op(x, b, y_idx=numpy.random.randint(
low=0, high=n_classes, size=n_samples))[0]
utt.verify_grad(grad_on_nll, [numpy.random.rand(n_samples, n_classes),
numpy.random.rand(n_classes)])
# Then test gradient when getting a gradient on the softmax output.
def grad_on_softmax(x, b):
return self.op(x, b, y_idx=numpy.random.randint(
......@@ -234,21 +265,21 @@ class T_CrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester):
advec_val = rng.rand(5)
alvec_val = rng.randint(low=0, high=5, size=3)
self._compile_and_check([admat, advec, alvec],
CrossentropySoftmaxArgmax1HotWithBias()(admat, advec, alvec),
[admat_val, advec_val, alvec_val],
CrossentropySoftmaxArgmax1HotWithBias)
CrossentropySoftmaxArgmax1HotWithBias()(admat, advec, alvec),
[admat_val, advec_val, alvec_val],
CrossentropySoftmaxArgmax1HotWithBias)
class T_prepend(utt.InferShapeTester):
def test0(self):
x=tensor.matrix('x')
y=Prepend_scalar_constant_to_each_row(4.)(x)
f=theano.function([x],[y])
m=numpy.random.rand(3,5)
x = tensor.matrix('x')
y = Prepend_scalar_constant_to_each_row(4.)(x)
f = theano.function([x], [y])
m = numpy.random.rand(3, 5)
my = f(m)
self.assertTrue(my.shape == (3, 6), my.shape)
self.assertTrue(numpy.all( my[:,0] == 4.0))
self.assertTrue(numpy.all(my[:, 0] == 4.0))
def test_infer_shape(self):
admat = dmatrix()
......@@ -262,15 +293,16 @@ class T_prepend(utt.InferShapeTester):
class T_prepend(utt.InferShapeTester):
def test0(self):
"""basic functionality"""
x=tensor.matrix('x')
y=Prepend_scalar_to_each_row()(5.,x)
f=theano.function([x],y)
m=numpy.ones((3,5),dtype="float32")
x = tensor.matrix('x')
y = Prepend_scalar_to_each_row()(5., x)
f = theano.function([x], y)
m = numpy.ones((3, 5), dtype="float32")
my = f(m)
self.assertTrue(my.shape == (3, 6))
self.assertTrue(numpy.all(my[:,0] == 5.0))
self.assertTrue(numpy.all(my[:, 0] == 5.0))
def test_infer_shape(self):
admat = dmatrix()
......@@ -300,24 +332,20 @@ class T_CrossentropyCategorical1HotGrad(utt.InferShapeTester):
class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
def test_grad(self):
x = tensor.matrix('x')
one_of_n = tensor.lvector('one_of_n')
op = crossentropy_categorical_1hot
xe = op(x, one_of_n)
f = theano.function([x, one_of_n], xe)
x_val = numpy.asarray([[.4, .6, .0], [.1, .8, .1]],
dtype=config.floatX)
xe_val = f(x_val, [0,1])
xe_val = f(x_val, [0, 1])
assert numpy.allclose(xe_val, -numpy.log([.4, .8]))
def oplike(x):
return op(x, [0,1])
return op(x, [0, 1])
tensor.verify_grad(oplike, [x_val], rng=numpy.random)
......@@ -336,7 +364,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
x = tensor.matrix('x')
one_of_n = tensor.lvector('one_of_n')
op = crossentropy_categorical_1hot
xe = op(x, one_of_n)
fgraph = gof.FunctionGraph(
......@@ -348,7 +375,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_optimizations_vector(self):
x = tensor.vector('x')
......@@ -362,19 +390,19 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_optimizations_w_bias(self):
x = tensor.matrix('x')
b = tensor.vector('b')
one_of_n = tensor.lvector('one_of_n')
op = crossentropy_categorical_1hot
xe = op(x, one_of_n)
fgraph = gof.FunctionGraph(
[x, b, one_of_n],
[op(softmax(x+b), one_of_n)])
[op(softmax(x + b), one_of_n)])
assert fgraph.outputs[0].owner.op == op
#print 'BEFORE'
......@@ -394,7 +422,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
assert len(fgraph.toposort()) == 2
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_optimizations_w_bias2(self):
x = tensor.matrix('x')
......@@ -405,7 +434,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
fgraph = gof.FunctionGraph(
[x, b, c, one_of_n],
[op(softmax(T.add(x,b,c)), one_of_n)])
[op(softmax(T.add(x, b, c)), one_of_n)])
assert fgraph.outputs[0].owner.op == op
#print 'BEFORE'
......@@ -423,7 +452,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
assert len(fgraph.toposort()) == 3
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_optimizations_w_bias_vector(self):
x = tensor.vector('x')
......@@ -432,7 +462,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
op = crossentropy_categorical_1hot
fgraph = gof.FunctionGraph(
[x, b, one_of_n],
[op(softmax(x+b), one_of_n)])
[op(softmax(x + b), one_of_n)])
assert fgraph.outputs[0].owner.op == op
#print 'BEFORE'
#for node in fgraph.toposort():
......@@ -448,15 +478,14 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
#print '===='
assert len(fgraph.toposort()) == 3
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_grad_optimizations(self):
x = tensor.matrix('x')
one_of_n = tensor.lvector('one_of_n')
op = crossentropy_categorical_1hot
xe = op(softmax(x), one_of_n)
sum_xe = tensor.sum(xe)
g_x = tensor.grad(sum_xe, x)
fgraph = gof.FunctionGraph(
......@@ -474,8 +503,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
#for node in fgraph.toposort():
# print node.op, node.inputs
# the function has 9 ops because the dimshuffle and elemwise{second} aren't getting
# cleaned up as well as we'd like.
# the function has 9 ops because the dimshuffle and lemwise{second}
# aren't getting cleaned up as well as we'd like.
has_cx1hot = False
has_cx1hotdx = False
has_softmax = False
......@@ -483,13 +512,12 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
for node in fgraph.toposort():
if node.op == crossentropy_softmax_argmax_1hot_with_bias:
has_cx1hot = True
if node.op == crossentropy_softmax_1hot_with_bias_dx :
if node.op == crossentropy_softmax_1hot_with_bias_dx:
has_cx1hotdx = True
if node.op == softmax:
has_softmax = True
if node.op == softmax_grad:
has_softmaxdx = True
assert has_cx1hot
assert has_cx1hotdx
assert not has_softmax
......@@ -517,8 +545,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
#for node in fgraph.toposort():
# print node.op, node.inputs
# the function has 9 ops because the dimshuffle and elemwise{second} aren't getting
# cleaned up as well as we'd like.
# the function has 9 ops because the dimshuffle and elemwise{second}
# aren't getting cleaned up as well as we'd like.
has_cx1hot = False
has_cx1hotdx = False
has_softmax = False
......@@ -526,13 +554,12 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
for node in fgraph.toposort():
if node.op == crossentropy_softmax_argmax_1hot_with_bias:
has_cx1hot = True
if node.op == crossentropy_softmax_1hot_with_bias_dx :
if node.op == crossentropy_softmax_1hot_with_bias_dx:
has_cx1hotdx = True
if node.op == softmax:
has_softmax = True
if node.op == softmax_grad:
has_softmaxdx = True
assert has_cx1hot
assert has_cx1hotdx
assert not has_softmax
......@@ -547,17 +574,14 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
mode = theano.compile.mode.get_default_mode()
if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed())
x_val = rng.randn(3,5)
x_val = rng.randn(3, 5)
b_val = rng.randn(5)
y_val = numpy.asarray([2,4,1])
y_val = numpy.asarray([2, 4, 1])
x = T.dmatrix('x')
b = T.dvector('b')
y = T.lvector('y')
## Basic case
expressions = [
T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
......@@ -565,10 +589,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
-T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y])
]
for expr in expressions:
# Verify the optimizer worked on the expressions
f = theano.function([x,y], expr, mode=mode)
f = theano.function([x, y], expr, mode=mode)
if verbose:
theano.printing.debugprint(f)
try:
......@@ -579,7 +603,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise
# Also verify the gradient wrt x
g = theano.function([x,y], T.grad(expr, x), mode=mode)
g = theano.function([x, y], T.grad(expr, x), mode=mode)
if verbose:
theano.printing.debugprint(g)
try:
......@@ -591,23 +615,22 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
## Test that a biased softmax is optimized correctly
bias_expressions = [
T.sum(-T.log(softmax(x+b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b+x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x+b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b+x))[T.arange(y.shape[0]), y])]
T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions:
f = theano.function([x,b,y], expr, mode=mode)
f = theano.function([x, b, y], expr, mode=mode)
if verbose:
theano.printing.debugprint(f)
try:
assert len(f.maker.fgraph.toposort()) == 2 # [big_op, sum]
assert len(f.maker.fgraph.toposort()) == 2 # [big_op, sum]
f(x_val, b_val, y_val)
except Exception:
theano.printing.debugprint(f)
raise
g = theano.function([x,b,y], T.grad(expr, x), mode=mode)
g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
if verbose:
theano.printing.debugprint(g)
try:
......@@ -625,7 +648,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
T.mean(-T.log(softmax(x))[T.arange(y.shape[0]), y])]
for expr in mean_expressions:
f = theano.function([x,y], expr, mode=mode)
f = theano.function([x, y], expr, mode=mode)
if verbose:
theano.printing.debugprint(f)
try:
......@@ -635,11 +658,12 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
theano.printing.debugprint(f)
raise
g = theano.function([x,y], T.grad(expr, x), mode=mode)
g = theano.function([x, y], T.grad(expr, x), mode=mode)
if verbose:
theano.printing.debugprint(g)
try:
assert len(g.maker.fgraph.toposort()) in (6,7) #there's an extra dimshuffle in there
assert len(g.maker.fgraph.toposort()) in (6, 7)
#there's an extra dimshuffle in there
# but I can't think of a good rule to get rid of it
g(x_val, y_val)
except Exception:
......@@ -647,13 +671,13 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise
mean_bias_expressions = [
T.mean(-T.log(softmax(x+b)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax(b+x)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax(x+b))[T.arange(y.shape[0]), y]),
T.mean(-T.log(softmax(b+x))[T.arange(y.shape[0]), y])]
T.mean(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.mean(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in mean_bias_expressions:
f = theano.function([x,b,y], expr, mode=mode)
f = theano.function([x, b, y], expr, mode=mode)
if verbose:
theano.printing.debugprint(f)
try:
......@@ -661,12 +685,11 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
except Exception:
theano.printing.debugprint(f)
raise
g = theano.function([x,b,y], T.grad(expr, x), mode=mode)
g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
if verbose:
theano.printing.debugprint(g)
try:
assert len(g.maker.fgraph.toposort()) in (6,7)
assert len(g.maker.fgraph.toposort()) in (6, 7)
g(x_val, b_val, y_val)
except Exception:
theano.printing.debugprint(g)
......@@ -678,15 +701,13 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed())
x_val = rng.randn(3,5)
x_val = rng.randn(3, 5)
b_val = rng.randn(5)
y_val = numpy.asarray([2,4,1], dtype='int64')
y_val = numpy.asarray([2, 4, 1], dtype='int64')
x = T.dmatrix('x')
b = T.dvector('b')
y = T.lvector('y')
yi = T.cast(y, 'int32')
expressions = [
T.sum(-T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
-T.sum(T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
......@@ -696,7 +717,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
for expr in expressions:
# Verify the optimizer worked on the expressions
f = theano.function([x,y], expr, mode=mode)
f = theano.function([x, y], expr, mode=mode)
if verbose:
theano.printing.debugprint(f)
try:
......@@ -707,7 +728,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise
# Also verify the gradient wrt x
g = theano.function([x,y], T.grad(expr, x), mode=mode)
g = theano.function([x, y], T.grad(expr, x), mode=mode)
if verbose:
theano.printing.debugprint(g)
try:
......@@ -717,7 +738,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
theano.printing.debugprint(g)
raise
def test_optimize_xent_vector(self):
verbose = 0
mode = theano.compile.mode.get_default_mode()
......@@ -743,8 +763,9 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
-T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))]
for expr in bias_expressions:
f = theano.function([x,y], expr, mode=mode)
if verbose: print_graph(f)
f = theano.function([x, y], expr, mode=mode)
if verbose:
print_graph(f)
try:
prev, last = f.maker.fgraph.toposort()[-2:]
assert len(f.maker.fgraph.toposort()) == 5
......@@ -752,7 +773,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
except Exception:
theano.printing.debugprint(f)
raise
g = theano.function([x,y], T.grad(expr, x), mode=mode)
print_graph(g)
try:
......@@ -789,17 +809,19 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
## Test that a biased softmax is optimized correctly
bias_expressions = [
T.sum(-T.log(softmax(x+b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b+x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x+b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b+x))[T.arange(y.shape[0]), y])]
T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions:
f = theano.function([x,b,y], expr, mode=mode)
if verbose: print_graph(f)
f = theano.function([x, b, y], expr, mode=mode)
if verbose:
print_graph(f)
try:
prev, last = f.maker.fgraph.toposort()[-2:]
assert len(f.maker.fgraph.toposort()) == 3 # [big_op, sum, dim_shuffle]
assert len(f.maker.fgraph.toposort()) == 3
# [big_op, sum, dim_shuffle]
f(x_val, b_val, y_val)
except Exception:
theano.printing.debugprint(f)
......@@ -808,7 +830,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False
try:
g = theano.function([x,b,y], T.grad(expr, x), mode=mode)
g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
finally:
config.warn.sum_div_dimshuffle_bug = backup
......@@ -830,13 +852,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
mode = theano.compile.mode.get_default_mode()
if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed())
x_val = rng.randn(3,5)
x_val = rng.randn(3, 5)
b_val = rng.randn(5)
y_val = numpy.asarray([2,4,1])
y_val = numpy.asarray([2, 4, 1])
x = T.dmatrix('x')
b = T.dvector('b')
y = T.lvector('y')
......@@ -878,7 +897,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
assert has_softmax
assert not has_softmaxdx
## Cases to test
expressions = [
a * T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
......@@ -904,7 +922,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
for expr in expressions:
# Verify the optimizer worked on the expressions
f = theano.function([x,y,a], expr, mode=mode)
f = theano.function([x, y, a], expr, mode=mode)
try:
assert 5 <= len(f.maker.fgraph.toposort()) <= 10
validate_fn_graph(f)
......@@ -914,7 +932,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise
# Verify the gradient wrt x
g = theano.function([x,y,a], T.grad(expr, x), mode=mode)
g = theano.function([x, y, a], T.grad(expr, x), mode=mode)
try:
assert 5 <= len(g.maker.fgraph.toposort()) <= 12
validate_grad_graph(g)
......@@ -924,7 +942,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise
# Verify the gradient when providing output gradient
h = theano.function([x,y,a], T.grad(expr, x, g_cost=a*x.sum()), mode=mode)
h = theano.function([x, y, a],
T.grad(expr, x, g_cost=a * x.sum()), mode=mode)
try:
assert 8 <= len(h.maker.fgraph.toposort()) <= 17
validate_grad_graph(h)
......@@ -944,14 +963,13 @@ def test_argmax_pushdown():
fgraph = gof.FunctionGraph(
[x],
[out])
theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)
#print 'AFTER'
#for node in fgraph.toposort():
#print node.op
assert len(fgraph.toposort()) == 2 # an output_guard is second
assert len(fgraph.toposort()) == 2 # an output_guard is second
assert fgraph.toposort()[0].op == tensor.basic._max_and_argmax
assert str(fgraph.toposort()[1].op) == 'OutputGuard'
x = tensor.dmatrix()
......@@ -981,14 +999,13 @@ def test_argmax_pushdown():
assert isinstance(fgraph.toposort()[2].op.scalar_op, theano.scalar.Maximum)
assert str(fgraph.toposort()[3].op) == 'OutputGuard'
def test_argmax_pushdown_bias():
x = tensor.dmatrix()
b = tensor.dvector()
out = tensor.argmax(softmax_with_bias(x, b), axis=-1)
fgraph = gof.FunctionGraph(
[x,b],
[x, b],
[out])
theano.compile.mode.optdb.query(
......@@ -1005,10 +1022,9 @@ def test_argmax_pushdown_bias():
x = tensor.dmatrix()
b = tensor.dvector()
out = tensor.max_and_argmax(softmax_with_bias(x, b), axis=-1)[0]
fgraph = gof.FunctionGraph(
[x,b],
[x, b],
[out])
backup = config.warn.argmax_pushdown_bug
......@@ -1028,13 +1044,15 @@ def test_argmax_pushdown_bias():
assert isinstance(fgraph.toposort()[1].op.scalar_op, theano.scalar.Maximum)
assert str(fgraph.toposort()[2].op) == 'OutputGuard'
def test_asymptotic_32():
"""
This test makes sure that our functions behave sensibly when huge values are present
This test makes sure that our functions behave sensibly when
huge values are present
"""
#TODO: consider adding the optimization of crossentropy into the current mode for the
# purpose of running this test
#TODO: consider adding the optimization of crossentropy into the current
# mode for the purpose of running this test
for dtype in 'float32', 'float64':
if dtype == 'float32':
......@@ -1045,20 +1063,21 @@ def test_asymptotic_32():
x2 = tensor.dvector()
y = tensor.lvector()
c = categorical_crossentropy(softmax(x+x2), y)
f = theano.function([x,y,x2], [c.sum(), tensor.grad(c.sum(), x)], mode='FAST_RUN')
c = categorical_crossentropy(softmax(x + x2), y)
f = theano.function([x, y, x2], [c.sum(),
tensor.grad(c.sum(), x)], mode='FAST_RUN')
if 0:
for i, n in enumerate( f.maker.fgraph.toposort()):
for i, n in enumerate(f.maker.fgraph.toposort()):
print i, n
xval = numpy.zeros((5, 5), dtype=dtype)
x2val = numpy.zeros(5, dtype=xval.dtype)
for i in xrange(100):
cval, gxval = f(xval, numpy.arange(5), x2val)
cval, gxval = f(xval, numpy.arange(5), x2val)
xval -= 100.3 * gxval
#print cval, gxval
assert cval == 0 # no problem going to zero error
assert cval == 0 # no problem going to zero error
#what about when x gets really big?
......@@ -1066,56 +1085,55 @@ def test_asymptotic_32():
x2val = numpy.zeros(5, dtype=xval.dtype)
for i in xrange(100):
cval, gxval = f(xval, numpy.arange(5), x2val)
cval, gxval = f(xval, numpy.arange(5), x2val)
xval += 100000.3 * gxval
#print cval, gxval
assert cval > 61750000
assert gxval[0,0] == -1.0
assert gxval[0,1] == 0.25
assert gxval[0, 0] == -1.0
assert gxval[0, 1] == 0.25
class Test_softmax_opt:
# Test that expressions of softmax in terms of exponentiated things divided by row sums
# are replaced by softmax expressions.
# Test that expressions of softmax in terms of exponentiated things
# divided by row sums are replaced by softmax expressions.
#
# Softmax_grad isn't that interesting as an Op, but it's the signature we look for when
# trying to insert CrossEntropySoftmax... grad. So for now, we add softmax_grad to graphs.
# In future, we may modify the CrossEntropySoftmax...grad to look for the more basic
# pattern.
# Softmax_grad isn't that interesting as an Op, but it has the signature
# we look for when trying to insert CrossEntropySoftmax... grad. So for now,
# we add softmax_grad to graphs. In the future, we may modify the
# CrossEntropySoftmax...grad to look for the more basic pattern.
#
def setUp(self):
utt.seed_rng()
self.rng = numpy.random.RandomState(utt.fetch_seed())
self.mode=theano.compile.mode.get_default_mode()
self.mode=self.mode.including('canonicalize')
self.mode = theano.compile.mode.get_default_mode()
self.mode = self.mode.including('canonicalize')
def test_basic(self):
c = T.matrix()
p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0,'x')
p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0, 'x')
# test that function contains softmax and no div.
f = theano.function([c],p_y, mode=self.mode)
f = theano.function([c], p_y, mode=self.mode)
f_ops = [n.op for n in f.maker.fgraph.toposort()]
#print '--- f ='
#printing.debugprint(f)
#print '==='
assert len(f_ops) == 1
assert softmax in f_ops
f(self.rng.rand(3,4).astype(config.floatX))
f(self.rng.rand(3, 4).astype(config.floatX))
def test_grad(self):
c = T.matrix()
p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0,'x')
p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0, 'x')
# test that function contains softmax and softmaxgrad
w = T.matrix()
backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False
try:
g = theano.function([c,w],T.grad((p_y*w).sum(), c))
g = theano.function([c, w],T.grad((p_y*w).sum(), c))
finally:
config.warn.sum_div_dimshuffle_bug = backup
g_ops = [n.op for n in g.maker.fgraph.toposort()]
......@@ -1127,7 +1145,7 @@ class Test_softmax_opt:
assert len(g_ops) == 2
assert softmax in g_ops
assert softmax_grad in g_ops
g(self.rng.rand(3,4), self.rng.uniform(.5, 1, (3,4)))
g(self.rng.rand(3, 4), self.rng.uniform(.5, 1, (3,4)))
def test_transpose_basic(self):
# this should be a transposed softmax
......@@ -1135,14 +1153,14 @@ class Test_softmax_opt:
p_y = T.exp(c) / T.exp(c).sum(axis=0)
# test that function contains softmax and no div.
f = theano.function([c],p_y)
f = theano.function([c], p_y)
#printing.debugprint(f)
# test that function contains softmax and no div.
backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False
try:
g = theano.function([c],T.grad(p_y.sum(), c))
g = theano.function([c], T.grad(p_y.sum(), c))
finally:
config.warn.sum_div_dimshuffle_bug = backup
#printing.debugprint(g)
......@@ -1169,15 +1187,5 @@ class Test_softmax_opt:
# REPEAT 3 CASES in presence of log(softmax) with the advanced indexing etc.
#if __name__ == '__main__':
# unittest.main()
if __name__ == '__main__':
t = T_CrossentropyCategorical1HotGrad('setUp')
t.setUp()
t.test_infer_shape()
unittest.main()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论