提交 fe026b97 authored 作者: Eric Larsen's avatar Eric Larsen 提交者: Frederic

test_nnet.py: layout cleanup

上级 9c66685f
...@@ -22,42 +22,53 @@ from theano.tensor.nnet import (categorical_crossentropy, ...@@ -22,42 +22,53 @@ from theano.tensor.nnet import (categorical_crossentropy,
CrossentropyCategorical1Hot, CrossentropyCategorical1Hot,
CrossentropyCategorical1HotGrad, CrossentropyCategorical1HotGrad,
sigmoid, softplus, sigmoid, softplus,
Softmax, softmax, SoftmaxWithBias, softmax_grad, Softmax, softmax, SoftmaxWithBias,
softmax_grad,
softmax_with_bias, SoftmaxGrad, softmax_with_bias, SoftmaxGrad,
Prepend_scalar_constant_to_each_row, Prepend_scalar_constant_to_each_row,
Prepend_scalar_to_each_row) Prepend_scalar_to_each_row)
from theano.tensor import dmatrix, dvector, lvector, dscalar from theano.tensor import dmatrix, dvector, lvector, dscalar
class T_sigmoid(unittest.TestCase): class T_sigmoid(unittest.TestCase):
def setUp(self): def setUp(self):
utt.seed_rng() utt.seed_rng()
def test_elemwise(self): def test_elemwise(self):
utt.verify_grad(sigmoid, [numpy.random.rand(3,4)]) utt.verify_grad(sigmoid, [numpy.random.rand(3, 4)])
class T_softplus(unittest.TestCase): class T_softplus(unittest.TestCase):
def setUp(self): def setUp(self):
utt.seed_rng() utt.seed_rng()
def test_elemwise(self): def test_elemwise(self):
utt.verify_grad(softplus, [numpy.random.rand(3,4)]) utt.verify_grad(softplus, [numpy.random.rand(3, 4)])
class T_Softmax(utt.InferShapeTester): class T_Softmax(utt.InferShapeTester):
def test0(self): def test0(self):
def f(a): def f(a):
return softmax(a)[:,0] return softmax(a)[:, 0]
utt.verify_grad(f, [numpy.random.rand(3,4)]) utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test1(self): def test1(self):
def f(a): def f(a):
return softmax(a)[:,1] return softmax(a)[:, 1]
utt.verify_grad(f, [numpy.random.rand(3,4)]) utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test2(self): def test2(self):
def f(a): def f(a):
return softmax(a)[:,2] return softmax(a)[:, 2]
utt.verify_grad(f, [numpy.random.rand(3,4)]) utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test3(self): def test3(self):
def f(a): def f(a):
return softmax(a)[:,3] return softmax(a)[:, 3]
utt.verify_grad(f, [numpy.random.rand(3,4)]) utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test_infer_shape(self): def test_infer_shape(self):
admat = dmatrix() admat = dmatrix()
...@@ -71,6 +82,7 @@ class T_Softmax(utt.InferShapeTester): ...@@ -71,6 +82,7 @@ class T_Softmax(utt.InferShapeTester):
xv = numpy.random.randn(6).astype(config.floatX) xv = numpy.random.randn(6).astype(config.floatX)
assert numpy.allclose(f(xv), numpy.exp(xv) / numpy.exp(xv).sum()) assert numpy.allclose(f(xv), numpy.exp(xv) / numpy.exp(xv).sum())
def test_vector_grad(self): def test_vector_grad(self):
def f(a): def f(a):
return softmax(a) return softmax(a)
...@@ -81,43 +93,46 @@ class T_SoftmaxWithBias(utt.InferShapeTester): ...@@ -81,43 +93,46 @@ class T_SoftmaxWithBias(utt.InferShapeTester):
def test0(self): def test0(self):
def f(a, b): def f(a, b):
return softmax_with_bias(a, b)[:,0] return softmax_with_bias(a, b)[:, 0]
utt.verify_grad(f, [numpy.random.rand(3,4), utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)]) numpy.random.rand(4)])
def test1(self): def test1(self):
def f(a, b): def f(a, b):
return softmax_with_bias(a, b)[:,1] return softmax_with_bias(a, b)[:, 1]
utt.verify_grad(f, [numpy.random.rand(3,4), utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)]) numpy.random.rand(4)])
def test2(self): def test2(self):
def f(a, b): def f(a, b):
return softmax_with_bias(a, b)[:,2] return softmax_with_bias(a, b)[:, 2]
utt.verify_grad(f, [numpy.random.rand(3,4), utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)]) numpy.random.rand(4)])
def test3(self): def test3(self):
def f(a, b): def f(a, b):
return softmax_with_bias(a, b)[:,3] return softmax_with_bias(a, b)[:, 3]
utt.verify_grad(f, [numpy.random.rand(3,4), utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)]) numpy.random.rand(4)])
def test_broadcast(self): def test_broadcast(self):
#test that we don't raise an error during optimization for no good #test that we don't raise an error during optimization for no good
#reason as softmax_with_bias don't support correctly some/all #reason as softmax_with_bias don't support correctly some/all
#broadcasted inputs pattern #broadcasted inputs pattern
initial_W = numpy.asarray( [[0.1,0.1,0.1], \ initial_W = numpy.asarray([[0.1, 0.1, 0.1], \
[0.1,0.1,0.1], \ [0.1, 0.1, 0.1], \
[0.1,0.1,0.1]], \ [0.1, 0.1, 0.1]], \
dtype = theano.config.floatX) dtype=theano.config.floatX)
W = theano.shared(value = initial_W, name = 'W') W = theano.shared(value=initial_W, name='W')
vbias=theano.shared(value=0.1, name='vbias') #0.01 vbias = theano.shared(value=0.1, name='vbias') # 0.01
hid=T.vector('hid') hid = T.vector('hid')
f = theano.function([hid], f = theano.function([hid],
T.nnet.softmax(T.dot(hid, W.T) + vbias)) T.nnet.softmax(T.dot(hid, W.T) + vbias))
ops = [node.op for node in f.maker.fgraph.toposort()] ops = [node.op for node in f.maker.fgraph.toposort()]
assert softmax_with_bias not in ops assert softmax_with_bias not in ops
assert softmax in ops assert softmax in ops
f([0,1,0]) f([0, 1, 0])
#print f.maker.fgraph.toposort() #print f.maker.fgraph.toposort()
def test_infer_shape(self): def test_infer_shape(self):
...@@ -125,13 +140,14 @@ class T_SoftmaxWithBias(utt.InferShapeTester): ...@@ -125,13 +140,14 @@ class T_SoftmaxWithBias(utt.InferShapeTester):
advec = dvector() advec = dvector()
admat_val = numpy.random.rand(3, 4) admat_val = numpy.random.rand(3, 4)
advec_val = numpy.random.rand(4) advec_val = numpy.random.rand(4)
self._compile_and_check([admat, advec], [SoftmaxWithBias()(admat, advec)], self._compile_and_check([admat, advec],
[SoftmaxWithBias()(admat, advec)],
[admat_val, advec_val], SoftmaxWithBias) [admat_val, advec_val], SoftmaxWithBias)
class T_SoftmaxGrad(utt.InferShapeTester): class T_SoftmaxGrad(utt.InferShapeTester):
def test_infer_shape(self):
def test_infer_shape(self):
admat = dmatrix() admat = dmatrix()
bdmat = dmatrix() bdmat = dmatrix()
admat_val = numpy.random.rand(3, 4) admat_val = numpy.random.rand(3, 4)
...@@ -140,32 +156,41 @@ class T_SoftmaxGrad(utt.InferShapeTester): ...@@ -140,32 +156,41 @@ class T_SoftmaxGrad(utt.InferShapeTester):
[admat_val, bdmat_val], SoftmaxGrad) [admat_val, bdmat_val], SoftmaxGrad)
class T_CrossentropySoftmax1Hot(unittest.TestCase): class T_CrossentropySoftmax1Hot(unittest.TestCase):
def setUp(self): def setUp(self):
utt.seed_rng() utt.seed_rng()
def test0(self): def test0(self):
y_idx = [0,1,3] y_idx = [0, 1, 3]
def f(a, b): def f(a, b):
return crossentropy_softmax_1hot_with_bias(a, b, y_idx)[0] return crossentropy_softmax_1hot_with_bias(a, b, y_idx)[0]
utt.verify_grad(f, [numpy.random.rand(3,4), utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)]) numpy.random.rand(4)])
def test1(self): def test1(self):
y_idx = [0,1,3] y_idx = [0, 1, 3]
def f(a): def f(a):
return crossentropy_softmax_1hot(a, y_idx)[0] return crossentropy_softmax_1hot(a, y_idx)[0]
utt.verify_grad(f, [numpy.random.rand(3,4)]) utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test_vector(self): def test_vector(self):
y_idx = [3] y_idx = [3]
def f(a): def f(a):
return crossentropy_softmax_1hot(T.shape_padleft(a), y_idx)[0] return crossentropy_softmax_1hot(T.shape_padleft(a), y_idx)[0]
utt.verify_grad(f, [numpy.random.rand(4)]) utt.verify_grad(f, [numpy.random.rand(4)])
def test_vectors(self): def test_vectors(self):
y_idx = [3] y_idx = [3]
def f(a, b): def f(a, b):
return crossentropy_softmax_1hot(T.shape_padleft(a)+b, y_idx)[0] return crossentropy_softmax_1hot(T.shape_padleft(a) + b, y_idx)[0]
utt.verify_grad(f, [numpy.random.rand(4), numpy.random.rand(4)]) utt.verify_grad(f, [numpy.random.rand(4), numpy.random.rand(4)])
class T_CrossentropySoftmax1HotWithBiasDx(utt.InferShapeTester): class T_CrossentropySoftmax1HotWithBiasDx(utt.InferShapeTester):
def test0(self): def test0(self):
...@@ -173,15 +198,18 @@ class T_CrossentropySoftmax1HotWithBiasDx(utt.InferShapeTester): ...@@ -173,15 +198,18 @@ class T_CrossentropySoftmax1HotWithBiasDx(utt.InferShapeTester):
return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx( return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
numpy.random.rand(10), # Gradient w.r.t. NLL. numpy.random.rand(10), # Gradient w.r.t. NLL.
sm, # Softmax output. sm, # Softmax output.
numpy.random.randint(low=0, high=5, size=10))) # Class indices. numpy.random.randint(low=0,
high=5, size=10))) # Class indices.
# Build a random softmax output whose rows sum to 1. # Build a random softmax output whose rows sum to 1.
softmax_output = numpy.random.rand(10, 5) softmax_output = numpy.random.rand(10, 5)
softmax_output /= softmax_output.sum(axis=1).reshape(10, 1) softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
utt.verify_grad(f, [softmax_output]) utt.verify_grad(f, [softmax_output])
def test1(self): def test1(self):
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
softmax_output = rng.rand(10, 5) softmax_output = rng.rand(10, 5)
softmax_output /= softmax_output.sum(axis=1).reshape(10, 1) softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
def f(dy): def f(dy):
return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx( return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
dy, dy,
...@@ -208,15 +236,18 @@ class T_CrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester): ...@@ -208,15 +236,18 @@ class T_CrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester):
def setUp(self): def setUp(self):
super(T_CrossentropySoftmaxArgmax1HotWithBias, self).setUp() super(T_CrossentropySoftmaxArgmax1HotWithBias, self).setUp()
self.op = theano.tensor.nnet.crossentropy_softmax_argmax_1hot_with_bias self.op = theano.tensor.nnet.crossentropy_softmax_argmax_1hot_with_bias
def test0(self): def test0(self):
n_classes = 5 n_classes = 5
n_samples = 3 n_samples = 3
# First test gradient when getting a gradient on the NLL output. # First test gradient when getting a gradient on the NLL output.
def grad_on_nll(x, b): def grad_on_nll(x, b):
return self.op(x, b, y_idx=numpy.random.randint( return self.op(x, b, y_idx=numpy.random.randint(
low=0, high=n_classes, size=n_samples))[0] low=0, high=n_classes, size=n_samples))[0]
utt.verify_grad(grad_on_nll, [numpy.random.rand(n_samples, n_classes), utt.verify_grad(grad_on_nll, [numpy.random.rand(n_samples, n_classes),
numpy.random.rand(n_classes)]) numpy.random.rand(n_classes)])
# Then test gradient when getting a gradient on the softmax output. # Then test gradient when getting a gradient on the softmax output.
def grad_on_softmax(x, b): def grad_on_softmax(x, b):
return self.op(x, b, y_idx=numpy.random.randint( return self.op(x, b, y_idx=numpy.random.randint(
...@@ -240,15 +271,15 @@ class T_CrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester): ...@@ -240,15 +271,15 @@ class T_CrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester):
class T_prepend(utt.InferShapeTester): class T_prepend(utt.InferShapeTester):
def test0(self):
x=tensor.matrix('x') def test0(self):
y=Prepend_scalar_constant_to_each_row(4.)(x) x = tensor.matrix('x')
f=theano.function([x],[y]) y = Prepend_scalar_constant_to_each_row(4.)(x)
m=numpy.random.rand(3,5) f = theano.function([x], [y])
m = numpy.random.rand(3, 5)
my = f(m) my = f(m)
self.assertTrue(my.shape == (3, 6), my.shape) self.assertTrue(my.shape == (3, 6), my.shape)
self.assertTrue(numpy.all( my[:,0] == 4.0)) self.assertTrue(numpy.all(my[:, 0] == 4.0))
def test_infer_shape(self): def test_infer_shape(self):
admat = dmatrix() admat = dmatrix()
...@@ -262,15 +293,16 @@ class T_prepend(utt.InferShapeTester): ...@@ -262,15 +293,16 @@ class T_prepend(utt.InferShapeTester):
class T_prepend(utt.InferShapeTester): class T_prepend(utt.InferShapeTester):
def test0(self): def test0(self):
"""basic functionality""" """basic functionality"""
x=tensor.matrix('x') x = tensor.matrix('x')
y=Prepend_scalar_to_each_row()(5.,x) y = Prepend_scalar_to_each_row()(5., x)
f=theano.function([x],y) f = theano.function([x], y)
m=numpy.ones((3,5),dtype="float32") m = numpy.ones((3, 5), dtype="float32")
my = f(m) my = f(m)
self.assertTrue(my.shape == (3, 6)) self.assertTrue(my.shape == (3, 6))
self.assertTrue(numpy.all(my[:,0] == 5.0)) self.assertTrue(numpy.all(my[:, 0] == 5.0))
def test_infer_shape(self): def test_infer_shape(self):
admat = dmatrix() admat = dmatrix()
...@@ -300,24 +332,20 @@ class T_CrossentropyCategorical1HotGrad(utt.InferShapeTester): ...@@ -300,24 +332,20 @@ class T_CrossentropyCategorical1HotGrad(utt.InferShapeTester):
class T_CrossentropyCategorical1Hot(utt.InferShapeTester): class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
def test_grad(self): def test_grad(self):
x = tensor.matrix('x') x = tensor.matrix('x')
one_of_n = tensor.lvector('one_of_n') one_of_n = tensor.lvector('one_of_n')
op = crossentropy_categorical_1hot op = crossentropy_categorical_1hot
xe = op(x, one_of_n) xe = op(x, one_of_n)
f = theano.function([x, one_of_n], xe) f = theano.function([x, one_of_n], xe)
x_val = numpy.asarray([[.4, .6, .0], [.1, .8, .1]], x_val = numpy.asarray([[.4, .6, .0], [.1, .8, .1]],
dtype=config.floatX) dtype=config.floatX)
xe_val = f(x_val, [0,1]) xe_val = f(x_val, [0, 1])
assert numpy.allclose(xe_val, -numpy.log([.4, .8])) assert numpy.allclose(xe_val, -numpy.log([.4, .8]))
def oplike(x): def oplike(x):
return op(x, [0,1]) return op(x, [0, 1])
tensor.verify_grad(oplike, [x_val], rng=numpy.random) tensor.verify_grad(oplike, [x_val], rng=numpy.random)
...@@ -336,7 +364,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -336,7 +364,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
x = tensor.matrix('x') x = tensor.matrix('x')
one_of_n = tensor.lvector('one_of_n') one_of_n = tensor.lvector('one_of_n')
op = crossentropy_categorical_1hot op = crossentropy_categorical_1hot
xe = op(x, one_of_n) xe = op(x, one_of_n)
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
...@@ -348,7 +375,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -348,7 +375,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
theano.compile.mode.OPT_FAST_RUN).optimize(fgraph) theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard' assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_optimizations_vector(self): def test_softmax_optimizations_vector(self):
x = tensor.vector('x') x = tensor.vector('x')
...@@ -362,19 +390,19 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -362,19 +390,19 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
theano.compile.mode.optdb.query( theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(fgraph) theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard' assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_optimizations_w_bias(self): def test_softmax_optimizations_w_bias(self):
x = tensor.matrix('x') x = tensor.matrix('x')
b = tensor.vector('b') b = tensor.vector('b')
one_of_n = tensor.lvector('one_of_n') one_of_n = tensor.lvector('one_of_n')
op = crossentropy_categorical_1hot op = crossentropy_categorical_1hot
xe = op(x, one_of_n) xe = op(x, one_of_n)
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x, b, one_of_n], [x, b, one_of_n],
[op(softmax(x+b), one_of_n)]) [op(softmax(x + b), one_of_n)])
assert fgraph.outputs[0].owner.op == op assert fgraph.outputs[0].owner.op == op
#print 'BEFORE' #print 'BEFORE'
...@@ -394,7 +422,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -394,7 +422,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
assert len(fgraph.toposort()) == 2 assert len(fgraph.toposort()) == 2
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard' assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_optimizations_w_bias2(self): def test_softmax_optimizations_w_bias2(self):
x = tensor.matrix('x') x = tensor.matrix('x')
...@@ -405,7 +434,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -405,7 +434,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x, b, c, one_of_n], [x, b, c, one_of_n],
[op(softmax(T.add(x,b,c)), one_of_n)]) [op(softmax(T.add(x, b, c)), one_of_n)])
assert fgraph.outputs[0].owner.op == op assert fgraph.outputs[0].owner.op == op
#print 'BEFORE' #print 'BEFORE'
...@@ -423,7 +452,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -423,7 +452,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
assert len(fgraph.toposort()) == 3 assert len(fgraph.toposort()) == 3
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard' assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_optimizations_w_bias_vector(self): def test_softmax_optimizations_w_bias_vector(self):
x = tensor.vector('x') x = tensor.vector('x')
...@@ -432,7 +462,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -432,7 +462,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
op = crossentropy_categorical_1hot op = crossentropy_categorical_1hot
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x, b, one_of_n], [x, b, one_of_n],
[op(softmax(x+b), one_of_n)]) [op(softmax(x + b), one_of_n)])
assert fgraph.outputs[0].owner.op == op assert fgraph.outputs[0].owner.op == op
#print 'BEFORE' #print 'BEFORE'
#for node in fgraph.toposort(): #for node in fgraph.toposort():
...@@ -448,15 +478,14 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -448,15 +478,14 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
#print '====' #print '===='
assert len(fgraph.toposort()) == 3 assert len(fgraph.toposort()) == 3
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard' assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_grad_optimizations(self): def test_softmax_grad_optimizations(self):
x = tensor.matrix('x') x = tensor.matrix('x')
one_of_n = tensor.lvector('one_of_n') one_of_n = tensor.lvector('one_of_n')
op = crossentropy_categorical_1hot op = crossentropy_categorical_1hot
xe = op(softmax(x), one_of_n) xe = op(softmax(x), one_of_n)
sum_xe = tensor.sum(xe) sum_xe = tensor.sum(xe)
g_x = tensor.grad(sum_xe, x) g_x = tensor.grad(sum_xe, x)
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
...@@ -474,8 +503,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -474,8 +503,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
#for node in fgraph.toposort(): #for node in fgraph.toposort():
# print node.op, node.inputs # print node.op, node.inputs
# the function has 9 ops because the dimshuffle and elemwise{second} aren't getting # the function has 9 ops because the dimshuffle and lemwise{second}
# cleaned up as well as we'd like. # aren't getting cleaned up as well as we'd like.
has_cx1hot = False has_cx1hot = False
has_cx1hotdx = False has_cx1hotdx = False
has_softmax = False has_softmax = False
...@@ -483,13 +512,12 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -483,13 +512,12 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
for node in fgraph.toposort(): for node in fgraph.toposort():
if node.op == crossentropy_softmax_argmax_1hot_with_bias: if node.op == crossentropy_softmax_argmax_1hot_with_bias:
has_cx1hot = True has_cx1hot = True
if node.op == crossentropy_softmax_1hot_with_bias_dx : if node.op == crossentropy_softmax_1hot_with_bias_dx:
has_cx1hotdx = True has_cx1hotdx = True
if node.op == softmax: if node.op == softmax:
has_softmax = True has_softmax = True
if node.op == softmax_grad: if node.op == softmax_grad:
has_softmaxdx = True has_softmaxdx = True
assert has_cx1hot assert has_cx1hot
assert has_cx1hotdx assert has_cx1hotdx
assert not has_softmax assert not has_softmax
...@@ -517,8 +545,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -517,8 +545,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
#for node in fgraph.toposort(): #for node in fgraph.toposort():
# print node.op, node.inputs # print node.op, node.inputs
# the function has 9 ops because the dimshuffle and elemwise{second} aren't getting # the function has 9 ops because the dimshuffle and elemwise{second}
# cleaned up as well as we'd like. # aren't getting cleaned up as well as we'd like.
has_cx1hot = False has_cx1hot = False
has_cx1hotdx = False has_cx1hotdx = False
has_softmax = False has_softmax = False
...@@ -526,13 +554,12 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -526,13 +554,12 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
for node in fgraph.toposort(): for node in fgraph.toposort():
if node.op == crossentropy_softmax_argmax_1hot_with_bias: if node.op == crossentropy_softmax_argmax_1hot_with_bias:
has_cx1hot = True has_cx1hot = True
if node.op == crossentropy_softmax_1hot_with_bias_dx : if node.op == crossentropy_softmax_1hot_with_bias_dx:
has_cx1hotdx = True has_cx1hotdx = True
if node.op == softmax: if node.op == softmax:
has_softmax = True has_softmax = True
if node.op == softmax_grad: if node.op == softmax_grad:
has_softmaxdx = True has_softmaxdx = True
assert has_cx1hot assert has_cx1hot
assert has_cx1hotdx assert has_cx1hotdx
assert not has_softmax assert not has_softmax
...@@ -547,13 +574,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -547,13 +574,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
mode = theano.compile.mode.get_default_mode() mode = theano.compile.mode.get_default_mode()
if mode == theano.compile.mode.get_mode('FAST_COMPILE'): if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
mode = 'FAST_RUN' mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
x_val = rng.randn(3, 5)
x_val = rng.randn(3,5)
b_val = rng.randn(5) b_val = rng.randn(5)
y_val = numpy.asarray([2,4,1]) y_val = numpy.asarray([2, 4, 1])
x = T.dmatrix('x') x = T.dmatrix('x')
b = T.dvector('b') b = T.dvector('b')
y = T.lvector('y') y = T.lvector('y')
...@@ -565,10 +589,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -565,10 +589,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
-T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y]) T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y])
] ]
for expr in expressions: for expr in expressions:
# Verify the optimizer worked on the expressions # Verify the optimizer worked on the expressions
f = theano.function([x,y], expr, mode=mode) f = theano.function([x, y], expr, mode=mode)
if verbose: if verbose:
theano.printing.debugprint(f) theano.printing.debugprint(f)
try: try:
...@@ -579,7 +603,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -579,7 +603,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise raise
# Also verify the gradient wrt x # Also verify the gradient wrt x
g = theano.function([x,y], T.grad(expr, x), mode=mode) g = theano.function([x, y], T.grad(expr, x), mode=mode)
if verbose: if verbose:
theano.printing.debugprint(g) theano.printing.debugprint(g)
try: try:
...@@ -591,13 +615,13 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -591,13 +615,13 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
## Test that a biased softmax is optimized correctly ## Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax(x+b)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b+x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x+b))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b+x))[T.arange(y.shape[0]), y])] T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x,b,y], expr, mode=mode) f = theano.function([x, b, y], expr, mode=mode)
if verbose: if verbose:
theano.printing.debugprint(f) theano.printing.debugprint(f)
try: try:
...@@ -606,8 +630,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -606,8 +630,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
except Exception: except Exception:
theano.printing.debugprint(f) theano.printing.debugprint(f)
raise raise
g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
g = theano.function([x,b,y], T.grad(expr, x), mode=mode)
if verbose: if verbose:
theano.printing.debugprint(g) theano.printing.debugprint(g)
try: try:
...@@ -625,7 +648,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -625,7 +648,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
T.mean(-T.log(softmax(x))[T.arange(y.shape[0]), y])] T.mean(-T.log(softmax(x))[T.arange(y.shape[0]), y])]
for expr in mean_expressions: for expr in mean_expressions:
f = theano.function([x,y], expr, mode=mode) f = theano.function([x, y], expr, mode=mode)
if verbose: if verbose:
theano.printing.debugprint(f) theano.printing.debugprint(f)
try: try:
...@@ -635,11 +658,12 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -635,11 +658,12 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
theano.printing.debugprint(f) theano.printing.debugprint(f)
raise raise
g = theano.function([x,y], T.grad(expr, x), mode=mode) g = theano.function([x, y], T.grad(expr, x), mode=mode)
if verbose: if verbose:
theano.printing.debugprint(g) theano.printing.debugprint(g)
try: try:
assert len(g.maker.fgraph.toposort()) in (6,7) #there's an extra dimshuffle in there assert len(g.maker.fgraph.toposort()) in (6, 7)
#there's an extra dimshuffle in there
# but I can't think of a good rule to get rid of it # but I can't think of a good rule to get rid of it
g(x_val, y_val) g(x_val, y_val)
except Exception: except Exception:
...@@ -647,13 +671,13 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -647,13 +671,13 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise raise
mean_bias_expressions = [ mean_bias_expressions = [
T.mean(-T.log(softmax(x+b)[T.arange(y.shape[0]), y])), T.mean(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax(b+x)[T.arange(y.shape[0]), y])), -T.mean(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax(x+b))[T.arange(y.shape[0]), y]), -T.mean(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.mean(-T.log(softmax(b+x))[T.arange(y.shape[0]), y])] T.mean(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in mean_bias_expressions: for expr in mean_bias_expressions:
f = theano.function([x,b,y], expr, mode=mode) f = theano.function([x, b, y], expr, mode=mode)
if verbose: if verbose:
theano.printing.debugprint(f) theano.printing.debugprint(f)
try: try:
...@@ -661,12 +685,11 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -661,12 +685,11 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
except Exception: except Exception:
theano.printing.debugprint(f) theano.printing.debugprint(f)
raise raise
g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
g = theano.function([x,b,y], T.grad(expr, x), mode=mode)
if verbose: if verbose:
theano.printing.debugprint(g) theano.printing.debugprint(g)
try: try:
assert len(g.maker.fgraph.toposort()) in (6,7) assert len(g.maker.fgraph.toposort()) in (6, 7)
g(x_val, b_val, y_val) g(x_val, b_val, y_val)
except Exception: except Exception:
theano.printing.debugprint(g) theano.printing.debugprint(g)
...@@ -678,15 +701,13 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -678,15 +701,13 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
if mode == theano.compile.mode.get_mode('FAST_COMPILE'): if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
mode = 'FAST_RUN' mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
x_val = rng.randn(3,5) x_val = rng.randn(3, 5)
b_val = rng.randn(5) b_val = rng.randn(5)
y_val = numpy.asarray([2,4,1], dtype='int64') y_val = numpy.asarray([2, 4, 1], dtype='int64')
x = T.dmatrix('x') x = T.dmatrix('x')
b = T.dvector('b') b = T.dvector('b')
y = T.lvector('y') y = T.lvector('y')
yi = T.cast(y, 'int32') yi = T.cast(y, 'int32')
expressions = [ expressions = [
T.sum(-T.log(softmax(x)[T.arange(yi.shape[0]), yi])), T.sum(-T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
-T.sum(T.log(softmax(x)[T.arange(yi.shape[0]), yi])), -T.sum(T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
...@@ -696,7 +717,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -696,7 +717,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
for expr in expressions: for expr in expressions:
# Verify the optimizer worked on the expressions # Verify the optimizer worked on the expressions
f = theano.function([x,y], expr, mode=mode) f = theano.function([x, y], expr, mode=mode)
if verbose: if verbose:
theano.printing.debugprint(f) theano.printing.debugprint(f)
try: try:
...@@ -707,7 +728,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -707,7 +728,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise raise
# Also verify the gradient wrt x # Also verify the gradient wrt x
g = theano.function([x,y], T.grad(expr, x), mode=mode) g = theano.function([x, y], T.grad(expr, x), mode=mode)
if verbose: if verbose:
theano.printing.debugprint(g) theano.printing.debugprint(g)
try: try:
...@@ -717,7 +738,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -717,7 +738,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
theano.printing.debugprint(g) theano.printing.debugprint(g)
raise raise
def test_optimize_xent_vector(self): def test_optimize_xent_vector(self):
verbose = 0 verbose = 0
mode = theano.compile.mode.get_default_mode() mode = theano.compile.mode.get_default_mode()
...@@ -743,8 +763,9 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -743,8 +763,9 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
-T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))] -T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x,y], expr, mode=mode) f = theano.function([x, y], expr, mode=mode)
if verbose: print_graph(f) if verbose:
print_graph(f)
try: try:
prev, last = f.maker.fgraph.toposort()[-2:] prev, last = f.maker.fgraph.toposort()[-2:]
assert len(f.maker.fgraph.toposort()) == 5 assert len(f.maker.fgraph.toposort()) == 5
...@@ -752,7 +773,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -752,7 +773,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
except Exception: except Exception:
theano.printing.debugprint(f) theano.printing.debugprint(f)
raise raise
g = theano.function([x,y], T.grad(expr, x), mode=mode) g = theano.function([x,y], T.grad(expr, x), mode=mode)
print_graph(g) print_graph(g)
try: try:
...@@ -789,17 +809,19 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -789,17 +809,19 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
## Test that a biased softmax is optimized correctly ## Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax(x+b)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b+x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x+b))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b+x))[T.arange(y.shape[0]), y])] T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x,b,y], expr, mode=mode) f = theano.function([x, b, y], expr, mode=mode)
if verbose: print_graph(f) if verbose:
print_graph(f)
try: try:
prev, last = f.maker.fgraph.toposort()[-2:] prev, last = f.maker.fgraph.toposort()[-2:]
assert len(f.maker.fgraph.toposort()) == 3 # [big_op, sum, dim_shuffle] assert len(f.maker.fgraph.toposort()) == 3
# [big_op, sum, dim_shuffle]
f(x_val, b_val, y_val) f(x_val, b_val, y_val)
except Exception: except Exception:
theano.printing.debugprint(f) theano.printing.debugprint(f)
...@@ -808,7 +830,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -808,7 +830,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
backup = config.warn.sum_div_dimshuffle_bug backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False config.warn.sum_div_dimshuffle_bug = False
try: try:
g = theano.function([x,b,y], T.grad(expr, x), mode=mode) g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
finally: finally:
config.warn.sum_div_dimshuffle_bug = backup config.warn.sum_div_dimshuffle_bug = backup
...@@ -830,13 +852,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -830,13 +852,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
mode = theano.compile.mode.get_default_mode() mode = theano.compile.mode.get_default_mode()
if mode == theano.compile.mode.get_mode('FAST_COMPILE'): if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
mode = 'FAST_RUN' mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
x_val = rng.randn(3, 5)
x_val = rng.randn(3,5)
b_val = rng.randn(5) b_val = rng.randn(5)
y_val = numpy.asarray([2,4,1]) y_val = numpy.asarray([2, 4, 1])
x = T.dmatrix('x') x = T.dmatrix('x')
b = T.dvector('b') b = T.dvector('b')
y = T.lvector('y') y = T.lvector('y')
...@@ -878,7 +897,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -878,7 +897,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
assert has_softmax assert has_softmax
assert not has_softmaxdx assert not has_softmaxdx
## Cases to test ## Cases to test
expressions = [ expressions = [
a * T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])), a * T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
...@@ -904,7 +922,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -904,7 +922,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
for expr in expressions: for expr in expressions:
# Verify the optimizer worked on the expressions # Verify the optimizer worked on the expressions
f = theano.function([x,y,a], expr, mode=mode) f = theano.function([x, y, a], expr, mode=mode)
try: try:
assert 5 <= len(f.maker.fgraph.toposort()) <= 10 assert 5 <= len(f.maker.fgraph.toposort()) <= 10
validate_fn_graph(f) validate_fn_graph(f)
...@@ -914,7 +932,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -914,7 +932,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise raise
# Verify the gradient wrt x # Verify the gradient wrt x
g = theano.function([x,y,a], T.grad(expr, x), mode=mode) g = theano.function([x, y, a], T.grad(expr, x), mode=mode)
try: try:
assert 5 <= len(g.maker.fgraph.toposort()) <= 12 assert 5 <= len(g.maker.fgraph.toposort()) <= 12
validate_grad_graph(g) validate_grad_graph(g)
...@@ -924,7 +942,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester): ...@@ -924,7 +942,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
raise raise
# Verify the gradient when providing output gradient # Verify the gradient when providing output gradient
h = theano.function([x,y,a], T.grad(expr, x, g_cost=a*x.sum()), mode=mode) h = theano.function([x, y, a],
T.grad(expr, x, g_cost=a * x.sum()), mode=mode)
try: try:
assert 8 <= len(h.maker.fgraph.toposort()) <= 17 assert 8 <= len(h.maker.fgraph.toposort()) <= 17
validate_grad_graph(h) validate_grad_graph(h)
...@@ -944,7 +963,6 @@ def test_argmax_pushdown(): ...@@ -944,7 +963,6 @@ def test_argmax_pushdown():
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x], [x],
[out]) [out])
theano.compile.mode.optdb.query( theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(fgraph) theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)
...@@ -981,14 +999,13 @@ def test_argmax_pushdown(): ...@@ -981,14 +999,13 @@ def test_argmax_pushdown():
assert isinstance(fgraph.toposort()[2].op.scalar_op, theano.scalar.Maximum) assert isinstance(fgraph.toposort()[2].op.scalar_op, theano.scalar.Maximum)
assert str(fgraph.toposort()[3].op) == 'OutputGuard' assert str(fgraph.toposort()[3].op) == 'OutputGuard'
def test_argmax_pushdown_bias(): def test_argmax_pushdown_bias():
x = tensor.dmatrix() x = tensor.dmatrix()
b = tensor.dvector() b = tensor.dvector()
out = tensor.argmax(softmax_with_bias(x, b), axis=-1) out = tensor.argmax(softmax_with_bias(x, b), axis=-1)
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x,b], [x, b],
[out]) [out])
theano.compile.mode.optdb.query( theano.compile.mode.optdb.query(
...@@ -1005,10 +1022,9 @@ def test_argmax_pushdown_bias(): ...@@ -1005,10 +1022,9 @@ def test_argmax_pushdown_bias():
x = tensor.dmatrix() x = tensor.dmatrix()
b = tensor.dvector() b = tensor.dvector()
out = tensor.max_and_argmax(softmax_with_bias(x, b), axis=-1)[0] out = tensor.max_and_argmax(softmax_with_bias(x, b), axis=-1)[0]
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x,b], [x, b],
[out]) [out])
backup = config.warn.argmax_pushdown_bug backup = config.warn.argmax_pushdown_bug
...@@ -1028,13 +1044,15 @@ def test_argmax_pushdown_bias(): ...@@ -1028,13 +1044,15 @@ def test_argmax_pushdown_bias():
assert isinstance(fgraph.toposort()[1].op.scalar_op, theano.scalar.Maximum) assert isinstance(fgraph.toposort()[1].op.scalar_op, theano.scalar.Maximum)
assert str(fgraph.toposort()[2].op) == 'OutputGuard' assert str(fgraph.toposort()[2].op) == 'OutputGuard'
def test_asymptotic_32(): def test_asymptotic_32():
""" """
This test makes sure that our functions behave sensibly when huge values are present This test makes sure that our functions behave sensibly when
huge values are present
""" """
#TODO: consider adding the optimization of crossentropy into the current mode for the #TODO: consider adding the optimization of crossentropy into the current
# purpose of running this test # mode for the purpose of running this test
for dtype in 'float32', 'float64': for dtype in 'float32', 'float64':
if dtype == 'float32': if dtype == 'float32':
...@@ -1045,10 +1063,11 @@ def test_asymptotic_32(): ...@@ -1045,10 +1063,11 @@ def test_asymptotic_32():
x2 = tensor.dvector() x2 = tensor.dvector()
y = tensor.lvector() y = tensor.lvector()
c = categorical_crossentropy(softmax(x+x2), y) c = categorical_crossentropy(softmax(x + x2), y)
f = theano.function([x,y,x2], [c.sum(), tensor.grad(c.sum(), x)], mode='FAST_RUN') f = theano.function([x, y, x2], [c.sum(),
tensor.grad(c.sum(), x)], mode='FAST_RUN')
if 0: if 0:
for i, n in enumerate( f.maker.fgraph.toposort()): for i, n in enumerate(f.maker.fgraph.toposort()):
print i, n print i, n
xval = numpy.zeros((5, 5), dtype=dtype) xval = numpy.zeros((5, 5), dtype=dtype)
...@@ -1071,51 +1090,50 @@ def test_asymptotic_32(): ...@@ -1071,51 +1090,50 @@ def test_asymptotic_32():
#print cval, gxval #print cval, gxval
assert cval > 61750000 assert cval > 61750000
assert gxval[0,0] == -1.0 assert gxval[0, 0] == -1.0
assert gxval[0,1] == 0.25 assert gxval[0, 1] == 0.25
class Test_softmax_opt: class Test_softmax_opt:
# Test that expressions of softmax in terms of exponentiated things divided by row sums # Test that expressions of softmax in terms of exponentiated things
# are replaced by softmax expressions. # divided by row sums are replaced by softmax expressions.
# #
# Softmax_grad isn't that interesting as an Op, but it's the signature we look for when # Softmax_grad isn't that interesting as an Op, but it has the signature
# trying to insert CrossEntropySoftmax... grad. So for now, we add softmax_grad to graphs. # we look for when trying to insert CrossEntropySoftmax... grad. So for now,
# In future, we may modify the CrossEntropySoftmax...grad to look for the more basic # we add softmax_grad to graphs. In the future, we may modify the
# pattern. # CrossEntropySoftmax...grad to look for the more basic pattern.
# #
def setUp(self): def setUp(self):
utt.seed_rng() utt.seed_rng()
self.rng = numpy.random.RandomState(utt.fetch_seed()) self.rng = numpy.random.RandomState(utt.fetch_seed())
self.mode=theano.compile.mode.get_default_mode() self.mode = theano.compile.mode.get_default_mode()
self.mode=self.mode.including('canonicalize') self.mode = self.mode.including('canonicalize')
def test_basic(self): def test_basic(self):
c = T.matrix() c = T.matrix()
p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0,'x') p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0, 'x')
# test that function contains softmax and no div. # test that function contains softmax and no div.
f = theano.function([c],p_y, mode=self.mode) f = theano.function([c], p_y, mode=self.mode)
f_ops = [n.op for n in f.maker.fgraph.toposort()] f_ops = [n.op for n in f.maker.fgraph.toposort()]
#print '--- f =' #print '--- f ='
#printing.debugprint(f) #printing.debugprint(f)
#print '===' #print '==='
assert len(f_ops) == 1 assert len(f_ops) == 1
assert softmax in f_ops assert softmax in f_ops
f(self.rng.rand(3,4).astype(config.floatX)) f(self.rng.rand(3, 4).astype(config.floatX))
def test_grad(self): def test_grad(self):
c = T.matrix() c = T.matrix()
p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0,'x') p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0, 'x')
# test that function contains softmax and softmaxgrad # test that function contains softmax and softmaxgrad
w = T.matrix() w = T.matrix()
backup = config.warn.sum_div_dimshuffle_bug backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False config.warn.sum_div_dimshuffle_bug = False
try: try:
g = theano.function([c,w],T.grad((p_y*w).sum(), c)) g = theano.function([c, w],T.grad((p_y*w).sum(), c))
finally: finally:
config.warn.sum_div_dimshuffle_bug = backup config.warn.sum_div_dimshuffle_bug = backup
g_ops = [n.op for n in g.maker.fgraph.toposort()] g_ops = [n.op for n in g.maker.fgraph.toposort()]
...@@ -1127,7 +1145,7 @@ class Test_softmax_opt: ...@@ -1127,7 +1145,7 @@ class Test_softmax_opt:
assert len(g_ops) == 2 assert len(g_ops) == 2
assert softmax in g_ops assert softmax in g_ops
assert softmax_grad in g_ops assert softmax_grad in g_ops
g(self.rng.rand(3,4), self.rng.uniform(.5, 1, (3,4))) g(self.rng.rand(3, 4), self.rng.uniform(.5, 1, (3,4)))
def test_transpose_basic(self): def test_transpose_basic(self):
# this should be a transposed softmax # this should be a transposed softmax
...@@ -1135,14 +1153,14 @@ class Test_softmax_opt: ...@@ -1135,14 +1153,14 @@ class Test_softmax_opt:
p_y = T.exp(c) / T.exp(c).sum(axis=0) p_y = T.exp(c) / T.exp(c).sum(axis=0)
# test that function contains softmax and no div. # test that function contains softmax and no div.
f = theano.function([c],p_y) f = theano.function([c], p_y)
#printing.debugprint(f) #printing.debugprint(f)
# test that function contains softmax and no div. # test that function contains softmax and no div.
backup = config.warn.sum_div_dimshuffle_bug backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False config.warn.sum_div_dimshuffle_bug = False
try: try:
g = theano.function([c],T.grad(p_y.sum(), c)) g = theano.function([c], T.grad(p_y.sum(), c))
finally: finally:
config.warn.sum_div_dimshuffle_bug = backup config.warn.sum_div_dimshuffle_bug = backup
#printing.debugprint(g) #printing.debugprint(g)
...@@ -1169,15 +1187,5 @@ class Test_softmax_opt: ...@@ -1169,15 +1187,5 @@ class Test_softmax_opt:
# REPEAT 3 CASES in presence of log(softmax) with the advanced indexing etc. # REPEAT 3 CASES in presence of log(softmax) with the advanced indexing etc.
#if __name__ == '__main__':
# unittest.main()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main()
t = T_CrossentropyCategorical1HotGrad('setUp')
t.setUp()
t.test_infer_shape()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论