提交 28a095d6 authored 作者: nouiz's avatar nouiz

Merge pull request #730 from larseeri/shape_tensor_nnet

Better infer_shape test for Softmax, SoftmaxWithBias, SoftmaxGrad, CrossentropySoftmaxArgmax1HotWithBias, ConvOp, Conv3D, ConvTransp3D, ConvGrad3D Added and tested infer_shape for CrossentropySoftmax1HotWithBiasDx,Prepend_scalar_constant_to_each_row and Prepend_scalar_to_each_row, CrossentropyCategorical1HotGrad Added disabled CrossentropyCategorical1Hot.infer_shape, see gh-788
...@@ -857,6 +857,9 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op): ...@@ -857,6 +857,9 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
dx[i, y_idx[i]] -= dy[i] # scalar decrement dx[i, y_idx[i]] -= dy[i] # scalar decrement
output_storage[0][0] = dx output_storage[0][0] = dx
def infer_shape(self, node, shapes):
return [shapes[1]]
def grad(self, inp, grads): def grad(self, inp, grads):
dy, sm, y_idx = inp dy, sm, y_idx = inp
g_dx, = grads g_dx, = grads
...@@ -1031,8 +1034,11 @@ class CrossentropyCategorical1HotGrad(gof.Op): ...@@ -1031,8 +1034,11 @@ class CrossentropyCategorical1HotGrad(gof.Op):
for i in xrange(len(g_y)): for i in xrange(len(g_y)):
g_coding[i, true_one_of_n[i]] = -g_y[i] / coding_dist[i, g_coding[i, true_one_of_n[i]] = -g_y[i] / coding_dist[i,
true_one_of_n[i]] true_one_of_n[i]]
g_coding_strg[0] = g_coding g_coding_strg[0] = g_coding
def infer_shape(self, node, in_shapes):
return [in_shapes[1]]
crossentropy_categorical_1hot_grad = CrossentropyCategorical1HotGrad() crossentropy_categorical_1hot_grad = CrossentropyCategorical1HotGrad()
...@@ -1091,6 +1097,17 @@ class CrossentropyCategorical1Hot(gof.Op): ...@@ -1091,6 +1097,17 @@ class CrossentropyCategorical1Hot(gof.Op):
y[i] = -numpy.log(coding[i, one_of_n[i]]) y[i] = -numpy.log(coding[i, one_of_n[i]])
y_out[0] = y y_out[0] = y
#Enabling this infer_shape method make 2 tests fail:
#theano/tensor/nnet/tests/test_nnet.py:T_CrossentropyCategorical1Hot.
# {test_softmax_grad_optimizations,test_softmax_grad_optimizations_vector}
# This is caused by the local_fill_to_alloc that call broadcast_like
# that look into the shape feature and return a Rebroadcast instead of an alloc.
# I disable this infer_shape until we fix the optimizations or determine that
# this is not needed anymore and we update the tests.
# see issue gh-788
# def infer_shape(self, node, in_shapes):
# return [(in_shapes[0][0],)]
def grad(self, inp, grads): def grad(self, inp, grads):
coding, one_of_n = inp coding, one_of_n = inp
g_y, = grads g_y, = grads
...@@ -1121,7 +1138,7 @@ def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph): ...@@ -1121,7 +1138,7 @@ def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph):
new_nll, new_sm, new_am = crossentropy_softmax_argmax_1hot_with_bias(x, b, new_nll, new_sm, new_am = crossentropy_softmax_argmax_1hot_with_bias(x, b,
one_of_n) one_of_n)
fgraph.replace_all_validate([(nll, new_nll), (sm, new_sm)], fgraph.replace_all_validate([(nll, new_nll), (sm, new_sm)],
reason="crossentropy_to_crossentropy_with_softmax") reason="crossentropy_to_crossentropy_with_softmax_with_bias")
return True return True
return False return False
...@@ -1645,6 +1662,11 @@ class Prepend_scalar_constant_to_each_row(gof.Op): ...@@ -1645,6 +1662,11 @@ class Prepend_scalar_constant_to_each_row(gof.Op):
out[:, 0].fill(self.val.data) out[:, 0].fill(self.val.data)
out[:, 1:] = mat out[:, 1:] = mat
def infer_shape(self, node, in_shapes):
shp = (in_shapes[0][0], in_shapes[0][1] + 1)
return [shp]
def grad(self, inp, grads): def grad(self, inp, grads):
mat, = inp mat, = inp
goutput, = grads goutput, = grads
...@@ -1694,6 +1716,10 @@ class Prepend_scalar_to_each_row(gof.Op): ...@@ -1694,6 +1716,10 @@ class Prepend_scalar_to_each_row(gof.Op):
out[:, 0].fill(val) out[:, 0].fill(val)
out[:, 1:] = mat out[:, 1:] = mat
def infer_shape(self, node, in_shapes):
shp = (in_shapes[1][0], in_shapes[1][1] + 1)
return [shp]
def grad(self, inp, grads): def grad(self, inp, grads):
val, mat = inp val, mat = inp
goutput, = grads goutput, = grads
......
...@@ -13,10 +13,10 @@ from theano.tensor.nnet import conv ...@@ -13,10 +13,10 @@ from theano.tensor.nnet import conv
from theano.tensor.basic import _allclose from theano.tensor.basic import _allclose
class TestConv2D(unittest.TestCase): class TestConv2D(utt.InferShapeTester):
def setUp(self): def setUp(self):
utt.seed_rng() super (TestConv2D, self).setUp()
self.input = T.dtensor4('input') self.input = T.dtensor4('input')
self.filters = T.dtensor4('filters') self.filters = T.dtensor4('filters')
...@@ -369,7 +369,6 @@ class TestConv2D(unittest.TestCase): ...@@ -369,7 +369,6 @@ class TestConv2D(unittest.TestCase):
""" """
self.validate((1, 10, 213, 129), (46, 10, 212, 1), 'valid', self.validate((1, 10, 213, 129), (46, 10, 212, 1), 'valid',
verify_grad=False) verify_grad=False)
self.validate((1, 10, 213, 129), (46, 10, 212, 1), 'valid', verify_grad=False)
def speed(self): def speed(self):
n_calls = 20000 n_calls = 20000
...@@ -407,3 +406,100 @@ class TestConv2D(unittest.TestCase): ...@@ -407,3 +406,100 @@ class TestConv2D(unittest.TestCase):
t2 = time.time() t2 = time.time()
print t2 - t1, print t2 - t1,
print print
def test_infer_shape(self):
# Note: infer_shape is incomplete and thus input and filter shapes
# must be provided explicitly
def rand(*shape):
r = numpy.asarray(numpy.random.rand(*shape), dtype='float64')
return r * 2 - 1
adtens = T.dtensor4()
bdtens = T.dtensor4()
aivec_val = [2, 2, 3, 3]
bivec_val = [2, 2, 2, 2]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [2, 2, 3, 3]
bivec_val = [2, 2, 2, 2]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 8, 8]
bivec_val = [4, 2, 5, 5]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 8, 8]
bivec_val = [4, 2, 5, 5]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 7, 5]
bivec_val = [5, 2, 3, 2]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 7, 5]
bivec_val = [5, 2, 3, 2]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 7, 5]
bivec_val = [5, 2, 2, 3]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 7, 5]
bivec_val = [5, 2, 2, 3]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 3, 3]
bivec_val = [4, 2, 3, 3]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 3, 3]
bivec_val = [4, 2, 3, 3]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
if __name__ == '__main__':
t = TestConv2D('setUp')
t.setUp()
t.test_infer_shape()
...@@ -3,9 +3,9 @@ import theano ...@@ -3,9 +3,9 @@ import theano
import theano.tensor as T import theano.tensor as T
from theano import function, shared from theano import function, shared
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from theano.tensor.nnet.ConvTransp3D import convTransp3D from theano.tensor.nnet.ConvTransp3D import convTransp3D, ConvTransp3D
from theano.tensor.nnet.ConvGrad3D import convGrad3D from theano.tensor.nnet.ConvGrad3D import convGrad3D, ConvGrad3D
from theano.tensor.nnet.Conv3D import conv3D from theano.tensor.nnet.Conv3D import conv3D, Conv3D
import numpy as N import numpy as N
import copy import copy
import theano.sparse import theano.sparse
...@@ -20,7 +20,9 @@ floatX = theano.config.floatX ...@@ -20,7 +20,9 @@ floatX = theano.config.floatX
# a subset of the tests they will do different things than if you # a subset of the tests they will do different things than if you
# run all of them # run all of them
class DummyConv3D: class DummyConv3D:
"""A dummy version of Conv3D passed to verify_grad """A dummy version of Conv3D passed to verify_grad
Stores a fixed stride, since stride is not differentiable Stores a fixed stride, since stride is not differentiable
Exposes only one scalar argument, which is used as the position Exposes only one scalar argument, which is used as the position
...@@ -30,149 +32,174 @@ class DummyConv3D: ...@@ -30,149 +32,174 @@ class DummyConv3D:
verify_grad will not need to test hundreds of variables. Disadvantage verify_grad will not need to test hundreds of variables. Disadvantage
is we can't be certain that all of them are correct, advantange is that is we can't be certain that all of them are correct, advantange is that
this random projection lets us test lots of variables very quickly """ this random projection lets us test lots of variables very quickly """
def __init__(self, rng, VWbVals, d): def __init__(self, rng, VWbVals, d):
""" """
param: rng Random number generator used to pick direction of the line param: rng Random number generator used to pick direction of the
line
param: VWbVals tuple containing values to test V,W,b around param: VWbVals tuple containing values to test V,W,b around
param: d shared variable for d, the stride param: d shared variable for d, the stride
""" """
self.V, self.W, self.b = VWbVals self.V, self.W, self.b = VWbVals
self.dV = shared(rng.uniform(-1,1,self.V.get_value(borrow=True).shape)) self.dV = shared(rng.uniform(-1, 1,
self.dW = shared(rng.uniform(-1,1,self.W.get_value(borrow=True).shape)) self.V.get_value(borrow=True).shape))
self.db = shared(rng.uniform(-1,1,self.b.get_value(borrow=True).shape)) self.dW = shared(rng.uniform(-1, 1,
self.W.get_value(borrow=True).shape))
self.db = shared(rng.uniform(-1, 1,
self.b.get_value(borrow=True).shape))
self.d = d self.d = d
def __call__(self, t): def __call__(self, t):
output = conv3D(self.V+t*self.dV,self.W+t*self.dW,self.b+t*self.db,self.d) output = conv3D(self.V + t * self.dV, self.W + t * self.dW,
self.b + t * self.db, self.d)
return output return output
class DummyConvGrad3D: class DummyConvGrad3D:
def __init__(self, rng, VdHvals, d, WShape): def __init__(self, rng, VdHvals, d, WShape):
""" """
param: rng Random number generator used to pick direction of the line param: rng Random number generator used to pick direction of the
line
param: VWbVals tuple containing values to test V,W,b around param: VWbVals tuple containing values to test V,W,b around
param: d shared variable for d, the stride param: d shared variable for d, the stride
""" """
self.V, self.dCdH = VdHvals self.V, self.dCdH = VdHvals
self.dV = shared(rng.uniform(-1,1,self.V.get_value(borrow=True).shape)) self.dV = shared(rng.uniform(-1, 1,
self.ddCdH = shared(rng.uniform(-1,1,self.dCdH.get_value(borrow=True).shape)) self.V.get_value(borrow=True).shape))
self.ddCdH = shared(rng.uniform(-1, 1,
self.dCdH.get_value(borrow=True).shape))
self.d = d self.d = d
self.WShape = WShape self.WShape = WShape
def __call__(self, t): def __call__(self, t):
output = convGrad3D(self.V+t*self.dV,self.d,self.WShape,self.dCdH + t * self.ddCdH) output = convGrad3D(self.V + t * self.dV, self.d, self.WShape,
self.dCdH + t * self.ddCdH)
return output return output
class DummyConvTransp3D: class DummyConvTransp3D:
def __init__(self, rng, WbHvals, d, RShape): def __init__(self, rng, WbHvals, d, RShape):
""" """
param: rng Random number generator used to pick direction of the line param: rng Random number generator used to pick direction of the
line
param: VWbVals tuple containing values to test V,W,b around param: VWbVals tuple containing values to test V,W,b around
param: d shared variable for d, the stride param: d shared variable for d, the stride
""" """
self.W, self.b, self.H = WbHvals self.W, self.b, self.H = WbHvals
self.dW = rng.uniform(-1,1,self.W.get_value(borrow=True).shape) self.dW = rng.uniform(-1, 1, self.W.get_value(borrow=True).shape)
self.db = rng.uniform(-1,1,self.b.get_value(borrow=True).shape) self.db = rng.uniform(-1, 1, self.b.get_value(borrow=True).shape)
self.dH = rng.uniform(-1,1,self.H.get_value(borrow=True).shape) self.dH = rng.uniform(-1, 1, self.H.get_value(borrow=True).shape)
self.dW, self.db, self.dH = shared(self.dW), shared(self.db), shared(self.dH) self.dW, self.db, self.dH = shared(self.dW), shared(self.db),
shared(self.dH)
self.d = d self.d = d
self.RShape = RShape self.RShape = RShape
def __call__(self, t): def __call__(self, t):
output = convTransp3D(self.W+t*self.dW,self.b+t*self.db,self.d,self.H+t*self.dH, self.RShape) output = convTransp3D(self.W + t * self.dW, self.b + t * self.db,
self.d, self.H + t * self.dH, self.RShape)
return output return output
class TestConv3D(unittest.TestCase):
def setUp(self): class TestConv3D(utt.InferShapeTester):
def setUp(self):
super(TestConv3D, self).setUp()
utt.seed_rng() utt.seed_rng()
self.rng = N.random.RandomState(utt.fetch_seed()) self.rng = N.random.RandomState(utt.fetch_seed())
mode = copy.copy(theano.compile.mode.get_default_mode()) mode = copy.copy(theano.compile.mode.get_default_mode())
mode.check_py_code = False mode.check_py_code = False
self.W = shared(N.ndarray(shape=(1,1,1,1,1), dtype=floatX)) self.W = shared(N.ndarray(shape=(1, 1, 1, 1, 1), dtype=floatX))
self.b = shared(N.zeros(1,dtype=floatX)) self.b = shared(N.zeros(1, dtype=floatX))
self.rb = shared(N.zeros(1,dtype=floatX)) self.rb = shared(N.zeros(1, dtype=floatX))
self.V = shared(N.ndarray(shape=(1,1,1,1,1), dtype=floatX)) self.V = shared(N.ndarray(shape=(1, 1, 1, 1, 1), dtype=floatX))
self.d = shared(N.ndarray(shape=(3,),dtype=int)) self.d = shared(N.ndarray(shape=(3, ), dtype=int))
self.H = conv3D(self.V, self.W, self.b, self.d) self.H = conv3D(self.V, self.W, self.b, self.d)
self.H_func = function([], self.H, mode=mode)
self.H_func = function([], self.H, mode = mode) self.H_shape_func = function([], self.H.shape, mode=mode)
self.H_shape_func = function( [], self.H.shape, mode = mode)
self.RShape = T.vector(dtype='int64') self.RShape = T.vector(dtype='int64')
self.otherH = T.TensorType(floatX,(False,False,False,False,False))(name='otherH') self.otherH = T.TensorType(floatX,
self.transp = convTransp3D(self.W, self.rb, self.d, self.otherH, self.RShape) (False, False, False, False, False))(name='otherH')
self.transp_func = function([self.otherH,self.RShape],self.transp, mode=mode) self.transp = convTransp3D(self.W, self.rb, self.d,
self.otherH, self.RShape)
self.transp_func = function([self.otherH, self.RShape],
self.transp, mode=mode)
self.R = convTransp3D(self.W, self.rb, self.d, self.H, self.RShape) self.R = convTransp3D(self.W, self.rb, self.d, self.H, self.RShape)
self.R_func = function([self.RShape], self.R, mode = mode) self.R_func = function([self.RShape], self.R, mode=mode)
self.R_shape_func = function([self.RShape], self.R.shape) self.R_shape_func = function([self.RShape], self.R.shape)
self.reconsObj = T.sum(T.sqr(self.V-self.R)) self.reconsObj = T.sum(T.sqr(self.V - self.R))
self.reconsObjFunc = function([self.RShape], self.reconsObj, mode=mode) self.reconsObjFunc = function([self.RShape], self.reconsObj, mode=mode)
self.gradientsFunc = function([self.RShape], [ T.grad(self.reconsObj, self.W), T.grad(self.reconsObj, self.H), T.grad(self.reconsObj, self.V), T.grad(self.reconsObj,self.b) ] , mode=mode) self.gradientsFunc = function([self.RShape],
self.check_c_against_python = function([self.RShape], [ T.grad(self.reconsObj, self.W), T.grad(self.reconsObj, self.H), T.grad(self.reconsObj, self.V), T.grad(self.reconsObj,self.b) ] , mode='DEBUG_MODE') [T.grad(self.reconsObj, self.W), T.grad(self.reconsObj,
self.H), T.grad(self.reconsObj, self.V),
T.grad(self.reconsObj, self.b)], mode=mode)
self.check_c_against_python = function([self.RShape],
[T.grad(self.reconsObj, self.W), T.grad(self.reconsObj,
self.H), T.grad(self.reconsObj, self.V),
T.grad(self.reconsObj, self.b)], mode='DEBUG_MODE')
self.dCdW_shape_func = function([self.RShape], T.grad(self.reconsObj, self.W).shape, mode=mode) self.dCdW_shape_func = function([self.RShape],
T.grad(self.reconsObj, self.W).shape, mode=mode)
def random_tensor(self, *dims):
def random_tensor(self,*dims): return N.asarray(self.rng.uniform(-.05, .05, dims), dtype=floatX)
return N.asarray(self.rng.uniform(-.05,.05,dims),dtype=floatX)
def randomize(self): def randomize(self):
batchSize = self.rng.randint(1,4) batchSize = self.rng.randint(1, 4)
videoDur = self.rng.randint(8,30) videoDur = self.rng.randint(8, 30)
filterWidth = self.rng.randint(1,8) filterWidth = self.rng.randint(1, 8)
filterHeight = self.rng.randint(1,8) filterHeight = self.rng.randint(1, 8)
filterDur = self.rng.randint(1,8) filterDur = self.rng.randint(1, 8)
tsteps = self.rng.randint(1,4) tsteps = self.rng.randint(1, 4)
rsteps = self.rng.randint(1,4) rsteps = self.rng.randint(1, 4)
csteps = self.rng.randint(1,4) csteps = self.rng.randint(1, 4)
videoDur = tsteps * filterDur + self.rng.randint(0,3) videoDur = tsteps * filterDur + self.rng.randint(0, 3)
videoWidth = csteps * filterWidth + self.rng.randint(0,3) videoWidth = csteps * filterWidth + self.rng.randint(0, 3)
videoHeight = rsteps * filterHeight + self.rng.randint(0,3) videoHeight = rsteps * filterHeight + self.rng.randint(0, 3)
numFilters = self.rng.randint(1,3) numFilters = self.rng.randint(1, 3)
inputChannels = self.rng.randint(1,3) inputChannels = self.rng.randint(1, 3)
self.d.get_value(borrow=True, return_internal_type=True)[0] = self.rng.randint(1,15) self.d.get_value(borrow=True, return_internal_type=True)[0] = \
self.d.get_value(borrow=True, return_internal_type=True)[1] = self.rng.randint(1,15) self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = self.rng.randint(1,15) self.d.get_value(borrow=True, return_internal_type=True)[1] = \
self.rng.randint(1, 15)
outputHeight = int( (videoHeight - filterHeight) / self.d.get_value(borrow=True)[0] )+1 self.d.get_value(borrow=True, return_internal_type=True)[2] = \
outputWidth = int( (videoWidth - filterWidth) / self.d.get_value(borrow=True)[1] )+1 self.rng.randint(1, 15)
outputDur = int( (videoDur - filterDur) / self.d.get_value(borrow=True)[2] ) +1
outputHeight = int((videoHeight - filterHeight) /
self.W.set_value( self.d.get_value(borrow=True)[0]) + 1
self.random_tensor(numFilters,filterHeight,filterWidth,filterDur,inputChannels), outputWidth = int((videoWidth - filterWidth) /
borrow=True) self.d.get_value(borrow=True)[1]) + 1
outputDur = int((videoDur - filterDur) /
self.d.get_value(borrow=True)[2]) + 1
self.W.set_value(self.random_tensor(numFilters, filterHeight,
filterWidth, filterDur, inputChannels), borrow=True)
self.b.set_value(self.random_tensor(numFilters), borrow=True) self.b.set_value(self.random_tensor(numFilters), borrow=True)
self.rb.set_value(self.random_tensor(inputChannels), borrow=True) self.rb.set_value(self.random_tensor(inputChannels), borrow=True)
self.V.set_value( self.V.set_value(self.random_tensor(batchSize, videoHeight,
self.random_tensor(batchSize,videoHeight,videoWidth,videoDur,inputChannels), videoWidth, videoDur, inputChannels), borrow=True)
borrow=True)
self.rb.set_value(self.random_tensor(inputChannels), borrow=True) self.rb.set_value(self.random_tensor(inputChannels), borrow=True)
def test_c_against_python(self): def test_c_against_python(self):
...@@ -180,37 +207,38 @@ class TestConv3D(unittest.TestCase): ...@@ -180,37 +207,38 @@ class TestConv3D(unittest.TestCase):
self.check_c_against_python(self.V.get_value(borrow=True).shape[1:4]) self.check_c_against_python(self.V.get_value(borrow=True).shape[1:4])
def test_c_against_mat_mul(self): def test_c_against_mat_mul(self):
#Use a filter of the same size as the image, so the convolution is just a dense matrix multiply # Use a filter of the same size as the image, so the convolution is
#Check that dense matrix multiplication gives the same result as convolution # just a dense matrix multiply.
# Check that dense matrix multiplication gives the same result as
# convolution.
batchSize = self.rng.randint(1,10) batchSize = self.rng.randint(1, 10)
videoDur = self.rng.randint(3,10) videoDur = self.rng.randint(3, 10)
videoWidth = self.rng.randint(1,5) videoWidth = self.rng.randint(1, 5)
videoHeight = self.rng.randint(1,5) videoHeight = self.rng.randint(1, 5)
filterWidth = videoWidth filterWidth = videoWidth
filterHeight = videoHeight filterHeight = videoHeight
filterDur = videoDur filterDur = videoDur
numFilters = self.rng.randint(1,3) numFilters = self.rng.randint(1, 3)
inputChannels = self.rng.randint(1,4) inputChannels = self.rng.randint(1, 4)
self.d.get_value(borrow=True, return_internal_type=True)[0] = self.rng.randint(1,15)
self.d.get_value(borrow=True, return_internal_type=True)[1] = self.rng.randint(1,15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = self.rng.randint(1,15)
self.d.get_value(borrow=True, return_internal_type=True)[0] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[1] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = \
self.rng.randint(1, 15)
self.W.set_value( self.W.set_value(self.random_tensor(numFilters, filterHeight,
self.random_tensor(numFilters,filterHeight,filterWidth,filterDur,inputChannels), filterWidth, filterDur, inputChannels), borrow=True)
borrow=True) self.W.set_value(self.W.get_value(borrow=True) *
(self.W.get_value(borrow=True) < 1e-5), borrow=True)
self.W.set_value(
self.W.get_value(borrow=True) * (self.W.get_value(borrow=True) < 1e-5),
borrow=True)
self.b.set_value(self.random_tensor(numFilters), borrow=True) self.b.set_value(self.random_tensor(numFilters), borrow=True)
self.V.set_value(
self.random_tensor(batchSize,videoHeight,videoWidth,videoDur,inputChannels), self.V.set_value(self.random_tensor(batchSize, videoHeight,
borrow=True) videoWidth, videoDur, inputChannels), borrow=True)
Hv = self.H_func() Hv = self.H_func()
...@@ -220,58 +248,60 @@ class TestConv3D(unittest.TestCase): ...@@ -220,58 +248,60 @@ class TestConv3D(unittest.TestCase):
n = inputChannels * videoHeight * videoWidth * videoDur n = inputChannels * videoHeight * videoWidth * videoDur
W_mat = N.zeros((n, numFilters)) W_mat = N.zeros((n, numFilters))
V_mat = N.zeros((batchSize,n)) V_mat = N.zeros((batchSize, n))
Hv_mat = N.zeros((batchSize, numFilters)) Hv_mat = N.zeros((batchSize, numFilters))
for qi in xrange(0,numFilters): for qi in xrange(0, numFilters):
W_mat[:,qi] = self.W.get_value(borrow=True)[qi,:,:,:,:].reshape((n)) W_mat[:, qi] = \
Hv_mat[:,qi] = Hv[:,0,0,0,qi] self.W.get_value(borrow=True)[qi, :, :, :, :].reshape((n))
for qi in xrange(0,batchSize): Hv_mat[:, qi] = Hv[:, 0, 0, 0, qi]
V_mat[qi,:] = self.V.get_value(borrow=True)[qi,:,:,:,:].reshape((n)) for qi in xrange(0, batchSize):
V_mat[qi, :] = \
self.V.get_value(borrow=True)[qi, :, :, :, :].reshape((n))
H_mat = N.dot(V_mat,W_mat) + self.b.get_value(borrow=True) H_mat = N.dot(V_mat, W_mat) + self.b.get_value(borrow=True)
tol = 1e-5 tol = 1e-5
if floatX == 'float32': if floatX == 'float32':
tol = 1e-4 tol = 1e-4
if N.abs(H_mat-Hv_mat).max() > tol and not N.allclose(H_mat,Hv_mat): if N.abs(H_mat - Hv_mat).max() > tol and not N.allclose(H_mat, Hv_mat):
print H_mat print H_mat
print Hv_mat print Hv_mat
print 'max error: '+str(N.abs(H_mat-Hv_mat).max()) print 'max error: ' + str(N.abs(H_mat - Hv_mat).max())
W.get_value(borrow=True)[W.get_value(borrow=True) != 0] += 1.0 W.get_value(borrow=True)[W.get_value(borrow=True) != 0] += 1.0
print 'min non-zero kernel mag: '+str(N.abs(W.get_value(borrow=True)).min()) print 'min non-zero kernel mag: ' + \
str(N.abs(W.get_value(borrow=True)).min())
assert False assert False
def test_c_against_mat_transp_mul(self): def test_c_against_mat_transp_mul(self):
#Use a filter of the same size as the image, so the convolution is just a dense matrix multiply # Use a filter of the same size as the image, so the convolution is just a
#Check that dense matrix multiplication by the transpose of the matrix gives the same result as ConvTransp # dense matrix multiply.
batchSize = self.rng.randint(1,10) # Check that dense matrix multiplication by the transpose of the matrix
videoDur = self.rng.randint(3,15) # gives the same result as ConvTransp.
videoWidth = self.rng.randint(3,15) batchSize = self.rng.randint(1, 10)
videoHeight = self.rng.randint(3,15) videoDur = self.rng.randint(3, 15)
videoWidth = self.rng.randint(3, 15)
videoHeight = self.rng.randint(3, 15)
filterWidth = videoWidth filterWidth = videoWidth
filterHeight = videoHeight filterHeight = videoHeight
filterDur = videoDur filterDur = videoDur
numFilters = self.rng.randint(1,15) numFilters = self.rng.randint(1, 15)
inputChannels = self.rng.randint(1,15) inputChannels = self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[0] = self.rng.randint(1,15) self.d.get_value(borrow=True, return_internal_type=True)[0] = \
self.d.get_value(borrow=True, return_internal_type=True)[1] = self.rng.randint(1,15) self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = self.rng.randint(1,15) self.d.get_value(borrow=True, return_internal_type=True)[1] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = \
self.rng.randint(1, 15)
self.W.set_value(
self.random_tensor(numFilters,filterHeight,filterWidth,filterDur,inputChannels), self.W.set_value(self.random_tensor(numFilters, filterHeight,
borrow=True) filterWidth, filterDur, inputChannels), borrow=True)
self.b.set_value(self.random_tensor(numFilters), borrow=True) self.b.set_value(self.random_tensor(numFilters), borrow=True)
self.V.set_value( self.V.set_value(self.random_tensor(batchSize, videoHeight,
self.random_tensor(batchSize,videoHeight,videoWidth,videoDur,inputChannels), videoWidth, videoDur, inputChannels), borrow=True)
borrow=True)
self.rb.set_value(self.random_tensor(inputChannels), borrow=True) self.rb.set_value(self.random_tensor(inputChannels), borrow=True)
H_shape = self.H_shape_func() H_shape = self.H_shape_func()
...@@ -280,96 +310,101 @@ class TestConv3D(unittest.TestCase): ...@@ -280,96 +310,101 @@ class TestConv3D(unittest.TestCase):
assert H_shape[2] == 1 assert H_shape[2] == 1
assert H_shape[3] == 1 assert H_shape[3] == 1
Hv = self.random_tensor( * H_shape ) Hv = self.random_tensor( * H_shape)
Vv = self.transp_func(Hv,[videoHeight,videoWidth,videoDur]) Vv = self.transp_func(Hv, [videoHeight, videoWidth, videoDur])
n = inputChannels * videoHeight * videoWidth * videoDur n = inputChannels * videoHeight * videoWidth * videoDur
rbim = N.zeros((videoHeight,videoWidth,videoDur,inputChannels)) rbim = N.zeros((videoHeight, videoWidth, videoDur, inputChannels))
for qi in xrange(0,inputChannels): for qi in xrange(0, inputChannels):
rbim[:,:,:,qi] = self.rb.get_value(borrow=True)[qi] rbim[:, :, :, qi] = self.rb.get_value(borrow=True)[qi]
rbv = rbim.reshape((n)) rbv = rbim.reshape((n))
W_mat = N.zeros((numFilters, n)) W_mat = N.zeros((numFilters, n))
Vv_mat = N.zeros((n, batchSize)) Vv_mat = N.zeros((n, batchSize))
Hv_mat = N.zeros((numFilters,batchSize)) Hv_mat = N.zeros((numFilters, batchSize))
for qi in xrange(0,numFilters): for qi in xrange(0, numFilters):
W_mat[qi,:] = self.W.get_value(borrow=True)[qi,:,:,:,:].reshape((n)) W_mat[qi, :] = \
Hv_mat[qi,:] = Hv[:,0,0,0,qi] self.W.get_value(borrow=True)[qi, :, :, :, :].reshape((n))
for qi in xrange(0,batchSize): Hv_mat[qi, :] = Hv[:, 0, 0, 0, qi]
Vv_mat[:,qi] = Vv[qi,:,:,:,:].reshape((n)) for qi in xrange(0, batchSize):
Vv_mat[:, qi] = Vv[qi, :, :, :, :].reshape((n))
V_mat = (N.dot(W_mat.transpose(),Hv_mat).transpose() + rbv).transpose() V_mat = (N.dot(W_mat.transpose(), Hv_mat).transpose() + \
rbv).transpose()
if N.abs(V_mat-Vv_mat).max() > 1e-5:
if N.abs(V_mat - Vv_mat).max() > 1e-5:
print V_mat print V_mat
print Vv_mat print Vv_mat
for qq in xrange(V_mat.shape[0]): for qq in xrange(V_mat.shape[0]):
for qqq in xrange(Vv_mat.shape[1]): for qqq in xrange(Vv_mat.shape[1]):
if abs(V_mat[qq,qqq]-Vv_mat[qq,qqq]) > 1e-5: if abs(V_mat[qq, qqq] - Vv_mat[qq, qqq]) > 1e-5:
print 'wrong at '+str((qq,qqq))+': '+str((V_mat[qq,qqq],Vv_mat[qq,qqq])) print ('wrong at ' + str((qq, qqq)) + ': ' +
str(V_mat[qq, qqq], Vv_mat[qq, qqq]))
assert False assert False
def test_c_against_sparse_mat_transp_mul(self): def test_c_against_sparse_mat_transp_mul(self):
#like test_c_against_mat_transp_mul but using a sparse matrix and a kernel that is smaller than the image # like test_c_against_mat_transp_mul but using a sparse matrix and a kernel
# that is smaller than the image
if not theano.sparse.enable_sparse: if not theano.sparse.enable_sparse:
raise SkipTest('Optional package sparse disabled') raise SkipTest('Optional package sparse disabled')
batchSize = self.rng.randint(1,3) batchSize = self.rng.randint(1, 3)
filterWidth = self.rng.randint(1,8) filterWidth = self.rng.randint(1, 8)
filterHeight = self.rng.randint(1,8) filterHeight = self.rng.randint(1, 8)
filterDur = self.rng.randint(1,8) filterDur = self.rng.randint(1, 8)
self.d.get_value(borrow=True, return_internal_type=True)[0] = self.rng.randint(1,15) self.d.get_value(borrow=True, return_internal_type=True)[0] = \
self.d.get_value(borrow=True, return_internal_type=True)[1] = self.rng.randint(1,15) self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = self.rng.randint(1,15) self.d.get_value(borrow=True, return_internal_type=True)[1] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = \
self.rng.randint(1, 15)
dr = self.d.get_value(borrow=True)[0] dr = self.d.get_value(borrow=True)[0]
dc = self.d.get_value(borrow=True)[1] dc = self.d.get_value(borrow=True)[1]
dt = self.d.get_value(borrow=True)[2] dt = self.d.get_value(borrow=True)[2]
numFilters = self.rng.randint(1, 3)
numFilters = self.rng.randint(1,3) row_steps = self.rng.randint(1, 4)
row_steps = self.rng.randint(1,4) col_steps = self.rng.randint(1, 4)
col_steps = self.rng.randint(1,4) time_steps = self.rng.randint(1, 4)
time_steps = self.rng.randint(1,4)
#print (row_steps,col_steps,time_steps) #print (row_steps,col_steps,time_steps)
videoDur = (time_steps-1)*dt+filterDur + self.rng.randint(0,3) videoDur = (time_steps - 1) * dt + filterDur + \
videoWidth = (col_steps-1)*dc+filterWidth + self.rng.randint(0,3) self.rng.randint(0, 3)
videoHeight = (row_steps-1)*dr+filterHeight + self.rng.randint(0,3) videoWidth = (col_steps - 1) * dc + filterWidth + \
self.rng.randint(0, 3)
videoHeight = (row_steps - 1) * dr + filterHeight + \
self.rng.randint(0, 3)
inputChannels = self.rng.randint(1,15) inputChannels = self.rng.randint(1, 15)
self.W.set_value( self.W.set_value(self.random_tensor(numFilters, filterHeight,
self.random_tensor(numFilters,filterHeight,filterWidth,filterDur,inputChannels), filterWidth, filterDur, inputChannels), borrow=True)
borrow=True)
self.b.set_value(self.random_tensor(numFilters), borrow=True) self.b.set_value(self.random_tensor(numFilters), borrow=True)
#just needed so H_shape works #just needed so H_shape works
self.V.set_value( self.V.set_value(self.random_tensor(batchSize, videoHeight, videoWidth,
self.random_tensor(batchSize,videoHeight,videoWidth,videoDur,inputChannels), videoDur, inputChannels), borrow=True)
borrow=True)
self.rb.set_value(self.random_tensor(inputChannels), borrow=True) self.rb.set_value(self.random_tensor(inputChannels), borrow=True)
H_shape = self.H_shape_func() H_shape = self.H_shape_func()
#make index maps #make index maps
h = N.zeros( H_shape[1:]) h = N.zeros(H_shape[1:])
r = N.zeros( H_shape[1:]) r = N.zeros(H_shape[1:])
c = N.zeros( H_shape[1:]) c = N.zeros(H_shape[1:])
t = N.zeros( H_shape[1:]) t = N.zeros(H_shape[1:])
for qi in xrange(0,H_shape[4]): for qi in xrange(0, H_shape[4]):
h[:,:,:,qi] = qi h[:, :, :, qi] = qi
for qi in xrange(0,H_shape[1]): for qi in xrange(0, H_shape[1]):
r[qi,:,:,:] = qi r[qi, :, :, :] = qi
for qi in xrange(0,H_shape[2]): for qi in xrange(0, H_shape[2]):
c[:,qi,:,:] = qi c[:, qi, :, :] = qi
for qi in xrange(0,H_shape[3]): for qi in xrange(0, H_shape[3]):
t[:,:,qi,:] = qi t[:, :, qi, :] = qi
hn = H_shape[1] * H_shape[2] * H_shape[3] * H_shape[4] hn = H_shape[1] * H_shape[2] * H_shape[3] * H_shape[4]
...@@ -378,21 +413,20 @@ class TestConv3D(unittest.TestCase): ...@@ -378,21 +413,20 @@ class TestConv3D(unittest.TestCase):
c = c.reshape((hn)) c = c.reshape((hn))
t = t.reshape((hn)) t = t.reshape((hn))
Hv = self.random_tensor(*H_shape)
Hv = self.random_tensor( * H_shape ) Vv = self.transp_func(Hv, [videoHeight, videoWidth, videoDur])
Vv = self.transp_func(Hv,[videoHeight,videoWidth,videoDur])
n = inputChannels * videoHeight * videoWidth * videoDur n = inputChannels * videoHeight * videoWidth * videoDur
rbim = N.zeros((videoHeight,videoWidth,videoDur,inputChannels)) rbim = N.zeros((videoHeight, videoWidth, videoDur, inputChannels))
for qi in xrange(0,inputChannels): for qi in xrange(0, inputChannels):
rbim[:,:,:,qi] = self.rb.get_value(borrow=True)[qi] rbim[:, :, :, qi] = self.rb.get_value(borrow=True)[qi]
rbv = rbim.reshape((n)) rbv = rbim.reshape((n))
W_mat = N.zeros((hn,n)) W_mat = N.zeros((hn, n))
Vv_mat = N.zeros((n, batchSize)) Vv_mat = N.zeros((n, batchSize))
Hv_mat = N.zeros((hn,batchSize)) Hv_mat = N.zeros((hn, batchSize))
for qi in xrange(0,hn): for qi in xrange(0, hn):
hi = h[qi] hi = h[qi]
ri = r[qi] ri = r[qi]
ci = c[qi] ci = c[qi]
...@@ -401,57 +435,66 @@ class TestConv3D(unittest.TestCase): ...@@ -401,57 +435,66 @@ class TestConv3D(unittest.TestCase):
placed_filter = N.zeros(self.V.get_value(borrow=True).shape[1:]) placed_filter = N.zeros(self.V.get_value(borrow=True).shape[1:])
placed_filter[ placed_filter[
ri*dr:ri*dr+self.W.get_value(borrow=True).shape[1], ri * dr:ri * dr + self.W.get_value(borrow=True).shape[1],
ci*dc:ci*dc+self.W.get_value(borrow=True).shape[2], ci * dc:ci * dc + self.W.get_value(borrow=True).shape[2],
ti*dt:ti*dt+self.W.get_value(borrow=True).shape[3], ti * dt:ti * dt + self.W.get_value(borrow=True).shape[3],
:] = self.W.get_value(borrow=True)[hi,:,:,:,:] :] = self.W.get_value(borrow=True)[hi, :, :, :, :]
W_mat[qi,:] = placed_filter.reshape((n))
Hv_mat[qi,:] = Hv[:,ri,ci,ti,hi]
for qi in xrange(0,batchSize):
Vv_mat[:,qi] = Vv[qi,:,:,:,:].reshape((n))
W_mat[qi, :] = placed_filter.reshape((n))
Hv_mat[qi, :] = Hv[:, ri, ci, ti, hi]
for qi in xrange(0, batchSize):
Vv_mat[:, qi] = Vv[qi, :, :, :, :].reshape((n))
W_mat_T = sparse.csr_matrix(W_mat.transpose()) W_mat_T = sparse.csr_matrix(W_mat.transpose())
temp = W_mat_T * Hv_mat temp = W_mat_T * Hv_mat
V_mat = (temp.transpose() + rbv).transpose() V_mat = (temp.transpose() + rbv).transpose()
if N.abs(V_mat-Vv_mat).max() > 1e-5: if N.abs(V_mat - Vv_mat).max() > 1e-5:
print 'mul' print 'mul'
print V_mat print V_mat
print 'conv' print 'conv'
print Vv_mat print Vv_mat
for i in xrange(0,n): for i in xrange(0, n):
for j in xrange(0,batchSize): for j in xrange(0, batchSize):
if abs(V_mat[i,j] - Vv_mat[i,j]) > 1e-5: if abs(V_mat[i, j] - Vv_mat[i, j]) > 1e-5:
print 'wrong at %d,%d: %f mul versus %f conv' % (i,j,V_mat[i,j],Vv_mat[i,j]) print ('wrong at %d,%d: %f mul versus %f conv'
% (i, j, V_mat[i, j], Vv_mat[i, j]))
assert False assert False
def test_infer_shape(self): def test_infer_shape(self):
self.randomize() self.randomize()
Hv = self.H_func() # Conv3D
H_shape = self.H_shape_func() self._compile_and_check([], [self.H], [], Conv3D)
assert N.all(Hv.shape == H_shape)
gradients = self.gradientsFunc(self.V.get_value(borrow=True).shape[1:4])
dCdWv = gradients[0]
dCdW_shape = self.dCdW_shape_func(self.V.get_value(borrow=True).shape[1:4])
assert N.all(dCdWv.shape == dCdW_shape)
Rv = self.R_func(self.V.get_value(borrow=True).shape[1:4]) # ConvTransp3D
R_shape = self.R_shape_func(self.V.get_value(borrow=True).shape[1:4]) self._compile_and_check([self.RShape], [self.R],
[self.V.get_value(borrow=True).shape[1:4]], ConvTransp3D)
assert N.all(Rv.shape == R_shape)
# ConvGrad3D
self._compile_and_check([self.RShape], [T.grad(self.reconsObj, self.W),
T.grad(self.reconsObj, self.H),
T.grad(self.reconsObj, self.V),
T.grad(self.reconsObj, self.b)],
[self.V.get_value(borrow=True).shape[1:4]], ConvGrad3D)
def test_gradient(self): def test_gradient(self):
self.randomize() self.randomize()
rng, V,W,b,d,rb = self.rng, self.V, self.W, self.b, self.d, self.rb rng, V, W, b, d, rb = self.rng, self.V, self.W, self.b, self.d, self.rb
dCdH = shared(self.random_tensor( *self.H_shape_func() )) dCdH = shared(self.random_tensor(*self.H_shape_func()))
testsPerDir = 2 testsPerDir = 2
theano.tests.unittest_tools.verify_grad(DummyConv3D(rng, (V,W,b), d), [0.0], n_tests=testsPerDir) theano.tests.unittest_tools.verify_grad(DummyConv3D(rng, (V, W, b), d),
theano.tests.unittest_tools.verify_grad(DummyConvTransp3D(rng, (W,rb,dCdH), d,V.get_value(borrow=True).shape[1:4]), [0.0], n_tests=testsPerDir) [0.0], n_tests=testsPerDir)
theano.tests.unittest_tools.verify_grad(DummyConvGrad3D(rng, (V,dCdH), d, W.get_value(borrow=True).shape), [0.0], n_tests=testsPerDir) theano.tests.unittest_tools.verify_grad(DummyConvTransp3D(rng,
(W, rb, dCdH), d, V.get_value(borrow=True).shape[1:4]),
[0.0], n_tests=testsPerDir)
theano.tests.unittest_tools.verify_grad(DummyConvGrad3D(rng, (V,dCdH),
d, W.get_value(borrow=True).shape),
[0.0], n_tests=testsPerDir)
if __name__ == '__main__':
t = TestConv3D('setUp')
t.setUp()
t.test_infer_shape()
...@@ -17,47 +17,64 @@ from theano.tensor.nnet import (categorical_crossentropy, ...@@ -17,47 +17,64 @@ from theano.tensor.nnet import (categorical_crossentropy,
crossentropy_softmax_1hot_with_bias, crossentropy_softmax_1hot_with_bias,
crossentropy_softmax_1hot_with_bias_dx, crossentropy_softmax_1hot_with_bias_dx,
crossentropy_softmax_argmax_1hot_with_bias, crossentropy_softmax_argmax_1hot_with_bias,
CrossentropySoftmax1HotWithBiasDx,
CrossentropySoftmaxArgmax1HotWithBias,
CrossentropyCategorical1Hot,
CrossentropyCategorical1HotGrad,
sigmoid, softplus, sigmoid, softplus,
Softmax, softmax, SoftmaxWithBias, softmax_grad, Softmax, softmax, SoftmaxWithBias,
softmax_with_bias, softmax_grad,
softmax_with_bias, SoftmaxGrad,
Prepend_scalar_constant_to_each_row, Prepend_scalar_constant_to_each_row,
Prepend_scalar_to_each_row) Prepend_scalar_to_each_row)
from theano.tensor import dmatrix, dvector, lvector, dscalar
class T_sigmoid(unittest.TestCase): class T_sigmoid(unittest.TestCase):
def setUp(self): def setUp(self):
utt.seed_rng() utt.seed_rng()
def test_elemwise(self): def test_elemwise(self):
utt.verify_grad(sigmoid, [numpy.random.rand(3,4)]) utt.verify_grad(sigmoid, [numpy.random.rand(3, 4)])
class T_softplus(unittest.TestCase): class T_softplus(unittest.TestCase):
def setUp(self): def setUp(self):
utt.seed_rng() utt.seed_rng()
def test_elemwise(self): def test_elemwise(self):
utt.verify_grad(softplus, [numpy.random.rand(3,4)]) utt.verify_grad(softplus, [numpy.random.rand(3, 4)])
class T_Softmax(utt.InferShapeTester):
class T_Softmax(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test0(self): def test0(self):
def f(a): def f(a):
return softmax(a)[:,0] return softmax(a)[:, 0]
utt.verify_grad(f, [numpy.random.rand(3,4)]) utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test1(self): def test1(self):
def f(a): def f(a):
return softmax(a)[:,1] return softmax(a)[:, 1]
utt.verify_grad(f, [numpy.random.rand(3,4)]) utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test2(self): def test2(self):
def f(a): def f(a):
return softmax(a)[:,2] return softmax(a)[:, 2]
utt.verify_grad(f, [numpy.random.rand(3,4)]) utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test3(self): def test3(self):
def f(a): def f(a):
return softmax(a)[:,3] return softmax(a)[:, 3]
utt.verify_grad(f, [numpy.random.rand(3,4)]) utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test_infer_shape(self): def test_infer_shape(self):
f=theano.function([],softmax(numpy.random.rand(3,4)).shape) admat = dmatrix()
assert all(f()==[3,4]) admat_val = numpy.random.rand(3, 4)
self._compile_and_check([admat], [Softmax()(admat)],
[admat_val], Softmax)
def test_vector(self): def test_vector(self):
x = T.vector() x = T.vector()
...@@ -65,109 +82,134 @@ class T_Softmax(unittest.TestCase): ...@@ -65,109 +82,134 @@ class T_Softmax(unittest.TestCase):
xv = numpy.random.randn(6).astype(config.floatX) xv = numpy.random.randn(6).astype(config.floatX)
assert numpy.allclose(f(xv), numpy.exp(xv) / numpy.exp(xv).sum()) assert numpy.allclose(f(xv), numpy.exp(xv) / numpy.exp(xv).sum())
def test_vector_grad(self): def test_vector_grad(self):
def f(a): def f(a):
return softmax(a) return softmax(a)
utt.verify_grad(f, [numpy.random.rand(4)]) utt.verify_grad(f, [numpy.random.rand(4)])
class T_SoftmaxWithBias(unittest.TestCase): class T_SoftmaxWithBias(utt.InferShapeTester):
def setUp(self):
utt.seed_rng()
def test0(self): def test0(self):
def f(a, b): def f(a, b):
return softmax_with_bias(a, b)[:,0] return softmax_with_bias(a, b)[:, 0]
utt.verify_grad(f, [numpy.random.rand(3,4), utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)]) numpy.random.rand(4)])
def test1(self): def test1(self):
def f(a, b): def f(a, b):
return softmax_with_bias(a, b)[:,1] return softmax_with_bias(a, b)[:, 1]
utt.verify_grad(f, [numpy.random.rand(3,4), utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)]) numpy.random.rand(4)])
def test2(self): def test2(self):
def f(a, b): def f(a, b):
return softmax_with_bias(a, b)[:,2] return softmax_with_bias(a, b)[:, 2]
utt.verify_grad(f, [numpy.random.rand(3,4), utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)]) numpy.random.rand(4)])
def test3(self): def test3(self):
def f(a, b): def f(a, b):
return softmax_with_bias(a, b)[:,3] return softmax_with_bias(a, b)[:, 3]
utt.verify_grad(f, [numpy.random.rand(3,4), utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)]) numpy.random.rand(4)])
def test_broadcast(self): def test_broadcast(self):
#test that we don't raise an error during optimization for no good #test that we don't raise an error during optimization for no good
#reason as softmax_with_bias don't support correctly some/all #reason as softmax_with_bias don't support correctly some/all
#broadcasted inputs pattern #broadcasted inputs pattern
initial_W = numpy.asarray( [[0.1,0.1,0.1], \ initial_W = numpy.asarray([[0.1, 0.1, 0.1], \
[0.1,0.1,0.1], \ [0.1, 0.1, 0.1], \
[0.1,0.1,0.1]], \ [0.1, 0.1, 0.1]], \
dtype = theano.config.floatX) dtype=theano.config.floatX)
W = theano.shared(value = initial_W, name = 'W') W = theano.shared(value=initial_W, name='W')
vbias=theano.shared(value=0.1, name='vbias') #0.01 vbias = theano.shared(value=0.1, name='vbias') # 0.01
hid=T.vector('hid') hid = T.vector('hid')
f = theano.function([hid], f = theano.function([hid],
T.nnet.softmax(T.dot(hid, W.T) + vbias)) T.nnet.softmax(T.dot(hid, W.T) + vbias))
ops = [node.op for node in f.maker.fgraph.toposort()] ops = [node.op for node in f.maker.fgraph.toposort()]
assert softmax_with_bias not in ops assert softmax_with_bias not in ops
assert softmax in ops assert softmax in ops
f([0,1,0]) f([0, 1, 0])
#print f.maker.fgraph.toposort() #print f.maker.fgraph.toposort()
def test_infer_shape(self): def test_infer_shape(self):
fff=theano.function([],outputs=softmax_with_bias(numpy.random.rand(3,4),numpy.random.rand(4)).shape) admat = dmatrix()
assert all(fff()==[3,4]) advec = dvector()
admat_val = numpy.random.rand(3, 4)
advec_val = numpy.random.rand(4)
self._compile_and_check([admat, advec],
[SoftmaxWithBias()(admat, advec)],
[admat_val, advec_val], SoftmaxWithBias)
class T_SoftmaxGrad(utt.InferShapeTester):
class T_SoftmaxGrad(unittest.TestCase):
def test_infer_shape(self): def test_infer_shape(self):
a=T.constant(numpy.random.rand(3,4)) admat = dmatrix()
b=T.constant(numpy.random.rand(3,4)) bdmat = dmatrix()
f=theano.function([],softmax_grad(a,b).shape) admat_val = numpy.random.rand(3, 4)
assert numpy.all(f()==[3,4]) bdmat_val = numpy.random.rand(3, 4)
self._compile_and_check([admat, bdmat], [SoftmaxGrad()(admat, bdmat)],
[admat_val, bdmat_val], SoftmaxGrad)
class T_CrossentropySoftmax1Hot(unittest.TestCase): class T_CrossentropySoftmax1Hot(unittest.TestCase):
def setUp(self): def setUp(self):
utt.seed_rng() utt.seed_rng()
def test0(self): def test0(self):
y_idx = [0,1,3] y_idx = [0, 1, 3]
def f(a, b): def f(a, b):
return crossentropy_softmax_1hot_with_bias(a, b, y_idx)[0] return crossentropy_softmax_1hot_with_bias(a, b, y_idx)[0]
utt.verify_grad(f, [numpy.random.rand(3,4), utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)]) numpy.random.rand(4)])
def test1(self): def test1(self):
y_idx = [0,1,3] y_idx = [0, 1, 3]
def f(a): def f(a):
return crossentropy_softmax_1hot(a, y_idx)[0] return crossentropy_softmax_1hot(a, y_idx)[0]
utt.verify_grad(f, [numpy.random.rand(3,4)]) utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test_vector(self): def test_vector(self):
y_idx = [3] y_idx = [3]
def f(a): def f(a):
return crossentropy_softmax_1hot(T.shape_padleft(a), y_idx)[0] return crossentropy_softmax_1hot(T.shape_padleft(a), y_idx)[0]
utt.verify_grad(f, [numpy.random.rand(4)]) utt.verify_grad(f, [numpy.random.rand(4)])
def test_vectors(self): def test_vectors(self):
y_idx = [3] y_idx = [3]
def f(a, b): def f(a, b):
return crossentropy_softmax_1hot(T.shape_padleft(a)+b, y_idx)[0] return crossentropy_softmax_1hot(T.shape_padleft(a) + b, y_idx)[0]
utt.verify_grad(f, [numpy.random.rand(4), numpy.random.rand(4)]) utt.verify_grad(f, [numpy.random.rand(4), numpy.random.rand(4)])
class T_CrossentropySoftmax1HotWithBiasDx(unittest.TestCase):
def setUp(self): class T_CrossentropySoftmax1HotWithBiasDx(utt.InferShapeTester):
utt.seed_rng()
def test0(self): def test0(self):
def f(sm): def f(sm):
return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx( return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
numpy.random.rand(10), # Gradient w.r.t. NLL. numpy.random.rand(10), # Gradient w.r.t. NLL.
sm, # Softmax output. sm, # Softmax output.
numpy.random.randint(low=0, high=5, size=10))) # Class indices. numpy.random.randint(low=0,
high=5, size=10))) # Class indices.
# Build a random softmax output whose rows sum to 1. # Build a random softmax output whose rows sum to 1.
softmax_output = numpy.random.rand(10, 5) softmax_output = numpy.random.rand(10, 5)
softmax_output /= softmax_output.sum(axis=1).reshape(10, 1) softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
utt.verify_grad(f, [softmax_output]) utt.verify_grad(f, [softmax_output])
def test1(self): def test1(self):
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
softmax_output = rng.rand(10, 5) softmax_output = rng.rand(10, 5)
softmax_output /= softmax_output.sum(axis=1).reshape(10, 1) softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
def f(dy): def f(dy):
return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx( return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
dy, dy,
...@@ -175,19 +217,38 @@ class T_CrossentropySoftmax1HotWithBiasDx(unittest.TestCase): ...@@ -175,19 +217,38 @@ class T_CrossentropySoftmax1HotWithBiasDx(unittest.TestCase):
rng.randint(low=0, high=5, size=10))) rng.randint(low=0, high=5, size=10)))
utt.verify_grad(f, [rng.rand(10)]) utt.verify_grad(f, [rng.rand(10)])
class T_CrossentropySoftmaxArgmax1HotWithBias(unittest.TestCase): def test_infer_shape(self):
admat = dmatrix()
advec = dvector()
alvec = lvector()
rng = numpy.random.RandomState(utt.fetch_seed())
admat_val = rng.rand(10, 5)
admat_val /= admat_val.sum(axis=1).reshape(10, 1)
advec_val = rng.rand(10)
alvec_val = rng.randint(low=0, high=5, size=10)
self._compile_and_check([advec, admat, alvec],
[CrossentropySoftmax1HotWithBiasDx()(advec, admat, alvec)],
[advec_val, admat_val, alvec_val],
CrossentropySoftmax1HotWithBiasDx)
class T_CrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester):
def setUp(self): def setUp(self):
utt.seed_rng() super(T_CrossentropySoftmaxArgmax1HotWithBias, self).setUp()
self.op = theano.tensor.nnet.crossentropy_softmax_argmax_1hot_with_bias self.op = theano.tensor.nnet.crossentropy_softmax_argmax_1hot_with_bias
def test0(self): def test0(self):
n_classes = 5 n_classes = 5
n_samples = 3 n_samples = 3
# First test gradient when getting a gradient on the NLL output. # First test gradient when getting a gradient on the NLL output.
def grad_on_nll(x, b): def grad_on_nll(x, b):
return self.op(x, b, y_idx=numpy.random.randint( return self.op(x, b, y_idx=numpy.random.randint(
low=0, high=n_classes, size=n_samples))[0] low=0, high=n_classes, size=n_samples))[0]
utt.verify_grad(grad_on_nll, [numpy.random.rand(n_samples, n_classes), utt.verify_grad(grad_on_nll, [numpy.random.rand(n_samples, n_classes),
numpy.random.rand(n_classes)]) numpy.random.rand(n_classes)])
# Then test gradient when getting a gradient on the softmax output. # Then test gradient when getting a gradient on the softmax output.
def grad_on_softmax(x, b): def grad_on_softmax(x, b):
return self.op(x, b, y_idx=numpy.random.randint( return self.op(x, b, y_idx=numpy.random.randint(
...@@ -197,68 +258,107 @@ class T_CrossentropySoftmaxArgmax1HotWithBias(unittest.TestCase): ...@@ -197,68 +258,107 @@ class T_CrossentropySoftmaxArgmax1HotWithBias(unittest.TestCase):
numpy.random.rand(n_classes)]) numpy.random.rand(n_classes)])
def test_infer_shape(self): def test_infer_shape(self):
var = self.op(numpy.random.rand(3,5),numpy.random.rand(5), y_idx=numpy.random.randint( admat = dmatrix()
low=0, high=5, size=3)) advec = dvector()
assert theano.function([],var[0].shape)() == [3] alvec = lvector()
assert all(theano.function([],var[1].shape)() == [3,5]) rng = numpy.random.RandomState(utt.fetch_seed())
assert theano.function([],var[2].shape)() == [3] admat_val = rng.rand(3, 5)
advec_val = rng.rand(5)
alvec_val = rng.randint(low=0, high=5, size=3)
self._compile_and_check([admat, advec, alvec],
CrossentropySoftmaxArgmax1HotWithBias()(admat, advec, alvec),
[admat_val, advec_val, alvec_val],
CrossentropySoftmaxArgmax1HotWithBias)
class T_prepend(utt.InferShapeTester):
class T_prepend(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test0(self): def test0(self):
"""basic functionality""" x = tensor.matrix('x')
x=tensor.matrix('x') y = Prepend_scalar_constant_to_each_row(4.)(x)
y=Prepend_scalar_constant_to_each_row(4.)(x) f = theano.function([x], [y])
f=theano.function([x],[y]) m = numpy.random.rand(3, 5)
m=numpy.random.rand(3,5)
my = f(m) my = f(m)
self.assertTrue(my.shape == (3, 6), my.shape) self.assertTrue(my.shape == (3, 6), my.shape)
self.assertTrue(numpy.all( my[:,0] == 4.0)) self.assertTrue(numpy.all(my[:, 0] == 4.0))
class T_prepend(unittest.TestCase): def test1(self):
def test0(self): "basic functionality"
"""basic functionality""" x = tensor.matrix('x')
x=tensor.matrix('x') y = Prepend_scalar_to_each_row()(5., x)
y=Prepend_scalar_to_each_row()(5.,x) f = theano.function([x], y)
f=theano.function([x],y) m = numpy.ones((3, 5), dtype="float32")
m=numpy.ones((3,5),dtype="float32")
my = f(m) my = f(m)
self.assertTrue(my.shape == (3, 6)) self.assertTrue(my.shape == (3, 6))
self.assertTrue(numpy.all(my[:,0] == 5.0)) self.assertTrue(numpy.all(my[:, 0] == 5.0))
def test_infer_shape(self):
admat = dmatrix()
adscal = dscalar()
rng = numpy.random.RandomState(utt.fetch_seed())
admat_val = rng.rand(3, 5)
adscal_val = rng.rand()
self._compile_and_check([admat],
[Prepend_scalar_constant_to_each_row(adscal_val)(admat)],
[admat_val],
Prepend_scalar_constant_to_each_row)
self._compile_and_check([adscal, admat],
[Prepend_scalar_to_each_row()(adscal, admat)],
[adscal_val, admat_val],
Prepend_scalar_to_each_row)
class T_CrossentropyCategorical1HotGrad(utt.InferShapeTester):
def test_infer_shape(self):
advec = dvector()
admat = dmatrix()
alvec = lvector()
rng = numpy.random.RandomState(utt.fetch_seed())
advec_val = rng.rand(3)
admat_val = rng.rand(3, 2)
alvec_val = [0, 1, 0]
self._compile_and_check([advec, admat, alvec],
[CrossentropyCategorical1HotGrad()(advec, admat, alvec)],
[advec_val, admat_val, alvec_val],
CrossentropyCategorical1HotGrad)
class T_CrossentropyCategorical1Hot(unittest.TestCase):
def setUp(self): class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
utt.seed_rng()
def test_grad(self): def test_grad(self):
x = tensor.matrix('x') x = tensor.matrix('x')
one_of_n = tensor.lvector('one_of_n') one_of_n = tensor.lvector('one_of_n')
op = crossentropy_categorical_1hot op = crossentropy_categorical_1hot
xe = op(x, one_of_n) xe = op(x, one_of_n)
f = theano.function([x, one_of_n], xe) f = theano.function([x, one_of_n], xe)
x_val = numpy.asarray([[.4, .6, .0], [.1, .8, .1]], x_val = numpy.asarray([[.4, .6, .0], [.1, .8, .1]],
dtype=config.floatX) dtype=config.floatX)
xe_val = f(x_val, [0,1]) xe_val = f(x_val, [0, 1])
assert numpy.allclose(xe_val, -numpy.log([.4, .8])) assert numpy.allclose(xe_val, -numpy.log([.4, .8]))
def oplike(x): def oplike(x):
return op(x, [0,1]) return op(x, [0, 1])
tensor.verify_grad(oplike, [x_val], rng=numpy.random) tensor.verify_grad(oplike, [x_val], rng=numpy.random)
# see issue gh-788
def est_infer_shape(self):
admat = dmatrix()
alvec = lvector()
rng = numpy.random.RandomState(utt.fetch_seed())
admat_val = rng.rand(3, 2)
alvec_val = [0, 1, 0]
self._compile_and_check([admat, alvec],
[CrossentropyCategorical1Hot()(admat, alvec)],
[admat_val, alvec_val],
CrossentropyCategorical1Hot)
def test_softmax_optimizations(self): def test_softmax_optimizations(self):
x = tensor.matrix('x') x = tensor.matrix('x')
one_of_n = tensor.lvector('one_of_n') one_of_n = tensor.lvector('one_of_n')
op = crossentropy_categorical_1hot op = crossentropy_categorical_1hot
xe = op(x, one_of_n) xe = op(x, one_of_n)
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
...@@ -270,7 +370,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -270,7 +370,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
theano.compile.mode.OPT_FAST_RUN).optimize(fgraph) theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard' assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_optimizations_vector(self): def test_softmax_optimizations_vector(self):
x = tensor.vector('x') x = tensor.vector('x')
...@@ -284,19 +385,19 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -284,19 +385,19 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
theano.compile.mode.optdb.query( theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(fgraph) theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard' assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_optimizations_w_bias(self): def test_softmax_optimizations_w_bias(self):
x = tensor.matrix('x') x = tensor.matrix('x')
b = tensor.vector('b') b = tensor.vector('b')
one_of_n = tensor.lvector('one_of_n') one_of_n = tensor.lvector('one_of_n')
op = crossentropy_categorical_1hot op = crossentropy_categorical_1hot
xe = op(x, one_of_n) xe = op(x, one_of_n)
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x, b, one_of_n], [x, b, one_of_n],
[op(softmax(x+b), one_of_n)]) [op(softmax(x + b), one_of_n)])
assert fgraph.outputs[0].owner.op == op assert fgraph.outputs[0].owner.op == op
#print 'BEFORE' #print 'BEFORE'
...@@ -316,7 +417,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -316,7 +417,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
assert len(fgraph.toposort()) == 2 assert len(fgraph.toposort()) == 2
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard' assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_optimizations_w_bias2(self): def test_softmax_optimizations_w_bias2(self):
x = tensor.matrix('x') x = tensor.matrix('x')
...@@ -327,7 +429,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -327,7 +429,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x, b, c, one_of_n], [x, b, c, one_of_n],
[op(softmax(T.add(x,b,c)), one_of_n)]) [op(softmax(T.add(x, b, c)), one_of_n)])
assert fgraph.outputs[0].owner.op == op assert fgraph.outputs[0].owner.op == op
#print 'BEFORE' #print 'BEFORE'
...@@ -345,7 +447,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -345,7 +447,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
assert len(fgraph.toposort()) == 3 assert len(fgraph.toposort()) == 3
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard' assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_optimizations_w_bias_vector(self): def test_softmax_optimizations_w_bias_vector(self):
x = tensor.vector('x') x = tensor.vector('x')
...@@ -354,7 +457,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -354,7 +457,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
op = crossentropy_categorical_1hot op = crossentropy_categorical_1hot
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x, b, one_of_n], [x, b, one_of_n],
[op(softmax(x+b), one_of_n)]) [op(softmax(x + b), one_of_n)])
assert fgraph.outputs[0].owner.op == op assert fgraph.outputs[0].owner.op == op
#print 'BEFORE' #print 'BEFORE'
#for node in fgraph.toposort(): #for node in fgraph.toposort():
...@@ -370,15 +473,14 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -370,15 +473,14 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
#print '====' #print '===='
assert len(fgraph.toposort()) == 3 assert len(fgraph.toposort()) == 3
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard' assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_grad_optimizations(self): def test_softmax_grad_optimizations(self):
x = tensor.matrix('x') x = tensor.matrix('x')
one_of_n = tensor.lvector('one_of_n') one_of_n = tensor.lvector('one_of_n')
op = crossentropy_categorical_1hot op = crossentropy_categorical_1hot
xe = op(softmax(x), one_of_n) xe = op(softmax(x), one_of_n)
sum_xe = tensor.sum(xe) sum_xe = tensor.sum(xe)
g_x = tensor.grad(sum_xe, x) g_x = tensor.grad(sum_xe, x)
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
...@@ -396,8 +498,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -396,8 +498,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
#for node in fgraph.toposort(): #for node in fgraph.toposort():
# print node.op, node.inputs # print node.op, node.inputs
# the function has 9 ops because the dimshuffle and elemwise{second} aren't getting # the function has 9 ops because the dimshuffle and lemwise{second}
# cleaned up as well as we'd like. # aren't getting cleaned up as well as we'd like.
has_cx1hot = False has_cx1hot = False
has_cx1hotdx = False has_cx1hotdx = False
has_softmax = False has_softmax = False
...@@ -405,13 +507,12 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -405,13 +507,12 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
for node in fgraph.toposort(): for node in fgraph.toposort():
if node.op == crossentropy_softmax_argmax_1hot_with_bias: if node.op == crossentropy_softmax_argmax_1hot_with_bias:
has_cx1hot = True has_cx1hot = True
if node.op == crossentropy_softmax_1hot_with_bias_dx : if node.op == crossentropy_softmax_1hot_with_bias_dx:
has_cx1hotdx = True has_cx1hotdx = True
if node.op == softmax: if node.op == softmax:
has_softmax = True has_softmax = True
if node.op == softmax_grad: if node.op == softmax_grad:
has_softmaxdx = True has_softmaxdx = True
assert has_cx1hot assert has_cx1hot
assert has_cx1hotdx assert has_cx1hotdx
assert not has_softmax assert not has_softmax
...@@ -439,8 +540,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -439,8 +540,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
#for node in fgraph.toposort(): #for node in fgraph.toposort():
# print node.op, node.inputs # print node.op, node.inputs
# the function has 9 ops because the dimshuffle and elemwise{second} aren't getting # the function has 9 ops because the dimshuffle and elemwise{second}
# cleaned up as well as we'd like. # aren't getting cleaned up as well as we'd like.
has_cx1hot = False has_cx1hot = False
has_cx1hotdx = False has_cx1hotdx = False
has_softmax = False has_softmax = False
...@@ -448,13 +549,12 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -448,13 +549,12 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
for node in fgraph.toposort(): for node in fgraph.toposort():
if node.op == crossentropy_softmax_argmax_1hot_with_bias: if node.op == crossentropy_softmax_argmax_1hot_with_bias:
has_cx1hot = True has_cx1hot = True
if node.op == crossentropy_softmax_1hot_with_bias_dx : if node.op == crossentropy_softmax_1hot_with_bias_dx:
has_cx1hotdx = True has_cx1hotdx = True
if node.op == softmax: if node.op == softmax:
has_softmax = True has_softmax = True
if node.op == softmax_grad: if node.op == softmax_grad:
has_softmaxdx = True has_softmaxdx = True
assert has_cx1hot assert has_cx1hot
assert has_cx1hotdx assert has_cx1hotdx
assert not has_softmax assert not has_softmax
...@@ -469,13 +569,10 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -469,13 +569,10 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
mode = theano.compile.mode.get_default_mode() mode = theano.compile.mode.get_default_mode()
if mode == theano.compile.mode.get_mode('FAST_COMPILE'): if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
mode = 'FAST_RUN' mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
x_val = rng.randn(3, 5)
x_val = rng.randn(3,5)
b_val = rng.randn(5) b_val = rng.randn(5)
y_val = numpy.asarray([2,4,1]) y_val = numpy.asarray([2, 4, 1])
x = T.dmatrix('x') x = T.dmatrix('x')
b = T.dvector('b') b = T.dvector('b')
y = T.lvector('y') y = T.lvector('y')
...@@ -487,10 +584,10 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -487,10 +584,10 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
-T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y]) T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y])
] ]
for expr in expressions: for expr in expressions:
# Verify the optimizer worked on the expressions # Verify the optimizer worked on the expressions
f = theano.function([x,y], expr, mode=mode) f = theano.function([x, y], expr, mode=mode)
if verbose: if verbose:
theano.printing.debugprint(f) theano.printing.debugprint(f)
try: try:
...@@ -501,7 +598,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -501,7 +598,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
raise raise
# Also verify the gradient wrt x # Also verify the gradient wrt x
g = theano.function([x,y], T.grad(expr, x), mode=mode) g = theano.function([x, y], T.grad(expr, x), mode=mode)
if verbose: if verbose:
theano.printing.debugprint(g) theano.printing.debugprint(g)
try: try:
...@@ -513,13 +610,13 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -513,13 +610,13 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
## Test that a biased softmax is optimized correctly ## Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax(x+b)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b+x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x+b))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b+x))[T.arange(y.shape[0]), y])] T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x,b,y], expr, mode=mode) f = theano.function([x, b, y], expr, mode=mode)
if verbose: if verbose:
theano.printing.debugprint(f) theano.printing.debugprint(f)
try: try:
...@@ -528,8 +625,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -528,8 +625,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
except Exception: except Exception:
theano.printing.debugprint(f) theano.printing.debugprint(f)
raise raise
g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
g = theano.function([x,b,y], T.grad(expr, x), mode=mode)
if verbose: if verbose:
theano.printing.debugprint(g) theano.printing.debugprint(g)
try: try:
...@@ -547,7 +643,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -547,7 +643,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
T.mean(-T.log(softmax(x))[T.arange(y.shape[0]), y])] T.mean(-T.log(softmax(x))[T.arange(y.shape[0]), y])]
for expr in mean_expressions: for expr in mean_expressions:
f = theano.function([x,y], expr, mode=mode) f = theano.function([x, y], expr, mode=mode)
if verbose: if verbose:
theano.printing.debugprint(f) theano.printing.debugprint(f)
try: try:
...@@ -557,11 +653,12 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -557,11 +653,12 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
theano.printing.debugprint(f) theano.printing.debugprint(f)
raise raise
g = theano.function([x,y], T.grad(expr, x), mode=mode) g = theano.function([x, y], T.grad(expr, x), mode=mode)
if verbose: if verbose:
theano.printing.debugprint(g) theano.printing.debugprint(g)
try: try:
assert len(g.maker.fgraph.toposort()) in (6,7) #there's an extra dimshuffle in there assert len(g.maker.fgraph.toposort()) in (6, 7)
#there's an extra dimshuffle in there
# but I can't think of a good rule to get rid of it # but I can't think of a good rule to get rid of it
g(x_val, y_val) g(x_val, y_val)
except Exception: except Exception:
...@@ -569,13 +666,13 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -569,13 +666,13 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
raise raise
mean_bias_expressions = [ mean_bias_expressions = [
T.mean(-T.log(softmax(x+b)[T.arange(y.shape[0]), y])), T.mean(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax(b+x)[T.arange(y.shape[0]), y])), -T.mean(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax(x+b))[T.arange(y.shape[0]), y]), -T.mean(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.mean(-T.log(softmax(b+x))[T.arange(y.shape[0]), y])] T.mean(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in mean_bias_expressions: for expr in mean_bias_expressions:
f = theano.function([x,b,y], expr, mode=mode) f = theano.function([x, b, y], expr, mode=mode)
if verbose: if verbose:
theano.printing.debugprint(f) theano.printing.debugprint(f)
try: try:
...@@ -583,12 +680,11 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -583,12 +680,11 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
except Exception: except Exception:
theano.printing.debugprint(f) theano.printing.debugprint(f)
raise raise
g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
g = theano.function([x,b,y], T.grad(expr, x), mode=mode)
if verbose: if verbose:
theano.printing.debugprint(g) theano.printing.debugprint(g)
try: try:
assert len(g.maker.fgraph.toposort()) in (6,7) assert len(g.maker.fgraph.toposort()) in (6, 7)
g(x_val, b_val, y_val) g(x_val, b_val, y_val)
except Exception: except Exception:
theano.printing.debugprint(g) theano.printing.debugprint(g)
...@@ -600,15 +696,13 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -600,15 +696,13 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
if mode == theano.compile.mode.get_mode('FAST_COMPILE'): if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
mode = 'FAST_RUN' mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
x_val = rng.randn(3,5) x_val = rng.randn(3, 5)
b_val = rng.randn(5) b_val = rng.randn(5)
y_val = numpy.asarray([2,4,1], dtype='int64') y_val = numpy.asarray([2, 4, 1], dtype='int64')
x = T.dmatrix('x') x = T.dmatrix('x')
b = T.dvector('b') b = T.dvector('b')
y = T.lvector('y') y = T.lvector('y')
yi = T.cast(y, 'int32') yi = T.cast(y, 'int32')
expressions = [ expressions = [
T.sum(-T.log(softmax(x)[T.arange(yi.shape[0]), yi])), T.sum(-T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
-T.sum(T.log(softmax(x)[T.arange(yi.shape[0]), yi])), -T.sum(T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
...@@ -618,7 +712,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -618,7 +712,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
for expr in expressions: for expr in expressions:
# Verify the optimizer worked on the expressions # Verify the optimizer worked on the expressions
f = theano.function([x,y], expr, mode=mode) f = theano.function([x, y], expr, mode=mode)
if verbose: if verbose:
theano.printing.debugprint(f) theano.printing.debugprint(f)
try: try:
...@@ -629,7 +723,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -629,7 +723,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
raise raise
# Also verify the gradient wrt x # Also verify the gradient wrt x
g = theano.function([x,y], T.grad(expr, x), mode=mode) g = theano.function([x, y], T.grad(expr, x), mode=mode)
if verbose: if verbose:
theano.printing.debugprint(g) theano.printing.debugprint(g)
try: try:
...@@ -639,7 +733,6 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -639,7 +733,6 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
theano.printing.debugprint(g) theano.printing.debugprint(g)
raise raise
def test_optimize_xent_vector(self): def test_optimize_xent_vector(self):
verbose = 0 verbose = 0
mode = theano.compile.mode.get_default_mode() mode = theano.compile.mode.get_default_mode()
...@@ -665,8 +758,9 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -665,8 +758,9 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
-T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))] -T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x,y], expr, mode=mode) f = theano.function([x, y], expr, mode=mode)
if verbose: print_graph(f) if verbose:
print_graph(f)
try: try:
prev, last = f.maker.fgraph.toposort()[-2:] prev, last = f.maker.fgraph.toposort()[-2:]
assert len(f.maker.fgraph.toposort()) == 5 assert len(f.maker.fgraph.toposort()) == 5
...@@ -674,8 +768,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -674,8 +768,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
except Exception: except Exception:
theano.printing.debugprint(f) theano.printing.debugprint(f)
raise raise
g = theano.function([x, y], T.grad(expr, x), mode=mode)
g = theano.function([x,y], T.grad(expr, x), mode=mode)
print_graph(g) print_graph(g)
try: try:
ops = [node.op for node in g.maker.fgraph.toposort()] ops = [node.op for node in g.maker.fgraph.toposort()]
...@@ -711,17 +804,19 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -711,17 +804,19 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
## Test that a biased softmax is optimized correctly ## Test that a biased softmax is optimized correctly
bias_expressions = [ bias_expressions = [
T.sum(-T.log(softmax(x+b)[T.arange(y.shape[0]), y])), T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b+x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x+b))[T.arange(y.shape[0]), y]), -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b+x))[T.arange(y.shape[0]), y])] T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions: for expr in bias_expressions:
f = theano.function([x,b,y], expr, mode=mode) f = theano.function([x, b, y], expr, mode=mode)
if verbose: print_graph(f) if verbose:
print_graph(f)
try: try:
prev, last = f.maker.fgraph.toposort()[-2:] prev, last = f.maker.fgraph.toposort()[-2:]
assert len(f.maker.fgraph.toposort()) == 3 # [big_op, sum, dim_shuffle] assert len(f.maker.fgraph.toposort()) == 3
# [big_op, sum, dim_shuffle]
f(x_val, b_val, y_val) f(x_val, b_val, y_val)
except Exception: except Exception:
theano.printing.debugprint(f) theano.printing.debugprint(f)
...@@ -730,7 +825,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -730,7 +825,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
backup = config.warn.sum_div_dimshuffle_bug backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False config.warn.sum_div_dimshuffle_bug = False
try: try:
g = theano.function([x,b,y], T.grad(expr, x), mode=mode) g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
finally: finally:
config.warn.sum_div_dimshuffle_bug = backup config.warn.sum_div_dimshuffle_bug = backup
...@@ -752,13 +847,10 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -752,13 +847,10 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
mode = theano.compile.mode.get_default_mode() mode = theano.compile.mode.get_default_mode()
if mode == theano.compile.mode.get_mode('FAST_COMPILE'): if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
mode = 'FAST_RUN' mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
x_val = rng.randn(3, 5)
x_val = rng.randn(3,5)
b_val = rng.randn(5) b_val = rng.randn(5)
y_val = numpy.asarray([2,4,1]) y_val = numpy.asarray([2, 4, 1])
x = T.dmatrix('x') x = T.dmatrix('x')
b = T.dvector('b') b = T.dvector('b')
y = T.lvector('y') y = T.lvector('y')
...@@ -800,7 +892,6 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -800,7 +892,6 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
assert has_softmax assert has_softmax
assert not has_softmaxdx assert not has_softmaxdx
## Cases to test ## Cases to test
expressions = [ expressions = [
a * T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])), a * T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
...@@ -826,7 +917,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -826,7 +917,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
for expr in expressions: for expr in expressions:
# Verify the optimizer worked on the expressions # Verify the optimizer worked on the expressions
f = theano.function([x,y,a], expr, mode=mode) f = theano.function([x, y, a], expr, mode=mode)
try: try:
assert 5 <= len(f.maker.fgraph.toposort()) <= 10 assert 5 <= len(f.maker.fgraph.toposort()) <= 10
validate_fn_graph(f) validate_fn_graph(f)
...@@ -836,7 +927,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -836,7 +927,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
raise raise
# Verify the gradient wrt x # Verify the gradient wrt x
g = theano.function([x,y,a], T.grad(expr, x), mode=mode) g = theano.function([x, y, a], T.grad(expr, x), mode=mode)
try: try:
assert 5 <= len(g.maker.fgraph.toposort()) <= 12 assert 5 <= len(g.maker.fgraph.toposort()) <= 12
validate_grad_graph(g) validate_grad_graph(g)
...@@ -846,7 +937,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase): ...@@ -846,7 +937,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
raise raise
# Verify the gradient when providing output gradient # Verify the gradient when providing output gradient
h = theano.function([x,y,a], T.grad(expr, x, g_cost=a*x.sum()), mode=mode) h = theano.function([x, y, a],
T.grad(expr, x, g_cost=a * x.sum()), mode=mode)
try: try:
assert 8 <= len(h.maker.fgraph.toposort()) <= 17 assert 8 <= len(h.maker.fgraph.toposort()) <= 17
validate_grad_graph(h) validate_grad_graph(h)
...@@ -866,7 +958,6 @@ def test_argmax_pushdown(): ...@@ -866,7 +958,6 @@ def test_argmax_pushdown():
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x], [x],
[out]) [out])
theano.compile.mode.optdb.query( theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(fgraph) theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)
...@@ -910,7 +1001,7 @@ def test_argmax_pushdown_bias(): ...@@ -910,7 +1001,7 @@ def test_argmax_pushdown_bias():
out = tensor.argmax(softmax_with_bias(x, b), axis=-1) out = tensor.argmax(softmax_with_bias(x, b), axis=-1)
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x,b], [x, b],
[out]) [out])
theano.compile.mode.optdb.query( theano.compile.mode.optdb.query(
...@@ -927,10 +1018,9 @@ def test_argmax_pushdown_bias(): ...@@ -927,10 +1018,9 @@ def test_argmax_pushdown_bias():
x = tensor.dmatrix() x = tensor.dmatrix()
b = tensor.dvector() b = tensor.dvector()
out = tensor.max_and_argmax(softmax_with_bias(x, b), axis=-1)[0] out = tensor.max_and_argmax(softmax_with_bias(x, b), axis=-1)[0]
fgraph = gof.FunctionGraph( fgraph = gof.FunctionGraph(
[x,b], [x, b],
[out]) [out])
backup = config.warn.argmax_pushdown_bug backup = config.warn.argmax_pushdown_bug
...@@ -950,13 +1040,15 @@ def test_argmax_pushdown_bias(): ...@@ -950,13 +1040,15 @@ def test_argmax_pushdown_bias():
assert isinstance(fgraph.toposort()[1].op.scalar_op, theano.scalar.Maximum) assert isinstance(fgraph.toposort()[1].op.scalar_op, theano.scalar.Maximum)
assert str(fgraph.toposort()[2].op) == 'OutputGuard' assert str(fgraph.toposort()[2].op) == 'OutputGuard'
def test_asymptotic_32(): def test_asymptotic_32():
""" """
This test makes sure that our functions behave sensibly when huge values are present This test makes sure that our functions behave sensibly when
huge values are present
""" """
#TODO: consider adding the optimization of crossentropy into the current mode for the #TODO: consider adding the optimization of crossentropy into the current
# purpose of running this test # mode for the purpose of running this test
for dtype in 'float32', 'float64': for dtype in 'float32', 'float64':
if dtype == 'float32': if dtype == 'float32':
...@@ -967,16 +1059,16 @@ def test_asymptotic_32(): ...@@ -967,16 +1059,16 @@ def test_asymptotic_32():
x2 = tensor.dvector() x2 = tensor.dvector()
y = tensor.lvector() y = tensor.lvector()
c = categorical_crossentropy(softmax(x+x2), y) c = categorical_crossentropy(softmax(x + x2), y)
f = theano.function([x,y,x2], [c.sum(), tensor.grad(c.sum(), x)], mode='FAST_RUN') f = theano.function([x, y, x2], [c.sum(),
tensor.grad(c.sum(), x)], mode='FAST_RUN')
if 0: if 0:
for i, n in enumerate( f.maker.fgraph.toposort()): for i, n in enumerate(f.maker.fgraph.toposort()):
print i, n print i, n
xval = numpy.zeros((5, 5), dtype=dtype) xval = numpy.zeros((5, 5), dtype=dtype)
x2val = numpy.zeros(5, dtype=xval.dtype) x2val = numpy.zeros(5, dtype=xval.dtype)
for i in xrange(100): for i in xrange(100):
cval, gxval = f(xval, numpy.arange(5), x2val) cval, gxval = f(xval, numpy.arange(5), x2val)
xval -= 100.3 * gxval xval -= 100.3 * gxval
#print cval, gxval #print cval, gxval
...@@ -993,51 +1085,50 @@ def test_asymptotic_32(): ...@@ -993,51 +1085,50 @@ def test_asymptotic_32():
#print cval, gxval #print cval, gxval
assert cval > 61750000 assert cval > 61750000
assert gxval[0,0] == -1.0 assert gxval[0, 0] == -1.0
assert gxval[0,1] == 0.25 assert gxval[0, 1] == 0.25
class Test_softmax_opt: class Test_softmax_opt:
# Test that expressions of softmax in terms of exponentiated things divided by row sums # Test that expressions of softmax in terms of exponentiated things
# are replaced by softmax expressions. # divided by row sums are replaced by softmax expressions.
# #
# Softmax_grad isn't that interesting as an Op, but it's the signature we look for when # Softmax_grad isn't that interesting as an Op, but it has the signature
# trying to insert CrossEntropySoftmax... grad. So for now, we add softmax_grad to graphs. # we look for when trying to insert CrossEntropySoftmax... grad. So, for
# In future, we may modify the CrossEntropySoftmax...grad to look for the more basic # now, we add softmax_grad to graphs. In the future, we may modify the
# pattern. # CrossEntropySoftmax...grad to look for the more basic pattern.
# #
def setUp(self): def setUp(self):
utt.seed_rng() utt.seed_rng()
self.rng = numpy.random.RandomState(utt.fetch_seed()) self.rng = numpy.random.RandomState(utt.fetch_seed())
self.mode=theano.compile.mode.get_default_mode() self.mode = theano.compile.mode.get_default_mode()
self.mode=self.mode.including('canonicalize') self.mode = self.mode.including('canonicalize')
def test_basic(self): def test_basic(self):
c = T.matrix() c = T.matrix()
p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0,'x') p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0, 'x')
# test that function contains softmax and no div. # test that function contains softmax and no div.
f = theano.function([c],p_y, mode=self.mode) f = theano.function([c], p_y, mode=self.mode)
f_ops = [n.op for n in f.maker.fgraph.toposort()] f_ops = [n.op for n in f.maker.fgraph.toposort()]
#print '--- f =' #print '--- f ='
#printing.debugprint(f) #printing.debugprint(f)
#print '===' #print '==='
assert len(f_ops) == 1 assert len(f_ops) == 1
assert softmax in f_ops assert softmax in f_ops
f(self.rng.rand(3,4).astype(config.floatX)) f(self.rng.rand(3, 4).astype(config.floatX))
def test_grad(self): def test_grad(self):
c = T.matrix() c = T.matrix()
p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0,'x') p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0, 'x')
# test that function contains softmax and softmaxgrad # test that function contains softmax and softmaxgrad
w = T.matrix() w = T.matrix()
backup = config.warn.sum_div_dimshuffle_bug backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False config.warn.sum_div_dimshuffle_bug = False
try: try:
g = theano.function([c,w],T.grad((p_y*w).sum(), c)) g = theano.function([c, w], T.grad((p_y * w).sum(), c))
finally: finally:
config.warn.sum_div_dimshuffle_bug = backup config.warn.sum_div_dimshuffle_bug = backup
g_ops = [n.op for n in g.maker.fgraph.toposort()] g_ops = [n.op for n in g.maker.fgraph.toposort()]
...@@ -1049,7 +1140,7 @@ class Test_softmax_opt: ...@@ -1049,7 +1140,7 @@ class Test_softmax_opt:
assert len(g_ops) == 2 assert len(g_ops) == 2
assert softmax in g_ops assert softmax in g_ops
assert softmax_grad in g_ops assert softmax_grad in g_ops
g(self.rng.rand(3,4), self.rng.uniform(.5, 1, (3,4))) g(self.rng.rand(3, 4), self.rng.uniform(.5, 1, (3, 4)))
def test_transpose_basic(self): def test_transpose_basic(self):
# this should be a transposed softmax # this should be a transposed softmax
...@@ -1057,14 +1148,14 @@ class Test_softmax_opt: ...@@ -1057,14 +1148,14 @@ class Test_softmax_opt:
p_y = T.exp(c) / T.exp(c).sum(axis=0) p_y = T.exp(c) / T.exp(c).sum(axis=0)
# test that function contains softmax and no div. # test that function contains softmax and no div.
f = theano.function([c],p_y) f = theano.function([c], p_y)
#printing.debugprint(f) #printing.debugprint(f)
# test that function contains softmax and no div. # test that function contains softmax and no div.
backup = config.warn.sum_div_dimshuffle_bug backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False config.warn.sum_div_dimshuffle_bug = False
try: try:
g = theano.function([c],T.grad(p_y.sum(), c)) g = theano.function([c], T.grad(p_y.sum(), c))
finally: finally:
config.warn.sum_div_dimshuffle_bug = backup config.warn.sum_div_dimshuffle_bug = backup
#printing.debugprint(g) #printing.debugprint(g)
...@@ -1089,7 +1180,10 @@ class Test_softmax_opt: ...@@ -1089,7 +1180,10 @@ class Test_softmax_opt:
#printing.debugprint(g) #printing.debugprint(g)
raise SkipTest('Optimization not enabled for the moment') raise SkipTest('Optimization not enabled for the moment')
# REPEAT 3 CASES in presence of log(softmax) with the advanced indexing etc. # REPEAT 3 CASES in presence of log(softmax) with the advanced indexing
# etc.
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论