提交 28a095d6 authored 作者: nouiz's avatar nouiz

Merge pull request #730 from larseeri/shape_tensor_nnet

Better infer_shape test for Softmax, SoftmaxWithBias, SoftmaxGrad, CrossentropySoftmaxArgmax1HotWithBias, ConvOp, Conv3D, ConvTransp3D, ConvGrad3D Added and tested infer_shape for CrossentropySoftmax1HotWithBiasDx,Prepend_scalar_constant_to_each_row and Prepend_scalar_to_each_row, CrossentropyCategorical1HotGrad Added disabled CrossentropyCategorical1Hot.infer_shape, see gh-788
......@@ -857,6 +857,9 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
dx[i, y_idx[i]] -= dy[i] # scalar decrement
output_storage[0][0] = dx
def infer_shape(self, node, shapes):
return [shapes[1]]
def grad(self, inp, grads):
dy, sm, y_idx = inp
g_dx, = grads
......@@ -1031,8 +1034,11 @@ class CrossentropyCategorical1HotGrad(gof.Op):
for i in xrange(len(g_y)):
g_coding[i, true_one_of_n[i]] = -g_y[i] / coding_dist[i,
true_one_of_n[i]]
g_coding_strg[0] = g_coding
def infer_shape(self, node, in_shapes):
return [in_shapes[1]]
crossentropy_categorical_1hot_grad = CrossentropyCategorical1HotGrad()
......@@ -1091,6 +1097,17 @@ class CrossentropyCategorical1Hot(gof.Op):
y[i] = -numpy.log(coding[i, one_of_n[i]])
y_out[0] = y
#Enabling this infer_shape method make 2 tests fail:
#theano/tensor/nnet/tests/test_nnet.py:T_CrossentropyCategorical1Hot.
# {test_softmax_grad_optimizations,test_softmax_grad_optimizations_vector}
# This is caused by the local_fill_to_alloc that call broadcast_like
# that look into the shape feature and return a Rebroadcast instead of an alloc.
# I disable this infer_shape until we fix the optimizations or determine that
# this is not needed anymore and we update the tests.
# see issue gh-788
# def infer_shape(self, node, in_shapes):
# return [(in_shapes[0][0],)]
def grad(self, inp, grads):
coding, one_of_n = inp
g_y, = grads
......@@ -1121,7 +1138,7 @@ def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph):
new_nll, new_sm, new_am = crossentropy_softmax_argmax_1hot_with_bias(x, b,
one_of_n)
fgraph.replace_all_validate([(nll, new_nll), (sm, new_sm)],
reason="crossentropy_to_crossentropy_with_softmax")
reason="crossentropy_to_crossentropy_with_softmax_with_bias")
return True
return False
......@@ -1645,6 +1662,11 @@ class Prepend_scalar_constant_to_each_row(gof.Op):
out[:, 0].fill(self.val.data)
out[:, 1:] = mat
def infer_shape(self, node, in_shapes):
shp = (in_shapes[0][0], in_shapes[0][1] + 1)
return [shp]
def grad(self, inp, grads):
mat, = inp
goutput, = grads
......@@ -1694,6 +1716,10 @@ class Prepend_scalar_to_each_row(gof.Op):
out[:, 0].fill(val)
out[:, 1:] = mat
def infer_shape(self, node, in_shapes):
shp = (in_shapes[1][0], in_shapes[1][1] + 1)
return [shp]
def grad(self, inp, grads):
val, mat = inp
goutput, = grads
......
......@@ -13,10 +13,10 @@ from theano.tensor.nnet import conv
from theano.tensor.basic import _allclose
class TestConv2D(unittest.TestCase):
class TestConv2D(utt.InferShapeTester):
def setUp(self):
utt.seed_rng()
super (TestConv2D, self).setUp()
self.input = T.dtensor4('input')
self.filters = T.dtensor4('filters')
......@@ -368,8 +368,7 @@ class TestConv2D(unittest.TestCase):
gcc bug. So it should not crash anymore
"""
self.validate((1, 10, 213, 129), (46, 10, 212, 1), 'valid',
verify_grad=False)
self.validate((1, 10, 213, 129), (46, 10, 212, 1), 'valid', verify_grad=False)
verify_grad=False)
def speed(self):
n_calls = 20000
......@@ -407,3 +406,100 @@ class TestConv2D(unittest.TestCase):
t2 = time.time()
print t2 - t1,
print
def test_infer_shape(self):
# Note: infer_shape is incomplete and thus input and filter shapes
# must be provided explicitly
def rand(*shape):
r = numpy.asarray(numpy.random.rand(*shape), dtype='float64')
return r * 2 - 1
adtens = T.dtensor4()
bdtens = T.dtensor4()
aivec_val = [2, 2, 3, 3]
bivec_val = [2, 2, 2, 2]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [2, 2, 3, 3]
bivec_val = [2, 2, 2, 2]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 8, 8]
bivec_val = [4, 2, 5, 5]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 8, 8]
bivec_val = [4, 2, 5, 5]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 7, 5]
bivec_val = [5, 2, 3, 2]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 7, 5]
bivec_val = [5, 2, 3, 2]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 7, 5]
bivec_val = [5, 2, 2, 3]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 7, 5]
bivec_val = [5, 2, 2, 3]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 3, 3]
bivec_val = [4, 2, 3, 3]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 3, 3]
bivec_val = [4, 2, 3, 3]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
if __name__ == '__main__':
t = TestConv2D('setUp')
t.setUp()
t.test_infer_shape()
......@@ -3,9 +3,9 @@ import theano
import theano.tensor as T
from theano import function, shared
from theano.tests import unittest_tools as utt
from theano.tensor.nnet.ConvTransp3D import convTransp3D
from theano.tensor.nnet.ConvGrad3D import convGrad3D
from theano.tensor.nnet.Conv3D import conv3D
from theano.tensor.nnet.ConvTransp3D import convTransp3D, ConvTransp3D
from theano.tensor.nnet.ConvGrad3D import convGrad3D, ConvGrad3D
from theano.tensor.nnet.Conv3D import conv3D, Conv3D
import numpy as N
import copy
import theano.sparse
......@@ -20,7 +20,9 @@ floatX = theano.config.floatX
# a subset of the tests they will do different things than if you
# run all of them
class DummyConv3D:
"""A dummy version of Conv3D passed to verify_grad
Stores a fixed stride, since stride is not differentiable
Exposes only one scalar argument, which is used as the position
......@@ -30,149 +32,174 @@ class DummyConv3D:
verify_grad will not need to test hundreds of variables. Disadvantage
is we can't be certain that all of them are correct, advantange is that
this random projection lets us test lots of variables very quickly """
def __init__(self, rng, VWbVals, d):
"""
param: rng Random number generator used to pick direction of the line
param: rng Random number generator used to pick direction of the
line
param: VWbVals tuple containing values to test V,W,b around
param: d shared variable for d, the stride
"""
self.V, self.W, self.b = VWbVals
self.dV = shared(rng.uniform(-1,1,self.V.get_value(borrow=True).shape))
self.dW = shared(rng.uniform(-1,1,self.W.get_value(borrow=True).shape))
self.db = shared(rng.uniform(-1,1,self.b.get_value(borrow=True).shape))
self.dV = shared(rng.uniform(-1, 1,
self.V.get_value(borrow=True).shape))
self.dW = shared(rng.uniform(-1, 1,
self.W.get_value(borrow=True).shape))
self.db = shared(rng.uniform(-1, 1,
self.b.get_value(borrow=True).shape))
self.d = d
def __call__(self, t):
output = conv3D(self.V+t*self.dV,self.W+t*self.dW,self.b+t*self.db,self.d)
output = conv3D(self.V + t * self.dV, self.W + t * self.dW,
self.b + t * self.db, self.d)
return output
class DummyConvGrad3D:
def __init__(self, rng, VdHvals, d, WShape):
"""
param: rng Random number generator used to pick direction of the line
param: rng Random number generator used to pick direction of the
line
param: VWbVals tuple containing values to test V,W,b around
param: d shared variable for d, the stride
"""
self.V, self.dCdH = VdHvals
self.dV = shared(rng.uniform(-1,1,self.V.get_value(borrow=True).shape))
self.ddCdH = shared(rng.uniform(-1,1,self.dCdH.get_value(borrow=True).shape))
self.dV = shared(rng.uniform(-1, 1,
self.V.get_value(borrow=True).shape))
self.ddCdH = shared(rng.uniform(-1, 1,
self.dCdH.get_value(borrow=True).shape))
self.d = d
self.WShape = WShape
def __call__(self, t):
output = convGrad3D(self.V+t*self.dV,self.d,self.WShape,self.dCdH + t * self.ddCdH)
output = convGrad3D(self.V + t * self.dV, self.d, self.WShape,
self.dCdH + t * self.ddCdH)
return output
class DummyConvTransp3D:
def __init__(self, rng, WbHvals, d, RShape):
"""
param: rng Random number generator used to pick direction of the line
param: rng Random number generator used to pick direction of the
line
param: VWbVals tuple containing values to test V,W,b around
param: d shared variable for d, the stride
"""
self.W, self.b, self.H = WbHvals
self.dW = rng.uniform(-1,1,self.W.get_value(borrow=True).shape)
self.db = rng.uniform(-1,1,self.b.get_value(borrow=True).shape)
self.dH = rng.uniform(-1,1,self.H.get_value(borrow=True).shape)
self.dW, self.db, self.dH = shared(self.dW), shared(self.db), shared(self.dH)
self.dW = rng.uniform(-1, 1, self.W.get_value(borrow=True).shape)
self.db = rng.uniform(-1, 1, self.b.get_value(borrow=True).shape)
self.dH = rng.uniform(-1, 1, self.H.get_value(borrow=True).shape)
self.dW, self.db, self.dH = shared(self.dW), shared(self.db),
shared(self.dH)
self.d = d
self.RShape = RShape
def __call__(self, t):
output = convTransp3D(self.W+t*self.dW,self.b+t*self.db,self.d,self.H+t*self.dH, self.RShape)
output = convTransp3D(self.W + t * self.dW, self.b + t * self.db,
self.d, self.H + t * self.dH, self.RShape)
return output
class TestConv3D(unittest.TestCase):
def setUp(self):
class TestConv3D(utt.InferShapeTester):
def setUp(self):
super(TestConv3D, self).setUp()
utt.seed_rng()
self.rng = N.random.RandomState(utt.fetch_seed())
mode = copy.copy(theano.compile.mode.get_default_mode())
mode.check_py_code = False
self.W = shared(N.ndarray(shape=(1,1,1,1,1), dtype=floatX))
self.b = shared(N.zeros(1,dtype=floatX))
self.rb = shared(N.zeros(1,dtype=floatX))
self.V = shared(N.ndarray(shape=(1,1,1,1,1), dtype=floatX))
self.d = shared(N.ndarray(shape=(3,),dtype=int))
self.W = shared(N.ndarray(shape=(1, 1, 1, 1, 1), dtype=floatX))
self.b = shared(N.zeros(1, dtype=floatX))
self.rb = shared(N.zeros(1, dtype=floatX))
self.V = shared(N.ndarray(shape=(1, 1, 1, 1, 1), dtype=floatX))
self.d = shared(N.ndarray(shape=(3, ), dtype=int))
self.H = conv3D(self.V, self.W, self.b, self.d)
self.H_func = function([], self.H, mode = mode)
self.H_shape_func = function( [], self.H.shape, mode = mode)
self.H_func = function([], self.H, mode=mode)
self.H_shape_func = function([], self.H.shape, mode=mode)
self.RShape = T.vector(dtype='int64')
self.otherH = T.TensorType(floatX,(False,False,False,False,False))(name='otherH')
self.transp = convTransp3D(self.W, self.rb, self.d, self.otherH, self.RShape)
self.transp_func = function([self.otherH,self.RShape],self.transp, mode=mode)
self.otherH = T.TensorType(floatX,
(False, False, False, False, False))(name='otherH')
self.transp = convTransp3D(self.W, self.rb, self.d,
self.otherH, self.RShape)
self.transp_func = function([self.otherH, self.RShape],
self.transp, mode=mode)
self.R = convTransp3D(self.W, self.rb, self.d, self.H, self.RShape)
self.R_func = function([self.RShape], self.R, mode = mode)
self.R_func = function([self.RShape], self.R, mode=mode)
self.R_shape_func = function([self.RShape], self.R.shape)
self.reconsObj = T.sum(T.sqr(self.V-self.R))
self.reconsObj = T.sum(T.sqr(self.V - self.R))
self.reconsObjFunc = function([self.RShape], self.reconsObj, mode=mode)
self.gradientsFunc = function([self.RShape], [ T.grad(self.reconsObj, self.W), T.grad(self.reconsObj, self.H), T.grad(self.reconsObj, self.V), T.grad(self.reconsObj,self.b) ] , mode=mode)
self.check_c_against_python = function([self.RShape], [ T.grad(self.reconsObj, self.W), T.grad(self.reconsObj, self.H), T.grad(self.reconsObj, self.V), T.grad(self.reconsObj,self.b) ] , mode='DEBUG_MODE')
self.gradientsFunc = function([self.RShape],
[T.grad(self.reconsObj, self.W), T.grad(self.reconsObj,
self.H), T.grad(self.reconsObj, self.V),
T.grad(self.reconsObj, self.b)], mode=mode)
self.dCdW_shape_func = function([self.RShape], T.grad(self.reconsObj, self.W).shape, mode=mode)
self.check_c_against_python = function([self.RShape],
[T.grad(self.reconsObj, self.W), T.grad(self.reconsObj,
self.H), T.grad(self.reconsObj, self.V),
T.grad(self.reconsObj, self.b)], mode='DEBUG_MODE')
self.dCdW_shape_func = function([self.RShape],
T.grad(self.reconsObj, self.W).shape, mode=mode)
def random_tensor(self,*dims):
return N.asarray(self.rng.uniform(-.05,.05,dims),dtype=floatX)
def random_tensor(self, *dims):
return N.asarray(self.rng.uniform(-.05, .05, dims), dtype=floatX)
def randomize(self):
batchSize = self.rng.randint(1,4)
videoDur = self.rng.randint(8,30)
filterWidth = self.rng.randint(1,8)
filterHeight = self.rng.randint(1,8)
filterDur = self.rng.randint(1,8)
tsteps = self.rng.randint(1,4)
rsteps = self.rng.randint(1,4)
csteps = self.rng.randint(1,4)
videoDur = tsteps * filterDur + self.rng.randint(0,3)
videoWidth = csteps * filterWidth + self.rng.randint(0,3)
videoHeight = rsteps * filterHeight + self.rng.randint(0,3)
numFilters = self.rng.randint(1,3)
inputChannels = self.rng.randint(1,3)
self.d.get_value(borrow=True, return_internal_type=True)[0] = self.rng.randint(1,15)
self.d.get_value(borrow=True, return_internal_type=True)[1] = self.rng.randint(1,15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = self.rng.randint(1,15)
outputHeight = int( (videoHeight - filterHeight) / self.d.get_value(borrow=True)[0] )+1
outputWidth = int( (videoWidth - filterWidth) / self.d.get_value(borrow=True)[1] )+1
outputDur = int( (videoDur - filterDur) / self.d.get_value(borrow=True)[2] ) +1
self.W.set_value(
self.random_tensor(numFilters,filterHeight,filterWidth,filterDur,inputChannels),
borrow=True)
batchSize = self.rng.randint(1, 4)
videoDur = self.rng.randint(8, 30)
filterWidth = self.rng.randint(1, 8)
filterHeight = self.rng.randint(1, 8)
filterDur = self.rng.randint(1, 8)
tsteps = self.rng.randint(1, 4)
rsteps = self.rng.randint(1, 4)
csteps = self.rng.randint(1, 4)
videoDur = tsteps * filterDur + self.rng.randint(0, 3)
videoWidth = csteps * filterWidth + self.rng.randint(0, 3)
videoHeight = rsteps * filterHeight + self.rng.randint(0, 3)
numFilters = self.rng.randint(1, 3)
inputChannels = self.rng.randint(1, 3)
self.d.get_value(borrow=True, return_internal_type=True)[0] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[1] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = \
self.rng.randint(1, 15)
outputHeight = int((videoHeight - filterHeight) /
self.d.get_value(borrow=True)[0]) + 1
outputWidth = int((videoWidth - filterWidth) /
self.d.get_value(borrow=True)[1]) + 1
outputDur = int((videoDur - filterDur) /
self.d.get_value(borrow=True)[2]) + 1
self.W.set_value(self.random_tensor(numFilters, filterHeight,
filterWidth, filterDur, inputChannels), borrow=True)
self.b.set_value(self.random_tensor(numFilters), borrow=True)
self.rb.set_value(self.random_tensor(inputChannels), borrow=True)
self.V.set_value(
self.random_tensor(batchSize,videoHeight,videoWidth,videoDur,inputChannels),
borrow=True)
self.V.set_value(self.random_tensor(batchSize, videoHeight,
videoWidth, videoDur, inputChannels), borrow=True)
self.rb.set_value(self.random_tensor(inputChannels), borrow=True)
def test_c_against_python(self):
......@@ -180,37 +207,38 @@ class TestConv3D(unittest.TestCase):
self.check_c_against_python(self.V.get_value(borrow=True).shape[1:4])
def test_c_against_mat_mul(self):
#Use a filter of the same size as the image, so the convolution is just a dense matrix multiply
#Check that dense matrix multiplication gives the same result as convolution
batchSize = self.rng.randint(1,10)
videoDur = self.rng.randint(3,10)
videoWidth = self.rng.randint(1,5)
videoHeight = self.rng.randint(1,5)
filterWidth = videoWidth
filterHeight = videoHeight
filterDur = videoDur
numFilters = self.rng.randint(1,3)
inputChannels = self.rng.randint(1,4)
# Use a filter of the same size as the image, so the convolution is
# just a dense matrix multiply.
# Check that dense matrix multiplication gives the same result as
# convolution.
self.d.get_value(borrow=True, return_internal_type=True)[0] = self.rng.randint(1,15)
self.d.get_value(borrow=True, return_internal_type=True)[1] = self.rng.randint(1,15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = self.rng.randint(1,15)
batchSize = self.rng.randint(1, 10)
videoDur = self.rng.randint(3, 10)
videoWidth = self.rng.randint(1, 5)
videoHeight = self.rng.randint(1, 5)
filterWidth = videoWidth
filterHeight = videoHeight
filterDur = videoDur
numFilters = self.rng.randint(1, 3)
inputChannels = self.rng.randint(1, 4)
self.W.set_value(
self.random_tensor(numFilters,filterHeight,filterWidth,filterDur,inputChannels),
borrow=True)
self.d.get_value(borrow=True, return_internal_type=True)[0] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[1] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = \
self.rng.randint(1, 15)
self.W.set_value(
self.W.get_value(borrow=True) * (self.W.get_value(borrow=True) < 1e-5),
borrow=True)
self.W.set_value(self.random_tensor(numFilters, filterHeight,
filterWidth, filterDur, inputChannels), borrow=True)
self.W.set_value(self.W.get_value(borrow=True) *
(self.W.get_value(borrow=True) < 1e-5), borrow=True)
self.b.set_value(self.random_tensor(numFilters), borrow=True)
self.V.set_value(
self.random_tensor(batchSize,videoHeight,videoWidth,videoDur,inputChannels),
borrow=True)
self.V.set_value(self.random_tensor(batchSize, videoHeight,
videoWidth, videoDur, inputChannels), borrow=True)
Hv = self.H_func()
......@@ -220,156 +248,163 @@ class TestConv3D(unittest.TestCase):
n = inputChannels * videoHeight * videoWidth * videoDur
W_mat = N.zeros((n, numFilters))
V_mat = N.zeros((batchSize,n))
V_mat = N.zeros((batchSize, n))
Hv_mat = N.zeros((batchSize, numFilters))
for qi in xrange(0,numFilters):
W_mat[:,qi] = self.W.get_value(borrow=True)[qi,:,:,:,:].reshape((n))
Hv_mat[:,qi] = Hv[:,0,0,0,qi]
for qi in xrange(0,batchSize):
V_mat[qi,:] = self.V.get_value(borrow=True)[qi,:,:,:,:].reshape((n))
for qi in xrange(0, numFilters):
W_mat[:, qi] = \
self.W.get_value(borrow=True)[qi, :, :, :, :].reshape((n))
Hv_mat[:, qi] = Hv[:, 0, 0, 0, qi]
for qi in xrange(0, batchSize):
V_mat[qi, :] = \
self.V.get_value(borrow=True)[qi, :, :, :, :].reshape((n))
H_mat = N.dot(V_mat,W_mat) + self.b.get_value(borrow=True)
H_mat = N.dot(V_mat, W_mat) + self.b.get_value(borrow=True)
tol = 1e-5
if floatX == 'float32':
tol = 1e-4
if N.abs(H_mat-Hv_mat).max() > tol and not N.allclose(H_mat,Hv_mat):
if N.abs(H_mat - Hv_mat).max() > tol and not N.allclose(H_mat, Hv_mat):
print H_mat
print Hv_mat
print 'max error: '+str(N.abs(H_mat-Hv_mat).max())
print 'max error: ' + str(N.abs(H_mat - Hv_mat).max())
W.get_value(borrow=True)[W.get_value(borrow=True) != 0] += 1.0
print 'min non-zero kernel mag: '+str(N.abs(W.get_value(borrow=True)).min())
print 'min non-zero kernel mag: ' + \
str(N.abs(W.get_value(borrow=True)).min())
assert False
def test_c_against_mat_transp_mul(self):
#Use a filter of the same size as the image, so the convolution is just a dense matrix multiply
#Check that dense matrix multiplication by the transpose of the matrix gives the same result as ConvTransp
batchSize = self.rng.randint(1,10)
videoDur = self.rng.randint(3,15)
videoWidth = self.rng.randint(3,15)
videoHeight = self.rng.randint(3,15)
# Use a filter of the same size as the image, so the convolution is just a
# dense matrix multiply.
# Check that dense matrix multiplication by the transpose of the matrix
# gives the same result as ConvTransp.
batchSize = self.rng.randint(1, 10)
videoDur = self.rng.randint(3, 15)
videoWidth = self.rng.randint(3, 15)
videoHeight = self.rng.randint(3, 15)
filterWidth = videoWidth
filterHeight = videoHeight
filterDur = videoDur
numFilters = self.rng.randint(1,15)
inputChannels = self.rng.randint(1,15)
self.d.get_value(borrow=True, return_internal_type=True)[0] = self.rng.randint(1,15)
self.d.get_value(borrow=True, return_internal_type=True)[1] = self.rng.randint(1,15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = self.rng.randint(1,15)
self.W.set_value(
self.random_tensor(numFilters,filterHeight,filterWidth,filterDur,inputChannels),
borrow=True)
numFilters = self.rng.randint(1, 15)
inputChannels = self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[0] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[1] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = \
self.rng.randint(1, 15)
self.W.set_value(self.random_tensor(numFilters, filterHeight,
filterWidth, filterDur, inputChannels), borrow=True)
self.b.set_value(self.random_tensor(numFilters), borrow=True)
self.V.set_value(
self.random_tensor(batchSize,videoHeight,videoWidth,videoDur,inputChannels),
borrow=True)
self.V.set_value(self.random_tensor(batchSize, videoHeight,
videoWidth, videoDur, inputChannels), borrow=True)
self.rb.set_value(self.random_tensor(inputChannels), borrow=True)
H_shape = self.H_shape_func()
assert H_shape[1] == 1
assert H_shape[1] == 1
assert H_shape[2] == 1
assert H_shape[3] == 1
Hv = self.random_tensor( * H_shape )
Hv = self.random_tensor( * H_shape)
Vv = self.transp_func(Hv,[videoHeight,videoWidth,videoDur])
Vv = self.transp_func(Hv, [videoHeight, videoWidth, videoDur])
n = inputChannels * videoHeight * videoWidth * videoDur
rbim = N.zeros((videoHeight,videoWidth,videoDur,inputChannels))
for qi in xrange(0,inputChannels):
rbim[:,:,:,qi] = self.rb.get_value(borrow=True)[qi]
rbim = N.zeros((videoHeight, videoWidth, videoDur, inputChannels))
for qi in xrange(0, inputChannels):
rbim[:, :, :, qi] = self.rb.get_value(borrow=True)[qi]
rbv = rbim.reshape((n))
W_mat = N.zeros((numFilters, n))
Vv_mat = N.zeros((n, batchSize))
Hv_mat = N.zeros((numFilters,batchSize))
for qi in xrange(0,numFilters):
W_mat[qi,:] = self.W.get_value(borrow=True)[qi,:,:,:,:].reshape((n))
Hv_mat[qi,:] = Hv[:,0,0,0,qi]
for qi in xrange(0,batchSize):
Vv_mat[:,qi] = Vv[qi,:,:,:,:].reshape((n))
V_mat = (N.dot(W_mat.transpose(),Hv_mat).transpose() + rbv).transpose()
if N.abs(V_mat-Vv_mat).max() > 1e-5:
Hv_mat = N.zeros((numFilters, batchSize))
for qi in xrange(0, numFilters):
W_mat[qi, :] = \
self.W.get_value(borrow=True)[qi, :, :, :, :].reshape((n))
Hv_mat[qi, :] = Hv[:, 0, 0, 0, qi]
for qi in xrange(0, batchSize):
Vv_mat[:, qi] = Vv[qi, :, :, :, :].reshape((n))
V_mat = (N.dot(W_mat.transpose(), Hv_mat).transpose() + \
rbv).transpose()
if N.abs(V_mat - Vv_mat).max() > 1e-5:
print V_mat
print Vv_mat
for qq in xrange(V_mat.shape[0]):
for qqq in xrange(Vv_mat.shape[1]):
if abs(V_mat[qq,qqq]-Vv_mat[qq,qqq]) > 1e-5:
print 'wrong at '+str((qq,qqq))+': '+str((V_mat[qq,qqq],Vv_mat[qq,qqq]))
if abs(V_mat[qq, qqq] - Vv_mat[qq, qqq]) > 1e-5:
print ('wrong at ' + str((qq, qqq)) + ': ' +
str(V_mat[qq, qqq], Vv_mat[qq, qqq]))
assert False
def test_c_against_sparse_mat_transp_mul(self):
#like test_c_against_mat_transp_mul but using a sparse matrix and a kernel that is smaller than the image
# like test_c_against_mat_transp_mul but using a sparse matrix and a kernel
# that is smaller than the image
if not theano.sparse.enable_sparse:
raise SkipTest('Optional package sparse disabled')
batchSize = self.rng.randint(1,3)
filterWidth = self.rng.randint(1,8)
filterHeight = self.rng.randint(1,8)
filterDur = self.rng.randint(1,8)
batchSize = self.rng.randint(1, 3)
filterWidth = self.rng.randint(1, 8)
filterHeight = self.rng.randint(1, 8)
filterDur = self.rng.randint(1, 8)
self.d.get_value(borrow=True, return_internal_type=True)[0] = self.rng.randint(1,15)
self.d.get_value(borrow=True, return_internal_type=True)[1] = self.rng.randint(1,15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = self.rng.randint(1,15)
self.d.get_value(borrow=True, return_internal_type=True)[0] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[1] = \
self.rng.randint(1, 15)
self.d.get_value(borrow=True, return_internal_type=True)[2] = \
self.rng.randint(1, 15)
dr = self.d.get_value(borrow=True)[0]
dc = self.d.get_value(borrow=True)[1]
dt = self.d.get_value(borrow=True)[2]
numFilters = self.rng.randint(1,3)
row_steps = self.rng.randint(1,4)
col_steps = self.rng.randint(1,4)
time_steps = self.rng.randint(1,4)
numFilters = self.rng.randint(1, 3)
row_steps = self.rng.randint(1, 4)
col_steps = self.rng.randint(1, 4)
time_steps = self.rng.randint(1, 4)
#print (row_steps,col_steps,time_steps)
videoDur = (time_steps-1)*dt+filterDur + self.rng.randint(0,3)
videoWidth = (col_steps-1)*dc+filterWidth + self.rng.randint(0,3)
videoHeight = (row_steps-1)*dr+filterHeight + self.rng.randint(0,3)
videoDur = (time_steps - 1) * dt + filterDur + \
self.rng.randint(0, 3)
videoWidth = (col_steps - 1) * dc + filterWidth + \
self.rng.randint(0, 3)
videoHeight = (row_steps - 1) * dr + filterHeight + \
self.rng.randint(0, 3)
inputChannels = self.rng.randint(1, 15)
inputChannels = self.rng.randint(1,15)
self.W.set_value(
self.random_tensor(numFilters,filterHeight,filterWidth,filterDur,inputChannels),
borrow=True)
self.W.set_value(self.random_tensor(numFilters, filterHeight,
filterWidth, filterDur, inputChannels), borrow=True)
self.b.set_value(self.random_tensor(numFilters), borrow=True)
#just needed so H_shape works
self.V.set_value(
self.random_tensor(batchSize,videoHeight,videoWidth,videoDur,inputChannels),
borrow=True)
self.V.set_value(self.random_tensor(batchSize, videoHeight, videoWidth,
videoDur, inputChannels), borrow=True)
self.rb.set_value(self.random_tensor(inputChannels), borrow=True)
H_shape = self.H_shape_func()
#make index maps
h = N.zeros( H_shape[1:])
r = N.zeros( H_shape[1:])
c = N.zeros( H_shape[1:])
t = N.zeros( H_shape[1:])
for qi in xrange(0,H_shape[4]):
h[:,:,:,qi] = qi
for qi in xrange(0,H_shape[1]):
r[qi,:,:,:] = qi
for qi in xrange(0,H_shape[2]):
c[:,qi,:,:] = qi
for qi in xrange(0,H_shape[3]):
t[:,:,qi,:] = qi
h = N.zeros(H_shape[1:])
r = N.zeros(H_shape[1:])
c = N.zeros(H_shape[1:])
t = N.zeros(H_shape[1:])
for qi in xrange(0, H_shape[4]):
h[:, :, :, qi] = qi
for qi in xrange(0, H_shape[1]):
r[qi, :, :, :] = qi
for qi in xrange(0, H_shape[2]):
c[:, qi, :, :] = qi
for qi in xrange(0, H_shape[3]):
t[:, :, qi, :] = qi
hn = H_shape[1] * H_shape[2] * H_shape[3] * H_shape[4]
......@@ -378,21 +413,20 @@ class TestConv3D(unittest.TestCase):
c = c.reshape((hn))
t = t.reshape((hn))
Hv = self.random_tensor(*H_shape)
Hv = self.random_tensor( * H_shape )
Vv = self.transp_func(Hv,[videoHeight,videoWidth,videoDur])
Vv = self.transp_func(Hv, [videoHeight, videoWidth, videoDur])
n = inputChannels * videoHeight * videoWidth * videoDur
rbim = N.zeros((videoHeight,videoWidth,videoDur,inputChannels))
for qi in xrange(0,inputChannels):
rbim[:,:,:,qi] = self.rb.get_value(borrow=True)[qi]
rbim = N.zeros((videoHeight, videoWidth, videoDur, inputChannels))
for qi in xrange(0, inputChannels):
rbim[:, :, :, qi] = self.rb.get_value(borrow=True)[qi]
rbv = rbim.reshape((n))
W_mat = N.zeros((hn,n))
W_mat = N.zeros((hn, n))
Vv_mat = N.zeros((n, batchSize))
Hv_mat = N.zeros((hn,batchSize))
for qi in xrange(0,hn):
Hv_mat = N.zeros((hn, batchSize))
for qi in xrange(0, hn):
hi = h[qi]
ri = r[qi]
ci = c[qi]
......@@ -401,57 +435,66 @@ class TestConv3D(unittest.TestCase):
placed_filter = N.zeros(self.V.get_value(borrow=True).shape[1:])
placed_filter[
ri*dr:ri*dr+self.W.get_value(borrow=True).shape[1],
ci*dc:ci*dc+self.W.get_value(borrow=True).shape[2],
ti*dt:ti*dt+self.W.get_value(borrow=True).shape[3],
:] = self.W.get_value(borrow=True)[hi,:,:,:,:]
W_mat[qi,:] = placed_filter.reshape((n))
Hv_mat[qi,:] = Hv[:,ri,ci,ti,hi]
for qi in xrange(0,batchSize):
Vv_mat[:,qi] = Vv[qi,:,:,:,:].reshape((n))
ri * dr:ri * dr + self.W.get_value(borrow=True).shape[1],
ci * dc:ci * dc + self.W.get_value(borrow=True).shape[2],
ti * dt:ti * dt + self.W.get_value(borrow=True).shape[3],
:] = self.W.get_value(borrow=True)[hi, :, :, :, :]
W_mat[qi, :] = placed_filter.reshape((n))
Hv_mat[qi, :] = Hv[:, ri, ci, ti, hi]
for qi in xrange(0, batchSize):
Vv_mat[:, qi] = Vv[qi, :, :, :, :].reshape((n))
W_mat_T = sparse.csr_matrix(W_mat.transpose())
temp = W_mat_T * Hv_mat
V_mat = (temp.transpose() + rbv).transpose()
if N.abs(V_mat-Vv_mat).max() > 1e-5:
if N.abs(V_mat - Vv_mat).max() > 1e-5:
print 'mul'
print V_mat
print 'conv'
print Vv_mat
for i in xrange(0,n):
for j in xrange(0,batchSize):
if abs(V_mat[i,j] - Vv_mat[i,j]) > 1e-5:
print 'wrong at %d,%d: %f mul versus %f conv' % (i,j,V_mat[i,j],Vv_mat[i,j])
for i in xrange(0, n):
for j in xrange(0, batchSize):
if abs(V_mat[i, j] - Vv_mat[i, j]) > 1e-5:
print ('wrong at %d,%d: %f mul versus %f conv'
% (i, j, V_mat[i, j], Vv_mat[i, j]))
assert False
def test_infer_shape(self):
self.randomize()
Hv = self.H_func()
H_shape = self.H_shape_func()
assert N.all(Hv.shape == H_shape)
gradients = self.gradientsFunc(self.V.get_value(borrow=True).shape[1:4])
dCdWv = gradients[0]
dCdW_shape = self.dCdW_shape_func(self.V.get_value(borrow=True).shape[1:4])
# Conv3D
self._compile_and_check([], [self.H], [], Conv3D)
assert N.all(dCdWv.shape == dCdW_shape)
Rv = self.R_func(self.V.get_value(borrow=True).shape[1:4])
R_shape = self.R_shape_func(self.V.get_value(borrow=True).shape[1:4])
assert N.all(Rv.shape == R_shape)
# ConvTransp3D
self._compile_and_check([self.RShape], [self.R],
[self.V.get_value(borrow=True).shape[1:4]], ConvTransp3D)
# ConvGrad3D
self._compile_and_check([self.RShape], [T.grad(self.reconsObj, self.W),
T.grad(self.reconsObj, self.H),
T.grad(self.reconsObj, self.V),
T.grad(self.reconsObj, self.b)],
[self.V.get_value(borrow=True).shape[1:4]], ConvGrad3D)
def test_gradient(self):
self.randomize()
rng, V,W,b,d,rb = self.rng, self.V, self.W, self.b, self.d, self.rb
dCdH = shared(self.random_tensor( *self.H_shape_func() ))
rng, V, W, b, d, rb = self.rng, self.V, self.W, self.b, self.d, self.rb
dCdH = shared(self.random_tensor(*self.H_shape_func()))
testsPerDir = 2
theano.tests.unittest_tools.verify_grad(DummyConv3D(rng, (V,W,b), d), [0.0], n_tests=testsPerDir)
theano.tests.unittest_tools.verify_grad(DummyConvTransp3D(rng, (W,rb,dCdH), d,V.get_value(borrow=True).shape[1:4]), [0.0], n_tests=testsPerDir)
theano.tests.unittest_tools.verify_grad(DummyConvGrad3D(rng, (V,dCdH), d, W.get_value(borrow=True).shape), [0.0], n_tests=testsPerDir)
theano.tests.unittest_tools.verify_grad(DummyConv3D(rng, (V, W, b), d),
[0.0], n_tests=testsPerDir)
theano.tests.unittest_tools.verify_grad(DummyConvTransp3D(rng,
(W, rb, dCdH), d, V.get_value(borrow=True).shape[1:4]),
[0.0], n_tests=testsPerDir)
theano.tests.unittest_tools.verify_grad(DummyConvGrad3D(rng, (V,dCdH),
d, W.get_value(borrow=True).shape),
[0.0], n_tests=testsPerDir)
if __name__ == '__main__':
t = TestConv3D('setUp')
t.setUp()
t.test_infer_shape()
......@@ -17,47 +17,64 @@ from theano.tensor.nnet import (categorical_crossentropy,
crossentropy_softmax_1hot_with_bias,
crossentropy_softmax_1hot_with_bias_dx,
crossentropy_softmax_argmax_1hot_with_bias,
CrossentropySoftmax1HotWithBiasDx,
CrossentropySoftmaxArgmax1HotWithBias,
CrossentropyCategorical1Hot,
CrossentropyCategorical1HotGrad,
sigmoid, softplus,
Softmax, softmax, SoftmaxWithBias, softmax_grad,
softmax_with_bias,
Softmax, softmax, SoftmaxWithBias,
softmax_grad,
softmax_with_bias, SoftmaxGrad,
Prepend_scalar_constant_to_each_row,
Prepend_scalar_to_each_row)
from theano.tensor import dmatrix, dvector, lvector, dscalar
class T_sigmoid(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test_elemwise(self):
utt.verify_grad(sigmoid, [numpy.random.rand(3,4)])
utt.verify_grad(sigmoid, [numpy.random.rand(3, 4)])
class T_softplus(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test_elemwise(self):
utt.verify_grad(softplus, [numpy.random.rand(3,4)])
utt.verify_grad(softplus, [numpy.random.rand(3, 4)])
class T_Softmax(utt.InferShapeTester):
class T_Softmax(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test0(self):
def f(a):
return softmax(a)[:,0]
utt.verify_grad(f, [numpy.random.rand(3,4)])
return softmax(a)[:, 0]
utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test1(self):
def f(a):
return softmax(a)[:,1]
utt.verify_grad(f, [numpy.random.rand(3,4)])
return softmax(a)[:, 1]
utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test2(self):
def f(a):
return softmax(a)[:,2]
utt.verify_grad(f, [numpy.random.rand(3,4)])
return softmax(a)[:, 2]
utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test3(self):
def f(a):
return softmax(a)[:,3]
utt.verify_grad(f, [numpy.random.rand(3,4)])
return softmax(a)[:, 3]
utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test_infer_shape(self):
f=theano.function([],softmax(numpy.random.rand(3,4)).shape)
assert all(f()==[3,4])
admat = dmatrix()
admat_val = numpy.random.rand(3, 4)
self._compile_and_check([admat], [Softmax()(admat)],
[admat_val], Softmax)
def test_vector(self):
x = T.vector()
......@@ -65,109 +82,134 @@ class T_Softmax(unittest.TestCase):
xv = numpy.random.randn(6).astype(config.floatX)
assert numpy.allclose(f(xv), numpy.exp(xv) / numpy.exp(xv).sum())
def test_vector_grad(self):
def f(a):
return softmax(a)
utt.verify_grad(f, [numpy.random.rand(4)])
class T_SoftmaxWithBias(unittest.TestCase):
def setUp(self):
utt.seed_rng()
class T_SoftmaxWithBias(utt.InferShapeTester):
def test0(self):
def f(a, b):
return softmax_with_bias(a, b)[:,0]
utt.verify_grad(f, [numpy.random.rand(3,4),
return softmax_with_bias(a, b)[:, 0]
utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)])
def test1(self):
def f(a, b):
return softmax_with_bias(a, b)[:,1]
utt.verify_grad(f, [numpy.random.rand(3,4),
return softmax_with_bias(a, b)[:, 1]
utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)])
def test2(self):
def f(a, b):
return softmax_with_bias(a, b)[:,2]
utt.verify_grad(f, [numpy.random.rand(3,4),
return softmax_with_bias(a, b)[:, 2]
utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)])
def test3(self):
def f(a, b):
return softmax_with_bias(a, b)[:,3]
utt.verify_grad(f, [numpy.random.rand(3,4),
return softmax_with_bias(a, b)[:, 3]
utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)])
def test_broadcast(self):
#test that we don't raise an error during optimization for no good
#reason as softmax_with_bias don't support correctly some/all
#broadcasted inputs pattern
initial_W = numpy.asarray( [[0.1,0.1,0.1], \
[0.1,0.1,0.1], \
[0.1,0.1,0.1]], \
dtype = theano.config.floatX)
W = theano.shared(value = initial_W, name = 'W')
vbias=theano.shared(value=0.1, name='vbias') #0.01
hid=T.vector('hid')
initial_W = numpy.asarray([[0.1, 0.1, 0.1], \
[0.1, 0.1, 0.1], \
[0.1, 0.1, 0.1]], \
dtype=theano.config.floatX)
W = theano.shared(value=initial_W, name='W')
vbias = theano.shared(value=0.1, name='vbias') # 0.01
hid = T.vector('hid')
f = theano.function([hid],
T.nnet.softmax(T.dot(hid, W.T) + vbias))
ops = [node.op for node in f.maker.fgraph.toposort()]
assert softmax_with_bias not in ops
assert softmax in ops
f([0,1,0])
f([0, 1, 0])
#print f.maker.fgraph.toposort()
def test_infer_shape(self):
fff=theano.function([],outputs=softmax_with_bias(numpy.random.rand(3,4),numpy.random.rand(4)).shape)
assert all(fff()==[3,4])
admat = dmatrix()
advec = dvector()
admat_val = numpy.random.rand(3, 4)
advec_val = numpy.random.rand(4)
self._compile_and_check([admat, advec],
[SoftmaxWithBias()(admat, advec)],
[admat_val, advec_val], SoftmaxWithBias)
class T_SoftmaxGrad(utt.InferShapeTester):
class T_SoftmaxGrad(unittest.TestCase):
def test_infer_shape(self):
a=T.constant(numpy.random.rand(3,4))
b=T.constant(numpy.random.rand(3,4))
f=theano.function([],softmax_grad(a,b).shape)
assert numpy.all(f()==[3,4])
admat = dmatrix()
bdmat = dmatrix()
admat_val = numpy.random.rand(3, 4)
bdmat_val = numpy.random.rand(3, 4)
self._compile_and_check([admat, bdmat], [SoftmaxGrad()(admat, bdmat)],
[admat_val, bdmat_val], SoftmaxGrad)
class T_CrossentropySoftmax1Hot(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test0(self):
y_idx = [0,1,3]
y_idx = [0, 1, 3]
def f(a, b):
return crossentropy_softmax_1hot_with_bias(a, b, y_idx)[0]
utt.verify_grad(f, [numpy.random.rand(3,4),
utt.verify_grad(f, [numpy.random.rand(3, 4),
numpy.random.rand(4)])
def test1(self):
y_idx = [0,1,3]
y_idx = [0, 1, 3]
def f(a):
return crossentropy_softmax_1hot(a, y_idx)[0]
utt.verify_grad(f, [numpy.random.rand(3,4)])
utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test_vector(self):
y_idx = [3]
def f(a):
return crossentropy_softmax_1hot(T.shape_padleft(a), y_idx)[0]
utt.verify_grad(f, [numpy.random.rand(4)])
def test_vectors(self):
y_idx = [3]
def f(a, b):
return crossentropy_softmax_1hot(T.shape_padleft(a)+b, y_idx)[0]
return crossentropy_softmax_1hot(T.shape_padleft(a) + b, y_idx)[0]
utt.verify_grad(f, [numpy.random.rand(4), numpy.random.rand(4)])
class T_CrossentropySoftmax1HotWithBiasDx(unittest.TestCase):
def setUp(self):
utt.seed_rng()
class T_CrossentropySoftmax1HotWithBiasDx(utt.InferShapeTester):
def test0(self):
def f(sm):
return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
numpy.random.rand(10), # Gradient w.r.t. NLL.
sm, # Softmax output.
numpy.random.randint(low=0, high=5, size=10))) # Class indices.
numpy.random.randint(low=0,
high=5, size=10))) # Class indices.
# Build a random softmax output whose rows sum to 1.
softmax_output = numpy.random.rand(10, 5)
softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
utt.verify_grad(f, [softmax_output])
def test1(self):
rng = numpy.random.RandomState(utt.fetch_seed())
softmax_output = rng.rand(10, 5)
softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
def f(dy):
return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
dy,
......@@ -175,19 +217,38 @@ class T_CrossentropySoftmax1HotWithBiasDx(unittest.TestCase):
rng.randint(low=0, high=5, size=10)))
utt.verify_grad(f, [rng.rand(10)])
class T_CrossentropySoftmaxArgmax1HotWithBias(unittest.TestCase):
def test_infer_shape(self):
admat = dmatrix()
advec = dvector()
alvec = lvector()
rng = numpy.random.RandomState(utt.fetch_seed())
admat_val = rng.rand(10, 5)
admat_val /= admat_val.sum(axis=1).reshape(10, 1)
advec_val = rng.rand(10)
alvec_val = rng.randint(low=0, high=5, size=10)
self._compile_and_check([advec, admat, alvec],
[CrossentropySoftmax1HotWithBiasDx()(advec, admat, alvec)],
[advec_val, admat_val, alvec_val],
CrossentropySoftmax1HotWithBiasDx)
class T_CrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester):
def setUp(self):
utt.seed_rng()
super(T_CrossentropySoftmaxArgmax1HotWithBias, self).setUp()
self.op = theano.tensor.nnet.crossentropy_softmax_argmax_1hot_with_bias
def test0(self):
n_classes = 5
n_samples = 3
# First test gradient when getting a gradient on the NLL output.
def grad_on_nll(x, b):
return self.op(x, b, y_idx=numpy.random.randint(
low=0, high=n_classes, size=n_samples))[0]
utt.verify_grad(grad_on_nll, [numpy.random.rand(n_samples, n_classes),
numpy.random.rand(n_classes)])
# Then test gradient when getting a gradient on the softmax output.
def grad_on_softmax(x, b):
return self.op(x, b, y_idx=numpy.random.randint(
......@@ -197,68 +258,107 @@ class T_CrossentropySoftmaxArgmax1HotWithBias(unittest.TestCase):
numpy.random.rand(n_classes)])
def test_infer_shape(self):
var = self.op(numpy.random.rand(3,5),numpy.random.rand(5), y_idx=numpy.random.randint(
low=0, high=5, size=3))
assert theano.function([],var[0].shape)() == [3]
assert all(theano.function([],var[1].shape)() == [3,5])
assert theano.function([],var[2].shape)() == [3]
admat = dmatrix()
advec = dvector()
alvec = lvector()
rng = numpy.random.RandomState(utt.fetch_seed())
admat_val = rng.rand(3, 5)
advec_val = rng.rand(5)
alvec_val = rng.randint(low=0, high=5, size=3)
self._compile_and_check([admat, advec, alvec],
CrossentropySoftmaxArgmax1HotWithBias()(admat, advec, alvec),
[admat_val, advec_val, alvec_val],
CrossentropySoftmaxArgmax1HotWithBias)
class T_prepend(utt.InferShapeTester):
class T_prepend(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test0(self):
"""basic functionality"""
x=tensor.matrix('x')
y=Prepend_scalar_constant_to_each_row(4.)(x)
f=theano.function([x],[y])
m=numpy.random.rand(3,5)
x = tensor.matrix('x')
y = Prepend_scalar_constant_to_each_row(4.)(x)
f = theano.function([x], [y])
m = numpy.random.rand(3, 5)
my = f(m)
self.assertTrue(my.shape == (3, 6), my.shape)
self.assertTrue(numpy.all( my[:,0] == 4.0))
self.assertTrue(numpy.all(my[:, 0] == 4.0))
class T_prepend(unittest.TestCase):
def test0(self):
"""basic functionality"""
x=tensor.matrix('x')
y=Prepend_scalar_to_each_row()(5.,x)
f=theano.function([x],y)
m=numpy.ones((3,5),dtype="float32")
def test1(self):
"basic functionality"
x = tensor.matrix('x')
y = Prepend_scalar_to_each_row()(5., x)
f = theano.function([x], y)
m = numpy.ones((3, 5), dtype="float32")
my = f(m)
self.assertTrue(my.shape == (3, 6))
self.assertTrue(numpy.all(my[:,0] == 5.0))
self.assertTrue(numpy.all(my[:, 0] == 5.0))
def test_infer_shape(self):
admat = dmatrix()
adscal = dscalar()
rng = numpy.random.RandomState(utt.fetch_seed())
admat_val = rng.rand(3, 5)
adscal_val = rng.rand()
self._compile_and_check([admat],
[Prepend_scalar_constant_to_each_row(adscal_val)(admat)],
[admat_val],
Prepend_scalar_constant_to_each_row)
self._compile_and_check([adscal, admat],
[Prepend_scalar_to_each_row()(adscal, admat)],
[adscal_val, admat_val],
Prepend_scalar_to_each_row)
class T_CrossentropyCategorical1Hot(unittest.TestCase):
def setUp(self):
utt.seed_rng()
class T_CrossentropyCategorical1HotGrad(utt.InferShapeTester):
def test_infer_shape(self):
advec = dvector()
admat = dmatrix()
alvec = lvector()
rng = numpy.random.RandomState(utt.fetch_seed())
advec_val = rng.rand(3)
admat_val = rng.rand(3, 2)
alvec_val = [0, 1, 0]
self._compile_and_check([advec, admat, alvec],
[CrossentropyCategorical1HotGrad()(advec, admat, alvec)],
[advec_val, admat_val, alvec_val],
CrossentropyCategorical1HotGrad)
class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
def test_grad(self):
x = tensor.matrix('x')
one_of_n = tensor.lvector('one_of_n')
op = crossentropy_categorical_1hot
xe = op(x, one_of_n)
f = theano.function([x, one_of_n], xe)
x_val = numpy.asarray([[.4, .6, .0], [.1, .8, .1]],
dtype=config.floatX)
xe_val = f(x_val, [0,1])
xe_val = f(x_val, [0, 1])
assert numpy.allclose(xe_val, -numpy.log([.4, .8]))
def oplike(x):
return op(x, [0,1])
return op(x, [0, 1])
tensor.verify_grad(oplike, [x_val], rng=numpy.random)
# see issue gh-788
def est_infer_shape(self):
admat = dmatrix()
alvec = lvector()
rng = numpy.random.RandomState(utt.fetch_seed())
admat_val = rng.rand(3, 2)
alvec_val = [0, 1, 0]
self._compile_and_check([admat, alvec],
[CrossentropyCategorical1Hot()(admat, alvec)],
[admat_val, alvec_val],
CrossentropyCategorical1Hot)
def test_softmax_optimizations(self):
x = tensor.matrix('x')
one_of_n = tensor.lvector('one_of_n')
op = crossentropy_categorical_1hot
xe = op(x, one_of_n)
fgraph = gof.FunctionGraph(
......@@ -270,7 +370,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_optimizations_vector(self):
x = tensor.vector('x')
......@@ -284,19 +385,19 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_optimizations_w_bias(self):
x = tensor.matrix('x')
b = tensor.vector('b')
one_of_n = tensor.lvector('one_of_n')
op = crossentropy_categorical_1hot
xe = op(x, one_of_n)
fgraph = gof.FunctionGraph(
[x, b, one_of_n],
[op(softmax(x+b), one_of_n)])
[op(softmax(x + b), one_of_n)])
assert fgraph.outputs[0].owner.op == op
#print 'BEFORE'
......@@ -316,7 +417,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
assert len(fgraph.toposort()) == 2
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_optimizations_w_bias2(self):
x = tensor.matrix('x')
......@@ -327,7 +429,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
fgraph = gof.FunctionGraph(
[x, b, c, one_of_n],
[op(softmax(T.add(x,b,c)), one_of_n)])
[op(softmax(T.add(x, b, c)), one_of_n)])
assert fgraph.outputs[0].owner.op == op
#print 'BEFORE'
......@@ -345,7 +447,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
assert len(fgraph.toposort()) == 3
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_optimizations_w_bias_vector(self):
x = tensor.vector('x')
......@@ -354,7 +457,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
op = crossentropy_categorical_1hot
fgraph = gof.FunctionGraph(
[x, b, one_of_n],
[op(softmax(x+b), one_of_n)])
[op(softmax(x + b), one_of_n)])
assert fgraph.outputs[0].owner.op == op
#print 'BEFORE'
#for node in fgraph.toposort():
......@@ -370,15 +473,14 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
#print '===='
assert len(fgraph.toposort()) == 3
assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
crossentropy_softmax_argmax_1hot_with_bias)
def test_softmax_grad_optimizations(self):
x = tensor.matrix('x')
one_of_n = tensor.lvector('one_of_n')
op = crossentropy_categorical_1hot
xe = op(softmax(x), one_of_n)
sum_xe = tensor.sum(xe)
g_x = tensor.grad(sum_xe, x)
fgraph = gof.FunctionGraph(
......@@ -396,8 +498,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
#for node in fgraph.toposort():
# print node.op, node.inputs
# the function has 9 ops because the dimshuffle and elemwise{second} aren't getting
# cleaned up as well as we'd like.
# the function has 9 ops because the dimshuffle and lemwise{second}
# aren't getting cleaned up as well as we'd like.
has_cx1hot = False
has_cx1hotdx = False
has_softmax = False
......@@ -405,13 +507,12 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
for node in fgraph.toposort():
if node.op == crossentropy_softmax_argmax_1hot_with_bias:
has_cx1hot = True
if node.op == crossentropy_softmax_1hot_with_bias_dx :
if node.op == crossentropy_softmax_1hot_with_bias_dx:
has_cx1hotdx = True
if node.op == softmax:
has_softmax = True
if node.op == softmax_grad:
has_softmaxdx = True
assert has_cx1hot
assert has_cx1hotdx
assert not has_softmax
......@@ -439,8 +540,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
#for node in fgraph.toposort():
# print node.op, node.inputs
# the function has 9 ops because the dimshuffle and elemwise{second} aren't getting
# cleaned up as well as we'd like.
# the function has 9 ops because the dimshuffle and elemwise{second}
# aren't getting cleaned up as well as we'd like.
has_cx1hot = False
has_cx1hotdx = False
has_softmax = False
......@@ -448,13 +549,12 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
for node in fgraph.toposort():
if node.op == crossentropy_softmax_argmax_1hot_with_bias:
has_cx1hot = True
if node.op == crossentropy_softmax_1hot_with_bias_dx :
if node.op == crossentropy_softmax_1hot_with_bias_dx:
has_cx1hotdx = True
if node.op == softmax:
has_softmax = True
if node.op == softmax_grad:
has_softmaxdx = True
assert has_cx1hot
assert has_cx1hotdx
assert not has_softmax
......@@ -469,13 +569,10 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
mode = theano.compile.mode.get_default_mode()
if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed())
x_val = rng.randn(3,5)
x_val = rng.randn(3, 5)
b_val = rng.randn(5)
y_val = numpy.asarray([2,4,1])
y_val = numpy.asarray([2, 4, 1])
x = T.dmatrix('x')
b = T.dvector('b')
y = T.lvector('y')
......@@ -487,10 +584,10 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
-T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y])
]
for expr in expressions:
# Verify the optimizer worked on the expressions
f = theano.function([x,y], expr, mode=mode)
f = theano.function([x, y], expr, mode=mode)
if verbose:
theano.printing.debugprint(f)
try:
......@@ -501,7 +598,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
raise
# Also verify the gradient wrt x
g = theano.function([x,y], T.grad(expr, x), mode=mode)
g = theano.function([x, y], T.grad(expr, x), mode=mode)
if verbose:
theano.printing.debugprint(g)
try:
......@@ -513,23 +610,22 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
## Test that a biased softmax is optimized correctly
bias_expressions = [
T.sum(-T.log(softmax(x+b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b+x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x+b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b+x))[T.arange(y.shape[0]), y])]
T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions:
f = theano.function([x,b,y], expr, mode=mode)
f = theano.function([x, b, y], expr, mode=mode)
if verbose:
theano.printing.debugprint(f)
try:
assert len(f.maker.fgraph.toposort()) == 2 # [big_op, sum]
assert len(f.maker.fgraph.toposort()) == 2 # [big_op, sum]
f(x_val, b_val, y_val)
except Exception:
theano.printing.debugprint(f)
raise
g = theano.function([x,b,y], T.grad(expr, x), mode=mode)
g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
if verbose:
theano.printing.debugprint(g)
try:
......@@ -547,7 +643,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
T.mean(-T.log(softmax(x))[T.arange(y.shape[0]), y])]
for expr in mean_expressions:
f = theano.function([x,y], expr, mode=mode)
f = theano.function([x, y], expr, mode=mode)
if verbose:
theano.printing.debugprint(f)
try:
......@@ -557,11 +653,12 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
theano.printing.debugprint(f)
raise
g = theano.function([x,y], T.grad(expr, x), mode=mode)
g = theano.function([x, y], T.grad(expr, x), mode=mode)
if verbose:
theano.printing.debugprint(g)
try:
assert len(g.maker.fgraph.toposort()) in (6,7) #there's an extra dimshuffle in there
assert len(g.maker.fgraph.toposort()) in (6, 7)
#there's an extra dimshuffle in there
# but I can't think of a good rule to get rid of it
g(x_val, y_val)
except Exception:
......@@ -569,13 +666,13 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
raise
mean_bias_expressions = [
T.mean(-T.log(softmax(x+b)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax(b+x)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax(x+b))[T.arange(y.shape[0]), y]),
T.mean(-T.log(softmax(b+x))[T.arange(y.shape[0]), y])]
T.mean(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.mean(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.mean(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in mean_bias_expressions:
f = theano.function([x,b,y], expr, mode=mode)
f = theano.function([x, b, y], expr, mode=mode)
if verbose:
theano.printing.debugprint(f)
try:
......@@ -583,12 +680,11 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
except Exception:
theano.printing.debugprint(f)
raise
g = theano.function([x,b,y], T.grad(expr, x), mode=mode)
g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
if verbose:
theano.printing.debugprint(g)
try:
assert len(g.maker.fgraph.toposort()) in (6,7)
assert len(g.maker.fgraph.toposort()) in (6, 7)
g(x_val, b_val, y_val)
except Exception:
theano.printing.debugprint(g)
......@@ -600,15 +696,13 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed())
x_val = rng.randn(3,5)
x_val = rng.randn(3, 5)
b_val = rng.randn(5)
y_val = numpy.asarray([2,4,1], dtype='int64')
y_val = numpy.asarray([2, 4, 1], dtype='int64')
x = T.dmatrix('x')
b = T.dvector('b')
y = T.lvector('y')
yi = T.cast(y, 'int32')
expressions = [
T.sum(-T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
-T.sum(T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
......@@ -618,7 +712,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
for expr in expressions:
# Verify the optimizer worked on the expressions
f = theano.function([x,y], expr, mode=mode)
f = theano.function([x, y], expr, mode=mode)
if verbose:
theano.printing.debugprint(f)
try:
......@@ -629,7 +723,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
raise
# Also verify the gradient wrt x
g = theano.function([x,y], T.grad(expr, x), mode=mode)
g = theano.function([x, y], T.grad(expr, x), mode=mode)
if verbose:
theano.printing.debugprint(g)
try:
......@@ -639,7 +733,6 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
theano.printing.debugprint(g)
raise
def test_optimize_xent_vector(self):
verbose = 0
mode = theano.compile.mode.get_default_mode()
......@@ -665,8 +758,9 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
-T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))]
for expr in bias_expressions:
f = theano.function([x,y], expr, mode=mode)
if verbose: print_graph(f)
f = theano.function([x, y], expr, mode=mode)
if verbose:
print_graph(f)
try:
prev, last = f.maker.fgraph.toposort()[-2:]
assert len(f.maker.fgraph.toposort()) == 5
......@@ -674,8 +768,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
except Exception:
theano.printing.debugprint(f)
raise
g = theano.function([x,y], T.grad(expr, x), mode=mode)
g = theano.function([x, y], T.grad(expr, x), mode=mode)
print_graph(g)
try:
ops = [node.op for node in g.maker.fgraph.toposort()]
......@@ -711,17 +804,19 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
## Test that a biased softmax is optimized correctly
bias_expressions = [
T.sum(-T.log(softmax(x+b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b+x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x+b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b+x))[T.arange(y.shape[0]), y])]
T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions:
f = theano.function([x,b,y], expr, mode=mode)
if verbose: print_graph(f)
f = theano.function([x, b, y], expr, mode=mode)
if verbose:
print_graph(f)
try:
prev, last = f.maker.fgraph.toposort()[-2:]
assert len(f.maker.fgraph.toposort()) == 3 # [big_op, sum, dim_shuffle]
assert len(f.maker.fgraph.toposort()) == 3
# [big_op, sum, dim_shuffle]
f(x_val, b_val, y_val)
except Exception:
theano.printing.debugprint(f)
......@@ -730,7 +825,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False
try:
g = theano.function([x,b,y], T.grad(expr, x), mode=mode)
g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
finally:
config.warn.sum_div_dimshuffle_bug = backup
......@@ -752,13 +847,10 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
mode = theano.compile.mode.get_default_mode()
if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed())
x_val = rng.randn(3,5)
x_val = rng.randn(3, 5)
b_val = rng.randn(5)
y_val = numpy.asarray([2,4,1])
y_val = numpy.asarray([2, 4, 1])
x = T.dmatrix('x')
b = T.dvector('b')
y = T.lvector('y')
......@@ -800,7 +892,6 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
assert has_softmax
assert not has_softmaxdx
## Cases to test
expressions = [
a * T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
......@@ -826,7 +917,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
for expr in expressions:
# Verify the optimizer worked on the expressions
f = theano.function([x,y,a], expr, mode=mode)
f = theano.function([x, y, a], expr, mode=mode)
try:
assert 5 <= len(f.maker.fgraph.toposort()) <= 10
validate_fn_graph(f)
......@@ -836,7 +927,7 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
raise
# Verify the gradient wrt x
g = theano.function([x,y,a], T.grad(expr, x), mode=mode)
g = theano.function([x, y, a], T.grad(expr, x), mode=mode)
try:
assert 5 <= len(g.maker.fgraph.toposort()) <= 12
validate_grad_graph(g)
......@@ -846,7 +937,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
raise
# Verify the gradient when providing output gradient
h = theano.function([x,y,a], T.grad(expr, x, g_cost=a*x.sum()), mode=mode)
h = theano.function([x, y, a],
T.grad(expr, x, g_cost=a * x.sum()), mode=mode)
try:
assert 8 <= len(h.maker.fgraph.toposort()) <= 17
validate_grad_graph(h)
......@@ -866,14 +958,13 @@ def test_argmax_pushdown():
fgraph = gof.FunctionGraph(
[x],
[out])
theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)
#print 'AFTER'
#for node in fgraph.toposort():
#print node.op
assert len(fgraph.toposort()) == 2 # an output_guard is second
assert len(fgraph.toposort()) == 2 # an output_guard is second
assert fgraph.toposort()[0].op == tensor.basic._max_and_argmax
assert str(fgraph.toposort()[1].op) == 'OutputGuard'
x = tensor.dmatrix()
......@@ -910,7 +1001,7 @@ def test_argmax_pushdown_bias():
out = tensor.argmax(softmax_with_bias(x, b), axis=-1)
fgraph = gof.FunctionGraph(
[x,b],
[x, b],
[out])
theano.compile.mode.optdb.query(
......@@ -927,10 +1018,9 @@ def test_argmax_pushdown_bias():
x = tensor.dmatrix()
b = tensor.dvector()
out = tensor.max_and_argmax(softmax_with_bias(x, b), axis=-1)[0]
fgraph = gof.FunctionGraph(
[x,b],
[x, b],
[out])
backup = config.warn.argmax_pushdown_bug
......@@ -950,13 +1040,15 @@ def test_argmax_pushdown_bias():
assert isinstance(fgraph.toposort()[1].op.scalar_op, theano.scalar.Maximum)
assert str(fgraph.toposort()[2].op) == 'OutputGuard'
def test_asymptotic_32():
"""
This test makes sure that our functions behave sensibly when huge values are present
This test makes sure that our functions behave sensibly when
huge values are present
"""
#TODO: consider adding the optimization of crossentropy into the current mode for the
# purpose of running this test
#TODO: consider adding the optimization of crossentropy into the current
# mode for the purpose of running this test
for dtype in 'float32', 'float64':
if dtype == 'float32':
......@@ -967,20 +1059,20 @@ def test_asymptotic_32():
x2 = tensor.dvector()
y = tensor.lvector()
c = categorical_crossentropy(softmax(x+x2), y)
f = theano.function([x,y,x2], [c.sum(), tensor.grad(c.sum(), x)], mode='FAST_RUN')
c = categorical_crossentropy(softmax(x + x2), y)
f = theano.function([x, y, x2], [c.sum(),
tensor.grad(c.sum(), x)], mode='FAST_RUN')
if 0:
for i, n in enumerate( f.maker.fgraph.toposort()):
for i, n in enumerate(f.maker.fgraph.toposort()):
print i, n
xval = numpy.zeros((5, 5), dtype=dtype)
x2val = numpy.zeros(5, dtype=xval.dtype)
for i in xrange(100):
cval, gxval = f(xval, numpy.arange(5), x2val)
cval, gxval = f(xval, numpy.arange(5), x2val)
xval -= 100.3 * gxval
#print cval, gxval
assert cval == 0 # no problem going to zero error
assert cval == 0 # no problem going to zero error
#what about when x gets really big?
......@@ -988,56 +1080,55 @@ def test_asymptotic_32():
x2val = numpy.zeros(5, dtype=xval.dtype)
for i in xrange(100):
cval, gxval = f(xval, numpy.arange(5), x2val)
cval, gxval = f(xval, numpy.arange(5), x2val)
xval += 100000.3 * gxval
#print cval, gxval
assert cval > 61750000
assert gxval[0,0] == -1.0
assert gxval[0,1] == 0.25
assert gxval[0, 0] == -1.0
assert gxval[0, 1] == 0.25
class Test_softmax_opt:
# Test that expressions of softmax in terms of exponentiated things divided by row sums
# are replaced by softmax expressions.
# Test that expressions of softmax in terms of exponentiated things
# divided by row sums are replaced by softmax expressions.
#
# Softmax_grad isn't that interesting as an Op, but it's the signature we look for when
# trying to insert CrossEntropySoftmax... grad. So for now, we add softmax_grad to graphs.
# In future, we may modify the CrossEntropySoftmax...grad to look for the more basic
# pattern.
# Softmax_grad isn't that interesting as an Op, but it has the signature
# we look for when trying to insert CrossEntropySoftmax... grad. So, for
# now, we add softmax_grad to graphs. In the future, we may modify the
# CrossEntropySoftmax...grad to look for the more basic pattern.
#
def setUp(self):
utt.seed_rng()
self.rng = numpy.random.RandomState(utt.fetch_seed())
self.mode=theano.compile.mode.get_default_mode()
self.mode=self.mode.including('canonicalize')
self.mode = theano.compile.mode.get_default_mode()
self.mode = self.mode.including('canonicalize')
def test_basic(self):
c = T.matrix()
p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0,'x')
p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0, 'x')
# test that function contains softmax and no div.
f = theano.function([c],p_y, mode=self.mode)
f = theano.function([c], p_y, mode=self.mode)
f_ops = [n.op for n in f.maker.fgraph.toposort()]
#print '--- f ='
#printing.debugprint(f)
#print '==='
assert len(f_ops) == 1
assert softmax in f_ops
f(self.rng.rand(3,4).astype(config.floatX))
f(self.rng.rand(3, 4).astype(config.floatX))
def test_grad(self):
c = T.matrix()
p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0,'x')
p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0, 'x')
# test that function contains softmax and softmaxgrad
w = T.matrix()
backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False
try:
g = theano.function([c,w],T.grad((p_y*w).sum(), c))
g = theano.function([c, w], T.grad((p_y * w).sum(), c))
finally:
config.warn.sum_div_dimshuffle_bug = backup
g_ops = [n.op for n in g.maker.fgraph.toposort()]
......@@ -1049,7 +1140,7 @@ class Test_softmax_opt:
assert len(g_ops) == 2
assert softmax in g_ops
assert softmax_grad in g_ops
g(self.rng.rand(3,4), self.rng.uniform(.5, 1, (3,4)))
g(self.rng.rand(3, 4), self.rng.uniform(.5, 1, (3, 4)))
def test_transpose_basic(self):
# this should be a transposed softmax
......@@ -1057,14 +1148,14 @@ class Test_softmax_opt:
p_y = T.exp(c) / T.exp(c).sum(axis=0)
# test that function contains softmax and no div.
f = theano.function([c],p_y)
f = theano.function([c], p_y)
#printing.debugprint(f)
# test that function contains softmax and no div.
backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False
try:
g = theano.function([c],T.grad(p_y.sum(), c))
g = theano.function([c], T.grad(p_y.sum(), c))
finally:
config.warn.sum_div_dimshuffle_bug = backup
#printing.debugprint(g)
......@@ -1089,7 +1180,10 @@ class Test_softmax_opt:
#printing.debugprint(g)
raise SkipTest('Optimization not enabled for the moment')
# REPEAT 3 CASES in presence of log(softmax) with the advanced indexing etc.
# REPEAT 3 CASES in presence of log(softmax) with the advanced indexing
# etc.
if __name__ == '__main__':
unittest.main()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论