提交 28a095d6 authored 作者: nouiz's avatar nouiz

Merge pull request #730 from larseeri/shape_tensor_nnet

Better infer_shape test for Softmax, SoftmaxWithBias, SoftmaxGrad, CrossentropySoftmaxArgmax1HotWithBias, ConvOp, Conv3D, ConvTransp3D, ConvGrad3D Added and tested infer_shape for CrossentropySoftmax1HotWithBiasDx,Prepend_scalar_constant_to_each_row and Prepend_scalar_to_each_row, CrossentropyCategorical1HotGrad Added disabled CrossentropyCategorical1Hot.infer_shape, see gh-788
...@@ -857,6 +857,9 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op): ...@@ -857,6 +857,9 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
dx[i, y_idx[i]] -= dy[i] # scalar decrement dx[i, y_idx[i]] -= dy[i] # scalar decrement
output_storage[0][0] = dx output_storage[0][0] = dx
def infer_shape(self, node, shapes):
return [shapes[1]]
def grad(self, inp, grads): def grad(self, inp, grads):
dy, sm, y_idx = inp dy, sm, y_idx = inp
g_dx, = grads g_dx, = grads
...@@ -1031,8 +1034,11 @@ class CrossentropyCategorical1HotGrad(gof.Op): ...@@ -1031,8 +1034,11 @@ class CrossentropyCategorical1HotGrad(gof.Op):
for i in xrange(len(g_y)): for i in xrange(len(g_y)):
g_coding[i, true_one_of_n[i]] = -g_y[i] / coding_dist[i, g_coding[i, true_one_of_n[i]] = -g_y[i] / coding_dist[i,
true_one_of_n[i]] true_one_of_n[i]]
g_coding_strg[0] = g_coding g_coding_strg[0] = g_coding
def infer_shape(self, node, in_shapes):
return [in_shapes[1]]
crossentropy_categorical_1hot_grad = CrossentropyCategorical1HotGrad() crossentropy_categorical_1hot_grad = CrossentropyCategorical1HotGrad()
...@@ -1091,6 +1097,17 @@ class CrossentropyCategorical1Hot(gof.Op): ...@@ -1091,6 +1097,17 @@ class CrossentropyCategorical1Hot(gof.Op):
y[i] = -numpy.log(coding[i, one_of_n[i]]) y[i] = -numpy.log(coding[i, one_of_n[i]])
y_out[0] = y y_out[0] = y
#Enabling this infer_shape method make 2 tests fail:
#theano/tensor/nnet/tests/test_nnet.py:T_CrossentropyCategorical1Hot.
# {test_softmax_grad_optimizations,test_softmax_grad_optimizations_vector}
# This is caused by the local_fill_to_alloc that call broadcast_like
# that look into the shape feature and return a Rebroadcast instead of an alloc.
# I disable this infer_shape until we fix the optimizations or determine that
# this is not needed anymore and we update the tests.
# see issue gh-788
# def infer_shape(self, node, in_shapes):
# return [(in_shapes[0][0],)]
def grad(self, inp, grads): def grad(self, inp, grads):
coding, one_of_n = inp coding, one_of_n = inp
g_y, = grads g_y, = grads
...@@ -1121,7 +1138,7 @@ def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph): ...@@ -1121,7 +1138,7 @@ def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph):
new_nll, new_sm, new_am = crossentropy_softmax_argmax_1hot_with_bias(x, b, new_nll, new_sm, new_am = crossentropy_softmax_argmax_1hot_with_bias(x, b,
one_of_n) one_of_n)
fgraph.replace_all_validate([(nll, new_nll), (sm, new_sm)], fgraph.replace_all_validate([(nll, new_nll), (sm, new_sm)],
reason="crossentropy_to_crossentropy_with_softmax") reason="crossentropy_to_crossentropy_with_softmax_with_bias")
return True return True
return False return False
...@@ -1645,6 +1662,11 @@ class Prepend_scalar_constant_to_each_row(gof.Op): ...@@ -1645,6 +1662,11 @@ class Prepend_scalar_constant_to_each_row(gof.Op):
out[:, 0].fill(self.val.data) out[:, 0].fill(self.val.data)
out[:, 1:] = mat out[:, 1:] = mat
def infer_shape(self, node, in_shapes):
shp = (in_shapes[0][0], in_shapes[0][1] + 1)
return [shp]
def grad(self, inp, grads): def grad(self, inp, grads):
mat, = inp mat, = inp
goutput, = grads goutput, = grads
...@@ -1694,6 +1716,10 @@ class Prepend_scalar_to_each_row(gof.Op): ...@@ -1694,6 +1716,10 @@ class Prepend_scalar_to_each_row(gof.Op):
out[:, 0].fill(val) out[:, 0].fill(val)
out[:, 1:] = mat out[:, 1:] = mat
def infer_shape(self, node, in_shapes):
shp = (in_shapes[1][0], in_shapes[1][1] + 1)
return [shp]
def grad(self, inp, grads): def grad(self, inp, grads):
val, mat = inp val, mat = inp
goutput, = grads goutput, = grads
......
...@@ -13,10 +13,10 @@ from theano.tensor.nnet import conv ...@@ -13,10 +13,10 @@ from theano.tensor.nnet import conv
from theano.tensor.basic import _allclose from theano.tensor.basic import _allclose
class TestConv2D(unittest.TestCase): class TestConv2D(utt.InferShapeTester):
def setUp(self): def setUp(self):
utt.seed_rng() super (TestConv2D, self).setUp()
self.input = T.dtensor4('input') self.input = T.dtensor4('input')
self.filters = T.dtensor4('filters') self.filters = T.dtensor4('filters')
...@@ -368,8 +368,7 @@ class TestConv2D(unittest.TestCase): ...@@ -368,8 +368,7 @@ class TestConv2D(unittest.TestCase):
gcc bug. So it should not crash anymore gcc bug. So it should not crash anymore
""" """
self.validate((1, 10, 213, 129), (46, 10, 212, 1), 'valid', self.validate((1, 10, 213, 129), (46, 10, 212, 1), 'valid',
verify_grad=False) verify_grad=False)
self.validate((1, 10, 213, 129), (46, 10, 212, 1), 'valid', verify_grad=False)
def speed(self): def speed(self):
n_calls = 20000 n_calls = 20000
...@@ -407,3 +406,100 @@ class TestConv2D(unittest.TestCase): ...@@ -407,3 +406,100 @@ class TestConv2D(unittest.TestCase):
t2 = time.time() t2 = time.time()
print t2 - t1, print t2 - t1,
print print
def test_infer_shape(self):
# Note: infer_shape is incomplete and thus input and filter shapes
# must be provided explicitly
def rand(*shape):
r = numpy.asarray(numpy.random.rand(*shape), dtype='float64')
return r * 2 - 1
adtens = T.dtensor4()
bdtens = T.dtensor4()
aivec_val = [2, 2, 3, 3]
bivec_val = [2, 2, 2, 2]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [2, 2, 3, 3]
bivec_val = [2, 2, 2, 2]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 8, 8]
bivec_val = [4, 2, 5, 5]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 8, 8]
bivec_val = [4, 2, 5, 5]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 7, 5]
bivec_val = [5, 2, 3, 2]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 7, 5]
bivec_val = [5, 2, 3, 2]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 7, 5]
bivec_val = [5, 2, 2, 3]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 7, 5]
bivec_val = [5, 2, 2, 3]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 3, 3]
bivec_val = [4, 2, 3, 3]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
aivec_val = [3, 2, 3, 3]
bivec_val = [4, 2, 3, 3]
adtens_val = rand(*aivec_val)
bdtens_val = rand(*bivec_val)
self._compile_and_check([adtens, bdtens],
[conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
if __name__ == '__main__':
t = TestConv2D('setUp')
t.setUp()
t.test_infer_shape()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论