提交 b2afa088 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5552 from abergeron/fix_segfault

Fix batchnorm problems
...@@ -2217,6 +2217,7 @@ class _RNNSplitParams(DnnBase): ...@@ -2217,6 +2217,7 @@ class _RNNSplitParams(DnnBase):
%(b)s = pygpu_view(%(w)s, Py_None); %(b)s = pygpu_view(%(w)s, Py_None);
%(b)s->ga.offset = off; %(b)s->ga.offset = off;
%(b)s->ga.dimensions[0] = dims[0]; %(b)s->ga.dimensions[0] = dims[0];
GpuArray_fix_flags(&%(b)s->ga);
bshp = dims[0]; bshp = dims[0];
err = cudnnGetRNNLinLayerMatrixParams(%(handle)s, %(desc)s, %(layer)s, xdesc, wdesc, w, %(id)s, odesc, &o); err = cudnnGetRNNLinLayerMatrixParams(%(handle)s, %(desc)s, %(layer)s, xdesc, wdesc, w, %(id)s, odesc, &o);
...@@ -2249,6 +2250,7 @@ class _RNNSplitParams(DnnBase): ...@@ -2249,6 +2250,7 @@ class _RNNSplitParams(DnnBase):
%(m)s->ga.dimensions[0] = dims[0] / bshp; %(m)s->ga.dimensions[0] = dims[0] / bshp;
%(m)s->ga.dimensions[1] = bshp; %(m)s->ga.dimensions[1] = bshp;
%(m)s->ga.strides[1] = %(m)s->ga.dimensions[0] * gpuarray_get_elsize(%(m)s->ga.typecode); %(m)s->ga.strides[1] = %(m)s->ga.dimensions[0] * gpuarray_get_elsize(%(m)s->ga.typecode);
GpuArray_fix_flags(&%(m)s->ga);
""" % kw2 """ % kw2
for i in range(len(outputs) // 2): for i in range(len(outputs) // 2):
...@@ -2262,7 +2264,7 @@ class _RNNSplitParams(DnnBase): ...@@ -2262,7 +2264,7 @@ class _RNNSplitParams(DnnBase):
return code return code
def c_code_cache_version(self): def c_code_cache_version(self):
return (2,) return (3,)
def _split_rnn_params(w, desc, layer, input_size, dtype, rnn_mode): def _split_rnn_params(w, desc, layer, input_size, dtype, rnn_mode):
......
...@@ -30,7 +30,7 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) { ...@@ -30,7 +30,7 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
for (unsigned int _i = nd; _i > 0; _i--) { for (unsigned int _i = nd; _i > 0; _i--) {
unsigned int i = _i - 1; unsigned int i = _i - 1;
strs[i] = PyGpuArray_STRIDE(var, i) ? strs[i] = (PyGpuArray_DIM(var, i) != 1 && PyGpuArray_STRIDE(var, i)) ?
PyGpuArray_STRIDE(var, i)/ds : default_stride; PyGpuArray_STRIDE(var, i)/ds : default_stride;
default_stride *= PyGpuArray_DIM(var, i); default_stride *= PyGpuArray_DIM(var, i);
dims[i] = PyGpuArray_DIM(var, i); dims[i] = PyGpuArray_DIM(var, i);
......
...@@ -45,7 +45,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, ...@@ -45,7 +45,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
#ifdef RUNNING_AVERAGES #ifdef RUNNING_AVERAGES
#ifdef INPLACE_RUNNING_MEAN #ifdef INPLACE_RUNNING_MEAN
Py_XDECREF(out_running_mean); Py_XDECREF(*out_running_mean);
PyGpuArrayObject *running_mean = in_running_mean; PyGpuArrayObject *running_mean = in_running_mean;
Py_INCREF(running_mean); Py_INCREF(running_mean);
#else #else
...@@ -56,7 +56,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, ...@@ -56,7 +56,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
} }
#endif #endif
#ifdef INPLACE_RUNNING_VAR #ifdef INPLACE_RUNNING_VAR
Py_XDECREF(out_running_var); Py_XDECREF(*out_running_var);
PyGpuArrayObject *running_var = in_running_var; PyGpuArrayObject *running_var = in_running_var;
Py_INCREF(running_var); Py_INCREF(running_var);
#else #else
......
...@@ -102,6 +102,8 @@ def test_dnn_conv_inplace(): ...@@ -102,6 +102,8 @@ def test_dnn_conv_inplace():
""" """
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
utt.seed_rng()
img_shp = [2, 5, 6, 8] img_shp = [2, 5, 6, 8]
kern_shp = [3, 5, 5, 6] kern_shp = [3, 5, 5, 6]
img = T.tensor4('img') img = T.tensor4('img')
...@@ -150,6 +152,7 @@ def test_dnn_conv_inplace(): ...@@ -150,6 +152,7 @@ def test_dnn_conv_inplace():
def test_pooling(): def test_pooling():
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
utt.seed_rng()
# 'average_exc_pad' is disabled for versions < 4004 # 'average_exc_pad' is disabled for versions < 4004
if dnn.version(raises=False) < 4004: if dnn.version(raises=False) < 4004:
...@@ -241,6 +244,8 @@ def test_pooling(): ...@@ -241,6 +244,8 @@ def test_pooling():
def test_pooling_with_tensor_vars(): def test_pooling_with_tensor_vars():
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
utt.seed_rng()
x = T.tensor4() x = T.tensor4()
ws = theano.shared(np.array([2, 2], dtype='int32')) ws = theano.shared(np.array([2, 2], dtype='int32'))
stride = theano.shared(np.array([1, 1], dtype='int32')) stride = theano.shared(np.array([1, 1], dtype='int32'))
...@@ -295,6 +300,7 @@ def test_pooling3d(): ...@@ -295,6 +300,7 @@ def test_pooling3d():
# 3d pooling requires version 3 or newer. # 3d pooling requires version 3 or newer.
if not dnn.dnn_available(test_ctx_name) or dnn.version(raises=False) < 3000: if not dnn.dnn_available(test_ctx_name) or dnn.version(raises=False) < 3000:
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
utt.seed_rng()
# We force the FAST_RUN as we don't want the reference to run in DebugMode. # We force the FAST_RUN as we don't want the reference to run in DebugMode.
mode_without_gpu_ref = theano.compile.mode.get_mode( mode_without_gpu_ref = theano.compile.mode.get_mode(
...@@ -383,6 +389,7 @@ def test_pooling3d(): ...@@ -383,6 +389,7 @@ def test_pooling3d():
def test_pooling_opt(): def test_pooling_opt():
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
utt.seed_rng()
# 2D pooling # 2D pooling
x = T.matrix() x = T.matrix()
...@@ -457,6 +464,7 @@ def test_pooling_opt_arbitrary_dimensions(): ...@@ -457,6 +464,7 @@ def test_pooling_opt_arbitrary_dimensions():
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
utt.seed_rng()
# 'average_exc_pad' is disabled for versions < 4004 # 'average_exc_pad' is disabled for versions < 4004
if dnn.version(raises=False) < 4004: if dnn.version(raises=False) < 4004:
...@@ -875,6 +883,8 @@ def test_dnn_conv_border_mode(): ...@@ -875,6 +883,8 @@ def test_dnn_conv_border_mode():
def test_dnn_conv_alpha_output_merge(): def test_dnn_conv_alpha_output_merge():
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
utt.seed_rng()
img = T.tensor4() img = T.tensor4()
kern = T.tensor4() kern = T.tensor4()
out = T.tensor4() out = T.tensor4()
...@@ -938,6 +948,8 @@ def test_dnn_conv_alpha_output_merge(): ...@@ -938,6 +948,8 @@ def test_dnn_conv_alpha_output_merge():
def test_dnn_conv_grad(): def test_dnn_conv_grad():
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
utt.seed_rng()
b = 1 b = 1
c = 4 c = 4
f = 3 f = 3
...@@ -1010,6 +1022,7 @@ def test_conv3d_fwd(): ...@@ -1010,6 +1022,7 @@ def test_conv3d_fwd():
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
utt.seed_rng()
def run_conv3d_fwd(inputs_shape, filters_shape, subsample, def run_conv3d_fwd(inputs_shape, filters_shape, subsample,
border_mode, conv_mode): border_mode, conv_mode):
...@@ -1064,6 +1077,7 @@ def test_conv3d_bwd(): ...@@ -1064,6 +1077,7 @@ def test_conv3d_bwd():
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
utt.seed_rng()
def run_conv3d_bwd(inputs_shape, filters_shape, subsample, def run_conv3d_bwd(inputs_shape, filters_shape, subsample,
border_mode, conv_mode): border_mode, conv_mode):
...@@ -1494,8 +1508,6 @@ def test_dnn_batchnorm_train_without_running_averages(): ...@@ -1494,8 +1508,6 @@ def test_dnn_batchnorm_train_without_running_averages():
# compile and run batch_normalization_train without running averages # compile and run batch_normalization_train without running averages
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
if dnn.version(raises=False) < 5000:
raise SkipTest("batch normalization requires cudnn v5+")
utt.seed_rng() utt.seed_rng()
x, scale, bias, dy = T.tensor4('x'), T.tensor4('scale'), T.tensor4('bias'), T.tensor4('dy') x, scale, bias, dy = T.tensor4('x'), T.tensor4('scale'), T.tensor4('bias'), T.tensor4('dy')
...@@ -1579,8 +1591,6 @@ def test_dnn_batchnorm_train_inplace(): ...@@ -1579,8 +1591,6 @@ def test_dnn_batchnorm_train_inplace():
# test inplace_running_mean and inplace_running_var # test inplace_running_mean and inplace_running_var
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
if dnn.version(raises=False) < 5000:
raise SkipTest("batch normalization requires cudnn v5+")
utt.seed_rng() utt.seed_rng()
x, scale, bias = T.tensor4('x'), T.tensor4('scale'), T.tensor4('bias') x, scale, bias = T.tensor4('x'), T.tensor4('scale'), T.tensor4('bias')
...@@ -1703,8 +1713,6 @@ def test_batchnorm_inference_inplace(): ...@@ -1703,8 +1713,6 @@ def test_batchnorm_inference_inplace():
# test inplace # test inplace
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
if dnn.version(raises=False) < 5000:
raise SkipTest("batch normalization requires cudnn v5+")
utt.seed_rng() utt.seed_rng()
x, scale, bias, mean, var = (T.tensor4(n) for n in ('x', 'scale', 'bias', 'mean', 'var')) x, scale, bias, mean, var = (T.tensor4(n) for n in ('x', 'scale', 'bias', 'mean', 'var'))
...@@ -1732,8 +1740,6 @@ def test_batchnorm_inference_inplace(): ...@@ -1732,8 +1740,6 @@ def test_batchnorm_inference_inplace():
def test_dnn_batchnorm_valid_and_invalid_axes(): def test_dnn_batchnorm_valid_and_invalid_axes():
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
if dnn.version(raises=False) < 5000:
raise SkipTest("batch normalization requires cudnn v5+")
for vartype in (T.tensor5, T.tensor4, T.tensor3, T.matrix): for vartype in (T.tensor5, T.tensor4, T.tensor3, T.matrix):
x, scale, bias, mean, var, dy = (vartype(n) x, scale, bias, mean, var, dy = (vartype(n)
...@@ -1783,6 +1789,10 @@ def test_dnn_batchnorm_valid_and_invalid_axes(): ...@@ -1783,6 +1789,10 @@ def test_dnn_batchnorm_valid_and_invalid_axes():
def test_dnn_rnn_gru(): def test_dnn_rnn_gru():
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
utt.seed_rng()
# test params # test params
input_dim = 32 input_dim = 32
hidden_dim = 16 hidden_dim = 16
...@@ -1882,6 +1892,10 @@ def test_dnn_rnn_gru(): ...@@ -1882,6 +1892,10 @@ def test_dnn_rnn_gru():
def test_dnn_rnn_gru_bidi(): def test_dnn_rnn_gru_bidi():
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
utt.seed_rng()
# test params # test params
input_dim = 32 input_dim = 32
hidden_dim = 16 hidden_dim = 16
...@@ -1931,6 +1945,10 @@ def test_dnn_rnn_gru_bidi(): ...@@ -1931,6 +1945,10 @@ def test_dnn_rnn_gru_bidi():
def test_dnn_rnn_lstm(): def test_dnn_rnn_lstm():
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
utt.seed_rng()
# test params # test params
input_dim = 32 input_dim = 32
hidden_dim = 16 hidden_dim = 16
...@@ -2006,6 +2024,10 @@ def test_dnn_rnn_lstm(): ...@@ -2006,6 +2024,10 @@ def test_dnn_rnn_lstm():
def test_dnn_rnn_lstm_grad_c(): def test_dnn_rnn_lstm_grad_c():
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
utt.seed_rng()
# test params # test params
input_dim = 32 input_dim = 32
hidden_dim = 16 hidden_dim = 16
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论