提交 5921fd10 authored 作者: lucasb-eyer's avatar lucasb-eyer 提交者: Frederic Bastien

Allow for pooling of empty batch (like for conv in #3715).

上级 8757c2dc
...@@ -1673,9 +1673,6 @@ if (!CudaNdarray_is_c_contiguous(%(input)s)) { ...@@ -1673,9 +1673,6 @@ if (!CudaNdarray_is_c_contiguous(%(input)s)) {
%(fail)s %(fail)s
} }
if (c_set_tensorNd(%(input)s, %(input_desc)s) != 0)
%(fail)s
int win[%(nd)d]; int win[%(nd)d];
int pad[%(nd)d]; int pad[%(nd)d];
int str[%(nd)d]; int str[%(nd)d];
...@@ -1711,6 +1708,24 @@ if (CudaNdarray_prep_output(&%(out)s, %(nd)s+2, %(out)s_dims) != 0) ...@@ -1711,6 +1708,24 @@ if (CudaNdarray_prep_output(&%(out)s, %(nd)s+2, %(out)s_dims) != 0)
%(fail)s %(fail)s
} }
// if input batch is empty, we return the empty output without calling cuDNN
// (which will fail on zero batch size)
if (CudaNdarray_DIMS(%(input)s)[0] == 0) {
cudaError_t err2 = cudaMemset((%(out)s)->devdata, 0,
CudaNdarray_SIZE(%(out)s) * sizeof(real));
if (err2 != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv could not fill the output with zeros: %%s",
cudaGetErrorString(err2));
%(fail)s
}
// Ideally, "return success" here, but we don't have a %%(done)s
} else {
if (c_set_tensorNd(%(input)s, %(input_desc)s) != 0)
%(fail)s
if (c_set_tensorNd(%(out)s, %(output_desc)s) != 0) if (c_set_tensorNd(%(out)s, %(output_desc)s) != 0)
%(fail)s %(fail)s
...@@ -1732,6 +1747,8 @@ if (err != CUDNN_STATUS_SUCCESS) { ...@@ -1732,6 +1747,8 @@ if (err != CUDNN_STATUS_SUCCESS) {
cudnnGetErrorString(err)); cudnnGetErrorString(err));
%(fail)s %(fail)s
} }
}
""" % dict(out=out, fail=sub['fail'], """ % dict(out=out, fail=sub['fail'],
name=name, input=inputs[0], name=name, input=inputs[0],
ws=ws, pad=pad, str=stride, ws=ws, pad=pad, str=stride,
...@@ -1756,7 +1773,7 @@ if (err != CUDNN_STATUS_SUCCESS) { ...@@ -1756,7 +1773,7 @@ if (err != CUDNN_STATUS_SUCCESS) {
return [[1], [0], [0], [0]] return [[1], [0], [0], [0]]
def c_code_cache_version(self): def c_code_cache_version(self):
return (8, version()) return (9, version())
class GpuDnnPoolGrad(DnnBase): class GpuDnnPoolGrad(DnnBase):
...@@ -1938,13 +1955,6 @@ if (!CudaNdarray_is_c_contiguous(%(output)s)) { ...@@ -1938,13 +1955,6 @@ if (!CudaNdarray_is_c_contiguous(%(output)s)) {
%(fail)s %(fail)s
} }
if (c_set_tensorNd(%(input)s, %(input_desc)s) != 0)
%(fail)s
if (c_set_tensorNd(%(input_grad)s, %(input_grad_desc)s) != 0)
%(fail)s
if (c_set_tensorNd(%(output)s, %(output_desc)s) != 0)
%(fail)s
if (CudaNdarray_prep_output(&%(output_grad)s, if (CudaNdarray_prep_output(&%(output_grad)s,
%(output)s->nd, %(output)s->nd,
CudaNdarray_HOST_DIMS(%(output)s)) != 0) CudaNdarray_HOST_DIMS(%(output)s)) != 0)
...@@ -1952,6 +1962,27 @@ if (CudaNdarray_prep_output(&%(output_grad)s, ...@@ -1952,6 +1962,27 @@ if (CudaNdarray_prep_output(&%(output_grad)s,
%(fail)s %(fail)s
} }
// if input batch is empty, we return the empty output without calling cuDNN
// (which will fail on zero batch size)
if (CudaNdarray_DIMS(%(input)s)[0] == 0) {
cudaError_t err2 = cudaMemset((%(output)s)->devdata, 0,
CudaNdarray_SIZE(%(output)s) * sizeof(real));
if (err2 != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv could not fill the output with zeros: %%s",
cudaGetErrorString(err2));
%(fail)s
}
// Ideally, "return success" here, but we don't have a %%(done)s, so do else.
} else {
if (c_set_tensorNd(%(input)s, %(input_desc)s) != 0)
%(fail)s
if (c_set_tensorNd(%(input_grad)s, %(input_grad_desc)s) != 0)
%(fail)s
if (c_set_tensorNd(%(output)s, %(output_desc)s) != 0)
%(fail)s
int win[%(nd)d]; int win[%(nd)d];
int pad[%(nd)d]; int pad[%(nd)d];
...@@ -1999,6 +2030,8 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -1999,6 +2030,8 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
cudnnGetErrorString(err%(name)s)); cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
}
""" % dict(output_grad=out_grad, """ % dict(output_grad=out_grad,
fail=sub['fail'], name=name, fail=sub['fail'], name=name,
input=inp, input_grad=inp_grad, output=out, input=inp, input_grad=inp_grad, output=out,
...@@ -2010,7 +2043,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -2010,7 +2043,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
ws=ws, pad=pad, str=stride) ws=ws, pad=pad, str=stride)
def c_code_cache_version(self): def c_code_cache_version(self):
return (8, version()) return (9, version())
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
return [shape[0]] return [shape[0]]
......
...@@ -546,6 +546,22 @@ def test_pooling_opt_arbitrary_dimensions(): ...@@ -546,6 +546,22 @@ def test_pooling_opt_arbitrary_dimensions():
utt.assert_allclose(res_gpu[1], res_cpu[1]) utt.assert_allclose(res_gpu[1], res_cpu[1])
def test_pooling_empty_batch():
img_shp = (0, 5, 6, 8)
img = T.ftensor4('img')
o = dnn.dnn_pool(img, (2, 2), (2, 2))
f = theano.function([img], o, mode=mode_with_gpu)
d = f(numpy.random.rand(*img_shp).astype('float32'))
assert d.shape == (0, 5, 3, 4)
g = T.grad(T.sum(o), wrt=img)
f = theano.function([img], g, mode=mode_with_gpu)
d = f(numpy.random.rand(*img_shp).astype('float32'))
# Not sure what to assert, it should just pass, that's all.
assert d.shape == (0, 5, 6, 8)
class test_DnnSoftMax(test_nnet.test_SoftMax): class test_DnnSoftMax(test_nnet.test_SoftMax):
gpu_op = dnn.GpuDnnSoftmax gpu_op = dnn.GpuDnnSoftmax
gpu_grad_op = dnn.GpuDnnSoftmaxGrad gpu_grad_op = dnn.GpuDnnSoftmaxGrad
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论