提交 e5b3c89d authored 作者: f0k's avatar f0k

GpuCorrMM: more documentation, more error messages

上级 e181b34f
......@@ -569,12 +569,29 @@ class BaseGpuCorrMM(GpuOp):
for f in files]
return reduce(str.__add__, codes)
def c_code(self, bottom, weights, top, direction, sub):
# This is the shared code for GpuCorrMM (direction="forward"),
# GpuCorrMM_gradWeights (direction="backprop weights"), and
# GpuCorrMM_gradInputs (direction="backprop inputs").
# Depending on the direction, one of bottom, weights, top will
# receive the output, while the other two serve as inputs.
def c_code_helper(self, bottom, weights, top, direction, sub):
"""
This generates the C code for GpuCorrMM (direction="forward"),
GpuCorrMM_gradWeights (direction="backprop weights"), and
GpuCorrMM_gradInputs (direction="backprop inputs").
Depending on the direction, one of bottom, weights, top will
receive the output, while the other two serve as inputs.
:param bottom: Variable name of the input images in the forward pass,
or the gradient of the input images in backprop wrt. inputs
:param weights: Variable name of the filters in the forward pass,
or the gradient of the filters in backprop wrt. weights
:param top: Variable name of the output images / feature maps in the
forward pass, or the gradient of the outputs in the backprop passes
:param direction: "forward" to correlate bottom with weights and store
results in top,
"backprop weights" to do a valid convolution of bottom with top
(swapping the first two dimensions) and store results in weights,
and "backprop inputs" to do a full convolution of top with weights
(swapping the first two dimensions) and store results in bottom.
:param sub: Dictionary of substitutions useable to help generating the
C code.
"""
if self.border_mode != "valid":
raise ValueError("mode must be 'valid'")
dH, dW = self.subsample
......@@ -591,6 +608,9 @@ class BaseGpuCorrMM(GpuOp):
elif direction == "backprop inputs":
direction = 2
out = bottom
else:
raise ValueError("direction must be one of 'forward', "
"'backprop weights', 'backprop inputs'")
sub = sub.copy()
sub.update(locals())
......@@ -671,6 +691,13 @@ class BaseGpuCorrMM(GpuOp):
{
Py_XDECREF(%(out)s);
%(out)s = (CudaNdarray*)CudaNdarray_NewDims(4,out_dim);
if (NULL == %(out)s)
{
PyErr_Format(PyExc_RuntimeError,
"BaseGpuCorrMM: Failed to allocate output of %%d x %%d x %%d x %%d",
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
%(fail)s
}
}
// Call CUDA code
......@@ -736,7 +763,7 @@ class GpuCorrMM(BaseGpuCorrMM):
bottom, weights = inp
top, = out_
direction = "forward"
return super(GpuCorrMM, self).c_code(bottom, weights, top, direction, sub)
return super(GpuCorrMM, self).c_code_helper(bottom, weights, top, direction, sub)
def grad(self, inp, grads):
bottom, weights = inp
......@@ -776,7 +803,7 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
bottom, top = inp
weights, = out_
direction = "backprop weights"
return super(GpuCorrMM_gradWeights, self).c_code(bottom, weights, top, direction, sub)
return super(GpuCorrMM_gradWeights, self).c_code_helper(bottom, weights, top, direction, sub)
class GpuCorrMM_gradInputs(BaseGpuCorrMM):
......@@ -806,7 +833,7 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
weights, top = inp
bottom, = out_
direction = "backprop inputs"
return super(GpuCorrMM_gradInputs, self).c_code(bottom, weights, top, direction, sub)
return super(GpuCorrMM_gradInputs, self).c_code_helper(bottom, weights, top, direction, sub)
##
......
......@@ -225,6 +225,13 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
col_dim[0] = nChannels * kW * kH;
col_dim[1] = topHeight * topWidth;
CudaNdarray* col = (CudaNdarray*)CudaNdarray_NewDims(2, col_dim);
if (NULL == col)
{
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM failed to allocate working memory of %d x %d\n",
col_dim[0], col_dim[1]);
return NULL;
}
// Define some useful variables
const int bottom_stride = CudaNdarray_HOST_STRIDES(bottom)[0];
......@@ -244,6 +251,13 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
// First, im2col
im2col(bottom->devdata + n * bottom_stride, nChannels, bottomHeight,
bottomWidth, kH, kW, padH, padW, dH, dW, col->devdata);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM encountered a CUDA error in im2col: %s\n",
cudaGetErrorString(err));
return NULL;
}
// Second, gemm
cublasStatus_t status = cublasSgemm(handle,
CUBLAS_OP_N, CUBLAS_OP_N,
......@@ -299,7 +313,7 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
sizeof(float) * M_ * K_);
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM encountered a CUDA error: %s\n",
"GpuCorrMM encountered a CUDA error in cudaMemsetAsync: %s\n",
cudaGetErrorString(err));
return NULL;
}
......@@ -308,6 +322,13 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
// First, im2col
im2col(bottom->devdata + n * bottom_stride, nChannels, bottomHeight,
bottomWidth, kH, kW, padH, padW, dH, dW, col->devdata);
err = cudaGetLastError();
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM encountered a CUDA error in im2col: %s\n",
cudaGetErrorString(err));
return NULL;
}
// Second, gemm
cublasStatus_t status = cublasSgemm(handle,
CUBLAS_OP_T, CUBLAS_OP_N,
......@@ -374,6 +395,13 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
// col2im back to the data
col2im(col->devdata, nChannels, bottomHeight, bottomWidth,
kH, kW, padH, padW, dH, dW, bottom->devdata + n * bottom_stride);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM encountered a CUDA error in col2im: %s\n",
cudaGetErrorString(err));
return NULL;
}
}
/*
// Original caffe code for comparison
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论