提交 e5b3c89d authored 作者: f0k's avatar f0k

GpuCorrMM: more documentation, more error messages

上级 e181b34f
...@@ -569,12 +569,29 @@ class BaseGpuCorrMM(GpuOp): ...@@ -569,12 +569,29 @@ class BaseGpuCorrMM(GpuOp):
for f in files] for f in files]
return reduce(str.__add__, codes) return reduce(str.__add__, codes)
def c_code(self, bottom, weights, top, direction, sub): def c_code_helper(self, bottom, weights, top, direction, sub):
# This is the shared code for GpuCorrMM (direction="forward"), """
# GpuCorrMM_gradWeights (direction="backprop weights"), and This generates the C code for GpuCorrMM (direction="forward"),
# GpuCorrMM_gradInputs (direction="backprop inputs"). GpuCorrMM_gradWeights (direction="backprop weights"), and
# Depending on the direction, one of bottom, weights, top will GpuCorrMM_gradInputs (direction="backprop inputs").
# receive the output, while the other two serve as inputs. Depending on the direction, one of bottom, weights, top will
receive the output, while the other two serve as inputs.
:param bottom: Variable name of the input images in the forward pass,
or the gradient of the input images in backprop wrt. inputs
:param weights: Variable name of the filters in the forward pass,
or the gradient of the filters in backprop wrt. weights
:param top: Variable name of the output images / feature maps in the
forward pass, or the gradient of the outputs in the backprop passes
:param direction: "forward" to correlate bottom with weights and store
results in top,
"backprop weights" to do a valid convolution of bottom with top
(swapping the first two dimensions) and store results in weights,
and "backprop inputs" to do a full convolution of top with weights
(swapping the first two dimensions) and store results in bottom.
:param sub: Dictionary of substitutions useable to help generating the
C code.
"""
if self.border_mode != "valid": if self.border_mode != "valid":
raise ValueError("mode must be 'valid'") raise ValueError("mode must be 'valid'")
dH, dW = self.subsample dH, dW = self.subsample
...@@ -591,6 +608,9 @@ class BaseGpuCorrMM(GpuOp): ...@@ -591,6 +608,9 @@ class BaseGpuCorrMM(GpuOp):
elif direction == "backprop inputs": elif direction == "backprop inputs":
direction = 2 direction = 2
out = bottom out = bottom
else:
raise ValueError("direction must be one of 'forward', "
"'backprop weights', 'backprop inputs'")
sub = sub.copy() sub = sub.copy()
sub.update(locals()) sub.update(locals())
...@@ -671,6 +691,13 @@ class BaseGpuCorrMM(GpuOp): ...@@ -671,6 +691,13 @@ class BaseGpuCorrMM(GpuOp):
{ {
Py_XDECREF(%(out)s); Py_XDECREF(%(out)s);
%(out)s = (CudaNdarray*)CudaNdarray_NewDims(4,out_dim); %(out)s = (CudaNdarray*)CudaNdarray_NewDims(4,out_dim);
if (NULL == %(out)s)
{
PyErr_Format(PyExc_RuntimeError,
"BaseGpuCorrMM: Failed to allocate output of %%d x %%d x %%d x %%d",
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
%(fail)s
}
} }
// Call CUDA code // Call CUDA code
...@@ -736,7 +763,7 @@ class GpuCorrMM(BaseGpuCorrMM): ...@@ -736,7 +763,7 @@ class GpuCorrMM(BaseGpuCorrMM):
bottom, weights = inp bottom, weights = inp
top, = out_ top, = out_
direction = "forward" direction = "forward"
return super(GpuCorrMM, self).c_code(bottom, weights, top, direction, sub) return super(GpuCorrMM, self).c_code_helper(bottom, weights, top, direction, sub)
def grad(self, inp, grads): def grad(self, inp, grads):
bottom, weights = inp bottom, weights = inp
...@@ -776,7 +803,7 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM): ...@@ -776,7 +803,7 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
bottom, top = inp bottom, top = inp
weights, = out_ weights, = out_
direction = "backprop weights" direction = "backprop weights"
return super(GpuCorrMM_gradWeights, self).c_code(bottom, weights, top, direction, sub) return super(GpuCorrMM_gradWeights, self).c_code_helper(bottom, weights, top, direction, sub)
class GpuCorrMM_gradInputs(BaseGpuCorrMM): class GpuCorrMM_gradInputs(BaseGpuCorrMM):
...@@ -806,7 +833,7 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM): ...@@ -806,7 +833,7 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
weights, top = inp weights, top = inp
bottom, = out_ bottom, = out_
direction = "backprop inputs" direction = "backprop inputs"
return super(GpuCorrMM_gradInputs, self).c_code(bottom, weights, top, direction, sub) return super(GpuCorrMM_gradInputs, self).c_code_helper(bottom, weights, top, direction, sub)
## ##
......
...@@ -225,6 +225,13 @@ CudaNdarray* corrMM(CudaNdarray *const bottom, ...@@ -225,6 +225,13 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
col_dim[0] = nChannels * kW * kH; col_dim[0] = nChannels * kW * kH;
col_dim[1] = topHeight * topWidth; col_dim[1] = topHeight * topWidth;
CudaNdarray* col = (CudaNdarray*)CudaNdarray_NewDims(2, col_dim); CudaNdarray* col = (CudaNdarray*)CudaNdarray_NewDims(2, col_dim);
if (NULL == col)
{
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM failed to allocate working memory of %d x %d\n",
col_dim[0], col_dim[1]);
return NULL;
}
// Define some useful variables // Define some useful variables
const int bottom_stride = CudaNdarray_HOST_STRIDES(bottom)[0]; const int bottom_stride = CudaNdarray_HOST_STRIDES(bottom)[0];
...@@ -244,6 +251,13 @@ CudaNdarray* corrMM(CudaNdarray *const bottom, ...@@ -244,6 +251,13 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
// First, im2col // First, im2col
im2col(bottom->devdata + n * bottom_stride, nChannels, bottomHeight, im2col(bottom->devdata + n * bottom_stride, nChannels, bottomHeight,
bottomWidth, kH, kW, padH, padW, dH, dW, col->devdata); bottomWidth, kH, kW, padH, padW, dH, dW, col->devdata);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM encountered a CUDA error in im2col: %s\n",
cudaGetErrorString(err));
return NULL;
}
// Second, gemm // Second, gemm
cublasStatus_t status = cublasSgemm(handle, cublasStatus_t status = cublasSgemm(handle,
CUBLAS_OP_N, CUBLAS_OP_N, CUBLAS_OP_N, CUBLAS_OP_N,
...@@ -299,7 +313,7 @@ CudaNdarray* corrMM(CudaNdarray *const bottom, ...@@ -299,7 +313,7 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
sizeof(float) * M_ * K_); sizeof(float) * M_ * K_);
if (err != cudaSuccess) { if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM encountered a CUDA error: %s\n", "GpuCorrMM encountered a CUDA error in cudaMemsetAsync: %s\n",
cudaGetErrorString(err)); cudaGetErrorString(err));
return NULL; return NULL;
} }
...@@ -308,6 +322,13 @@ CudaNdarray* corrMM(CudaNdarray *const bottom, ...@@ -308,6 +322,13 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
// First, im2col // First, im2col
im2col(bottom->devdata + n * bottom_stride, nChannels, bottomHeight, im2col(bottom->devdata + n * bottom_stride, nChannels, bottomHeight,
bottomWidth, kH, kW, padH, padW, dH, dW, col->devdata); bottomWidth, kH, kW, padH, padW, dH, dW, col->devdata);
err = cudaGetLastError();
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM encountered a CUDA error in im2col: %s\n",
cudaGetErrorString(err));
return NULL;
}
// Second, gemm // Second, gemm
cublasStatus_t status = cublasSgemm(handle, cublasStatus_t status = cublasSgemm(handle,
CUBLAS_OP_T, CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_N,
...@@ -374,6 +395,13 @@ CudaNdarray* corrMM(CudaNdarray *const bottom, ...@@ -374,6 +395,13 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
// col2im back to the data // col2im back to the data
col2im(col->devdata, nChannels, bottomHeight, bottomWidth, col2im(col->devdata, nChannels, bottomHeight, bottomWidth,
kH, kW, padH, padW, dH, dW, bottom->devdata + n * bottom_stride); kH, kW, padH, padW, dH, dW, bottom->devdata + n * bottom_stride);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM encountered a CUDA error in col2im: %s\n",
cudaGetErrorString(err));
return NULL;
}
} }
/* /*
// Original caffe code for comparison // Original caffe code for comparison
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论