提交 aca7c197 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #2017 from nouiz/gpu_conv

[ENH] small updates really useful
...@@ -365,6 +365,8 @@ def use(device, ...@@ -365,6 +365,8 @@ def use(device,
# event if another device is selected later. # event if another device is selected later.
cuda_ndarray.cuda_ndarray.CudaNdarray.zeros((2, 3)) cuda_ndarray.cuda_ndarray.CudaNdarray.zeros((2, 3))
use.device_number = active_device_number() use.device_number = active_device_number()
# This is needed to initialize the cublas handle.
gpu_init(use.device_number)
if test_driver: if test_driver:
import theano.sandbox.cuda.tests.test_driver import theano.sandbox.cuda.tests.test_driver
......
...@@ -262,6 +262,8 @@ class GpuGemm(GpuOp): ...@@ -262,6 +262,8 @@ class GpuGemm(GpuOp):
== CudaNdarray_HOST_DIMS(%(z_in)s)[1]) == CudaNdarray_HOST_DIMS(%(z_in)s)[1])
&& (CudaNdarray_HOST_STRIDES(%(z_out)s)[0] >= 0) && (CudaNdarray_HOST_STRIDES(%(z_out)s)[0] >= 0)
&& (CudaNdarray_HOST_STRIDES(%(z_out)s)[1] >= 0) && (CudaNdarray_HOST_STRIDES(%(z_out)s)[1] >= 0)
// The following condition is needed as this is a condition by cublas
// on the memory layout of the output it accepts.
&& ((CudaNdarray_HOST_DIMS(%(z_out)s)[0] <= 1) && ((CudaNdarray_HOST_DIMS(%(z_out)s)[0] <= 1)
|| (CudaNdarray_HOST_STRIDES(%(z_out)s)[0] == 1) || (CudaNdarray_HOST_STRIDES(%(z_out)s)[0] == 1)
|| (CudaNdarray_HOST_DIMS(%(z_out)s)[1] <= 1) || (CudaNdarray_HOST_DIMS(%(z_out)s)[1] <= 1)
...@@ -577,9 +579,8 @@ class GpuCorrMM(GpuOp): ...@@ -577,9 +579,8 @@ class GpuCorrMM(GpuOp):
return ['cuda_ndarray.cuh', '<stdio.h>'] return ['cuda_ndarray.cuh', '<stdio.h>']
def c_code_cache_version(self): def c_code_cache_version(self):
return
# raise this whenever modifying any of the support_code_files # raise this whenever modifying any of the support_code_files
return (0, 21) return (0, 22)
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of # REMEMBER TO RAISE c_code_cache_version when changing any of
...@@ -594,14 +595,18 @@ class GpuCorrMM(GpuOp): ...@@ -594,14 +595,18 @@ class GpuCorrMM(GpuOp):
out, = out_ out, = out_
dx = self.subsample[0] dx = self.subsample[0]
dy = self.subsample[1] dy = self.subsample[1]
border_mode = self.border_mode
sub = sub.copy() sub = sub.copy()
pad = self.pad pad = self.pad
if self.border_mode == "valid":
bmode = 1
else:
assert self.border_mode == "full"
bmode = 0
sub.update(locals()) sub.update(locals())
return """ return """
//Mandatory args //Mandatory args
const char *mode_str = "%(border_mode)s"; int mode = %(bmode)s;
//Optional args //Optional args
int dx = %(dx)s; int dx = %(dx)s;
...@@ -610,21 +615,6 @@ class GpuCorrMM(GpuOp): ...@@ -610,21 +615,6 @@ class GpuCorrMM(GpuOp):
CudaNdarray * img = %(img)s; CudaNdarray * img = %(img)s;
CudaNdarray * kern = %(kern)s; CudaNdarray * kern = %(kern)s;
CudaNdarray * out2 = NULL; CudaNdarray * out2 = NULL;
int mode;
if (strcmp(mode_str, "full") == 0)
{
mode = 0;
}
else if (strcmp(mode_str, "valid") == 0)
{
mode = 1;
}
else
{
PyErr_SetString(PyExc_ValueError,
"mode must be one of 'full' or 'valid'");
%(fail)s;
}
//TODO: Send self.pad, stride, etc //TODO: Send self.pad, stride, etc
int out_dim[4]; int out_dim[4];
...@@ -842,7 +832,7 @@ class GpuConv(GpuOp): ...@@ -842,7 +832,7 @@ class GpuConv(GpuOp):
def c_code_cache_version(self): def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files # raise this whenever modifying any of the support_code_files
return (0, 21) return (0, 22)
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of # REMEMBER TO RAISE c_code_cache_version when changing any of
...@@ -857,11 +847,15 @@ class GpuConv(GpuOp): ...@@ -857,11 +847,15 @@ class GpuConv(GpuOp):
out, = out_ out, = out_
dx = self.subsample[0] dx = self.subsample[0]
dy = self.subsample[1] dy = self.subsample[1]
border_mode = self.border_mode
version = self.version version = self.version
verbose = self.verbose verbose = self.verbose
sub = sub.copy() sub = sub.copy()
max_threads_dim0 = self.max_threads_dim0 max_threads_dim0 = self.max_threads_dim0
if self.border_mode == "valid":
bmode = 1
else:
assert self.border_mode == "full"
bmode = 0
if max_threads_dim0 is None: if max_threads_dim0 is None:
raise NotImplementedError("GpuConv.c_code should not be called " raise NotImplementedError("GpuConv.c_code should not be called "
"directly. It should be called by " "directly. It should be called by "
...@@ -870,7 +864,7 @@ class GpuConv(GpuOp): ...@@ -870,7 +864,7 @@ class GpuConv(GpuOp):
sub.update(locals()) sub.update(locals())
return """ return """
//Mandatory args //Mandatory args
const char *mode_str = "%(border_mode)s"; int mode = %(bmode)s;
//Optional args //Optional args
int version = %(version)s; int version = %(version)s;
...@@ -878,21 +872,6 @@ class GpuConv(GpuOp): ...@@ -878,21 +872,6 @@ class GpuConv(GpuOp):
int dx = %(dx)s; int dx = %(dx)s;
int dy = %(dy)s; int dy = %(dy)s;
int mode;
if (strcmp(mode_str, "full") == 0)
{
mode = ConvMode_FULL;
}
else if (strcmp(mode_str, "valid") == 0)
{
mode = ConvMode_VALID;
}
else
{
PyErr_SetString(PyExc_ValueError,
"mode must be one of 'full' or 'valid'");
return NULL;
}
// TODO, make out be decref before we alloc out2! // TODO, make out be decref before we alloc out2!
CudaNdarray * out2 = (CudaNdarray *)CudaNdarray_Conv(%(img)s, %(kern)s, CudaNdarray * out2 = (CudaNdarray *)CudaNdarray_Conv(%(img)s, %(kern)s,
......
...@@ -182,7 +182,8 @@ CudaNdarray* corrMM(const CudaNdarray *input, ...@@ -182,7 +182,8 @@ CudaNdarray* corrMM(const CudaNdarray *input,
); );
if (status != CUBLAS_STATUS_SUCCESS) { if (status != CUBLAS_STATUS_SUCCESS) {
std::cerr << "!!!! CUBLAS error in GpuCorrMM\n"; std::cerr << "!!!! CUBLAS error: ";
std::cerr << cublasGetErrorString(status) << "\n";
} }
} }
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论