提交 ce1a77f2 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #4950 from abergeron/part1

Mixed
...@@ -893,6 +893,14 @@ If you pass a function name to the ``__init__()`` method of the ...@@ -893,6 +893,14 @@ If you pass a function name to the ``__init__()`` method of the
types for the argument is dependant on the Types (that is types for the argument is dependant on the Types (that is
theano Types) of your inputs and outputs. theano Types) of your inputs and outputs.
* You can sepcify the number of inputs and outputs for your op
by setting the `_cop_num_inputs` and `_cop_num_outputs`
attributes on your op. The main function will always be
called with that number of arguments, using NULL to fill in
for missing values at the end. This can be used if your op
has a variable number of inputs or outputs, but with a fixed
maximum.
For example, the main C function of an op that takes two TensorTypes For example, the main C function of an op that takes two TensorTypes
(which has ``PyArrayObject *`` as its C type) as inputs and returns (which has ``PyArrayObject *`` as its C type) as inputs and returns
both their sum and the difference between them would have four both their sum and the difference between them would have four
......
...@@ -1434,7 +1434,15 @@ class COp(Op): ...@@ -1434,7 +1434,15 @@ class COp(Op):
# Generate an string containing the arguments sent to the external C # Generate an string containing the arguments sent to the external C
# function. The argstring will be of format : # function. The argstring will be of format :
# "input0, input1, input2, &output0, &output1" # "input0, input1, input2, &output0, &output1"
return ", ".join(list(inp) + ["&%s" % o for o in out]) inp = list(inp)
numi = getattr(self, '_cop_num_inputs', len(inp))
while len(inp) < numi:
inp.append('NULL')
out = ["&%s" % o for o in out]
numo = getattr(self, '_cop_num_outputs', len(out))
while len(out) < numo:
out.append('NULL')
return ", ".join(inp + out)
def get_c_macros(self, node, name, check_input=None): def get_c_macros(self, node, name, check_input=None):
define_template = "#define %s %s" define_template = "#define %s %s"
......
...@@ -11,15 +11,6 @@ static inline int cudnnGetVersion() { ...@@ -11,15 +11,6 @@ static inline int cudnnGetVersion() {
} }
#endif #endif
#include <assert.h>
// If needed, define element of the V4 interface in terms of elements of
// previous versions
#if defined(CUDNN_VERSION) && CUDNN_VERSION < 4000
#define CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING 5
#define CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING 3
#endif
#endif #endif
...@@ -11,22 +11,20 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) { ...@@ -11,22 +11,20 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
case GA_DOUBLE: case GA_DOUBLE:
dt = CUDNN_DATA_DOUBLE; dt = CUDNN_DATA_DOUBLE;
break; break;
#if CUDNN_VERSION > 3000
case GA_HALF: case GA_HALF:
dt = CUDNN_DATA_HALF; dt = CUDNN_DATA_HALF;
break; break;
#endif
default: default:
PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_tensorNd"); PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_tensorNd");
return -1; return -1;
} }
ds = gpuarray_get_elsize(var->ga.typecode); ds = gpuarray_get_elsize(var->ga.typecode);
int strs[5], dims[5], default_stride = 1; int strs[8], dims[8], default_stride = 1;
unsigned int nd = PyGpuArray_NDIM(var); unsigned int nd = PyGpuArray_NDIM(var);
if (nd > 5) { if (nd > 8) {
PyErr_SetString(PyExc_TypeError, "Tensor of more than 5d"); PyErr_SetString(PyExc_TypeError, "Tensor of more than 8d");
return -1; return -1;
} }
...@@ -38,7 +36,15 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) { ...@@ -38,7 +36,15 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
dims[i] = PyGpuArray_DIM(var, i); dims[i] = PyGpuArray_DIM(var, i);
} }
cudnnStatus_t err = cudnnSetTensorNdDescriptor(desc, dt, nd, dims, strs); /* Tensors can't be smaller than 3d for cudnn so we pad the
* descriptor if they are */
for (unsigned int i = nd; i < 3; i++) {
strs[i] = 1;
dims[i] = 1;
}
cudnnStatus_t err = cudnnSetTensorNdDescriptor(desc, dt, nd < 3 ? 3 : nd,
dims, strs);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"Could not set tensorNd descriptor: %s", "Could not set tensorNd descriptor: %s",
...@@ -48,6 +54,22 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) { ...@@ -48,6 +54,22 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
return 0; return 0;
} }
static int c_make_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t *desc) {
cudnnStatus_t err;
err = cudnnCreateTensorDescriptor(desc);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not create tensor descriptor: %s",
cudnnGetErrorString(err));
return -1;
}
if (c_set_tensorNd(var, *desc) != 0) {
cudnnDestroyTensorDescriptor(*desc);
return -1;
}
return 0;
}
static int static int
c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) { c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
cudnnDataType_t dt; cudnnDataType_t dt;
...@@ -65,21 +87,19 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) { ...@@ -65,21 +87,19 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
case GA_DOUBLE: case GA_DOUBLE:
dt = CUDNN_DATA_DOUBLE; dt = CUDNN_DATA_DOUBLE;
break; break;
#if CUDNN_VERSION > 3000
case GA_HALF: case GA_HALF:
dt = CUDNN_DATA_HALF; dt = CUDNN_DATA_HALF;
break; break;
#endif
default: default:
PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_filter"); PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_filter");
return -1; return -1;
} }
int dims[5]; int dims[8];
unsigned int nd = PyGpuArray_NDIM(var); unsigned int nd = PyGpuArray_NDIM(var);
if (nd > 5) { if (nd > 8) {
PyErr_SetString(PyExc_TypeError, "Tensor of more than 5d"); PyErr_SetString(PyExc_TypeError, "Tensor of more than 8d");
return -1; return -1;
} }
...@@ -88,6 +108,13 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) { ...@@ -88,6 +108,13 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
dims[i] = PyGpuArray_DIM(var, i); dims[i] = PyGpuArray_DIM(var, i);
} }
/* Filters can't be less than 3d so we pad */
for (unsigned int i = nd; i < 3; i++)
dims[i] = 1;
if (nd < 3)
nd = 3;
#if CUDNN_VERSION >= 5000 #if CUDNN_VERSION >= 5000
err = cudnnSetFilterNdDescriptor(desc, dt, CUDNN_TENSOR_NCHW, nd, dims); err = cudnnSetFilterNdDescriptor(desc, dt, CUDNN_TENSOR_NCHW, nd, dims);
#else #else
...@@ -103,6 +130,22 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) { ...@@ -103,6 +130,22 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
return 0; return 0;
} }
static int c_make_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t *desc) {
cudnnStatus_t err;
err = cudnnCreateFilterDescriptor(desc);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not create tensor descriptor: %s",
cudnnGetErrorString(err));
return -1;
}
if (c_set_filter(var, *desc) != 0) {
cudnnDestroyFilterDescriptor(*desc);
return -1;
}
return 0;
}
#section init_code #section init_code
setup_ext_cuda(); setup_ext_cuda();
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论