Fix problems found in testing.

0a162810 · Arnaud Bergeron · dba4873c · 0a162810 · 0a162810 · 0a162810
--- a/theano/sandbox/gpuarray/dnn.py
+++ b/theano/sandbox/gpuarray/dnn.py
@@ -118,30 +118,38 @@ dnn_available.msg = None
 def c_set_tensor4d(var, desc, err, fail):
    return """
 {
+  cudnnDataType_t dt;
+  size_t ds;
+  switch (%(var)s->ga.typecode) {
+  case GA_FLOAT:
+    dt = CUDNN_DATA_FLOAT;
+    break;
+  case GA_DOUBLE:
+    dt = CUDNN_DATA_DOUBLE;
+    break;
+  case GA_HALF:
+    dt = CUDNN_DATA_HALF;
+    break;
+  default:
+    PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_tensor4d");
+    return -1;
+  }
+  ds = gpuarray_get_elsize(%(var)s->ga.typecode);
  %(err)s = cudnnSetTensor4dDescriptorEx(
-    %(desc)s, CUDNN_DATA_FLOAT,
+    %(desc)s, dt,
    PyGpuArray_DIMS(%(var)s)[0],
    PyGpuArray_DIMS(%(var)s)[1],
    PyGpuArray_DIMS(%(var)s)[2],
    PyGpuArray_DIMS(%(var)s)[3],
-    PyGpuArray_STRIDES(%(var)s)[0],
+    PyGpuArray_STRIDES(%(var)s)[0] / ds,
-    PyGpuArray_STRIDES(%(var)s)[1],
+    PyGpuArray_STRIDES(%(var)s)[1] / ds,
-    PyGpuArray_STRIDES(%(var)s)[2],
+    PyGpuArray_STRIDES(%(var)s)[2] / ds,
-    PyGpuArray_STRIDES(%(var)s)[3]);
+    PyGpuArray_STRIDES(%(var)s)[3] / ds);
  if (%(err)s != CUDNN_STATUS_SUCCESS) {
    PyErr_Format(PyExc_RuntimeError,
-    "could not set tensor4d descriptor: %%s"
+    "could not set tensor4d descriptor: %%s",
-    "shapes=%%d %%d %%d %%d strides=%%d %%d %%d %%d",
+    cudnnGetErrorString(%(err)s));
-    cudnnGetErrorString(%(err)s),
-    PyGpuArray_DIMS(%(var)s)[0],
-    PyGpuArray_DIMS(%(var)s)[1],
-    PyGpuArray_DIMS(%(var)s)[2],
-    PyGpuArray_DIMS(%(var)s)[3],
-    PyGpuArray_STRIDES(%(var)s)[0],
-    PyGpuArray_STRIDES(%(var)s)[1],
-    PyGpuArray_STRIDES(%(var)s)[2],
-    PyGpuArray_STRIDES(%(var)s)[3]);
    %(fail)s
  }
 }
@@ -160,9 +168,9 @@ class DnnBase(COp):
        COp.__init__(self, "dnn_base.c")
    def c_headers(self):
-        return ['cudnn.h', 'cudnn_helper.h',
+        return ['cudnn.h', 'cudnn_helper.h', 'gpuarray_helper.h',
-                'gpuarray/types.h', 'gpuarray/array.h',
+                'gpuarray/types.h', 'gpuarray/array.h', 'gpuarray/util.h',
-                'gpuarray_api.h']
+                'gpuarray_api.h', 'numpy_compat.h']
    def c_header_dirs(self):
        return [os.path.dirname(__file__), pygpu.get_include(),
@@ -953,7 +961,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
 %(out)s_dims[3] = (PyGpuArray_DIMS(%(input)s)[3] + (hpad*2) - wsY) / strideY + 1;
 if (theano_prep_output(&%(out)s, 4, %(out)s_dims, %(input)s->ga.typecode,
-                       GA_C_ORDER) != 0) {
+                       GA_C_ORDER, pygpu_default_context()) != 0) {
  %(fail)s
 }
@@ -1008,7 +1016,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
        return [[1], [0]]
    def c_code_cache_version(self):
-        return (6, version())
+        return (7, version())
 class GpuDnnPoolGrad(DnnBase):
@@ -1125,7 +1133,7 @@ if (!GpuArray_IS_C_CONTIGUOUS(&%(input_grad)s->ga)) {
  %(fail)s
 }
-if (!GpuArray_IS_C_CONTIGUOUS(%(output)s)) {
+if (!GpuArray_IS_C_CONTIGUOUS(&%(output)s->ga)) {
  PyErr_SetString(PyExc_ValueError,
                  "GpuDnnPoolGrad: Only contiguous outputs are supported.");
  %(fail)s
@@ -1134,8 +1142,8 @@ if (!GpuArray_IS_C_CONTIGUOUS(%(output)s)) {
 %(set_in)s
 if (theano_prep_output(&%(output_grad)s, PyGpuArray_NDIM(%(output)s),
-                       PyGpuArray_DIMS(%(output)s, %(output)s->ga.typecode,
+                       PyGpuArray_DIMS(%(output)s), %(output)s->ga.typecode,
-                       GA_C_ORDER)) != 0)
+                       GA_C_ORDER, pygpu_default_context()) != 0)
 {
  %(fail)s
 }
@@ -1168,29 +1176,8 @@ _handle,
 #endif
 if (err%(name)s != CUDNN_STATUS_SUCCESS) {
  PyErr_Format(PyExc_RuntimeError,
-               "GpuDnnPoolGrad: error doing operation: %%s. "
+               "GpuDnnPoolGrad: error doing operation: %%s.",
-               "input.shape=(%%d, %%d, %%d, %%d) "
+               cudnnGetErrorString(err%(name)s));
-               "input_grad.shape=(%%d, %%d, %%d, %%d) "
-               "output.shape=(%%d, %%d, %%d, %%d) "
-               "output_grad.shape=(%%d, %%d, %%d, %%d)",
-               cudnnGetErrorString(err%(name)s),
-               PyGpuArray_DIMS(%(input)s)[0],
-               PyGpuArray_DIMS(%(input)s)[1],
-               PyGpuArray_DIMS(%(input)s)[2],
-               PyGpuArray_DIMS(%(input)s)[3],
-               PyGpuArray_DIMS(%(input_grad)s)[0],
-               PyGpuArray_DIMS(%(input_grad)s)[1],
-               PyGpuArray_DIMS(%(input_grad)s)[2],
-               PyGpuArray_DIMS(%(input_grad)s)[3],
-               PyGpuArray_DIMS(%(output)s)[0],
-               PyGpuArray_DIMS(%(output)s)[1],
-               PyGpuArray_DIMS(%(output)s)[2],
-               PyGpuArray_DIMS(%(output)s)[3],
-               PyGpuArray_DIMS(%(output_grad)s)[0],
-               PyGpuArray_DIMS(%(output_grad)s)[1],
-               PyGpuArray_DIMS(%(output_grad)s)[2],
-               PyGpuArray_DIMS(%(output_grad)s)[3]
-               );
  %(fail)s
 }
 """ % dict(output_grad=out_grad, desc=desc,
@@ -1363,7 +1350,7 @@ if (%(mode)d == 1)
        result += """
 if (theano_prep_output(&%(outs)s, PyGpuArray_NDIM(%(ins)s),
                       PyGpuArray_DIMS(%(ins)s), %(ins)s->ga.typecode,
-                       GA_C_ORDER) != 0)
+                       GA_C_ORDER, pygpu_default_context()) != 0)
 {
  %(fail)s
 }

--- a/theano/sandbox/gpuarray/dnn_base.c
+++ b/theano/sandbox/gpuarray/dnn_base.c
@@ -4,6 +4,7 @@ static cudnnHandle_t _handle = NULL;
 static int
 c_set_tensor4d(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
  cudnnDataType_t dt;
+  size_t ds;
  switch (var->ga.typecode) {
  case GA_FLOAT:
    dt = CUDNN_DATA_FLOAT;
@@ -18,25 +19,17 @@ c_set_tensor4d(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
    PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_tensor4d");
    return -1;
  }
+  ds = gpuarray_get_elsize(var->ga.typecode);
  cudnnStatus_t err = cudnnSetTensor4dDescriptorEx(
    desc, dt,
    PyGpuArray_DIM(var, 0), PyGpuArray_DIM(var, 1),
    PyGpuArray_DIM(var, 2), PyGpuArray_DIM(var, 3),
-    PyGpuArray_STRIDE(var, 0), PyGpuArray_STRIDE(var, 1),
+    PyGpuArray_STRIDE(var, 0) / ds, PyGpuArray_STRIDE(var, 1) / ds,
-    PyGpuArray_STRIDE(var, 2), PyGpuArray_STRIDE(var, 3));
+    PyGpuArray_STRIDE(var, 2) / ds, PyGpuArray_STRIDE(var, 3) / ds);
  if (err != CUDNN_STATUS_SUCCESS) {
    PyErr_Format(PyExc_RuntimeError,
-		 "Could not set tensor4d descriptor: %s"
+		 "Could not set tensor4d descriptor: %s",
-		 "shapes=%d %d %d %d strides=%d %d %d %d",
+		 cudnnGetErrorString(err));
-		 cudnnGetErrorString(err),
-		 PyGpuArray_DIMS(var)[0],
-		 PyGpuArray_DIMS(var)[1],
-		 PyGpuArray_DIMS(var)[2],
-		 PyGpuArray_DIMS(var)[3],
-		 PyGpuArray_STRIDES(var)[0],
-		 PyGpuArray_STRIDES(var)[1],
-		 PyGpuArray_STRIDES(var)[2],
-		 PyGpuArray_STRIDES(var)[3]);
    return -1;
  }
  return 0;
@@ -45,7 +38,7 @@ c_set_tensor4d(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
 static int
 c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
  cudnnDataType_t dt;
-  if (!GpuArray_IS_C_CONTIGUOUS(&var->ga))
+  if (!GpuArray_IS_C_CONTIGUOUS(&var->ga)) {
    PyErr_SetString(PyExc_ValueError,
 		    "Only contiguous filters (kernels) are supported.");
    return -1;
@@ -70,13 +63,8 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
    PyGpuArray_DIMS(var)[2], PyGpuArray_DIMS(var)[3]);
  if (err != CUDNN_STATUS_SUCCESS) {
    PyErr_Format(PyExc_RuntimeError,
-		 "Could not set filter descriptor: %s."
+		 "Could not set filter descriptor: %s.",
-		 " dims= %d %d %d %d",
+		 cudnnGetErrorString(err));
-		 cudnnGetErrorString(err),
-		 PyGpuArray_DIMS(var)[0],
-		 PyGpuArray_DIMS(var)[1],
-		 PyGpuArray_DIMS(var)[2],
-		 PyGpuArray_DIMS(var)[3]);
    return -1;
  }
  return 0;

--- a/theano/sandbox/gpuarray/dnn_fwd.c
+++ b/theano/sandbox/gpuarray/dnn_fwd.c
@@ -42,7 +42,8 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
  Py_INCREF(*output);
 #else
  if (theano_prep_output(output, PyGpuArray_NDIM(om), PyGpuArray_DIMS(om),
-                         om->ga.typecode, GA_C_ORDER) != 0)
+                         om->ga.typecode, GA_C_ORDER,
+                         pygpu_default_context()) != 0)
    return 1;
  if (beta != 0.0 && pygpu_move(*output, om))
    return 1;
@@ -54,6 +55,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
  {
    size_t worksize;
    gpudata *workspace;
+    PyGpuContextObject *c;
    err = cudnnGetConvolutionForwardWorkspaceSize(_handle,
                                                  APPLY_SPECIFIC(input),
@@ -75,8 +77,8 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
     * to place a nice get_work_mem() function in.
     */
    if (worksize != 0) {
-      workspace = pygpu_default_context->ops->buffer_alloc(
+      c = pygpu_default_context();
-        pygpu_default_context->ctx, worksize, NULL, 0, NULL);
+      workspace = c->ops->buffer_alloc(c->ctx, worksize, NULL, 0, NULL);
      if (workspace == NULL) {
        PyErr_SetString(PyExc_RuntimeError,
                        "Could not allocate working memory");
@@ -93,9 +95,10 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
      worksize == 0 ? NULL : *(void **)workspace, worksize,
      beta_p,
      APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(*output));
-  }
-  pygpu_default_context->ops->buffer_release(workspace);
+    if (worksize != 0)
+      c->ops->buffer_release(workspace);
+  }
  if (err != CUDNN_STATUS_SUCCESS) {
    PyErr_Format(PyExc_RuntimeError, "GpuDnnConv: error doing operation: %s",

--- a/theano/sandbox/gpuarray/dnn_gi.c
+++ b/theano/sandbox/gpuarray/dnn_gi.c
@@ -21,7 +21,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
  if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
    return 1;
-  switch (input->ga.typecode) {
+  switch (im->ga.typecode) {
  case GA_DOUBLE:
    alpha_p = (void *)&alpha;
    beta_p = (void *)&beta;
@@ -41,7 +41,8 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
  Py_INCREF(*input);
 #else
  if (theano_prep_output(input, PyGpuArray_NDIM(im), PyGpuArray_DIMS(im),
-                         im->ga.typecode, GA_C_ORDER) != 0)
+                         im->ga.typecode, GA_C_ORDER,
+                         pygpu_default_context()) != 0)
    return 1;
  if (beta != 0.0 && pygpu_move(*input, im))
    return 1;

--- a/theano/sandbox/gpuarray/dnn_gw.c
+++ b/theano/sandbox/gpuarray/dnn_gw.c
@@ -41,7 +41,8 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
  Py_INCREF(*kerns);
 #else
  if (theano_prep_output(kerns, PyGpuArray_NDIM(km), PyGpuArray_DIMS(km),
-                         km->ga.typecode, GA_C_ORDER) != 0)
+                         km->ga.typecode, GA_C_ORDER,
+                         pygpu_default_context()) != 0)
    return 1;
  if (beta != 0.0 && pygpu_move(*kerns, km))
    return 1;