cuda cnmem at one more place cublas batcheddot

3e40d56a · Frederic · 4e70bd89 · 3e40d56a
--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
@@ -137,13 +137,9 @@ class BatchedDotOp(GpuOp):
                host_z[i] = host_z[i - 1] + z_stride;
            }

-            err1 = cudaMalloc((void **)&gpu_x, ptr_array_size);
+            gpu_x = (float **) device_malloc(ptr_array_size);

-            if (err1 != cudaSuccess)
-            {
-                CLEANUP();
-                PyErr_Format(PyExc_RuntimeError,
-                             "%%s", "cudaMalloc failure");
+            if (gpu_x == NULL){
                %(fail)s;
            }

@@ -195,7 +191,7 @@ class BatchedDotOp(GpuOp):
            do                                          \
            {                                           \
                if (host_x) free (host_x);              \
-                if (gpu_x) cudaFree(gpu_x);             \
+                if (gpu_x) device_free(gpu_x);          \
            } while (0)
        """

@@ -213,6 +209,9 @@ class BatchedDotOp(GpuOp):

        return rval

+    def c_code_cache_version(self):
+        return (1,)
+
 batched_dot = BatchedDotOp()

 class GpuDot22(GpuOp):