Add precision param to GpuDnnConvDesc

46c83387 · carriepl · Frederic · 5a015a8d · 46c83387 · 46c83387
--- a/theano/sandbox/gpuarray/conv_desc.c
+++ b/theano/sandbox/gpuarray/conv_desc.c
@@ -29,7 +29,7 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
    return -1;
  }
-  err = cudnnSetConvolutionNdDescriptor(*desc, NB_DIMS, pad, strides, upscale,
+  err = cudnnSetConvolutionNdDescriptor_v3(*desc, NB_DIMS, pad, strides,
-                                        CONV_MODE);
+                                           upscale, CONV_MODE, PRECISION);
  return 0;
 }
--- a/theano/sandbox/gpuarray/cudnn_helper.h
+++ b/theano/sandbox/gpuarray/cudnn_helper.h
@@ -107,6 +107,23 @@ static cudnnStatus_t cudnnConvolutionBackwardFilter_v3(
    gradData);
 }
+// Starting in V3, the cudnnSetConvolutionNdDescriptor has an additional
+// parameter that determines the data type in which to do the computation.
+// For versions older than V3, we need to define an alias for that function
+// that will take the additional parameter as input but ignore it.
+static inline cudnnStatus_t cudnnSetConvolutionNdDescriptor_v3(
+                                        cudnnConvolutionDescriptor_t convDesc,
+                                        int arrayLength,
+                                        int padA[],
+                                        int filterStrideA[]
+                                        int upscaleA[],
+                                        cudnnConvolutionMode_t mode,
+                                        cudnn_dataType_t dataType)
+  return cudnnSetConvolutionNdDescriptor(convDesc, arrayLength, padA,
+                                         filterStrideA, upscaleA, mode);
 #endif
 #endif
--- a/theano/sandbox/gpuarray/dnn.py
+++ b/theano/sandbox/gpuarray/dnn.py
@@ -241,7 +241,7 @@ class GpuDnnConvDesc(COp):
    """
-    __props__ = ('border_mode', 'subsample', 'conv_mode')
+    __props__ = ('border_mode', 'subsample', 'conv_mode', 'precision')
    def c_headers(self):
        return ['cudnn.h', 'cudnn_helper.h']
@@ -258,7 +258,8 @@ class GpuDnnConvDesc(COp):
    def do_constant_folding(self, node):
        return False
-    def __init__(self, border_mode, subsample=(1, 1), conv_mode='conv'):
+    def __init__(self, border_mode, subsample=(1, 1), conv_mode='conv',
+                 precision=None):
        COp.__init__(self, ["conv_desc.c"], "APPLY_SPECIFIC(conv_desc)")
        if isinstance(border_mode, int):
@@ -278,6 +279,13 @@ class GpuDnnConvDesc(COp):
        assert conv_mode in ('conv', 'cross')
        self.conv_mode = conv_mode
+        if precision is None:
+            precision = theano.config.dnn.conv.precision
+        if precision == 'floatX':
+            precision = theano.config.floatX
+        assert precision in ['float16', 'float32', 'float64']
+        self.precision = precision
    def make_node(self, kern_shape):
        if kern_shape.type.ndim != 1 or kern_shape.type.dtype != 'int64':
            raise TypeError('kern must be 1D shape tensor')
@@ -315,11 +323,20 @@ class GpuDnnConvDesc(COp):
        else:
            sub2 = '0'
+        if self.precision == 'float16':
+            precision = 'CUDNN_DATA_HALF'
+        elif self.precision == 'float32':
+            precision = 'CUDNN_DATA_FLOAT'
+        else:
+            assert self.precision == 'float64'
+            precision = 'CUDNN_DATA_DOUBLE'
        return [('NB_DIMS', str(len(self.subsample))),
                ('BORDER_MODE', bmode),
                ('PAD_0', pad0), ('PAD_1', pad1), ('PAD_2', pad2),
                ('CONV_MODE', conv_flag),
-                ('SUB_0', sub0), ('SUB_1', sub1), ('SUB_2', sub2)]
+                ('SUB_0', sub0), ('SUB_1', sub1), ('SUB_2', sub2),
+                ('PRECISION', precision)]
    def c_code_cache_version(self):
        return (super(GpuDnnConvDesc, self).c_code_cache_version(), version())