提交 0ed16db1 authored 作者: carriepl's avatar carriepl 提交者: Frederic

Add new precision parameter to GpuDnnConvDesc (cuda backend)

上级 3914bf65
...@@ -248,7 +248,7 @@ class GpuDnnConvDesc(GpuOp): ...@@ -248,7 +248,7 @@ class GpuDnnConvDesc(GpuOp):
""" """
__props__ = ('border_mode', 'subsample', 'conv_mode') __props__ = ('border_mode', 'subsample', 'conv_mode', 'precision')
def c_headers(self): def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h'] return ['cudnn.h', 'cudnn_helper.h']
...@@ -265,7 +265,8 @@ class GpuDnnConvDesc(GpuOp): ...@@ -265,7 +265,8 @@ class GpuDnnConvDesc(GpuOp):
def do_constant_folding(self, node): def do_constant_folding(self, node):
return False return False
def __init__(self, border_mode, subsample=(1, 1), conv_mode='conv'): def __init__(self, border_mode, subsample=(1, 1), conv_mode='conv',
precision=None):
if isinstance(border_mode, int): if isinstance(border_mode, int):
border_mode = (border_mode,) * len(subsample) border_mode = (border_mode,) * len(subsample)
if isinstance(border_mode, tuple): if isinstance(border_mode, tuple):
...@@ -283,6 +284,13 @@ class GpuDnnConvDesc(GpuOp): ...@@ -283,6 +284,13 @@ class GpuDnnConvDesc(GpuOp):
assert conv_mode in ('conv', 'cross') assert conv_mode in ('conv', 'cross')
self.conv_mode = conv_mode self.conv_mode = conv_mode
if precision is None:
precision = theano.config.dnn.conv.precision
if precision == 'floatX':
precision = theano.config.floatX
assert precision in ['float16', 'float32', 'float64']
self.precision = precision
def make_node(self, img_shape, kern_shape): def make_node(self, img_shape, kern_shape):
if img_shape.type.ndim != 1 or img_shape.type.dtype != 'int64': if img_shape.type.ndim != 1 or img_shape.type.dtype != 'int64':
raise TypeError('img must be 1D shape tensor') raise TypeError('img must be 1D shape tensor')
...@@ -321,6 +329,14 @@ class GpuDnnConvDesc(GpuOp): ...@@ -321,6 +329,14 @@ class GpuDnnConvDesc(GpuOp):
subsample_str = ", ".join([str(s) for s in self.subsample]) subsample_str = ", ".join([str(s) for s in self.subsample])
upscale_str = ", ".join(["1"] * nb_dim) upscale_str = ", ".join(["1"] * nb_dim)
if self.precision == 'float16':
precision = 'CUDNN_DATA_HALF'
elif self.precision == 'float32':
precision = 'CUDNN_DATA_FLOAT'
else:
assert self.precision == 'float64'
precision = 'CUDNN_DATA_DOUBLE'
return """ return """
{ {
cudnnStatus_t err; cudnnStatus_t err;
...@@ -350,7 +366,7 @@ class GpuDnnConvDesc(GpuOp): ...@@ -350,7 +366,7 @@ class GpuDnnConvDesc(GpuOp):
%(desc)s, %(desc)s,
%(nb_dim)d, %(nb_dim)d,
pad, subsample, upscale, pad, subsample, upscale,
%(conv_flag)s %(conv_flag)s, %(precision)s
); );
#else #else
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: CUDNN_VERSION must be >= 30"); PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: CUDNN_VERSION must be >= 30");
...@@ -364,10 +380,10 @@ class GpuDnnConvDesc(GpuOp): ...@@ -364,10 +380,10 @@ class GpuDnnConvDesc(GpuOp):
""" % dict(name=name, img_shape=img_shape, kern_shape=kern_shape, desc=desc, """ % dict(name=name, img_shape=img_shape, kern_shape=kern_shape, desc=desc,
bmode=bmode, conv_flag=conv_flag, fail=sub['fail'], bmode=bmode, conv_flag=conv_flag, fail=sub['fail'],
pad_str=pad_str, subsample_str=subsample_str, pad_str=pad_str, subsample_str=subsample_str,
upscale_str=upscale_str, nb_dim=nb_dim) upscale_str=upscale_str, nb_dim=nb_dim, precision=precision)
def c_code_cache_version(self): def c_code_cache_version(self):
return (2, version()) return (3, version())
# scalar constants # scalar constants
_zero = constant(numpy.asarray(0.0, dtype='float32')) _zero = constant(numpy.asarray(0.0, dtype='float32'))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论