提交 260fc9a7 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #4258 from lamblin/fix_debugmode_dnn

Fixes for debugmode in dnn and gpuarray
...@@ -1322,9 +1322,16 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -1322,9 +1322,16 @@ class GpuDnnPoolDesc(GpuOp):
if self.pad != (0, 0) and version() == -1: if self.pad != (0, 0) and version() == -1:
raise RuntimeError("CuDNN pooling with padding requires CuDNN v2") raise RuntimeError("CuDNN pooling with padding requires CuDNN v2")
return Apply(self, [], node = Apply(self, [],
[CDataType("cudnnPoolingDescriptor_t", [CDataType("cudnnPoolingDescriptor_t",
freefunc="cudnnDestroyPoolingDescriptor")()]) freefunc="cudnnDestroyPoolingDescriptor")()])
# DebugMode cannot compare the values of CDataType variables, so by
# default it returns False all the time. To prevent DebugMode from
# complaining because of the MergeOptimizer, we make this variable
# always compare to True.
out = node.outputs[0]
out.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
return node
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
desc, = outputs desc, = outputs
......
...@@ -412,6 +412,13 @@ def test_pooling3d(): ...@@ -412,6 +412,13 @@ def test_pooling3d():
if not cuda.dnn.dnn_available() or cuda.dnn.version() < (3000, 3000): if not cuda.dnn.dnn_available() or cuda.dnn.version() < (3000, 3000):
raise SkipTest(cuda.dnn.dnn_available.msg) raise SkipTest(cuda.dnn.dnn_available.msg)
# For max pooling pool3d2d explicitly pads the input with
# -inf. Because of this, the compilation mode for the function
# that uses pool3d2d should not check for infinite values or
# it will falsely believe there is a error in the graph.
mode_without_gpu2 = mode_without_gpu.including()
mode_without_gpu2.check_isfinite = False
# 'average_exc_pad' is disabled for versions < 4004 # 'average_exc_pad' is disabled for versions < 4004
if cuda.dnn.version() < (4004, 4004): if cuda.dnn.version() < (4004, 4004):
modes = ('max', 'average_inc_pad') modes = ('max', 'average_inc_pad')
...@@ -447,13 +454,6 @@ def test_pooling3d(): ...@@ -447,13 +454,6 @@ def test_pooling3d():
strides=(stride, stride, stride), strides=(stride, stride, stride),
pad=pad, pool_func=func) pad=pad, pool_func=func)
# For max pooling pool3d2d explicitly pads the input with
# -inf. Because of this, the compilation mode for the function
# that uses pool3d2d should not check for infinite values or
# it will falsely believe there is a error in the graph.
mode_without_gpu2 = mode_without_gpu.including()
mode_without_gpu2.check_isfinite = False
f1 = theano.function([x], out1, mode=mode_with_gpu) f1 = theano.function([x], out1, mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPool) assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f1.maker.fgraph.apply_nodes]) for node in f1.maker.fgraph.apply_nodes])
...@@ -512,7 +512,7 @@ def test_pooling3d(): ...@@ -512,7 +512,7 @@ def test_pooling3d():
strides=(stride, stride, stride), strides=(stride, stride, stride),
pad=pad, pool_func=func) pad=pad, pool_func=func)
fc = theano.function([x], theano.grad(out.sum(), x), fc = theano.function([x], theano.grad(out.sum(), x),
mode=mode_without_gpu) mode=mode_without_gpu2)
c_out = fc(data) c_out = fc(data)
utt.assert_allclose(c_out, g_out) utt.assert_allclose(c_out, g_out)
......
...@@ -996,6 +996,8 @@ class GpuJoin(HideC, Join): ...@@ -996,6 +996,8 @@ class GpuJoin(HideC, Join):
def perform(self, node, axis_and_tensors, out_, ctx): def perform(self, node, axis_and_tensors, out_, ctx):
out, = out_ out, = out_
axis = int(axis_and_tensors[0]) axis = int(axis_and_tensors[0])
if axis < 0:
axis += axis_and_tensors[1].ndim
tensors = axis_and_tensors[1:] tensors = axis_and_tensors[1:]
out[0] = pygpu.concatenate(tensors, axis=axis, context=ctx).astype( out[0] = pygpu.concatenate(tensors, axis=axis, context=ctx).astype(
node.outputs[0].dtype) node.outputs[0].dtype)
......
...@@ -287,7 +287,8 @@ class GpuDot22(BlasOp): ...@@ -287,7 +287,8 @@ class GpuDot22(BlasOp):
def perform(self, node, inputs, outputs): def perform(self, node, inputs, outputs):
x, y = inputs x, y = inputs
out = pygpu.empty((x.shape[0], y.shape[1]), dtype=x.dtype) out = pygpu.empty((x.shape[0], y.shape[1]), dtype=x.dtype,
context=x.context)
outputs[0][0] = blas.gemm(1., x, y, 0., out, outputs[0][0] = blas.gemm(1., x, y, 0., out,
overwrite_c=True) overwrite_c=True)
......
...@@ -307,9 +307,16 @@ class GpuDnnConvDesc(COp): ...@@ -307,9 +307,16 @@ class GpuDnnConvDesc(COp):
if kern_shape.type.ndim != 1 or kern_shape.type.dtype != 'int64': if kern_shape.type.ndim != 1 or kern_shape.type.dtype != 'int64':
raise TypeError('kern must be 1D shape tensor') raise TypeError('kern must be 1D shape tensor')
return Apply(self, [kern_shape], node = Apply(self, [kern_shape],
[CDataType("cudnnConvolutionDescriptor_t", [CDataType("cudnnConvolutionDescriptor_t",
freefunc="cudnnDestroyConvolutionDescriptor")()]) freefunc="cudnnDestroyConvolutionDescriptor")()])
# DebugMode cannot compare the values of CDataType variables, so by
# default it returns False all the time. To prevent DebugMode from
# complaining because of the MergeOptimizer, we make this variable
# always compare to True.
out = node.outputs[0]
out.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
return node
def get_op_params(self): def get_op_params(self):
pad0 = '0' pad0 = '0'
...@@ -998,9 +1005,16 @@ class GpuDnnPoolDesc(Op): ...@@ -998,9 +1005,16 @@ class GpuDnnPoolDesc(Op):
self.pad = (0, 0) self.pad = (0, 0)
def make_node(self): def make_node(self):
return Apply(self, [], node = Apply(self, [],
[CDataType("cudnnPoolingDescriptor_t", [CDataType("cudnnPoolingDescriptor_t",
freefunc="cudnnDestroyPoolingDescriptor")()]) freefunc="cudnnDestroyPoolingDescriptor")()])
# DebugMode cannot compare the values of CDataType variables, so by
# default it returns False all the time. To prevent DebugMode from
# complaining because of the MergeOptimizer, we make this variable
# always compare to True.
out = node.outputs[0]
out.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
return node
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
desc, = outputs desc, = outputs
......
...@@ -462,6 +462,10 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): ...@@ -462,6 +462,10 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
} // END NESTED SCOPE } // END NESTED SCOPE
""" % locals() """ % locals()
def perform(self, node, inp, out, ctx):
# Disable the perform method from the CPU version
Op.perform(self, node, inp, out, ctx)
@op_lifter([Images2Neibs]) @op_lifter([Images2Neibs])
def use_gpu_images2neibs(node, context_name): def use_gpu_images2neibs(node, context_name):
......
...@@ -3884,11 +3884,7 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3884,11 +3884,7 @@ class T_Join_and_Split(unittest.TestCase):
got = f(-2) got = f(-2)
assert numpy.allclose(got, want) assert numpy.allclose(got, want)
try: self.assertRaises((IndexError, OverflowError), f, -3)
got = f(-3)
assert False
except IndexError:
pass
def test_join_matrixC_negative_axis(self): def test_join_matrixC_negative_axis(self):
"""constant join negative axis""" """constant join negative axis"""
...@@ -3920,11 +3916,7 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3920,11 +3916,7 @@ class T_Join_and_Split(unittest.TestCase):
got = f() got = f()
assert numpy.allclose(got, want) assert numpy.allclose(got, want)
try: self.assertRaises((IndexError, OverflowError), join, -3, a, b)
s = join(-3, a, b)
assert False
except IndexError:
pass
utt.verify_grad(lambda a, b: join(-1, a, b), [v, 2 * v], utt.verify_grad(lambda a, b: join(-1, a, b), [v, 2 * v],
mode=self.mode) mode=self.mode)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论