提交 0f146f60 authored 作者: Frederic's avatar Frederic

Fix optimization crash and test crash due to new gpu conv default.

上级 b75b37c3
...@@ -1123,11 +1123,19 @@ def local_gpu_conv(node): ...@@ -1123,11 +1123,19 @@ def local_gpu_conv(node):
if theano.sandbox.cuda.dnn.dnn_available(): if theano.sandbox.cuda.dnn.dnn_available():
repl = local_gpu_conv_legacy.transform(node) repl = local_gpu_conv_legacy.transform(node)
if repl: if repl:
if isinstance(repl[0].owner.op, GpuConv):
n = repl[0].owner
add_transfer = False
else:
n = repl[0].owner.inputs[0].owner n = repl[0].owner.inputs[0].owner
assert isinstance(n.op, GpuConv) assert isinstance(n.op, GpuConv)
add_transfer = True
ret = theano.sandbox.cuda.dnn.local_conv_dnn.transform(n) ret = theano.sandbox.cuda.dnn.local_conv_dnn.transform(n)
if ret: if ret:
if add_transfer:
return [host_from_gpu(ret[0])] return [host_from_gpu(ret[0])]
else:
return ret
# If dnn isn't avail, the local_gpu_conv_legacy wil introduce the # If dnn isn't avail, the local_gpu_conv_legacy wil introduce the
# legacy opt. Then the local_conv_gemm will convert it to gemm # legacy opt. Then the local_conv_gemm will convert it to gemm
# opt. # opt.
......
...@@ -747,6 +747,10 @@ def test_dnn_subsample(): ...@@ -747,6 +747,10 @@ def test_dnn_subsample():
class TestConv2DGPU(unittest.TestCase): class TestConv2DGPU(unittest.TestCase):
conv_ops = (cuda.blas.GpuConv,
cuda.dnn.GpuDnnConvBase,
cuda.blas.BaseGpuCorrMM)
def test_logical_shapes(self): def test_logical_shapes(self):
seed_rng() seed_rng()
for stride in range(1, 4): for stride in range(1, 4):
...@@ -773,7 +777,7 @@ class TestConv2DGPU(unittest.TestCase): ...@@ -773,7 +777,7 @@ class TestConv2DGPU(unittest.TestCase):
func = theano.function([a, A], image_estimate, mode=theano_mode) func = theano.function([a, A], image_estimate, mode=theano_mode)
#theano.printing.debugprint(func,) #theano.printing.debugprint(func,)
assert any([isinstance(node.op, theano.sandbox.cuda.blas.GpuConv) assert any([isinstance(node.op, self.conv_ops)
for node in func.maker.fgraph.toposort()]) for node in func.maker.fgraph.toposort()])
a_in = numpy.random.randn(*featshp).astype("float32") a_in = numpy.random.randn(*featshp).astype("float32")
......
...@@ -396,7 +396,11 @@ def build_conv_nnet2_classif(use_gpu, isize, ksize, n_batch, ...@@ -396,7 +396,11 @@ def build_conv_nnet2_classif(use_gpu, isize, ksize, n_batch,
if use_gpu: if use_gpu:
# Check that GpuConv is used # Check that GpuConv is used
topo = train.maker.fgraph.toposort() topo = train.maker.fgraph.toposort()
assert len([n for n in topo if isinstance(n.op, tcn.blas.GpuConv)]) > 0 conv_ops = (tcn.blas.GpuConv,
tcn.dnn.GpuDnnConvBase,
tcn.blas.BaseGpuCorrMM)
assert len([n for n in topo if isinstance(n.op, conv_ops)]) > 0
shape_target = (n_batch, n_out) shape_target = (n_batch, n_out)
return train, params, shape_img, shape_target, mode return train, params, shape_img, shape_target, mode
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论