提交 b0a20106 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #2999 from nouiz/crash_test

[CRASH,TESTS] Fix crash recently introduced and fix tests
...@@ -34,7 +34,7 @@ _logger = logging.getLogger(_logger_name) ...@@ -34,7 +34,7 @@ _logger = logging.getLogger(_logger_name)
def as_cuda_ndarray_variable(x): def as_cuda_ndarray_variable(x):
if x.owner: if getattr(x, 'owner', None):
if isinstance(x.owner.op, HostFromGpu): if isinstance(x.owner.op, HostFromGpu):
return x.owner.inputs[0] return x.owner.inputs[0]
elif (isinstance(x.owner.op, GpuFromHost) and elif (isinstance(x.owner.op, GpuFromHost) and
......
...@@ -1963,6 +1963,7 @@ gpu_elemwise_alloc = gof.local_optimizer([GpuElemwise])( ...@@ -1963,6 +1963,7 @@ gpu_elemwise_alloc = gof.local_optimizer([GpuElemwise])(
tensor.opt.local_elemwise_alloc_op(GpuElemwise, GpuAlloc, GpuDimShuffle) tensor.opt.local_elemwise_alloc_op(GpuElemwise, GpuAlloc, GpuDimShuffle)
) )
register_opt()(gpu_elemwise_alloc) register_opt()(gpu_elemwise_alloc)
register_opt()(tensor.opt.local_useless_elemwise) # needed by gpu_elemwise_alloc
tensor.opt.register_specialize_device(gpu_elemwise_alloc) tensor.opt.register_specialize_device(gpu_elemwise_alloc)
......
...@@ -67,8 +67,8 @@ class TestBatchedDot(TestCase): ...@@ -67,8 +67,8 @@ class TestBatchedDot(TestCase):
z_test = numpy.sum(a[:,:,:,None]*b[:,None,:,:],axis=-2) z_test = numpy.sum(a[:,:,:,None]*b[:,None,:,:],axis=-2)
assert numpy.allclose(z0, z_test) unittest_tools.assert_allclose(z0, z_test)
assert numpy.allclose(z1, z_test) unittest_tools.assert_allclose(z1, z_test)
cmp((5,4,3), (5,3,2)) cmp((5,4,3), (5,3,2))
cmp((5,3,3), (5,3,3)) cmp((5,3,3), (5,3,3))
...@@ -108,7 +108,7 @@ class TestBatchedDot(TestCase): ...@@ -108,7 +108,7 @@ class TestBatchedDot(TestCase):
self.assertRaises(RuntimeError, fail, (5,4,3), (5,2,2)) self.assertRaises(RuntimeError, fail, (5,4,3), (5,2,2))
def test_batched_dot_gradient(self): def test_batched_dot_gradient(self):
theano.tests.unittest_tools.verify_grad( unittest_tools.verify_grad(
batched_dot, batched_dot,
[numpy.random.randn(5,7,2).astype(numpy.float32), [numpy.random.randn(5,7,2).astype(numpy.float32),
numpy.random.randn(5,2,6).astype(numpy.float32)], numpy.random.randn(5,2,6).astype(numpy.float32)],
...@@ -161,18 +161,18 @@ def test_dot22scalar(): ...@@ -161,18 +161,18 @@ def test_dot22scalar():
[a, b], [a, b],
tensor.dot(a, b) * numpy.asarray(4, 'float32')) tensor.dot(a, b) * numpy.asarray(4, 'float32'))
t = f.maker.fgraph.toposort() t = f.maker.fgraph.toposort()
assert any([isinstance(n.op, tcn.blas.GpuGemm) for n in t]) assert any([isinstance(n.op, tcn.blas.GpuDot22Scalar) for n in t])
assert any([isinstance(n.op, tcn.basic_ops.GpuAllocEmpty) # assert any([isinstance(n.op, tcn.basic_ops.GpuAllocEmpty)
for n in t]) # for n in t])
assert numpy.allclose(f(av, bv), f2(av, bv)) assert numpy.allclose(f(av, bv), f2(av, bv))
f = theano.function([a, b, scalar], tensor.dot(a, b) * scalar, f = theano.function([a, b, scalar], tensor.dot(a, b) * scalar,
mode=mode_with_gpu) mode=mode_with_gpu)
f2 = theano.function([a, b, scalar], tensor.dot(a, b) * scalar) f2 = theano.function([a, b, scalar], tensor.dot(a, b) * scalar)
t = f.maker.fgraph.toposort() t = f.maker.fgraph.toposort()
assert any([isinstance(n.op, tcn.blas.GpuGemm) for n in t]) assert any([isinstance(n.op, tcn.blas.GpuDot22Scalar) for n in t])
assert any([isinstance(n.op, tcn.basic_ops.GpuAllocEmpty) # assert any([isinstance(n.op, tcn.basic_ops.GpuAllocEmpty)
for n in t]) # for n in t])
assert numpy.allclose(f(av, bv, 0.5), f2(av, bv, 0.5)) assert numpy.allclose(f(av, bv, 0.5), f2(av, bv, 0.5))
f = theano.function([a, b, scalar], f = theano.function([a, b, scalar],
......
...@@ -202,13 +202,6 @@ class Test_local_elemwise_alloc(test_opt.Test_local_elemwise_alloc): ...@@ -202,13 +202,6 @@ class Test_local_elemwise_alloc(test_opt.Test_local_elemwise_alloc):
if elem.op is not None]) == count if elem.op is not None]) == count
) )
def _verify_assert_count(self, f, count):
assert(
sum([isinstance(elem.op, tensor.opt.Assert)
for elem in f.maker.fgraph.toposort()
if elem.op is not None]) == count
)
def test_alloc_memset_0(): def test_alloc_memset_0():
i = tensor.iscalar() i = tensor.iscalar()
......
...@@ -24,15 +24,13 @@ from .fp16_help import write_w ...@@ -24,15 +24,13 @@ from .fp16_help import write_w
def as_gpuarray_variable(x): def as_gpuarray_variable(x):
# This is needed to lower the number of useless transfer if getattr(x, 'owner', None):
# introduced during optimization. This speed up optimization and if isinstance(x.owner.op, HostFromGpu):
# "canonicalize" the graph, so it make easier making some
# optimization.
if (hasattr(x, 'fgraph') and
len(x.clients) == 1 and
x.owner and
isinstance(x.owner.op, HostFromGpu)):
return x.owner.inputs[0] return x.owner.inputs[0]
elif (isinstance(x.owner.op, GpuFromHost) and
x.owner.inputs[0].owner and
isinstance(x.owner.inputs[0].owner.op, HostFromGpu)):
return x.owner.inputs[0].owner.inputs[0]
if hasattr(x, '_as_GpuArrayVariable'): if hasattr(x, '_as_GpuArrayVariable'):
return x._as_GpuArrayVariable() return x._as_GpuArrayVariable()
# TODO we need to have the cuda -> gpu path taken care of. # TODO we need to have the cuda -> gpu path taken care of.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论