提交 b0a20106 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #2999 from nouiz/crash_test

[CRASH,TESTS] Fix crash recently introduced and fix tests
......@@ -34,7 +34,7 @@ _logger = logging.getLogger(_logger_name)
def as_cuda_ndarray_variable(x):
if x.owner:
if getattr(x, 'owner', None):
if isinstance(x.owner.op, HostFromGpu):
return x.owner.inputs[0]
elif (isinstance(x.owner.op, GpuFromHost) and
......
......@@ -1963,6 +1963,7 @@ gpu_elemwise_alloc = gof.local_optimizer([GpuElemwise])(
tensor.opt.local_elemwise_alloc_op(GpuElemwise, GpuAlloc, GpuDimShuffle)
)
register_opt()(gpu_elemwise_alloc)
register_opt()(tensor.opt.local_useless_elemwise) # needed by gpu_elemwise_alloc
tensor.opt.register_specialize_device(gpu_elemwise_alloc)
......
......@@ -67,8 +67,8 @@ class TestBatchedDot(TestCase):
z_test = numpy.sum(a[:,:,:,None]*b[:,None,:,:],axis=-2)
assert numpy.allclose(z0, z_test)
assert numpy.allclose(z1, z_test)
unittest_tools.assert_allclose(z0, z_test)
unittest_tools.assert_allclose(z1, z_test)
cmp((5,4,3), (5,3,2))
cmp((5,3,3), (5,3,3))
......@@ -108,7 +108,7 @@ class TestBatchedDot(TestCase):
self.assertRaises(RuntimeError, fail, (5,4,3), (5,2,2))
def test_batched_dot_gradient(self):
theano.tests.unittest_tools.verify_grad(
unittest_tools.verify_grad(
batched_dot,
[numpy.random.randn(5,7,2).astype(numpy.float32),
numpy.random.randn(5,2,6).astype(numpy.float32)],
......@@ -161,18 +161,18 @@ def test_dot22scalar():
[a, b],
tensor.dot(a, b) * numpy.asarray(4, 'float32'))
t = f.maker.fgraph.toposort()
assert any([isinstance(n.op, tcn.blas.GpuGemm) for n in t])
assert any([isinstance(n.op, tcn.basic_ops.GpuAllocEmpty)
for n in t])
assert any([isinstance(n.op, tcn.blas.GpuDot22Scalar) for n in t])
# assert any([isinstance(n.op, tcn.basic_ops.GpuAllocEmpty)
# for n in t])
assert numpy.allclose(f(av, bv), f2(av, bv))
f = theano.function([a, b, scalar], tensor.dot(a, b) * scalar,
mode=mode_with_gpu)
f2 = theano.function([a, b, scalar], tensor.dot(a, b) * scalar)
t = f.maker.fgraph.toposort()
assert any([isinstance(n.op, tcn.blas.GpuGemm) for n in t])
assert any([isinstance(n.op, tcn.basic_ops.GpuAllocEmpty)
for n in t])
assert any([isinstance(n.op, tcn.blas.GpuDot22Scalar) for n in t])
# assert any([isinstance(n.op, tcn.basic_ops.GpuAllocEmpty)
# for n in t])
assert numpy.allclose(f(av, bv, 0.5), f2(av, bv, 0.5))
f = theano.function([a, b, scalar],
......
......@@ -202,13 +202,6 @@ class Test_local_elemwise_alloc(test_opt.Test_local_elemwise_alloc):
if elem.op is not None]) == count
)
def _verify_assert_count(self, f, count):
assert(
sum([isinstance(elem.op, tensor.opt.Assert)
for elem in f.maker.fgraph.toposort()
if elem.op is not None]) == count
)
def test_alloc_memset_0():
i = tensor.iscalar()
......
......@@ -24,15 +24,13 @@ from .fp16_help import write_w
def as_gpuarray_variable(x):
# This is needed to lower the number of useless transfer
# introduced during optimization. This speed up optimization and
# "canonicalize" the graph, so it make easier making some
# optimization.
if (hasattr(x, 'fgraph') and
len(x.clients) == 1 and
x.owner and
isinstance(x.owner.op, HostFromGpu)):
return x.owner.inputs[0]
if getattr(x, 'owner', None):
if isinstance(x.owner.op, HostFromGpu):
return x.owner.inputs[0]
elif (isinstance(x.owner.op, GpuFromHost) and
x.owner.inputs[0].owner and
isinstance(x.owner.inputs[0].owner.op, HostFromGpu)):
return x.owner.inputs[0].owner.inputs[0]
if hasattr(x, '_as_GpuArrayVariable'):
return x._as_GpuArrayVariable()
# TODO we need to have the cuda -> gpu path taken care of.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论