Merge pull request #2999 from nouiz/crash_test

[CRASH,TESTS] Fix crash recently introduced and fix tests

Merge pull request #2999 from nouiz/crash_test
b0a20106 · Pascal Lamblin · e2775418 · e3c4c093 · b0a20106 · b0a20106
--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -34,7 +34,7 @@ _logger = logging.getLogger(_logger_name)
 def as_cuda_ndarray_variable(x):
-    if x.owner:
+    if getattr(x, 'owner', None):
        if isinstance(x.owner.op, HostFromGpu):
            return x.owner.inputs[0]
        elif (isinstance(x.owner.op, GpuFromHost) and

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1963,6 +1963,7 @@ gpu_elemwise_alloc = gof.local_optimizer([GpuElemwise])(
    tensor.opt.local_elemwise_alloc_op(GpuElemwise, GpuAlloc, GpuDimShuffle)
 )
 register_opt()(gpu_elemwise_alloc)
+register_opt()(tensor.opt.local_useless_elemwise) # needed by gpu_elemwise_alloc
 tensor.opt.register_specialize_device(gpu_elemwise_alloc)

--- a/theano/sandbox/cuda/tests/test_blas.py
+++ b/theano/sandbox/cuda/tests/test_blas.py
@@ -67,8 +67,8 @@ class TestBatchedDot(TestCase):
            z_test = numpy.sum(a[:,:,:,None]*b[:,None,:,:],axis=-2)
-            assert numpy.allclose(z0, z_test)
+            unittest_tools.assert_allclose(z0, z_test)
-            assert numpy.allclose(z1, z_test)
+            unittest_tools.assert_allclose(z1, z_test)
        cmp((5,4,3), (5,3,2))
        cmp((5,3,3), (5,3,3))
@@ -108,7 +108,7 @@ class TestBatchedDot(TestCase):
        self.assertRaises(RuntimeError, fail, (5,4,3), (5,2,2))
    def test_batched_dot_gradient(self):
-        theano.tests.unittest_tools.verify_grad(
+        unittest_tools.verify_grad(
            batched_dot,
            [numpy.random.randn(5,7,2).astype(numpy.float32),
             numpy.random.randn(5,2,6).astype(numpy.float32)],
@@ -161,18 +161,18 @@ def test_dot22scalar():
                [a, b],
                tensor.dot(a, b) * numpy.asarray(4, 'float32'))
        t = f.maker.fgraph.toposort()
-        assert any([isinstance(n.op, tcn.blas.GpuGemm) for n in t])
+        assert any([isinstance(n.op, tcn.blas.GpuDot22Scalar) for n in t])
-        assert any([isinstance(n.op, tcn.basic_ops.GpuAllocEmpty)
+#        assert any([isinstance(n.op, tcn.basic_ops.GpuAllocEmpty)
-                    for n in t])
+#                    for n in t])
        assert numpy.allclose(f(av, bv), f2(av, bv))
        f = theano.function([a, b, scalar], tensor.dot(a, b) * scalar,
                            mode=mode_with_gpu)
        f2 = theano.function([a, b, scalar], tensor.dot(a, b) * scalar)
        t = f.maker.fgraph.toposort()
-        assert any([isinstance(n.op, tcn.blas.GpuGemm) for n in t])
+        assert any([isinstance(n.op, tcn.blas.GpuDot22Scalar) for n in t])
-        assert any([isinstance(n.op, tcn.basic_ops.GpuAllocEmpty)
+#        assert any([isinstance(n.op, tcn.basic_ops.GpuAllocEmpty)
-                    for n in t])
+#                    for n in t])
        assert numpy.allclose(f(av, bv, 0.5), f2(av, bv, 0.5))
        f = theano.function([a, b, scalar],

--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
@@ -202,13 +202,6 @@ class Test_local_elemwise_alloc(test_opt.Test_local_elemwise_alloc):
                 if elem.op is not None]) == count
        )
-    def _verify_assert_count(self, f, count):
-        assert(
-            sum([isinstance(elem.op, tensor.opt.Assert)
-                 for elem in f.maker.fgraph.toposort()
-                 if elem.op is not None]) == count
-        )
 def test_alloc_memset_0():
    i = tensor.iscalar()

--- a/theano/sandbox/gpuarray/basic_ops.py
+++ b/theano/sandbox/gpuarray/basic_ops.py
@@ -24,15 +24,13 @@ from .fp16_help import write_w
 def as_gpuarray_variable(x):
-    # This is needed to lower the number of useless transfer
+    if getattr(x, 'owner', None):
-    # introduced during optimization.  This speed up optimization and
+        if isinstance(x.owner.op, HostFromGpu):
-    # "canonicalize" the graph, so it make easier making some
-    # optimization.
-    if (hasattr(x, 'fgraph') and
-        len(x.clients) == 1 and
-        x.owner and
-        isinstance(x.owner.op, HostFromGpu)):
            return x.owner.inputs[0]
+        elif (isinstance(x.owner.op, GpuFromHost) and
+              x.owner.inputs[0].owner and
+              isinstance(x.owner.inputs[0].owner.op, HostFromGpu)):
+            return x.owner.inputs[0].owner.inputs[0]
    if hasattr(x, '_as_GpuArrayVariable'):
        return x._as_GpuArrayVariable()
    # TODO we need to have the cuda -> gpu path taken care of.