[CRASH] fix for op with make_thunk not telling if they are lazy or not.

Add tutorial test about it.

[CRASH] fix for op with make_thunk not telling if they are lazy or not.
7fcbb026 · Frederic · 14dad2b2 · 7fcbb026 · 7fcbb026
--- a/theano/gof/vm.py
+++ b/theano/gof/vm.py
@@ -886,6 +886,10 @@ class VM_Linker(link.LocalLinker):
                                                 storage_map,
                                                 compute_map,
                                                 no_recycling))
+                if not hasattr(thunks[-1], 'lazy'):
+                    # We don't want all ops maker to think about lazy Ops.
+                    # So if they didn't specify that its lazy or not, it isn't.
+                    thunks[-1].lazy = False
            except Exception, e:
                e.args = ("The following error happened while"
                          " compiling the node", node, "\n") + e.args

--- a/theano/tests/test_tutorial.py
+++ b/theano/tests/test_tutorial.py
@@ -2,11 +2,14 @@
 """
 import os, shutil, unittest
+from nose.plugins.skip import SkipTest
+import numpy
+from numpy import array
 import theano
 import theano.tensor as T
 from theano import function
-import numpy
-from numpy import array
 from theano import config
 from theano.tests import unittest_tools as utt
@@ -972,6 +975,61 @@ class T_using_gpu(unittest.TestCase):
            assert not numpy.any([isinstance(x.op, T.Elemwise)
                                  for x in f.maker.fgraph.toposort()])
+    def test_using_gpu_pycudaop(self):
+        import theano.misc.pycuda_init
+        from pycuda.compiler import SourceModule
+        import theano.sandbox.cuda as cuda
+        if not theano.misc.pycuda_init.pycuda_available:
+            raise SkipTest("Pycuda not installed. Skip test of theano op"
+                           " with pycuda code.")
+        import theano.sandbox.cuda as cuda_ndarray
+        if not cuda_ndarray.cuda_available:
+            raise SkipTest('Optional package cuda disabled')
+        class PyCUDADoubleOp(theano.Op):
+            def __eq__(self, other):
+                return type(self) == type(other)
+            def __hash__(self):
+                return hash(type(self))
+            def __str__(self):
+                return self.__class__.__name__
+            def make_node(self, inp):
+                inp = cuda.basic_ops.gpu_contiguous(
+                    cuda.basic_ops.as_cuda_ndarray_variable(inp))
+                assert inp.dtype == "float32"
+                return theano.Apply(self, [inp], [inp.type()])
+            def make_thunk(self, node, storage_map, _, _2):
+                mod = SourceModule("""
+    __global__ void my_fct(float * i0, float * o0, int size) {
+    int i = blockIdx.x*blockDim.x + threadIdx.x;
+    if(i<size){
+        o0[i] = i0[i]*2;
+    }
+  }""")
+                pycuda_fct = mod.get_function("my_fct")
+                inputs = [storage_map[v] for v in node.inputs]
+                outputs = [storage_map[v] for v in node.outputs]
+                def thunk():
+                    z = outputs[0]
+                    if z[0] is None or z[0].shape != inputs[0][0].shape:
+                        z[0] = cuda.CudaNdarray.zeros(inputs[0][0].shape)
+                        grid = (int(numpy.ceil(inputs[0][0].size / 512.)), 1)
+                        pycuda_fct(inputs[0][0], z[0],
+                                   numpy.intc(inputs[0][0].size),
+                                   block=(512, 1, 1), grid=grid)
+                return thunk
+        x = theano.tensor.fmatrix()
+        f = theano.function([x], PyCUDADoubleOp()(x))
+        xv = numpy.ones((4, 5), dtype="float32")
+        assert numpy.allclose(f(xv), xv*2)
+        # print numpy.asarray(f(xv))
 # Used in T_fibby
 class Fibby(theano.Op):