Merge pull request #580 from nouiz/gpu_setsubtensor

Gpu setsubtensor

Merge pull request #580 from nouiz/gpu_setsubtensor
5890e98e · lamblin · 780813c7 · 241f4ffb · 5890e98e · 5890e98e
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -9,6 +9,13 @@ Bug fixes
   (both in Python and Cython) since April 2011. (Pascal L.)
 * In Sparse sandbox, fix the grad of theano.sparse.sandbox.sp.row_scale.
   It did not return the right number of elements. (Frederic B.)
+ * set_subtensor(x[int vector], new_value) when moved to the GPU
+   where transformed into inc_subtensor on the GPU. Now we have a slow
+   GPU implementation.
+   Note: set_subtensor(x[slice[,...]], new_value) was working correctly
+   in all case as well as inc_subtensor(*, *).
+   Note2: If your code have this behavior, we print a warning by default.
+  (Frederic B.)
 Documentation
 * Added in the tutorial documentation on how to extend Theano.
@@ -81,6 +88,8 @@ Crash Fix
   element-wise fusion optimization when upcasting some inputs to
   float32 (to compute them on the GPU).
   (Frederic B., reported by Sander Dieleman)
+ * GpuReshape in some particular case when the input is not contiguous
+   (Frederic B., reported by Sander Dieleman)
 * GpuSoftmaxWithBias with shape (0, N) with N > 1.
   (Frédéric B., reported by Razvan P.)
 * Fix crash under 64-bit Windows, when taking subtensors of the form a[n:]
@@ -89,6 +98,7 @@ Crash Fix
   dimensions, which could typically result in optimization crashes (Olivier D.)
 * Fixed crash when concatenating some arrays with specific broadcasting
   patterns (Olivier D.)
+ * Work around a known issue with nvcc 4.1 on MacOS X. (Graham Taylon)
 =============
 Release Notes

--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -315,6 +315,13 @@ AddConfigVar('warn.subtensor_merge_bug',
        BoolParam(warn_default('0.5')),
        in_c_key=False)
+AddConfigVar('warn.gpu_set_subtensor1',
+        "Warn if previous versions of Theano (before 0.6) could have given "
+        "incorrect results when moving to the gpu"
+        "set_subtensor(x[int vector], new_value)",
+        BoolParam(warn_default('0.6')),
+        in_c_key=False)
 AddConfigVar('compute_test_value',
        ("If 'True', Theano will run each op at graph build time, using "
         "Constants, SharedVariables and the tag 'test_value' as inputs "

--- a/theano/sandbox/cuda/cuda_ndarray.cu
+++ b/theano/sandbox/cuda/cuda_ndarray.cu
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -82,17 +82,24 @@ class InputToGpuOptimizer(Optimizer):
    def apply(self, env):
        for input in env.inputs:
-            if not isinstance(input.type, CudaNdarrayType):
+            if isinstance(input.type, CudaNdarrayType):
-                try:
+                return
-                    new_input = host_from_gpu(gpu_from_host(input))
-                    if new_input.type == input.type:
+            # This happen frequently as we do 2 pass of the gpu optimizations
-                        env.replace_validate(input, new_input,
+            if (len(input.clients) == 1 and
-                                             "InputToGpuOptimizer")
+                input.clients[0][0].op == gpu_from_host):
-                except TypeError, e:
+                return
-                    #as we currently only support float32, this can fail.
-                    #Using try except make that we won't need
+            try:
-                    pass
+                new_input = host_from_gpu(gpu_from_host(input))
+                if new_input.type == input.type:
+                    env.replace_validate(input, new_input,
+                                         "InputToGpuOptimizer")
+            except TypeError, e:
+                #as we currently only support float32, this can fail.
+                #Using try except make that we won't need
+                pass
 # we register it before all other gpu optimizer to be sure that the input
 # are on the gpu.
@@ -753,11 +760,11 @@ def local_gpu_advanced_incsubtensor1(node):
                warnings.warn(
                    'Although your current code is fine, please note that '
                    'Theano versions prior to 0.6 (more specifically, '
-                    'prior to commit XXXX on DATE) may have '
+                    'prior to commitd 2240bddd on March 29, 2012) may have '
                    'yielded an incorrect result. To remove this warning, '
                    'either set the `warn.gpu_set_subtensor1` config '
                    'option to False, or `warn.ignore_bug_before` to at '
-                    'least \'0.6\'.')
+                    'least \'0.6\'.', stacklevel=1)
            if set_instead_of_inc:
                return
@@ -787,7 +794,7 @@ def local_gpu_advanced_incsubtensor1(node):
                warnings.warn(
                    'Although your current code is fine, please note that '
                    'Theano versions prior to 0.6 (more specifically, '
-                    'prior to commit XXXX on DATE) may have '
+                    'prior to commit d2240bddd on March 29, 2012) may have '
                    'yielded an incorrect result. To remove this warning, '
                    'either set the `warn.gpu_set_subtensor1` config '
                    'option to False, or `warn.ignore_bug_before` to at '

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -2100,23 +2100,20 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        return super(T_subtensor, self).__init__(name)
    def function(self, inputs, outputs, accept_inplace=False,
-                 op=None, mode=None, N=1):
+                 op=None, mode=None, N=1, N_fast=None):
        """ wrapper around theano.function that also check the output
        :param N: the number of op expected in the toposort
                  if tuple of length 2, (expected if fast_compile,
                                         if not fast_compile)
        """
-        if isinstance(N, tuple):
+        if self.fast_compile and N_fast is not None:
-            assert len(N) == 2
+            N = N_fast
-            if self.fast_compile:
-                N = N[0]
-            else:
-                N = N[1]
        if mode is None:
            mode = self.mode
        if op is None:
            op = self.sub
        f = theano.function(inputs, outputs, mode=mode,
                            accept_inplace=accept_inplace)
        self.assertFunctionContainsClassN(f, op, N)
@@ -2694,7 +2691,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
            if idx is idxs[0]:
                f = self.function([], [gn.shape, n[idx_].shape],
                                  op=ops,
-                                  N=(2, 0))
+                                  N=0, N_fast=2)
                f()
    def test_wrong_exception_regression(self):
@@ -2747,7 +2744,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
            data = numpy.asarray(data, dtype=self.dtype)
            n = self.shared(data)
            t = n[idx]
-            f = self.function([], t.shape, op=self.ops, N=(1, 0))
+            f = self.function([], t.shape, op=self.ops, N=0, N_fast=1)
            val = f()
            self.assertTrue(numpy.allclose(val, data[idx].shape))
@@ -2850,6 +2847,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
                                    data_copy[idx] = inc_num
                                else:
                                    data_copy[idx] += inc_num
+                        data_var = theano.In(data_var, mutable=True)
                        # Remember data for the Theano function (see below).
                        all_inputs_var += [data_var, idx_var, inc_var]
                        all_inputs_num += [data_num, idx_num, inc_num]
@@ -2869,9 +2868,16 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
                                assert (data_num == data_num_init).all()
        # Actual test (we compile a single Theano function to make it faster).
-        f = self.function(all_inputs_var, all_outputs_var,
+        orig_warn = theano.config.warn.gpu_set_subtensor1
-                          accept_inplace=True, op=self.adv_incsub1,
+        try:
-                          N=len(all_outputs_var))
+            theano.config.warn.gpu_set_subtensor1 = False
+            f = self.function(all_inputs_var, all_outputs_var,
+                              accept_inplace=True,
+                              op=self.adv_incsub1,
+                              N=len(all_outputs_var))
+        finally:
+            theano.config.warn.gpu_set_subtensor1 = orig_warn
        f_outs = f(*all_inputs_num)
        assert len(f_outs) == len(all_outputs_num)
        for f_out, output_num in izip(f_outs, all_outputs_num):

--- a/theano/tests/unittest_tools.py
+++ b/theano/tests/unittest_tools.py
@@ -93,7 +93,8 @@ class TestOptimizationMixin(object):
    def assertFunctionContains(self, f, op, min=1, max=sys.maxint):
        toposort = f.maker.env.toposort()
        matches = [node for node in toposort if node.op == op]
-        assert (min <= len(matches) <= max), (toposort, matches, str(op), min, max)
+        assert (min <= len(matches) <= max), (toposort, matches,
+                                              str(op), len(matches), min, max)
    def assertFunctionContains0(self, f, op):
        return self.assertFunctionContains(f, op, min=0, max=0)
@@ -104,6 +105,15 @@ class TestOptimizationMixin(object):
    def assertFunctionContainsN(self, f, op, N):
        return self.assertFunctionContains(f, op, min=N, max=N)
+    def assertFunctionContainsClass(self, f, op, min=1, max=sys.maxint):
+        toposort = f.maker.env.toposort()
+        matches = [node for node in toposort if isinstance(node.op, op)]
+        assert (min <= len(matches) <= max), (toposort, matches,
+                                              str(op), len(matches), min, max)
+    def assertFunctionContainsClassN(self, f, op, N):
+        return self.assertFunctionContainsClass(f, op, min=N, max=N)
    def SkipTest(self, msg='Skip this test'):
        raise SkipTest(msg)