Merge pull request #1297 from nouiz/fixes

Fixes

Merge pull request #1297 from nouiz/fixes
7e4b6196 · lamblin · fe14a910 · ed41ea9b · 7e4b6196 · 7e4b6196
--- a/theano/sandbox/cuda/nvcc_compiler.py
+++ b/theano/sandbox/cuda/nvcc_compiler.py
@@ -136,28 +136,33 @@ class NVCC_compiler(object):
            flags.append("-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS")
            flags.append("-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS")

-        # We compile cuda_ndarray.cu during import.
-        # We should not add device properties at that time.
-        # As the device is not selected yet!
-        # TODO: compile cuda_ndarray when we bind to a GPU?
-        import theano.sandbox.cuda
-        if hasattr(theano.sandbox, 'cuda'):
-            n = theano.sandbox.cuda.use.device_number
-            if n is None:
-                _logger.warn("We try to get compilation arguments for CUDA"
-                             " code, but the GPU device is not initialized."
-                             " This is probably caused by an Op that work on"
-                             " the GPU that don't inherit from GpuOp."
-                             " We Initialize the GPU now.")
-                theano.sandbox.cuda.use("gpu",
-                                        force=True,
-                                        default_to_move_computation_to_gpu=False,
-                                        move_shared_float32_to_gpu=False,
-                                        enable_cuda=False)
+        # If the user didn't specify architecture flags add them
+        if not any(['-arch=sm_' in f for f in flags]):
+            # We compile cuda_ndarray.cu during import.
+            # We should not add device properties at that time.
+            # As the device is not selected yet!
+            # TODO: re-compile cuda_ndarray when we bind to a GPU?
+            import theano.sandbox.cuda
+            if hasattr(theano.sandbox, 'cuda'):
                n = theano.sandbox.cuda.use.device_number
+                if n is None:
+                    _logger.warn(
+                        "We try to get compilation arguments for CUDA"
+                        " code, but the GPU device is not initialized."
+                        " This is probably caused by an Op that work on"
+                        " the GPU that don't inherit from GpuOp."
+                        " We Initialize the GPU now.")
+                    theano.sandbox.cuda.use(
+                        "gpu",
+                        force=True,
+                        default_to_move_computation_to_gpu=False,
+                        move_shared_float32_to_gpu=False,
+                        enable_cuda=False)
+                    n = theano.sandbox.cuda.use.device_number
+                    p = theano.sandbox.cuda.device_properties(n)
+                    flags.append('-arch=sm_' + str(p['major']) +
+                                 str(p['minor']))

-            p = theano.sandbox.cuda.device_properties(n)
-            flags.append('-arch=sm_' + str(p['major']) + str(p['minor']))
        return flags

    @staticmethod

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -3417,9 +3417,9 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
            utt.verify_grad(fct, [data])

            # Test the grad of the grad (e.i. AdvancedIncSubtensor1.grad)
-            def fct(t):
+            def fct2(t):
                return grad(sum(t[idx_]), t)
-            utt.verify_grad(fct, [data])
+            utt.verify_grad(fct2, [data])

            # Test shape of AdvancedIncSubtensor1 and AdvancedSubtensor1
            if not self.fast_compile:
@@ -5151,10 +5151,11 @@ class T_reshape(unittest.TestCase):
        assert numpy.all(f_sub(a_val, b_val) == [2, 3])

    def test_reshape_long_in_shape(self):
-        v = vector('v')
+        v = dvector('v')
        r = v.reshape((v.shape[0], 1L))
        print r.eval({v: numpy.arange(5.)})
-        assert numpy.allclose(r.eval({v: numpy.arange(5.)}).T, numpy.arange(5.))
+        assert numpy.allclose(r.eval({v: numpy.arange(5.)}).T,
+                              numpy.arange(5.))

    def test_bad_shape(self):
        a = matrix('a')
@@ -5709,7 +5710,7 @@ class TestPermuteRowElements(unittest.TestCase):
        out_val = permute(input_val, p_val)

        # The same permutation should be applied to every row of the input matrix.
-        out_bis = numpy.asarray([row[p_val] for row in input_val])
+        out_bis = numpy.asarray([r[p_val] for r in input_val])
        assert numpy.all(out_val == out_bis)

        # Verify gradient