Merge pull request #2746 from nouiz/shared

fix gh-855.

Merge pull request #2746 from nouiz/shared
45a84389 · abergeron · 893a4549 · 24cedf24 · 45a84389 · 45a84389
--- a/theano/gof/tests/test_vm.py
+++ b/theano/gof/tests/test_vm.py
@@ -2,17 +2,12 @@ import gc
 import sys
 import time
 import unittest
-try:
-    import line_profiler
-except ImportError:
-    pass

 from nose.plugins.skip import SkipTest
 import numpy

 from theano import function
 from theano.gof import vm
-from theano.gof import link
 from theano.gof import OpWiseCLinker
 from theano.compile import Mode

@@ -36,14 +31,15 @@ class TestCallbacks(unittest.TestCase):
    def test_callback(self):
        a, b, c = tensor.scalars('abc')
        f = function([a, b, c], (a + b) + c,
-                mode=Mode(
-                    optimizer=None,
-                    linker=vm.VM_Linker(callback=self.callback)))
+                     mode=Mode(
+                         optimizer=None,
+                         linker=vm.VM_Linker(callback=self.callback)))

        f(1, 2, 3)
        assert sum(self.n_callbacks.values()) == len(f.maker.fgraph.toposort())
        f(1, 2, 3)
-        assert sum(self.n_callbacks.values()) == len(f.maker.fgraph.toposort()) * 2
+        assert (sum(self.n_callbacks.values()) ==
+                len(f.maker.fgraph.toposort()) * 2)

    def test_callback_with_ifelse(self):
        a, b, c = tensor.scalars('abc')
@@ -88,8 +84,8 @@ def test_speed():
        t_b = t3 - t2

        print "%s takes %f s/Kop" % (
-                'numpy',
-                (1000*(t_b-t_a) / (steps_b - steps_a)))
+            'numpy',
+            (1000 * (t_b - t_a) / (steps_b - steps_a)))

    def time_linker(name, linker):
        steps_a = 5
@@ -99,13 +95,9 @@ def test_speed():
        b = build_graph(x, steps_b)

        f_a = function([x], a,
-                       mode=Mode(optimizer=None, linker=linker()),
-                       #profile='f_a speed test %s'%name,
-        )
+                       mode=Mode(optimizer=None, linker=linker()))
        f_b = function([x], b,
-                       mode=Mode(optimizer=None, linker=linker()),
-                       #profile='f_b speed test %s'%name,
-        )
+                       mode=Mode(optimizer=None, linker=linker()))

        f_a([2.0, 3.0])
        t0 = time.time()
@@ -151,14 +143,10 @@ def test_speed_lazy():

        f_a = function([x], a,
                       mode=Mode(optimizer=None,
-                                 linker=linker()),
-                       #profile='f_a lazy ifelse %s'%name,
-        )
+                                 linker=linker()))
        f_b = function([x], b,
                       mode=Mode(optimizer=None,
-                                 linker=linker()),
-                       #profile='f_b lazy ifelse %s'%name,
-        )
+                                 linker=linker()))

        f_a([2.0])
        t0 = time.time()
@@ -175,8 +163,8 @@ def test_speed_lazy():
        t_b = t3 - t2

        print "%s takes %f s/Kop" % (
-                name,
-                (1000*(t_b-t_a) / (steps_b - steps_a)))
+            name,
+            (1000*(t_b-t_a) / (steps_b - steps_a)))

    time_linker('vmLinker', vm.VM_Linker)
    time_linker('vmLinker_nogc', lambda: vm.VM_Linker(allow_gc=False))
@@ -220,6 +208,7 @@ if run_memory_usage_tests:
            c = sys.getrefcount(n)
            a = cuda.CudaNdarray(n)
            assert c == sys.getrefcount(n)
+            del a
            if not i % 1000:
                print '.',
                print gc.collect(),
@@ -258,20 +247,20 @@ if run_memory_usage_tests:
        def build_graph(x, depth=5):
            z = x
            for d in range(depth):
-                z = ifelse(z > 0, -z, z)
+                z = ifelse(z.mean() > 0.5, -z, z)
            return z

        def time_linker(name, linker):
            steps_a = 10
-            x = tensor.vector()
+            x = tensor.dvector()
            a = build_graph(x, steps_a)

            f_a = function([x], a,
                           mode=Mode(optimizer=None,
                                     linker=linker()))
-
-            for i in xrange(100000):
-                f_a([2.0])
+            inp = numpy.random.rand(1000000)
+            for i in xrange(100):
+                f_a(inp)
            if 0:  # this doesn't seem to work, prints 0 for everything
                import resource
                pre = resource.getrusage(resource.RUSAGE_SELF)
@@ -279,9 +268,12 @@ if run_memory_usage_tests:
                print pre.ru_ixrss, post.ru_ixrss
                print pre.ru_idrss, post.ru_idrss
                print pre.ru_maxrss, post.ru_maxrss
-
+        print 1
        time_linker('vmLinker_C',
                    lambda: vm.VM_Linker(allow_gc=False, use_cloop=True))
+        print 2
+        time_linker('vmLinker',
+                    lambda: vm.VM_Linker(allow_gc=False, use_cloop=False))

    def test_no_leak_many_call_nonlazy():
        # Verify no memory leaks when calling a function a lot of times
@@ -297,18 +289,21 @@ if run_memory_usage_tests:

        def time_linker(name, linker):
            steps_a = 10
-            x = tensor.vector()
+            x = tensor.dvector()
            a = build_graph(x, steps_a)

            f_a = function([x], a,
                           mode=Mode(optimizer=None,
                                     linker=linker()))
-
-            for i in xrange(500000):
-                f_a([2.0])
-
+            inp = numpy.random.rand(1000000)
+            for i in xrange(500):
+                f_a(inp)
+        print 1
        time_linker('vmLinker_C',
                    lambda: vm.VM_Linker(allow_gc=False, use_cloop=True))
+        print 2
+        time_linker('vmLinker',
+                    lambda: vm.VM_Linker(allow_gc=False, use_cloop=False))


 class RunOnce(theano.Op):

--- a/theano/sandbox/cuda/tests/test_basic_ops.py
+++ b/theano/sandbox/cuda/tests/test_basic_ops.py
@@ -878,7 +878,7 @@ def test_gpujoin_assert_cndas():
        c = cuda.basic_ops.gpu_join(1, a)
        # can't "assert False" here, as we want the assertion
        # error from gpu_join
-    except AssertionError:
+    except TypeError:
        assert True
        return


--- a/theano/sandbox/gpuarray/tests/test_basic_ops.py
+++ b/theano/sandbox/gpuarray/tests/test_basic_ops.py
@@ -42,6 +42,7 @@ from ..basic_ops import (
    gpu_alloc, GpuAlloc,
    gpu_from_cuda,
    cuda_from_gpu, HostFromGpu,
+    GpuContiguous,
    GpuFromHost, GpuReshape,
    gpu_join, GpuJoin, GpuSplit, GpuEye, gpu_contiguous)
 from ..subtensor import GpuSubtensor
@@ -332,10 +333,13 @@ def test_gpu_contiguous():
    a = T.fmatrix('a')
    i = T.iscalar('i')
    a_val = numpy.asarray(numpy.random.rand(4, 5), dtype='float32')
-    f = theano.function([a, i], gpu_contiguous(a[::i]),
+    # The reshape is needed otherwise we make the subtensor on the CPU
+    # to transfer less data.
+    f = theano.function([a, i], gpu_contiguous(a.reshape((5, 4))[::i]),
                        mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert any([isinstance(node.op, GpuSubtensor) for node in topo])
+    assert any([isinstance(node.op, GpuContiguous) for node in topo])
    assert f(a_val, 1).flags.c_contiguous
    assert f(a_val, 2).flags.c_contiguous
    assert f(a_val, 2).flags.c_contiguous

--- a/theano/tensor/sharedvar.py
+++ b/theano/tensor/sharedvar.py
@@ -59,11 +59,16 @@ class ScalarSharedVariable(_tensor_py_operators, SharedVariable):


 @shared_constructor
-def scalar_constructor(value, name=None, strict=False, allow_downcast=None):
+def scalar_constructor(value, name=None, strict=False, allow_downcast=None,
+                       borrow=False):
    """SharedVariable constructor for scalar values. Default: int64 or float64.

    :note: We implement this using 0-d tensors for now.

+    :note: We ignore the borrow parameter as we convert ``value`` to an
+      ndarray (this is a new object). This respects the semantic of
+      borrow, as it is a hint to Theano that we can reuse it.
+
    """
    if not isinstance(value, (numpy.number, float, int, complex)):
        raise TypeError()

--- a/theano/tensor/sort.py
+++ b/theano/tensor/sort.py
@@ -61,7 +61,7 @@ class SortOp(theano.Op):
        inp_grad = theano.gradient.grad_not_implemented(
            self, 0, axis,
            "Currently, we only implement the gradient on sort for vector"
-            " and matrix (and axis is None)")
+            " and matrix (and axis is None or 0)")
        if a.ndim == 1:
            idx = argsort(*inputs, kind=self.kind, order=self.order)
 #            rev_idx = numpy.where(idx[None, :]==numpy.arange(5)[:,None])[1]

--- a/theano/tensor/tests/test_sharedvar.py
+++ b/theano/tensor/tests/test_sharedvar.py
@@ -617,3 +617,9 @@ test_shared_options = makeSharedTester(
    cast_value_=numpy.asarray,
    op_by_matrix_=False,
    name='test_shared_options')
+
+
+def test_scalar_shared_options():
+    # Simple test to make sure we do not loose that fonctionality.
+    theano.shared(value=0., name='lk', borrow=True)
+    theano.shared(value=numpy.float32(0.), name='lk', borrow=True)
--- a/theano/tests/test_flake8.py
+++ b/theano/tests/test_flake8.py
@@ -320,7 +320,6 @@ whitelist_flake8 = [
    "gof/tests/test_destroyhandler.py",
    "gof/tests/test_opt.py",
    "gof/tests/test_lazy.py",
-    "gof/tests/test_vm.py",
    "gof/tests/test_toolbox.py",
    "gof/tests/test_link.py",
    "gof/tests/test_fg.py",

--- a/theano/tests/test_rop.py
+++ b/theano/tests/test_rop.py
@@ -68,11 +68,11 @@ class RopLop_checker(unittest.TestCase):
        self.x = tensor.vector('x')
        self.v = tensor.vector('v')
        self.rng = numpy.random.RandomState(utt.fetch_seed())
-        self.in_shape = (5 + self.rng.randint(30),)
+        self.in_shape = (5 + self.rng.randint(3),)
        self.mx = tensor.matrix('mx')
        self.mv = tensor.matrix('mv')
-        self.mat_in_shape = (5 + self.rng.randint(30),
-                             5 + self.rng.randint(30))
+        self.mat_in_shape = (5 + self.rng.randint(3),
+                             5 + self.rng.randint(3))

    def check_nondiff_rop(self, y):
        """ If your op is not differentiable(so you can't define Rop)