Merge pull request #2706 from abergeron/merge_test_fixes

Merge test fixes

Merge pull request #2706 from abergeron/merge_test_fixes
e9328fdd · Pascal Lamblin · b2d3d192 · 575e4594 · e9328fdd · e9328fdd
--- a/theano/sandbox/cuda/tests/test_basic_ops.py
+++ b/theano/sandbox/cuda/tests/test_basic_ops.py
@@ -945,18 +945,18 @@ class TestAlloc(theano.tensor.tests.test_basic.TestAlloc):
    dtype = "float32"
    mode = mode_with_gpu
    shared = staticmethod(cuda.shared_constructor)
-    allocs = [B.GpuAlloc, B.GpuAlloc, tensor.Alloc]
+    allocs = [B.GpuAlloc(), B.GpuAlloc(), tensor.Alloc()]
 class T_Join_and_Split(theano.tensor.tests.test_basic.T_Join_and_Split):
    def setUp(self):
        utt.seed_rng()
        self.mode = mode_with_gpu.excluding('constant_folding')
-        self.join_op = cuda.GpuJoin
+        self.join_op = cuda.GpuJoin()
        # No gpu split.
-        self.split_op = tensor.Split
+        self.split_op_class = tensor.Split
        # No Make vector on the gpu, Join used instead
-        self.make_vector_op = cuda.GpuJoin
+        self.make_vector_op = cuda.GpuJoin()
        self.floatX = "float32"
        # In FAST_COMPILE mode, we force the FAST_RUN mode for optimization.
        self.hide_error = theano.config.mode not in ['DebugMode', 'DEBUG_MODE']

--- a/theano/sandbox/gpuarray/tests/test_basic_ops.py
+++ b/theano/sandbox/gpuarray/tests/test_basic_ops.py
@@ -7,9 +7,10 @@ import theano
 import theano.tensor as T
 from theano.tensor import TensorType
 from theano.tensor.basic import alloc
-from theano.tensor.tests.test_basic import (
-    rand, safe_make_node, T_reshape, T_Join_and_Split
+# Don't import test classes otherwise they get tested as part of the file
-    )
+from theano.tensor.tests import test_basic
+from theano.tensor.tests.test_basic import rand, safe_make_node
 from theano.tests.unittest_tools import SkipTest
 from numpy.testing.noseclasses import KnownFailureTest
@@ -304,7 +305,7 @@ class TestAlloc(theano.tensor.tests.test_basic.TestAlloc):
    dtype = "float32"
    mode = mode_with_gpu
    shared = staticmethod(gpuarray_shared_constructor)
-    allocs = [GpuAlloc, GpuAlloc, T.Alloc]
+    allocs = [GpuAlloc(), GpuAlloc(), T.Alloc()]
 def test_shape():
@@ -340,33 +341,32 @@ def test_gpu_contiguous():
    assert f(a_val, 2).flags.c_contiguous
-class G_reshape(T_reshape):
+class G_reshape(test_basic.T_reshape):
    def shortDescription(self):
        return None
    def __init__(self, name):
-        T_reshape.__init__(self, name,
+        test_basic.T_reshape.__init__(
-                           shared=gpuarray_shared_constructor,
+            self, name,
-                           op=GpuReshape,
+            shared=gpuarray_shared_constructor,
-                           mode=mode_with_gpu,
+            op=GpuReshape,
-                           # avoid errors with limited devices
+            mode=mode_with_gpu,
-#                             dtype='float32',
+            ignore_topo=(HostFromGpu, GpuFromHost,
-                           ignore_topo=(HostFromGpu, GpuFromHost,
+                         theano.compile.DeepCopyOp,
-                                        theano.compile.DeepCopyOp,
+                         theano.sandbox.gpuarray.elemwise.GpuElemwise,
-                                        theano.sandbox.gpuarray.elemwise.GpuElemwise,
+                         theano.tensor.opt.Shape_i,
-                                        theano.tensor.opt.Shape_i,
+                         theano.tensor.opt.MakeVector))
-                                        theano.tensor.opt.MakeVector))
        assert self.op == GpuReshape
-class G_Join_and_Split(T_Join_and_Split):
+class G_Join_and_Split(test_basic.T_Join_and_Split):
    def setUp(self):
        super(G_Join_and_Split, self).setUp()
        self.mode = mode_with_gpu.excluding('constant_folding')
-        self.join_op = GpuJoin
+        self.join_op = GpuJoin()
-        self.split_op = GpuSplit
+        self.split_op_class = GpuSplit
        # Use join instead of MakeVector since there is no MakeVector on GPU
-        self.make_vector_op = GpuJoin
+        self.make_vector_op = GpuJoin()
        # this is to avoid errors with limited devices
        self.floatX = 'float32'
        self.hide_error = theano.config.mode not in ['DebugMode', 'DEBUG_MODE']

--- a/theano/sandbox/gpuarray/tests/test_conv_cuda_ndarray.py
+++ b/theano/sandbox/gpuarray/tests/test_conv_cuda_ndarray.py
@@ -400,208 +400,12 @@ def get_valid_shapes():
    return shapes
-def test_valid_0_2():
-    seed_rng()
-    shapes = get_valid_shapes()
-    version = [0, 2]
-    verbose = 0
-    random = True
-    print_ = False
-    ones = False
-    if ones:
-        random = False
-    shapes2 = []
-    for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes):
-        oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) -
-                                                  numpy.asarray(kshape[2:]) +
-                                                  numpy.asarray([1, 1]))
-        if oshape[3] > device_prop['maxThreadsDim0']:
-            continue
-        if ishape[1] > 1:
-            continue
-        if ((numpy.prod(ishape[2:]) + numpy.prod(kshape[2:])) * 4 >
-            (16 * 1024 - 150)):
-            continue
-        if subshape == (1, 1):
-            shapes2.append((ishape, kshape, subshape, istride, kstride))
-    shapes = shapes2
-    exec_conv(version, shapes, verbose, random, 'valid',
-              print_=print_, ones=ones, rtol=1.1e-5)
-def test_valid_1_3_11_12():
-    seed_rng()
-    shapes = get_valid_shapes()
-    version = [1, 3, 11, 12]
-    verbose = 0
-    random = True
-    print_ = False
-    ones = False
-    if ones:
-        random = False
-    shapes2 = []
-    for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes):
-        oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) -
-                                                  numpy.asarray(kshape[2:]) +
-                                                  numpy.asarray([1, 1]))
-        if oshape[3] > device_prop['maxThreadsDim0']:
-            continue
-        if ((numpy.prod(ishape[2:]) + numpy.prod(kshape[2:])) * 4 >
-            (16 * 1024 - 150)):
-            continue
-        if subshape == (1, 1):
-            shapes2.append((ishape, kshape, subshape, istride, kstride))
-    shapes = shapes2
-    exec_conv(version, shapes, verbose, random, 'valid',
-              print_=print_, ones=ones, rtol=1.1e-5)
-def test_valid_4():
-    seed_rng()
-    shapes = get_valid_shapes()
-    version = [4]
-    verbose = 0
-    random = True
-    print_ = False
-    ones = False
-    if ones:
-        random = False
-    shapes2 = []
-    for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes):
-        oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) -
-                                                  numpy.asarray(kshape[2:]) +
-                                                  numpy.asarray([1, 1]))
-        if oshape[3] > device_prop['maxThreadsDim0']:
-            continue
-        if ishape[1] > 1:
-            continue
-        if ((kshape[2] * ishape[3] * 4 + numpy.prod(kshape[2:]) * 4) >
-            (16 * 1024 - 150)):
-            continue
-        if subshape == (1, 1):
-            shapes2.append((ishape, kshape, subshape, istride, kstride))
-    shapes = shapes2
-    exec_conv(version, shapes, verbose, random, 'valid',
-              print_=print_, ones=ones, rtol=1.1e-5)
-def test_valid_5():
-    seed_rng()
-    shapes = get_valid_shapes()
-    version = [5]
-    verbose = 0
-    random = True
-    print_ = False
-    ones = False
-    if ones:
-        random = False
-    shapes2 = []
-#    print len(shapes)
-    for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes):
-        oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) -
-                                                  numpy.asarray(kshape[2:]) +
-                                                  numpy.asarray([1, 1]))
-        if oshape[3] > device_prop['maxThreadsDim0']:
-            continue
-        if ((kshape[2] * ishape[3] * 4 + numpy.prod(kshape[2:]) * 4) >
-            (16 * 1024 - 150)):
-            continue
-        if subshape == (1, 1):
-            shapes2.append((ishape, kshape, subshape, istride, kstride))
-    shapes = shapes2
-#    print len(shapes2)
-    exec_conv(version, shapes, verbose, random, 'valid',
-              print_=print_, ones=ones, rtol=1.1e-5)
-def test_valid_7_8_13():
-    seed_rng()
-    shapes = get_valid_shapes()
-    # This is to test the "new" lower shared memory usage.
-    shapes.append(((10, 30, 60, 60), (20, 30, 40, 40),
-                   (1, 1), (1, 1), (1, 1)))
-    version = [7, 8, 13]
-    verbose = 0
-    random = True
-    print_ = False
-    ones = False
-    if ones:
-        random = False
-    shapes2 = []
-#    print len(shapes)
-    for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes):
-        oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) -
-                                                  numpy.asarray(kshape[2:]) +
-                                                  numpy.asarray([1, 1]))
-        if oshape[2] * oshape[3] > device_prop['maxThreadsDim0']:
-            continue
-        if max(numpy.prod(ishape[2:]) * 4 + 2 * kshape[3] * 4,
-               oshape[2] * oshape[3] * 4 * 2) > (16 * 1024 - 150):
-            continue
-        if subshape == (1, 1):
-            shapes2.append((ishape, kshape, subshape, istride, kstride))
-    shapes = shapes2
-#    print len(shapes2)
-    exec_conv(version, shapes, verbose, random, 'valid',
-              print_=print_, ones=ones, rtol=1.1e-5)
-def test_valid_9_10():
-    seed_rng()
-    shapes = get_valid_shapes()
-    version = [9, 10]
-    verbose = 0
-    random = True
-    print_ = False
-    ones = False
-    if ones:
-        random = False
-    shapes2 = []
-#    print len(shapes)
-    for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes):
-        oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) -
-                                                  numpy.asarray(kshape[2:]) +
-                                                  numpy.asarray([1, 1]))
-        if oshape[3] > device_prop['maxThreadsDim0']:
-            continue
-        if (kshape[3] * 4 + ishape[3]) > (16 * 1024 - 150):
-            continue
-        if subshape == (1, 1):
-            shapes2.append((ishape, kshape, subshape, istride, kstride))
-    shapes = shapes2
-#    print len(shapes2)
-    exec_conv(version, shapes, verbose, random, 'valid',
-              print_=print_, ones=ones, rtol=1.1e-5)
 def test_valid():
    seed_rng()
    shapes = get_valid_shapes()
-    # shapes=shapes[400:426]
+    version = [-1]
-    # I put -1 in case we forget to add version in the test to.
-    # I put -2 to test the reference version.
-    version = [-2, -1, 6]
    verbose = 0
-#    version=[1]
    random = True
    print_ = False
@@ -666,10 +470,8 @@ def test_full():
            , ((2, 4, 1050, 13), (3, 4, 10, 11), (1, 1), (1, 1), (1, 1))
            ]
-#    shapes=shapes[:277]
+    version = [-1]
-    version = [-2, -1, 0, 1, 2, 3, 4, 5]
    verbose = 0
-#    version=[4]
    random = True
    exec_conv(version, shapes, verbose, random, 'full')
@@ -689,9 +491,8 @@ def test_subsample():
    shapes += get_shapes2(scales_img=(2, 2), subsample=(2, 1))
    shapes += get_shapes2(scales_img=(2, 2), subsample=(2, 2))
-# We put only the version that implement the subsample to make the test faster.
+    version_valid = [-1]
-    version_valid = [-2, -1, 1, 3, 11, 12]
+    version_full = [-1]
-    version_full = [-2, -1]
    verbose = 0
    random = True
    print_ = False

--- a/theano/sandbox/gpuarray/tests/test_neighbours.py
+++ b/theano/sandbox/gpuarray/tests/test_neighbours.py
@@ -12,6 +12,3 @@ class T_GpuImages2Neibs(test_neighbours.T_Images2Neibs):
    mode = mode_with_gpu
    op = GpuImages2Neibs
    dtypes = ['int64', 'float32', 'float64']
-if __name__ == '__main__':
-    unittest.main()
--- a/theano/sandbox/gpuarray/tests/test_subtensor.py
+++ b/theano/sandbox/gpuarray/tests/test_subtensor.py
@@ -3,8 +3,7 @@ import numpy
 import theano
 from theano import tensor
 from theano.compile import DeepCopyOp
+from theano.tensor.tests import test_subtensor
-from theano.tensor.tests.test_subtensor import T_subtensor
 from ..basic_ops import HostFromGpu, GpuFromHost
 from ..subtensor import (GpuIncSubtensor, GpuSubtensor,
@@ -15,21 +14,22 @@ from .test_basic_ops import mode_with_gpu
-class G_subtensor(T_subtensor):
+class G_subtensor(test_subtensor.T_subtensor):
    def shortDescription(self):
        return None
    def __init__(self, name):
-        T_subtensor.__init__(self, name,
+        test_subtensor.T_subtensor.__init__(
-                             shared=gpuarray_shared_constructor,
+            self, name,
-                             sub=GpuSubtensor,
+            shared=gpuarray_shared_constructor,
-                             inc_sub=GpuIncSubtensor,
+            sub=GpuSubtensor,
-                             adv_incsub1=GpuAdvancedIncSubtensor1,
+            inc_sub=GpuIncSubtensor,
-                             mode=mode_with_gpu,
+            adv_incsub1=GpuAdvancedIncSubtensor1,
-                             # avoid errors with limited devices
+            mode=mode_with_gpu,
-                             dtype='float32',
+            # avoid errors with limited devices
-                             ignore_topo=(HostFromGpu, GpuFromHost,
+            dtype='float32',
-                                          DeepCopyOp))
+            ignore_topo=(HostFromGpu, GpuFromHost,
+                         DeepCopyOp))
        # GPU opt can't run in fast_compile only.
        self.fast_compile = False
        assert self.sub == GpuSubtensor

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -3539,7 +3539,7 @@ class Join(Op):
                split_gz = [split_gz]
            # Split.make_node isn't always able to infer the right
            # broadcast. As the grad need to keep the information,
-            # readd it if needed.
+            # read it if needed.
            split_gz = [patternbroadcast(g, t.broadcastable)
                        for t, g in zip(tensors, split_gz)]
            rval = rval + split_gz

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -2549,8 +2549,7 @@ compile.optdb.register('local_inplace_setsubtensor',
 def local_inplace_incsubtensor1(node):
    """ also work for GpuAdvancedIncSubtensor1 """
    if isinstance(node.op, AdvancedIncSubtensor1) and not node.op.inplace:
-        new_op = node.op.__class__(
+        new_op = node.op.clone_inplace()
-            inplace=True, set_instead_of_inc=node.op.set_instead_of_inc)
        new_node = new_op(*node.inputs)
        return [new_node]
    return False
@@ -5258,7 +5257,8 @@ for i in xrange(1,len(p64)): print i, 64[i]-p64[i-1]
 # ###############
 # # Loop fusion #
 # ###############
-def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 1024):
+def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 1024,
+                             maker=None):
    """
    We parametrize it to make it work for Elemwise and GpuElemwise op.
@@ -5277,6 +5277,9 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 1024):
                          enough that if we hit it, I'm not sure it
                          will affect performance.
    """
+    if maker is None:
+        def maker(node, scalar_op):
+            return OP(scalar_op)
    def local_fuse(node):
        """
        As part of specialization, we fuse two consecutive elemwise Ops of the
@@ -5458,7 +5461,7 @@ your code will run correctly, but may be slower.""")
        # create the new node.
        # Do not call make_node to have test_value
-        n = OP(C)(*inputs).owner
+        n = maker(node, C)(*inputs).owner
        assert len(n.outputs) == 1
        assert node.outputs[0].dtype == n.outputs[0].dtype

--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
@@ -1600,23 +1600,13 @@ def _sum_grad_over_bcasted_dims(x, gx):
 class AdvancedSubtensor1(Op):
    """Implement x[ilist] where ilist is a vector of integers."""
+    # sparse_grad doesn't go in here since it only affects the output
+    # of the grad() method.
+    __props__ = ()
    def __init__(self, sparse_grad=False):
        self.sparse_grad = sparse_grad
-    def __hash__(self):
-        return hash(type(self))
-    def __eq__(self, other):
-        # Don't check the sparse_grad attribute as
-        # This don't change the output of this op
-        # So we want the merge optimier to merge two op
-        # that differ from there sparse_grad attribute.
-        return type(self) == type(other)
-    def __str__(self):
-        return self.__class__.__name__
    def make_node(self, x, ilist):
        x_ = theano.tensor.as_tensor_variable(x)
        ilist_ = theano.tensor.as_tensor_variable(ilist)
@@ -1794,19 +1784,18 @@ advanced_subtensor1 = AdvancedSubtensor1()
 class AdvancedIncSubtensor1(Op):
    """Increments a subtensor using advanced slicing (list of index)"""
+    __props__ = ('inplace', 'set_instead_of_inc')
    def __init__(self, inplace=False, set_instead_of_inc=False):
        self.inplace = inplace
        self.set_instead_of_inc = set_instead_of_inc
        if inplace:
            self.destroy_map = {0: [0]}
-    def __hash__(self):
+    def clone_inplace(self):
-        return hash((type(self), self.inplace, self.set_instead_of_inc))
+        return self.__class__(
+            inplace=True,
-    def __eq__(self, other):
+            set_instead_of_inc=self.set_instead_of_inc)
-        return (type(self) == type(other)
-                and self.inplace == other.inplace
-                and self.set_instead_of_inc == other.set_instead_of_inc)
    def __str__(self):
        if self.inplace:

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -2099,7 +2099,7 @@ class TestAlloc(unittest.TestCase):
    dtype = config.floatX
    mode = mode_opt
    shared = staticmethod(theano.shared)
-    allocs = [tensor.Alloc] * 3
+    allocs = [tensor.Alloc()] * 3
    def setUp(self):
        self.rng = numpy.random.RandomState(seed=utt.fetch_seed())
@@ -2131,13 +2131,13 @@ class TestAlloc(unittest.TestCase):
            #<= is needed as the GPU currently don't implement
            # AdvancedIncSubtensor. When this is the case it can be
            # replaced with ==.
-            assert numpy.sum([isinstance(node.op, alloc)
+            assert numpy.sum([isinstance(node.op, type(alloc))
                              for node in topo_obj]) <= 1
            topo_grad = fgrad.maker.fgraph.toposort()
            # print subtensor
            # theano.printing.debugprint(fgrad)
-            assert numpy.sum([isinstance(node.op, alloc)
+            assert numpy.sum([isinstance(node.op, type(alloc))
                              for node in topo_grad]) == n_alloc, (
                                  alloc, subtensor, n_alloc, topo_grad)
            fobj(test_params)
@@ -2148,46 +2148,51 @@ class TestAlloc(unittest.TestCase):
        for alloc in self.allocs:
            # The output is the result of the alloc operation,
            # we do not want it to be constant-folded
-            out = alloc()(val, 50, 60)
+            out = alloc(val, 50, 60)
-            f = theano.function([], out)
+            f = theano.function([], out, mode=self.mode)
            topo = f.maker.fgraph.toposort()
-            assert numpy.sum([isinstance(node.op, alloc)
+            assert numpy.sum([isinstance(node.op, type(alloc))
                              for node in topo]) == 1
            assert not isinstance(topo[0].op, DeepCopyOp)
    def test_ones(self):
        for shp in [[], 1, [1], [1, 2], [1, 2, 3]]:
-            ones = theano.function([], [tensor.ones(shp)])
+            ones = theano.function([], [tensor.ones(shp)], mode=self.mode)
            assert numpy.allclose(ones(), numpy.ones(shp))
        # scalar doesn't have to be provided as input
        x = scalar()
        shp = []
-        ones_scalar = theano.function([], [tensor.ones(x.shape)])
+        ones_scalar = theano.function([], [tensor.ones(x.shape)],
+                                      mode=self.mode)
        assert numpy.allclose(ones_scalar(), numpy.ones(shp))
        for (typ, shp) in [(vector, [3]), (matrix, [3, 4])]:
            x = typ()
-            ones_tensor = theano.function([x], [tensor.ones(x.shape)])
+            ones_tensor = theano.function([x], [tensor.ones(x.shape)],
+                                          mode=self.mode)
            inp = numpy.zeros(shp, dtype=config.floatX)
            assert numpy.allclose(ones_tensor(inp),
                                  numpy.ones(shp))
    def test_zeros(self):
        for shp in [[], 1, [1], [1, 2], [1, 2, 3]]:
-            zeros = theano.function([], [tensor.zeros(shp)])
+            zeros = theano.function([], [tensor.zeros(shp)],
+                                    mode=self.mode)
            assert numpy.allclose(zeros(), numpy.zeros(shp))
        # scalar doesn't have to be provided as input
        x = scalar()
        shp = []
-        zeros_scalar = theano.function([], [tensor.zeros(x.shape)])
+        zeros_scalar = theano.function([], [tensor.zeros(x.shape)],
+                                       mode=self.mode)
        assert numpy.allclose(zeros_scalar(), numpy.zeros(shp))
        for (typ, shp) in [(vector, [3]), (matrix, [3, 4])]:
            x = typ()
-            zeros_tensor = theano.function([x], [tensor.zeros(x.shape)])
+            zeros_tensor = theano.function([x], [tensor.zeros(x.shape)],
+                                           mode=self.mode)
            inp = numpy.zeros(shp, dtype=config.floatX)
            assert numpy.allclose(zeros_tensor(inp),
                                  numpy.zeros(shp))
@@ -3187,9 +3192,9 @@ class T_Join_and_Split(unittest.TestCase):
        self.mode = theano.compile.get_default_mode().excluding(
            'constant_folding'
        )
-        self.join_op = Join
+        self.join_op = Join()
-        self.split_op = Split
+        self.split_op_class = Split
-        self.make_vector_op = opt.MakeVector
+        self.make_vector_op = opt.MakeVector()
        self.floatX = config.floatX
        self.hide_error = theano.config.mode not in ['DebugMode',
                                                     'DEBUG_MODE',
@@ -3199,7 +3204,8 @@ class T_Join_and_Split(unittest.TestCase):
    def eval_outputs_and_check_join(self, outputs):
        f = theano.function([], outputs, self.mode)
        topo = f.maker.fgraph.toposort()
-        assert [True for node in topo if isinstance(node.op, self.join_op)]
+        assert [True for node in topo
+                if isinstance(node.op, type(self.join_op))]
        variables = f()
        if isinstance(variables, (tuple, list)) and len(variables) == 1:
            return variables[0]
@@ -3211,7 +3217,8 @@ class T_Join_and_Split(unittest.TestCase):
            make_vector_op = self.make_vector_op
        f = theano.function([], outputs, self.mode)
        topo = f.maker.fgraph.toposort()
-        assert [True for node in topo if isinstance(node.op, make_vector_op)]
+        assert [True for node in topo
+                if isinstance(node.op, type(make_vector_op))]
        variables = f()
        if isinstance(variables, (tuple, list)) and len(variables) == 1:
            return variables[0]
@@ -3233,7 +3240,7 @@ class T_Join_and_Split(unittest.TestCase):
        c = tensor._shared(numpy.asarray(3.0, dtype=self.floatX))
        s = stack(a, b, c)
        want = numpy.array([1, 2, 3])
-        out = self.eval_outputs_and_check_vector([s], opt.MakeVector)
+        out = self.eval_outputs_and_check_vector([s], opt.MakeVector())
        self.assertTrue((out == want).all())
    def test_stack_scalar(self):
@@ -3259,7 +3266,7 @@ class T_Join_and_Split(unittest.TestCase):
        self.assertTrue(numpy.all(val == [1, 2, 1, 2]))
        topo = f.maker.fgraph.toposort()
        assert len([n for n in topo if isinstance(n.op, opt.MakeVector)]) > 0
-        assert len([n for n in topo if isinstance(n, self.join_op)]) == 0
+        assert len([n for n in topo if isinstance(n, type(self.join_op))]) == 0
        assert f.maker.fgraph.outputs[0].dtype == self.floatX
    def test_stack_scalar_make_vector_dtype(self):
@@ -3273,7 +3280,7 @@ class T_Join_and_Split(unittest.TestCase):
        self.assertTrue(numpy.all(val == [1, 2, 1, 2]))
        topo = f.maker.fgraph.toposort()
        assert len([n for n in topo if isinstance(n.op, opt.MakeVector)]) > 0
-        assert len([n for n in topo if isinstance(n, self.join_op)]) == 0
+        assert len([n for n in topo if isinstance(n, type(self.join_op))]) == 0
        assert f.maker.fgraph.outputs[0].dtype == 'int64'
    def test_stack_scalar_make_vector_constant(self):
@@ -3289,7 +3296,7 @@ class T_Join_and_Split(unittest.TestCase):
        self.assertTrue(numpy.all(val == [10, 1, 2, 3]))
        topo = f.maker.fgraph.toposort()
        assert len([n for n in topo if isinstance(n.op, opt.MakeVector)]) > 0
-        assert len([n for n in topo if isinstance(n, self.join_op)]) == 0
+        assert len([n for n in topo if isinstance(n, type(self.join_op))]) == 0
        assert f.maker.fgraph.outputs[0].dtype == 'int64'
    def test_stack_hessian(self):
@@ -3459,8 +3466,8 @@ class T_Join_and_Split(unittest.TestCase):
        out = self.eval_outputs_and_check_join([s])
        self.assertTrue((out == want).all())
-        assert (grad(s.sum(), b).eval() == 0).all()
+        assert (numpy.asarray(grad(s.sum(), b).eval()) == 0).all()
-        assert (grad(s.sum(), a).eval() == 0).all()
+        assert (numpy.asarray(grad(s.sum(), a).eval()) == 0).all()
    def test_join_matrix1_using_vertical_stack(self):
        a = self.shared(numpy.array([[1, 2, 3], [4, 5, 6]], dtype=self.floatX))
@@ -3499,7 +3506,8 @@ class T_Join_and_Split(unittest.TestCase):
        f = inplace_func([ax], [s], mode=self.mode)
        topo = f.maker.fgraph.toposort()
-        assert [True for node in topo if isinstance(node.op, self.join_op)]
+        assert [True for node in topo
+                if isinstance(node.op, type(self.join_op))]
        want = numpy.array([[.1, .2, .3], [.4, .5, .6],
                            [.1, .2, .3], [.4, .5, .6]])
@@ -3540,25 +3548,26 @@ class T_Join_and_Split(unittest.TestCase):
        a = self.shared(a_val, broadcastable=(False, False, True))
        b = self.shared(b_val, broadcastable=(True, False, True))
-        c = self.join_op()(1, a, b)
+        c = self.join_op(1, a, b)
        assert c.type.broadcastable[0] and c.type.broadcastable[2]
        assert not c.type.broadcastable[1]
        # Opt can remplace the int by a Theano constant
-        c = self.join_op()(theano.tensor.constant(1), a, b)
+        c = self.join_op(theano.tensor.constant(1), a, b)
        assert c.type.broadcastable[0] and c.type.broadcastable[2]
        assert not c.type.broadcastable[1]
        # In case futur opt insert other useless stuff
-        c = self.join_op()(theano.tensor.cast(theano.tensor.constant(1),
+        c = self.join_op(theano.tensor.cast(theano.tensor.constant(1),
-                                              dtype="int32"),
+                                            dtype="int32"),
-                 a, b)
+                         a, b)
        assert c.type.broadcastable[0] and c.type.broadcastable[2]
        assert not c.type.broadcastable[1]
        f = function([], c, mode=self.mode)
        topo = f.maker.fgraph.toposort()
-        assert [True for node in topo if isinstance(node.op, self.join_op)]
+        assert [True for node in topo
+                if isinstance(node.op, type(self.join_op))]
        f()
        utt.verify_grad((lambda a, b: join(1, a, b)), [a_val, b_val], rng=rng,
@@ -3580,12 +3589,13 @@ class T_Join_and_Split(unittest.TestCase):
        a = self.shared(a_val, broadcastable=(False, False, True))
        b = self.shared(b_val, broadcastable=(True, False, True))
-        c = self.join_op()(0, a, b)
+        c = self.join_op(0, a, b)
        assert not c.type.broadcastable[0]
        f = function([], c, mode=self.mode)
        topo = f.maker.fgraph.toposort()
-        assert [True for node in topo if isinstance(node.op, self.join_op)]
+        assert [True for node in topo
+                if isinstance(node.op, type(self.join_op))]
        f()
        utt.verify_grad((lambda a, b: join(0, a, b)), [a_val, b_val], rng=rng,
@@ -3596,7 +3606,7 @@ class T_Join_and_Split(unittest.TestCase):
                          rng.rand(3, 4, 1).astype(self.floatX))
        a = TensorType(dtype=self.floatX, broadcastable=[0, 0, 1])()
        b = TensorType(dtype=self.floatX, broadcastable=[1, 0, 1])()
-        c = join(0, a, b)
+        c = self.join_op(0, a, b)
        f = function([a, b], c, mode=self.mode)
        bad_b_val = rng.rand(3, 4, 1).astype(self.floatX)
        self.assertRaises(TypeError, f, a_val, bad_b_val)
@@ -3613,12 +3623,13 @@ class T_Join_and_Split(unittest.TestCase):
        a = self.shared(a_val, broadcastable=(True, False, True))
        b = self.shared(b_val, broadcastable=(True, False, True))
-        c = self.join_op()(0, a, b)
+        c = self.join_op(0, a, b)
        assert not c.type.broadcastable[0]
        f = function([], c, mode=self.mode)
        topo = f.maker.fgraph.toposort()
-        assert [True for node in topo if isinstance(node.op, self.join_op)]
+        assert [True for node in topo
+                if isinstance(node.op, type(self.join_op))]
        f()
        utt.verify_grad((lambda a, b: join(0, a, b)), [a_val, b_val], rng=rng,
@@ -3630,7 +3641,7 @@ class T_Join_and_Split(unittest.TestCase):
        rng = numpy.random.RandomState(seed=utt.fetch_seed())
        a_val = rng.rand(1, 4, 1).astype(self.floatX)
        a = self.shared(a_val, broadcastable=(True, False, True))
-        b = self.join_op()(0, a)
+        b = self.join_op(0, a)
        assert b.type.broadcastable[0]
        assert b.type.broadcastable[2]
        assert not b.type.broadcastable[1]
@@ -3638,8 +3649,8 @@ class T_Join_and_Split(unittest.TestCase):
        f = function([], b, mode=self.mode)
        topo = f.maker.fgraph.toposort()
        if theano.config.mode != 'FAST_COMPILE':
-            assert not [True for node in topo if isinstance(
+            assert not [True for node in topo
-                node.op, self.join_op)]
+                        if isinstance(node.op, type(self.join_op))]
        f()
        utt.verify_grad((lambda a: join(0, a)), [a_val], rng=rng,
@@ -3657,19 +3668,20 @@ class T_Join_and_Split(unittest.TestCase):
        c = TensorType(dtype=self.floatX, broadcastable=[1, 0, 0, 0, 0, 0])()
        d = TensorType(dtype=self.floatX, broadcastable=[1, 0, 1, 1, 0, 1])()
        e = TensorType(dtype=self.floatX, broadcastable=[1, 0, 1, 0, 0, 1])()
-        f = join(0, a, b, c, d, e)
+        f = self.join_op(0, a, b, c, d, e)
        fb = f.type.broadcastable
        assert not fb[0] and fb[1] and fb[2] and fb[3] and not fb[4] and fb[5]
-        g = join(1, a, b, c, d, e)
+        g = self.join_op(1, a, b, c, d, e)
        gb = g.type.broadcastable
        assert gb[0] and not gb[1] and gb[2] and gb[3] and not gb[4] and gb[5]
-        h = join(4, a, b, c, d, e)
+        h = self.join_op(4, a, b, c, d, e)
        hb = h.type.broadcastable
        assert hb[0] and hb[1] and hb[2] and hb[3] and not hb[4] and hb[5]
        f = function([a, b, c, d, e], f, mode=self.mode)
        topo = f.maker.fgraph.toposort()
-        assert [True for node in topo if isinstance(node.op, self.join_op)]
+        assert [True for node in topo
+                if isinstance(node.op, type(self.join_op))]
        rng = numpy.random.RandomState(seed=utt.fetch_seed())
        a_val = rng.rand(1, 1, 1, 1, 2, 1).astype(self.floatX)
@@ -3710,7 +3722,7 @@ class T_Join_and_Split(unittest.TestCase):
                                 dtype=self.floatX)
        # Test dim 0
-        z = join(0, x1, x2, x3)
+        z = self.join_op(0, x1, x2, x3)
        f = theano.function([x1, x2, x3], z.shape, mode=self.mode)
        topo = f.maker.fgraph.toposort()
@@ -3719,10 +3731,10 @@ class T_Join_and_Split(unittest.TestCase):
        if theano.config.mode != 'FAST_COMPILE':
            for node in f.maker.fgraph.toposort():
-                assert not isinstance(node.op, tensor.Join)
+                assert not isinstance(node.op, type(self.join_op))
        # Test dim 1
-        z = join(1, x1, x2, x3)
+        z = self.join_op(1, x1, x2, x3)
        f = theano.function([x1, x2, x3], z.shape, mode=self.mode)
        topo = f.maker.fgraph.toposort()
@@ -3731,7 +3743,7 @@ class T_Join_and_Split(unittest.TestCase):
        if theano.config.mode != 'FAST_COMPILE':
            for node in topo:
-                assert not isinstance(node.op, tensor.Join)
+                assert not isinstance(node.op, type(self.join_op))
        # Test hide error
        if not self.hide_error:
@@ -3757,8 +3769,8 @@ class T_Join_and_Split(unittest.TestCase):
        f = function([], Tout, mode=self.mode)
        out = f()
        if theano.config.mode != 'FAST_COMPILE':
-            assert [True for node in f.maker.fgraph.toposort() if isinstance(
+            assert [True for node in f.maker.fgraph.toposort()
-                node.op, self.join_op)]
+                    if isinstance(node.op, type(self.join_op))]
        assert numpy.allclose(out,
                              numpy.concatenate([T_shared.get_value(),
                                                 T_shared.get_value()]))
@@ -3767,14 +3779,14 @@ class T_Join_and_Split(unittest.TestCase):
        rng = numpy.random.RandomState(seed=utt.fetch_seed())
        v = self.shared(rng.rand(4).astype(self.floatX))
        m = self.shared(rng.rand(4, 4).astype(self.floatX))
-        self.assertRaises(TypeError, self.join_op(), 0, v, m)
+        self.assertRaises(TypeError, self.join_op, 0, v, m)
    def test_split_0elem(self):
        rng = numpy.random.RandomState(seed=utt.fetch_seed())
        m = self.shared(rng.rand(4, 6).astype(self.floatX))
-        o = self.split_op(2)(m, 0, [4, 0])
+        o = self.split_op_class(2)(m, 0, [4, 0])
        f = function([], o, mode=self.mode)
-        assert any([isinstance(node.op, self.split_op)
+        assert any([isinstance(node.op, self.split_op_class)
                    for node in f.maker.fgraph.toposort()])
        o1, o2 = f()
        assert numpy.allclose(o1, m.get_value(borrow=True))
@@ -3783,9 +3795,9 @@ class T_Join_and_Split(unittest.TestCase):
    def test_split_neg(self):
        rng = numpy.random.RandomState(seed=utt.fetch_seed())
        m = self.shared(rng.rand(4, 6).astype(self.floatX))
-        o = self.split_op(2)(m, 0, [5, -1])
+        o = self.split_op_class(2)(m, 0, [5, -1])
        f = function([], o, mode=self.mode)
-        assert any([isinstance(node.op, self.split_op)
+        assert any([isinstance(node.op, self.split_op_class)
                    for node in f.maker.fgraph.toposort()])
        self.assertRaises(ValueError, f)

--- a/theano/tensor/tests/test_blas.py
+++ b/theano/tensor/tests/test_blas.py
@@ -1472,14 +1472,14 @@ class BaseGemv(object):
        x_v = x_v.astype("float32")
        y_v = y_v.astype("float32")
-        alpha = T.dscalar('a')
+        alpha = T.dscalar('alpha')
-        a = T.fmatrix('w')
+        a = self.shared(a_v)
-        x = T.fvector('v')
+        x = self.shared(x_v)
-        y = T.fvector('t')
+        y = self.shared(y_v)
        rval = T.dot(a, x) * alpha + y
-        f = theano.function([a, x, y, alpha], rval, mode=self.mode)
+        f = theano.function([alpha], rval, mode=self.mode)
        # this function is currently optimized so that the gemv is
        # done inplace on a temporarily allocated-buffer, which is
        # then scaled by alpha and to t with a fused elemwise.
@@ -1491,7 +1491,7 @@ class BaseGemv(object):
                assert node.outputs[0].dtype == 'float32'
        assert n_gemvs == 1, n_gemvs
        self.assertFunctionContains1(f, self.gemv_inplace)
-        f(a_v, x_v, y_v, alpha_v)
+        f(alpha_v)
 class TestSgemv(TestCase, BaseGemv, unittest_tools.TestOptimizationMixin):

--- a/theano/tensor/tests/test_elemwise.py
+++ b/theano/tensor/tests/test_elemwise.py
@@ -27,6 +27,7 @@ def FunctionGraph(i, o):
 class test_DimShuffle(unittest_tools.InferShapeTester):
    op = DimShuffle
+    type = TensorType
    def with_linker(self, linker):
        for xsh, shuffle, zsh in [((2, 3), (1, 'x', 0), (3, 1, 2)),
@@ -40,12 +41,12 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
                                  ((1, 1, 1), (), ()),
                                  ((1,), ('x', 'x'), (1, 1))]:
            ib = [(entry == 1) for entry in xsh]
-            x = TensorType('float64', ib)('x')
+            x = self.type('float64', ib)('x')
            e = self.op(ib, shuffle)(x)
            f = copy(linker).accept(FunctionGraph([x], [e])).make_function()
            assert f(numpy.ones(xsh)).shape == zsh
            # test that DimShuffle.infer_shape work correctly
-            x = TensorType('float64', ib)('x')
+            x = self.type('float64', ib)('x')
            e = self.op(ib, shuffle)(x)
            f = copy(linker).accept(FunctionGraph([x],
                                                  [e.shape])).make_function()
@@ -53,12 +54,12 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
        # Test when we drop a axis that is not broadcastable
        ib = [False, True, False]
-        x = TensorType('float64', ib)('x')
+        x = self.type('float64', ib)('x')
        self.assertRaises(ValueError, self.op, ib, shuffle)
        # Test when we drop a axis that don't have shape 1
        ib = [True, True, False]
-        x = TensorType('float64', ib)('x')
+        x = self.type('float64', ib)('x')
        e = self.op(ib, (1, 2))(x)
        f = copy(linker).accept(FunctionGraph([x], [e.shape])).make_function()
        self.assertRaises(TypeError, f, numpy.ones((2, 1, 4)))
@@ -66,7 +67,7 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
        # Test that we can't take a dimensions multiple time
        xsh, shuffle, zsh = ((1, 1, 4), (0, 1, 2, 0), (1, 4))
        ib = [False, True, False]
-        x = TensorType('float64', ib)('x')
+        x = self.type('float64', ib)('x')
        self.assertRaises(ValueError, DimShuffle, ib, shuffle)
    def test_perform(self):
@@ -89,7 +90,7 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
                             ((1, 1, 1), ()),
                             ((1,), ('x', 'x'))]:
            ib = [(entry == 1) for entry in xsh]
-            adtens = TensorType('float64', ib)('x')
+            adtens = self.type('float64', ib)('x')
            adtens_val = numpy.ones(xsh)
            self._compile_and_check([adtens],
                                    [self.op(ib, shuffle)(adtens)],
@@ -97,7 +98,7 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
                                    warn=False)
    def test_too_big_rank(self):
-        x = tensor.dscalar()
+        x = self.type('float64', broadcastable=())()
        y = x.dimshuffle(('x',) * (numpy.MAXDIMS + 1))
        self.assertRaises(ValueError, y.eval, {x: 0})
@@ -328,6 +329,7 @@ class test_CAReduce(unittest_tools.InferShapeTester):
             ((), None),
             ((), ())
    ]
+    type = TensorType
    def with_linker(self, linker, scalar_op=scalar.add, dtype="floatX",
                    pre_scalar_op=None,
@@ -335,7 +337,7 @@ class test_CAReduce(unittest_tools.InferShapeTester):
        for xsh, tosum in self.cases:
            if dtype == "floatX":
                dtype = theano.config.floatX
-            x = TensorType(dtype, [(entry == 1) for entry in xsh])('x')
+            x = self.type(dtype, [(entry == 1) for entry in xsh])('x')
            d = {}
            if pre_scalar_op is not None:
                d = {"pre_scalar_op": pre_scalar_op}
@@ -438,7 +440,7 @@ class test_CAReduce(unittest_tools.InferShapeTester):
                if test_nan:
                    try:
                        self.assertTrue(
-                            theano.tensor.TensorType.values_eq(f(xv), zv),
+                            self.type.values_eq(f(xv), zv),
                            (f(xv), zv))
                    except NotImplementedError:
                        # GpuCAReduce don't implement all cases when size is 0
@@ -453,7 +455,7 @@ class test_CAReduce(unittest_tools.InferShapeTester):
                        # GpuCAReduce don't implement all cases when size is 0
                        assert xv.size == 0
-            x = TensorType(dtype, [(entry == 1) for entry in xsh])('x')
+            x = self.type(dtype, [(entry == 1) for entry in xsh])('x')
            if tensor_op is None:
                e = self.op(scalar_op, axis=tosum)(x)
            else:
@@ -538,7 +540,7 @@ class test_CAReduce(unittest_tools.InferShapeTester):
        if dtype is None:
            dtype = theano.config.floatX
        for xsh, tosum in self.cases:
-            x = TensorType(dtype, [(entry == 1) for entry in xsh])('x')
+            x = self.type(dtype, [(entry == 1) for entry in xsh])('x')
            if pre_scalar_op is not None:
                x = pre_scalar_op(x)
            if tosum is None:

--- a/theano/tensor/tests/test_subtensor.py
+++ b/theano/tensor/tests/test_subtensor.py
@@ -49,6 +49,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
                 adv_incsub1=tensor.AdvancedIncSubtensor1,
                 mode=None,
                 dtype=theano.config.floatX,
+                 type=tensor.TensorType,
                 ignore_topo=DeepCopyOp):
        self.shared = shared
        self.sub = sub
@@ -59,6 +60,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
            mode = theano.compile.mode.get_default_mode()
        self.mode = mode
        self.dtype = dtype
+        self.type = type
        self.ignore_topo = ignore_topo
        self.fast_compile = theano.config.mode == 'FAST_COMPILE'
        self.ops = (sub, inc_sub, adv_sub1, adv_incsub1)
@@ -88,8 +90,10 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        Subtensor.debug = False
        utt.seed_rng()
-    def eval_output_and_check(self, t, list=False):
+    def eval_output_and_check(self, t, list=False, mode=None):
-        f = inplace_func([], t, mode=self.mode)
+        if mode is None:
+            mode = self.mode
+        f = inplace_func([], t, mode=mode)
        topo = f.maker.fgraph.toposort()
        topo_ = [node for node in topo if not isinstance(node.op,
                                                         self.ignore_topo)]
@@ -167,12 +171,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        n = self.shared(numpy.ones((), dtype=self.dtype))
        t = self.sub([])(n)
        self.assertTrue(isinstance(t.owner.op, Subtensor))
-        mode = self.mode
+        self.eval_output_and_check(
-        self.mode = mode.excluding("local_useless_subtensor")
+            t, mode=self.mode.excluding("local_useless_subtensor"))
-        try:
-            self.eval_output_and_check(t)
-        finally:
-            self.mode = mode
    def test1_err_invalid(self):
        n = self.shared(numpy.ones(1, dtype=self.dtype))
@@ -885,16 +885,14 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        Test increment and set with broadcast
        """
-        X = tensor.matrix(dtype=self.dtype)
+        X = self.shared(numpy.ones((9, 9)).astype(self.dtype))
        y = set_subtensor(X[1::, 1::],  0)
-        f = self.function([X], [y],
+        f = self.function([], [y],
                          op=self.inc_sub,
                          N=1)
+        out = f()
-        x_ = numpy.ones((9, 9))
+        res = numpy.ones((9, 9))
-        out = f(x_.astype('float32'))
-        res = x_.copy()
        res[1::, 1::] = 0
        assert numpy.allclose(out, res)
@@ -925,9 +923,9 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
                        # Symbolic variable to be incremented.
                        # We create a new one every time in order not to
                        # have duplicated variables in the function's inputs
-                        data_var = tensor.tensor(
+                        data_var = self.type(
-                                broadcastable=[False] * data_n_dims,
+                            broadcastable=[False] * data_n_dims,
-                                dtype=self.dtype)
+                            dtype=self.dtype)()
                        # Symbolic variable with rows to be incremented.
                        idx_var = theano.tensor.vector(dtype='int64')
                        n_to_inc = rng.randint(data_shape[0])
@@ -935,9 +933,9 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
                        idx_num = rng.randint(0, data_shape[0], n_to_inc)
                        idx_num = idx_num.astype('int64')
                        # Symbolic variable with increment value.
-                        inc_var = tensor.tensor(
+                        inc_var = self.type(
-                                broadcastable=[False] * inc_n_dims,
+                            broadcastable=[False] * inc_n_dims,
-                                dtype=self.dtype)
+                            dtype=self.dtype)()
                        # Trick for the case where `inc_shape` is the same as
                        # `data_shape`: what we actually want is the first
                        # shape element to be equal to the number of rows to