Fix test_elemwise and make it pass.

14539bc2 · Arnaud Bergeron · 3372bb22 · 14539bc2 · 14539bc2
--- a/theano/sandbox/gpuarray/elemwise.py
+++ b/theano/sandbox/gpuarray/elemwise.py
@@ -37,7 +37,7 @@ def make_argument(v, name):
        return ArrayArg(numpy.dtype(v.type.dtype), name)
-def ensure_allocated(storage, shape, dtype):
+def ensure_allocated(storage, shape, dtype, ctx):
    odat = storage[0]
    if odat is not None:
        if odat.shape != shape:
@@ -45,7 +45,7 @@ def ensure_allocated(storage, shape, dtype):
            # we have to allocate output storage.
            odat = None
    if odat is None:
-        odat = pygpu.empty(shape, dtype=dtype)
+        odat = pygpu.empty(shape, dtype=dtype, context=ctx)
    storage[0] = odat
    return odat
@@ -401,7 +401,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
            """ % locals()
        return str(code)
-    def perform(self, node, inputs, output_storage):
+    def perform(self, node, inputs, output_storage, ctx):
        # Try to reuse the kernel from a previous call to hopefully
        # avoid recompiling
        if not hasattr(node, '_cache_elemwise_k'):
@@ -422,7 +422,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
            if n in self.inplace_pattern:
                stor[0] = inputs[self.inplace_pattern[n]]
            else:
-                args.append(ensure_allocated(stor, out_shape, out.type.dtype))
+                args.append(ensure_allocated(stor, out_shape, out.type.dtype, ctx))
        node._cache_elemwise_k(*args, broadcast=True)
        if config.gpuarray.sync:
@@ -2633,7 +2633,6 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
    Too slow for now as it only have a python interface.
    """
    def __init__(self, scalar_op, axis=None, dtype=None, acc_dtype=None):
        if not hasattr(scalar_op, 'identity'):
            raise ValueError("No identity on scalar op")
@@ -2647,10 +2646,12 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
        return "GpuReduce{%s}%s" % (self.scalar_op, ax)
    def make_node(self, input):
+        ctx_name = infer_context_name(input)
        res = CAReduceDtype.make_node(self, input)
-        input = as_gpuarray_variable(input)
+        input = as_gpuarray_variable(input, ctx_name)
        otype = GpuArrayType(dtype=res.outputs[0].dtype,
-                             broadcastable=res.outputs[0].broadcastable)
+                             broadcastable=res.outputs[0].broadcastable,
+                             context_name=ctx_name)
        if res.op.axis is not None:
            redux = []
@@ -2662,6 +2663,9 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
        return Apply(res.op, [input], [otype()])
+    def get_context(self, node):
+        return node.outputs[0].type.context
    def make_thunk(self, node, storage_map, compute_map, no_recycling):
        # cache the kernel object
        self.get_kernel_cache(node)
@@ -2887,7 +2891,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
                               arguments=[make_argument(node.inputs[0], 'a')],
                               init_nd=node.inputs[0].ndim)
-    def perform(self, node, inp, out):
+    def perform(self, node, inp, out, ctx):
        input, = inp
        output, = out
@@ -2901,6 +2905,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
                copy=False, dtype=node.outputs[0].type.dtype)
        else:
            output[0] = pygpu.gpuarray.array(input, copy=True,
-                                             dtype=node.outputs[0].type.dtype)
+                                             dtype=node.outputs[0].type.dtype,
+                                             context=ctx)
 # To allow reloading old pickled files
 GpuCAReduce = GpuCAReduceCPY
--- a/theano/sandbox/gpuarray/tests/test_elemwise.py
+++ b/theano/sandbox/gpuarray/tests/test_elemwise.py
@@ -4,20 +4,19 @@ import theano
 from theano import scalar, gof
 from theano.tests.unittest_tools import SkipTest, assert_allclose
-from theano.tensor.tests.test_elemwise import (test_Broadcast, test_DimShuffle,
+from theano.tensor.tests import test_elemwise
-                                               test_CAReduce, T_reduce_dtype)
-from .config import mode_with_gpu
+from .config import mode_with_gpu, test_ctx_name
 from .test_basic_ops import rand_gpuarray
 from ..elemwise import (GpuElemwise, GpuDimShuffle,
                        GpuCAReduceCuda, GpuCAReduceCPY)
-from ..type import GpuArrayType
+from ..type import GpuArrayType, get_context
 from pygpu import ndgpuarray as gpuarray
 # This is acutally a test for GpuElemwise
-class test_gpu_Broadcast(test_Broadcast):
+class test_gpu_Broadcast(test_elemwise.test_Broadcast):
    op = GpuElemwise
    type = GpuArrayType
    cop = GpuElemwise
@@ -26,8 +25,7 @@ class test_gpu_Broadcast(test_Broadcast):
    linkers = [gof.PerformLinker, gof.CLinker]
    def setUp(self):
-        dev = theano.sandbox.gpuarray.init_dev.device
+        if get_context(test_ctx_name).kind != 'cuda':
-        if not dev.startswith('cuda'):
            self.linkers = [gof.PerformLinker]
    def rand_val(self, shp):
@@ -37,14 +35,12 @@ class test_gpu_Broadcast(test_Broadcast):
        return rand_gpuarray(*shp, **dict(cls=gpuarray))
    def test_c(self):
-        dev = theano.sandbox.gpuarray.init_dev.device
+        if get_context(test_ctx_name).kind != 'cuda':
-        if not dev.startswith('cuda'):
            raise SkipTest("Cuda specific tests")
        super(test_gpu_Broadcast, self).test_c()
    def test_c_inplace(self):
-        dev = theano.sandbox.gpuarray.init_dev.device
+        if get_context(test_ctx_name).kind != 'cuda':
-        if not dev.startswith('cuda'):
            raise SkipTest("Cuda specific tests")
        super(test_gpu_Broadcast, self).test_c_inplace()
@@ -52,8 +48,7 @@ class test_gpu_Broadcast(test_Broadcast):
 def test_elemwise_pow():
    # Test that GpuElemwise(pow) can compile with any combination of integer
    # or float input dtype.
-    dev = theano.sandbox.gpuarray.init_dev.device
+    if get_context(test_ctx_name).kind != 'cuda':
-    if not dev.startswith('cuda'):
        raise SkipTest("Cuda specific tests")
    dtypes = ["uint8", "uint16", "uint32", "uint64",
@@ -78,11 +73,11 @@ def test_elemwise_pow():
            assert_allclose(out, expected_out)
-class test_GpuDimShuffle(test_DimShuffle):
+class test_GpuDimShuffle(test_elemwise.test_DimShuffle):
    op = GpuDimShuffle
-class test_GpuCAReduceCPY(test_CAReduce):
+class test_GpuCAReduceCPY(test_elemwise.test_CAReduce):
    dtypes = ["float32"]
    bin_dtypes = ["uint8", "int8"]
    op = GpuCAReduceCPY
@@ -210,12 +205,11 @@ class test_GpuCAReduceCuda(test_GpuCAReduceCPY):
    def setUp(self):
        super(test_GpuCAReduceCuda, self).setUp()
-        dev = theano.sandbox.gpuarray.init_dev.device
+        if get_context(test_ctx_name).kind != 'cuda':
-        if not dev.startswith('cuda'):
            raise SkipTest("Cuda specific tests")
-class T_gpureduce_dtype(T_reduce_dtype):
+class T_gpureduce_dtype(test_elemwise.T_reduce_dtype):
    mode = mode_with_gpu.excluding('local_cut_useless_reduce')
    op = GpuCAReduceCuda
    # Currently we don't support reduction on 0 axis
@@ -226,8 +220,7 @@ class T_gpureduce_dtype(T_reduce_dtype):
              'float32', 'float64']
    def setUp(self):
-        dev = theano.sandbox.gpuarray.init_dev.device
+        if get_context(test_ctx_name).kind != 'cuda':
-        if not dev.startswith('cuda'):
            raise SkipTest("Cuda specific tests")