Make a GpuOp and make all op that use the GPU inherit from it.

This allow to make sure the test for the driver was executed.

Make a GpuOp and make all op that use the GPU inherit from it.
b7bc0916 · Frederic · 8333fe82 · b7bc0916 · b7bc0916 · b7bc0916
--- a/theano/sandbox/cuda/GpuConv3D.py
+++ b/theano/sandbox/cuda/GpuConv3D.py
@@ -7,9 +7,9 @@ from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable, host_from_gp
 from theano.misc import strutil
 from theano.tensor.nnet.Conv3D import Conv3D
 from theano.sandbox.cuda.opt import register_opt
-from theano.sandbox.cuda import CudaNdarrayType
+from theano.sandbox.cuda import CudaNdarrayType, GpuOp

-class GpuConv3D(theano.Op):
+class GpuConv3D(GpuOp):
    """ GPU implementation of Conv3D """

    def __eq__(self, other):

--- a/theano/sandbox/cuda/GpuConvGrad3D.py
+++ b/theano/sandbox/cuda/GpuConvGrad3D.py
@@ -8,11 +8,12 @@ from theano.misc import strutil

 from theano.tensor.nnet.ConvGrad3D import ConvGrad3D
 from theano.sandbox.cuda.opt import register_opt
-from theano.sandbox.cuda import CudaNdarrayType, HostFromGpu, host_from_gpu
+from theano.sandbox.cuda import (CudaNdarrayType, HostFromGpu,
+                                 host_from_gpu, GpuOp)



-class GpuConvGrad3D(theano.Op):
+class GpuConvGrad3D(GpuOp):
    """ GPU version of gradient of ConvGrad3D with respect to W """

    def make_node(self, V, d, WShape, dCdH):

--- a/theano/sandbox/cuda/GpuConvTransp3D.py
+++ b/theano/sandbox/cuda/GpuConvTransp3D.py
@@ -9,10 +9,11 @@ from theano.gof import local_optimizer

 from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable
 from theano.sandbox.cuda.opt import register_opt
-from theano.sandbox.cuda import CudaNdarrayType, HostFromGpu, host_from_gpu
+from theano.sandbox.cuda import (CudaNdarrayType, HostFromGpu,
+                                 host_from_gpu, GpuOp)


-class GpuConvTransp3D(theano.Op):
+class GpuConvTransp3D(GpuOp):
    """ The gpu version of ConvTransp3D """
    def __eq__(self,other):
        return type(self) == type(other)

--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -173,7 +173,7 @@ if cuda_available:
    shared_constructor = float32_shared_constructor

    import basic_ops
-    from basic_ops import (GpuFromHost, HostFromGpu, GpuElemwise,
+    from basic_ops import (GpuOp, GpuFromHost, HostFromGpu, GpuElemwise,
                           GpuDimShuffle, GpuSum, GpuReshape, GpuContiguous,
                           GpuSubtensor, GpuIncSubtensor,
                           GpuAdvancedSubtensor1, GpuAdvancedIncSubtensor1,

--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -33,7 +33,20 @@ def as_cuda_array(obj):
    else:
        raise TypeError("Don't know how to cast to a CudaNdarray object")

-class HostFromGpu(Op):
+
+class GpuOp(Op):
+    def make_thunk(self, node, storage_map, compute_map, no_recycling):
+        if theano.sandbox.cuda.use.device_number is None:
+            theano.sandbox.cuda.use("gpu",
+                                    force=True,
+                                    default_to_move_computation_to_gpu=False,
+                                    move_shared_float32_to_gpu=False,
+                                    enable_cuda=False)
+        return super(GpuOp, self).make_thunk(node, storage_map,
+                                             compute_map, no_recycling)
+
+
+class HostFromGpu(GpuOp):
    """
    Implement the transfer from gpu to the cpu.
    """
@@ -65,7 +78,7 @@ class HostFromGpu(Op):
        return xshp
 host_from_gpu = HostFromGpu()

-class GpuFromHost(Op):
+class GpuFromHost(GpuOp):
    """
    Implement the transfer from cpu to the gpu.
    """
@@ -98,7 +111,8 @@ class GpuFromHost(Op):
        return xshp
 gpu_from_host = GpuFromHost()

-class GpuElemwise(Op):
+
+class GpuElemwise(GpuOp):
    """
    Implement a generic elemwise on the gpu.
    """
@@ -208,7 +222,7 @@ class GpuElemwise(Op):
    def c_code_cache_version(self):
        return self.src_generator.cache_version

-class GpuDimShuffle(Op):
+class GpuDimShuffle(GpuOp):
    """
    Implement DimShuffle on the gpu.
    """
@@ -397,7 +411,7 @@ class GpuDimShuffle(Op):
    def c_code_cache_version(self):
        return (1,0)

-class GpuSum(Op):
+class GpuSum(GpuOp):
    """GpuSum is a Reduction along some dimensions by summation.

    The dimensions along which to sum is specified by the `reduce_mask` that you pass to the
@@ -1717,7 +1731,7 @@ class GpuSum(Op):
            """ %locals()
        return sio.getvalue()

-class GpuReshape(tensor.Reshape):
+class GpuReshape(tensor.Reshape, GpuOp):
    """
    Implement Reshape on the gpu.
    """
@@ -1733,7 +1747,7 @@ class GpuReshape(tensor.Reshape):
                    ', should be %i' % (len(shp), self.ndim), shp)
        out[0] = x.reshape(tuple(shp))

-class GpuSubtensor(tensor.Subtensor):
+class GpuSubtensor(tensor.Subtensor, GpuOp):
    """
    Implement subtensor on the gpu.
    """
@@ -1764,7 +1778,7 @@ class GpuSubtensor(tensor.Subtensor):
            cdata = cdata[0]
        out[0] = x.__getitem__(cdata)

-class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1):
+class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
    """
    Implement AdvancedSubtensor1 on the gpu.
    """
@@ -1790,7 +1804,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1):
            o[j] = x[i]
        out[0] = o

-class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1):
+class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
    """
    Implement AdvancedIncSubtensor1 on the gpu.
    """
@@ -1818,7 +1832,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1):
        # CudaNdarray_Subscript() don't support Advanced slicing.
        # so we use the parent version that loop on each indices.

-class GpuIncSubtensor(tensor.IncSubtensor):
+class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
    """
    Implement IncSubtensor on the gpu.
    """
@@ -1828,7 +1842,7 @@ class GpuIncSubtensor(tensor.IncSubtensor):
        rval = tensor.IncSubtensor.make_node(self, x, y, *inputs)
        return Apply(self, [x,y]+rval.inputs[2:], [x.type()])

-class GpuFlatten(tensor.Flatten):
+class GpuFlatten(tensor.Flatten, GpuOp):
    """
    Implement Flatten on the gpu.
    """
@@ -1839,7 +1853,7 @@ class GpuFlatten(tensor.Flatten):
        out_type = CudaNdarrayType(broadcastable=host_out_broadcastable)
        return Apply(self, [x], [out_type()])

-class GpuShape(tensor.Shape):
+class GpuShape(tensor.Shape, GpuOp):
    """
    Implement Shape on the gpu.
    """
@@ -1847,7 +1861,7 @@ class GpuShape(tensor.Shape):
        return Apply(self, [x], [tensor.lvector()])
 gpu_shape = GpuShape()

-class GpuJoin(tensor.Join):
+class GpuJoin(tensor.Join, GpuOp):
    """
    Implement Join on the gpu.
    """
@@ -1924,7 +1938,7 @@ class GpuJoin(tensor.Join):

 gpu_join = GpuJoin()

-class GpuAlloc(Op):
+class GpuAlloc(GpuOp):
    """
    Implement Alloc on the gpu.
    """
@@ -2023,7 +2037,7 @@ class GpuAlloc(Op):
 gpu_alloc = GpuAlloc()


-class GpuContiguous(Op):
+class GpuContiguous(GpuOp):
    """
    Always return a c contiguous output. Copy the input only if it is
    not already c contiguous.

--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
@@ -4,8 +4,9 @@ import StringIO, os

 import cuda_ndarray.cuda_ndarray as cuda
 from theano.sandbox.cuda.type import CudaNdarrayType
+from theano.sandbox.cuda import GpuOp

-class GpuDot22(Op):
+class GpuDot22(GpuOp):
    """
    Implement dot(2d, 2d) on the gpu.
    """
@@ -76,7 +77,7 @@ class GpuDot22(Op):
        """ % locals()
 gpu_dot22 = GpuDot22()

-class GpuDot22Scalar(Op):
+class GpuDot22Scalar(GpuOp):
    """
    Implement dot(2d, 2d) * scalar on the gpu.
    """
@@ -155,7 +156,7 @@ class GpuDot22Scalar(Op):
        """ % locals()
 gpu_dot22scalar = GpuDot22Scalar()

-class GpuGemm(Op):
+class GpuGemm(GpuOp):
    """
    implement the gemm on the gpu.

@@ -257,7 +258,7 @@ class GpuGemm(Op):
 gpu_gemm_no_inplace = GpuGemm(inplace=False)
 gpu_gemm_inplace = GpuGemm(inplace=True)

-class GpuGemv(Op):
+class GpuGemv(GpuOp):
    """
    implement gemv on the gpu.

@@ -348,7 +349,7 @@ class GpuGemv(Op):
 gpu_gemv_no_inplace = GpuGemv(inplace=False)
 gpu_gemv_inplace = GpuGemv(inplace=True)

-class GpuGer(Op):
+class GpuGer(GpuOp):
    """
    implement ger on the gpu.

@@ -439,7 +440,7 @@ class GpuGer(Op):
 gpu_ger_no_inplace = GpuGer(inplace=False)
 gpu_ger_inplace = GpuGer(inplace=True)

-class GpuOuter(Op):
+class GpuOuter(GpuOp):
    """ Implement outer on the gpu."""
    def make_node(self, x, y):
        # we suppose type checking has been done, but make sure.
@@ -532,7 +533,7 @@ gpu_outer = GpuOuter()
 ##
 # Not really a BLAS operation, but whatever.
 #
-class GpuConv(Op):
+class GpuConv(GpuOp):
    """
    Implement the batched and stacked 2d convolution on the gpu.
    """
@@ -698,7 +699,7 @@ class GpuConv(Op):
 """%sub


-class GpuDownsampleFactorMax(Op):
+class GpuDownsampleFactorMax(GpuOp):
    """
    Implement downsample with max on the gpu.
    """
@@ -858,7 +859,7 @@ class GpuDownsampleFactorMax(Op):
        }
        """ % locals()

-class GpuDownsampleFactorMaxGrad(Op):
+class GpuDownsampleFactorMaxGrad(GpuOp):
    """
    Implement the grad of downsample with max on the gpu.
    """

--- a/theano/sandbox/cuda/nnet.py
+++ b/theano/sandbox/cuda/nnet.py
@@ -3,11 +3,12 @@ from theano import tensor, scalar
 import StringIO

 from theano.sandbox.cuda.type import CudaNdarrayType
+from theano.sandbox.cuda import GpuOp

 from theano.sandbox.cuda.kernel_codegen import nvcc_kernel, inline_reduce_max, inline_reduce_sum, inline_softmax


-class GpuCrossentropySoftmaxArgmax1HotWithBias (Op):
+class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp):
    """
    Implement CrossentropySoftmaxArgmax1HotWithBias on the gpu.
    """
@@ -180,7 +181,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (Op):

 gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1HotWithBias()

-class GpuCrossentropySoftmax1HotWithBiasDx (Op):
+class GpuCrossentropySoftmax1HotWithBiasDx (GpuOp):
    """
    Implement CrossentropySoftmax1HotWithBiasDx on the gpu.
    """
@@ -302,7 +303,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op):

 gpu_crossentropy_softmax_1hot_with_bias_dx = GpuCrossentropySoftmax1HotWithBiasDx()

-class GpuSoftmax (Op):
+class GpuSoftmax (GpuOp):
    """
    Implement Softmax on the gpu.
    """
@@ -400,7 +401,7 @@ class GpuSoftmax (Op):

 gpu_softmax = GpuSoftmax()

-class GpuSoftmaxWithBias (Op):
+class GpuSoftmaxWithBias (GpuOp):
    """
    Implement SoftmaxWithBias on the gpu.
    """

--- a/theano/sandbox/cuda/rng_curand.py
+++ b/theano/sandbox/cuda/rng_curand.py
@@ -10,7 +10,7 @@ __contact__ = "theano-dev@googlegroups.com"
 import sys
 import numpy
 import theano.gof
-from theano.sandbox.cuda import CudaNdarrayType
+from theano.sandbox.cuda import CudaNdarrayType, GpuOp
 from theano.tensor import (get_vector_length, cast, opt)
 from theano.compile import optdb
 from theano.gof import local_optimizer, Variable
@@ -19,7 +19,7 @@ from theano.gof import local_optimizer, Variable
 config = theano.config


-class CURAND_Base(theano.gof.Op):
+class CURAND_Base(GpuOp):
    """ Base class for a random number generator implemented in CURAND.

    The random number generator itself is an opaque reference managed by

--- a/theano/sandbox/multinomial.py
+++ b/theano/sandbox/multinomial.py
@@ -5,7 +5,7 @@ from theano.gof import local_optimizer

 from theano.sandbox.cuda import cuda_available
 if cuda_available:
-    from theano.sandbox.cuda import CudaNdarrayType
+    from theano.sandbox.cuda import CudaNdarrayType, GpuOp
    from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host
    from theano.sandbox.cuda.opt import register_opt

@@ -120,7 +120,7 @@ class MultinomialFromUniform(Op):
        """ % locals()


-class GpuMultinomialFromUniform(MultinomialFromUniform):
+class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
    """
    The output is transposed compared to MultinomialFromUniform.
    We must insert a Transpose op after it.

--- a/theano/sandbox/neighbours.py
+++ b/theano/sandbox/neighbours.py
@@ -5,7 +5,7 @@ from theano.gof import local_optimizer
 from theano.sandbox.cuda import cuda_available

 if cuda_available:
-    from theano.sandbox.cuda import CudaNdarrayType
+    from theano.sandbox.cuda import CudaNdarrayType, GpuOp
    from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host
    from theano.sandbox.cuda.opt import register_opt as register_gpu_opt

@@ -292,7 +292,7 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'):


 # This is work in progress
-class GpuImages2Neibs(Images2Neibs):
+class GpuImages2Neibs(Images2Neibs, GpuOp):
    def __init__(self, mode='valid'):
        if mode not in ['valid', 'wrap_centered']:
            raise NotImplementedError("Only the mode valid and wrap_centered"

--- a/theano/sandbox/rng_mrg.py
+++ b/theano/sandbox/rng_mrg.py
@@ -20,7 +20,10 @@ import multinomial

 from theano.sandbox.cuda import cuda_available, cuda_enabled
 if cuda_available:
-    from theano.sandbox.cuda import CudaNdarrayType, float32_shared_constructor
+    from theano.sandbox.cuda import (CudaNdarrayType,
+                                     float32_shared_constructor,
+                                     GpuOp)
+

 def mulmod(a, b, c, m):
    r = numpy.int32((numpy.int64(a)*b + c) % m)
@@ -372,7 +375,7 @@ class mrg_uniform(mrg_uniform_base):
    def c_code_cache_version(self):
        return (1,)

-class GPU_mrg_uniform(mrg_uniform_base):
+class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
    #GPU VERSION

    @classmethod