Partially factorize GpuKernelBase.get_params().

Add default GpuKernelBase.get_params() that retrieves GPU context from first node input type, and then factorize get_params() for all sub-classes that retrieve context by this way. Method get_params() must be overridden for all subclasses that get context by different way. Extend GpuKernelBase interface to allow sub-classes to use parameters types other than gpu_context_type (especially ParamsType).

Partially factorize GpuKernelBase.get_params().
90f94595 · notoraptor · 71792827 · 90f94595 · 90f94595 · 90f94595
--- a/theano/gpuarray/basic_ops.py
+++ b/theano/gpuarray/basic_ops.py
@@ -11,7 +11,7 @@ from theano.gradient import grad_undefined
 from theano.tensor.basic import (
    Alloc, AllocEmpty, alloc_validate_shape, Join, Split)

-from theano.gof import HideC, COp
+from theano.gof import HideC, COp, ParamsType
 from theano.gof.utils import MethodNotDefined

 from collections import deque
@@ -293,6 +293,37 @@ class GpuKernelBase(object):
    """
    params_type = gpu_context_type

+    def get_params(self, node):
+        # Default implementation, suitable for most sub-classes.
+        # To be necessarly overridden if a subclass uses a ParamsType
+        # (see GpuCumOp, GpuAdvancedIncSubtensor1_dev20 for examples).
+        assert (self.params_type is gpu_context_type and
+                node.inputs and
+                isinstance(node.inputs[0].type, GpuArrayType))
+        return node.inputs[0].type.context
+
+    def get_gpu_context(self, node):
+        # Private method used to retrieve GPU context, instead of
+        # directly using self.get_params(node), as this latter may be overridden.
+        if isinstance(self.params_type, ParamsType) and self.params_type.has_type(gpu_context_type):
+            # Get field name of gpu_context_type into ParamsType object.
+            gpu_context_field = self.params_type.get_field(gpu_context_type)
+            # Get Params object (self.get_params() should have been overridden).
+            wrap = self.get_params(node)
+            # Get GPU context from Params object.
+            return getattr(wrap, gpu_context_field)
+        assert self.params_type is gpu_context_type
+        return self.get_params(node)
+
+    def get_gpu_context_c_name(self, params_c_name):
+        # Private method used to retrieve C name of GPU context variable,
+        # instead of directly using sub['params'], as params may not be a GPU context
+        # (e.g. for sub-classes that use ParamsType).
+        if isinstance(self.params_type, ParamsType) and self.params_type.has_type(gpu_context_type):
+            return "(%s->%s)" % (params_c_name, self.params_type.get_field(gpu_context_type))
+        assert self.params_type is gpu_context_type
+        return params_c_name
+
    def gpu_kernels(self, node, name):
        """
        This is the method to override. This should return an iterable
@@ -397,7 +428,7 @@ int {sname}(unsigned int _nd, size_t *_n, size_t _shared, {args}) {{
            flags=k._get_c_flags(), fail=fail, ctx=ctx)

    def c_init_code_struct(self, node, name, sub):
-        ctx = sub['params']
+        ctx = self.get_gpu_context_c_name(sub['params'])
        kernels = self.gpu_kernels(node, name)
        inits_0 = '\n'.join(self._generate_zeros(k) for k in kernels)
        inits = '\n'.join(self._generate_kernel_init(k, sub['fail'], ctx)
@@ -432,7 +463,7 @@ int {sname}(unsigned int _nd, size_t *_n, size_t _shared, {args}) {{
            The node that we need the cache version for.

        """
-        return (8, self.get_params(node).bin_id)
+        return (8, self.get_gpu_context(node).bin_id)


 def forward_string_meth(name):
@@ -470,6 +501,7 @@ class CGpuKernelBase(COp, GpuKernelBase):

    kernel_re = re.compile(r'^#kernel ([a-zA-Z_].*?)$', re.MULTILINE)

+    get_params = GpuKernelBase.get_params
    c_support_code_apply = forward_string_meth('c_support_code_apply')
    c_support_code_struct = forward_string_meth('c_support_code_struct')
    c_init_code_struct = forward_string_meth('c_init_code_struct')

--- a/theano/gpuarray/blas.py
+++ b/theano/gpuarray/blas.py
@@ -545,9 +545,6 @@ class BaseGpuCorrMM(CGpuKernelBase):
        flops *= inputs[1] * filters[0] * inputs[0]
        return flops

-    def get_params(self, node):
-        return node.inputs[0].type.context
-
    def c_headers(self):
        return ["<gpuarray/array.h>", "<gpuarray/blas.h>", "gpuarray_helper.h"]

@@ -1142,9 +1139,6 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
        flops *= inputs[1] * filters[0] * inputs[0]
        return flops

-    def get_params(self, node):
-        return node.inputs[0].type.context
-
    def c_headers(self):
        return ["<gpuarray/array.h>", "<gpuarray/blas.h>", "gpuarray_helper.h"]


--- a/theano/gpuarray/elemwise.py
+++ b/theano/gpuarray/elemwise.py
@@ -618,9 +618,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
                                              ret.outputs[0].type.broadcastable,
                                              context_name=x.type.context_name)()])

-    def get_params(self, node):
-        return node.inputs[0].type.context
-
    def perform(self, node, inp, out, ctx):
        theano.Op.perform(self, node, inp, out, ctx)


--- a/theano/gpuarray/extra_ops.py
+++ b/theano/gpuarray/extra_ops.py
@@ -43,9 +43,6 @@ class GpuCumOp(GpuKernelBase, Op):
    def c_header_dirs(self):
        return [os.path.dirname(__file__)]

-    def get_params(self, node):
-        return node.inputs[0].type.context
-
    def make_node(self, x):
        assert x.type.dtype == 'float32', "Only float32 supported for GpuCumOp"


--- a/theano/gpuarray/neighbours.py
+++ b/theano/gpuarray/neighbours.py
@@ -46,9 +46,6 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
                                   dtype=ten4.type.dtype,
                                   context_name=ten4.type.context_name)()])

-    def get_params(self, node):
-        return node.inputs[0].type.context
-
    def c_code_cache_version(self):
        return (11,)


--- a/theano/gpuarray/nnet.py
+++ b/theano/gpuarray/nnet.py
@@ -42,9 +42,6 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
        am = y_idx.type()
        return Apply(self, [x, b, y_idx], [nll, sm, am])

-    def get_params(self, node):
-        return node.inputs[0].type.context
-
    def c_headers(self):
        return ['<numpy_compat.h>', '<gpuarray/types.h>', 'gpuarray_helper.h']

@@ -294,9 +291,6 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
        y_idx = as_gpuarray_variable(y_idx, ctx_name)
        return Apply(self, [dnll, sm, y_idx], [sm.type()])

-    def get_params(self, node):
-        return node.inputs[0].type.context
-
    def c_code_cache_version(self):
        return (12,)

@@ -501,9 +495,6 @@ class GpuSoftmax(GpuKernelBase, Op):
        x = as_gpuarray_variable(x, infer_context_name(x))
        return Apply(self, [x], [x.type()])

-    def get_params(self, node):
-        return node.inputs[0].type.context
-
    def infer_shape(self, node, shape):
        return shape

@@ -700,9 +691,6 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
        b = as_gpuarray_variable(b, ctx_name)
        return Apply(self, [x, b], [x.type()])

-    def get_params(self, node):
-        return node.inputs[0].type.context
-
    def infer_shape(self, node, shape):
        return [shape[0]]


--- a/theano/gpuarray/pool.py
+++ b/theano/gpuarray/pool.py
@@ -74,9 +74,6 @@ class GpuPool(CGpuKernelBase):

        return Apply(self, [inp, ws, stride, pad], [inp.type()])

-    def get_params(self, node):
-        return node.inputs[0].type.context
-
    def get_op_params(self):
        ignore_border = int(self.ignore_border)
        max_pool = int(self.mode == 'max')
@@ -194,9 +191,6 @@ class GpuMaxPoolGrad(CGpuKernelBase):

        return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()])

-    def get_params(self, node):
-        return node.inputs[0].type.context
-
    def infer_shape(self, node, in_shapes):
        return [in_shapes[0]]

@@ -273,9 +267,6 @@ class GpuAveragePoolGrad(CGpuKernelBase):

        return Apply(self, [inp, out_grad, ws, stride, pad], [inp.type()])

-    def get_params(self, node):
-        return node.inputs[0].type.context
-
    def get_op_params(self):
        inc_pad = int(self.mode == 'average_inc_pad')
        sum_mode = int(self.mode == 'sum')
@@ -355,9 +346,6 @@ class GpuDownsampleFactorMaxGradGrad(CGpuKernelBase):

        return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()])

-    def get_params(self, node):
-        return node.inputs[0].type.context
-
    def infer_shape(self, node, in_shapes):
        return [in_shapes[1]]

@@ -434,9 +422,6 @@ class GpuMaxPoolRop(CGpuKernelBase):

        return Apply(self, [inp, eval_point, ws, stride, pad], [eval_point.type()])

-    def get_params(self, node):
-        return node.inputs[0].type.context
-
    def get_op_params(self):
        ignore_border = int(self.ignore_border)
        return [('IGNORE_BORDER', ignore_border)]

--- a/theano/gpuarray/rng_mrg.py
+++ b/theano/gpuarray/rng_mrg.py
@@ -39,9 +39,6 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
                     [rstate, size],
                     [rstate.type(), output_type])

-    def get_params(self, node):
-        return node.inputs[0].type.context
-
    @classmethod
    def new(cls, rstate, ndim, dtype, size):
        v_size = as_tensor_variable(size)