提交 6187a1fa authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #5927 from notoraptor/simplify-and-prepare-gpukernelbase-for-paramstype

Partially factorize GpuKernelBase.get_params() and configure it to use ParamsType.
...@@ -11,7 +11,7 @@ from theano.gradient import grad_undefined ...@@ -11,7 +11,7 @@ from theano.gradient import grad_undefined
from theano.tensor.basic import ( from theano.tensor.basic import (
Alloc, AllocEmpty, alloc_validate_shape, Join, Split) Alloc, AllocEmpty, alloc_validate_shape, Join, Split)
from theano.gof import HideC, COp from theano.gof import HideC, COp, ParamsType
from theano.gof.utils import MethodNotDefined from theano.gof.utils import MethodNotDefined
from collections import deque from collections import deque
...@@ -293,6 +293,36 @@ class GpuKernelBase(object): ...@@ -293,6 +293,36 @@ class GpuKernelBase(object):
""" """
params_type = gpu_context_type params_type = gpu_context_type
def get_params(self, node):
# Default implementation, suitable for most sub-classes.
# To be necessarly overridden in a subclass that uses a ParamsType.
assert (self.params_type is gpu_context_type and
node.inputs and
isinstance(node.inputs[0].type, GpuArrayType))
return node.inputs[0].type.context
def get_gpu_context(self, node):
# Private method used to retrieve GPU context, instead of
# directly using self.get_params(node), as this latter may be overridden.
if isinstance(self.params_type, ParamsType) and self.params_type.has_type(gpu_context_type):
# Get field name of gpu_context_type into ParamsType object.
gpu_context_field = self.params_type.get_field(gpu_context_type)
# Get Params object (self.get_params() should have been overridden).
wrap = self.get_params(node)
# Get GPU context from Params object.
return getattr(wrap, gpu_context_field)
assert self.params_type is gpu_context_type
return self.get_params(node)
def get_gpu_context_c_name(self, params_c_name):
# Private method used to retrieve C name of GPU context variable,
# instead of directly using sub['params'], as params may not be a GPU context
# (e.g. for sub-classes that use ParamsType).
if isinstance(self.params_type, ParamsType) and self.params_type.has_type(gpu_context_type):
return "(%s->%s)" % (params_c_name, self.params_type.get_field(gpu_context_type))
assert self.params_type is gpu_context_type
return params_c_name
def gpu_kernels(self, node, name): def gpu_kernels(self, node, name):
""" """
This is the method to override. This should return an iterable This is the method to override. This should return an iterable
...@@ -397,7 +427,7 @@ int {sname}(unsigned int _nd, size_t *_n, size_t _shared, {args}) {{ ...@@ -397,7 +427,7 @@ int {sname}(unsigned int _nd, size_t *_n, size_t _shared, {args}) {{
flags=k._get_c_flags(), fail=fail, ctx=ctx) flags=k._get_c_flags(), fail=fail, ctx=ctx)
def c_init_code_struct(self, node, name, sub): def c_init_code_struct(self, node, name, sub):
ctx = sub['params'] ctx = self.get_gpu_context_c_name(sub['params'])
kernels = self.gpu_kernels(node, name) kernels = self.gpu_kernels(node, name)
inits_0 = '\n'.join(self._generate_zeros(k) for k in kernels) inits_0 = '\n'.join(self._generate_zeros(k) for k in kernels)
inits = '\n'.join(self._generate_kernel_init(k, sub['fail'], ctx) inits = '\n'.join(self._generate_kernel_init(k, sub['fail'], ctx)
...@@ -432,7 +462,7 @@ int {sname}(unsigned int _nd, size_t *_n, size_t _shared, {args}) {{ ...@@ -432,7 +462,7 @@ int {sname}(unsigned int _nd, size_t *_n, size_t _shared, {args}) {{
The node that we need the cache version for. The node that we need the cache version for.
""" """
return (8, self.get_params(node).bin_id) return (8, self.get_gpu_context(node).bin_id)
def forward_string_meth(name): def forward_string_meth(name):
...@@ -470,6 +500,7 @@ class CGpuKernelBase(COp, GpuKernelBase): ...@@ -470,6 +500,7 @@ class CGpuKernelBase(COp, GpuKernelBase):
kernel_re = re.compile(r'^#kernel ([a-zA-Z_].*?)$', re.MULTILINE) kernel_re = re.compile(r'^#kernel ([a-zA-Z_].*?)$', re.MULTILINE)
get_params = GpuKernelBase.get_params
c_support_code_apply = forward_string_meth('c_support_code_apply') c_support_code_apply = forward_string_meth('c_support_code_apply')
c_support_code_struct = forward_string_meth('c_support_code_struct') c_support_code_struct = forward_string_meth('c_support_code_struct')
c_init_code_struct = forward_string_meth('c_init_code_struct') c_init_code_struct = forward_string_meth('c_init_code_struct')
......
...@@ -545,9 +545,6 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -545,9 +545,6 @@ class BaseGpuCorrMM(CGpuKernelBase):
flops *= inputs[1] * filters[0] * inputs[0] flops *= inputs[1] * filters[0] * inputs[0]
return flops return flops
def get_params(self, node):
return node.inputs[0].type.context
def c_headers(self): def c_headers(self):
return ["<gpuarray/array.h>", "<gpuarray/blas.h>", "gpuarray_helper.h"] return ["<gpuarray/array.h>", "<gpuarray/blas.h>", "gpuarray_helper.h"]
...@@ -1142,9 +1139,6 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1142,9 +1139,6 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
flops *= inputs[1] * filters[0] * inputs[0] flops *= inputs[1] * filters[0] * inputs[0]
return flops return flops
def get_params(self, node):
return node.inputs[0].type.context
def c_headers(self): def c_headers(self):
return ["<gpuarray/array.h>", "<gpuarray/blas.h>", "gpuarray_helper.h"] return ["<gpuarray/array.h>", "<gpuarray/blas.h>", "gpuarray_helper.h"]
......
...@@ -618,9 +618,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -618,9 +618,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
ret.outputs[0].type.broadcastable, ret.outputs[0].type.broadcastable,
context_name=x.type.context_name)()]) context_name=x.type.context_name)()])
def get_params(self, node):
return node.inputs[0].type.context
def perform(self, node, inp, out, ctx): def perform(self, node, inp, out, ctx):
theano.Op.perform(self, node, inp, out, ctx) theano.Op.perform(self, node, inp, out, ctx)
......
...@@ -43,9 +43,6 @@ class GpuCumOp(GpuKernelBase, Op): ...@@ -43,9 +43,6 @@ class GpuCumOp(GpuKernelBase, Op):
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__)] return [os.path.dirname(__file__)]
def get_params(self, node):
return node.inputs[0].type.context
def make_node(self, x): def make_node(self, x):
assert x.type.dtype == 'float32', "Only float32 supported for GpuCumOp" assert x.type.dtype == 'float32', "Only float32 supported for GpuCumOp"
......
...@@ -46,9 +46,6 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): ...@@ -46,9 +46,6 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
dtype=ten4.type.dtype, dtype=ten4.type.dtype,
context_name=ten4.type.context_name)()]) context_name=ten4.type.context_name)()])
def get_params(self, node):
return node.inputs[0].type.context
def c_code_cache_version(self): def c_code_cache_version(self):
return (11,) return (11,)
......
...@@ -42,9 +42,6 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op): ...@@ -42,9 +42,6 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
am = y_idx.type() am = y_idx.type()
return Apply(self, [x, b, y_idx], [nll, sm, am]) return Apply(self, [x, b, y_idx], [nll, sm, am])
def get_params(self, node):
return node.inputs[0].type.context
def c_headers(self): def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray/types.h>', 'gpuarray_helper.h'] return ['<numpy_compat.h>', '<gpuarray/types.h>', 'gpuarray_helper.h']
...@@ -294,9 +291,6 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op): ...@@ -294,9 +291,6 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
y_idx = as_gpuarray_variable(y_idx, ctx_name) y_idx = as_gpuarray_variable(y_idx, ctx_name)
return Apply(self, [dnll, sm, y_idx], [sm.type()]) return Apply(self, [dnll, sm, y_idx], [sm.type()])
def get_params(self, node):
return node.inputs[0].type.context
def c_code_cache_version(self): def c_code_cache_version(self):
return (12,) return (12,)
...@@ -501,9 +495,6 @@ class GpuSoftmax(GpuKernelBase, Op): ...@@ -501,9 +495,6 @@ class GpuSoftmax(GpuKernelBase, Op):
x = as_gpuarray_variable(x, infer_context_name(x)) x = as_gpuarray_variable(x, infer_context_name(x))
return Apply(self, [x], [x.type()]) return Apply(self, [x], [x.type()])
def get_params(self, node):
return node.inputs[0].type.context
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
return shape return shape
...@@ -700,9 +691,6 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op): ...@@ -700,9 +691,6 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
b = as_gpuarray_variable(b, ctx_name) b = as_gpuarray_variable(b, ctx_name)
return Apply(self, [x, b], [x.type()]) return Apply(self, [x, b], [x.type()])
def get_params(self, node):
return node.inputs[0].type.context
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
return [shape[0]] return [shape[0]]
......
...@@ -74,9 +74,6 @@ class GpuPool(CGpuKernelBase): ...@@ -74,9 +74,6 @@ class GpuPool(CGpuKernelBase):
return Apply(self, [inp, ws, stride, pad], [inp.type()]) return Apply(self, [inp, ws, stride, pad], [inp.type()])
def get_params(self, node):
return node.inputs[0].type.context
def get_op_params(self): def get_op_params(self):
ignore_border = int(self.ignore_border) ignore_border = int(self.ignore_border)
max_pool = int(self.mode == 'max') max_pool = int(self.mode == 'max')
...@@ -194,9 +191,6 @@ class GpuMaxPoolGrad(CGpuKernelBase): ...@@ -194,9 +191,6 @@ class GpuMaxPoolGrad(CGpuKernelBase):
return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()]) return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()])
def get_params(self, node):
return node.inputs[0].type.context
def infer_shape(self, node, in_shapes): def infer_shape(self, node, in_shapes):
return [in_shapes[0]] return [in_shapes[0]]
...@@ -273,9 +267,6 @@ class GpuAveragePoolGrad(CGpuKernelBase): ...@@ -273,9 +267,6 @@ class GpuAveragePoolGrad(CGpuKernelBase):
return Apply(self, [inp, out_grad, ws, stride, pad], [inp.type()]) return Apply(self, [inp, out_grad, ws, stride, pad], [inp.type()])
def get_params(self, node):
return node.inputs[0].type.context
def get_op_params(self): def get_op_params(self):
inc_pad = int(self.mode == 'average_inc_pad') inc_pad = int(self.mode == 'average_inc_pad')
sum_mode = int(self.mode == 'sum') sum_mode = int(self.mode == 'sum')
...@@ -355,9 +346,6 @@ class GpuDownsampleFactorMaxGradGrad(CGpuKernelBase): ...@@ -355,9 +346,6 @@ class GpuDownsampleFactorMaxGradGrad(CGpuKernelBase):
return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()]) return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()])
def get_params(self, node):
return node.inputs[0].type.context
def infer_shape(self, node, in_shapes): def infer_shape(self, node, in_shapes):
return [in_shapes[1]] return [in_shapes[1]]
...@@ -434,9 +422,6 @@ class GpuMaxPoolRop(CGpuKernelBase): ...@@ -434,9 +422,6 @@ class GpuMaxPoolRop(CGpuKernelBase):
return Apply(self, [inp, eval_point, ws, stride, pad], [eval_point.type()]) return Apply(self, [inp, eval_point, ws, stride, pad], [eval_point.type()])
def get_params(self, node):
return node.inputs[0].type.context
def get_op_params(self): def get_op_params(self):
ignore_border = int(self.ignore_border) ignore_border = int(self.ignore_border)
return [('IGNORE_BORDER', ignore_border)] return [('IGNORE_BORDER', ignore_border)]
......
...@@ -39,9 +39,6 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base): ...@@ -39,9 +39,6 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
[rstate, size], [rstate, size],
[rstate.type(), output_type]) [rstate.type(), output_type])
def get_params(self, node):
return node.inputs[0].type.context
@classmethod @classmethod
def new(cls, rstate, ndim, dtype, size): def new(cls, rstate, ndim, dtype, size):
v_size = as_tensor_variable(size) v_size = as_tensor_variable(size)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论