提交 90f94595 authored 作者: notoraptor's avatar notoraptor

Partially factorize GpuKernelBase.get_params().

Add default GpuKernelBase.get_params() that retrieves GPU context from first node input type, and then factorize get_params() for all sub-classes that retrieve context by this way. Method get_params() must be overridden for all subclasses that get context by different way. Extend GpuKernelBase interface to allow sub-classes to use parameters types other than gpu_context_type (especially ParamsType).
上级 71792827
......@@ -11,7 +11,7 @@ from theano.gradient import grad_undefined
from theano.tensor.basic import (
Alloc, AllocEmpty, alloc_validate_shape, Join, Split)
from theano.gof import HideC, COp
from theano.gof import HideC, COp, ParamsType
from theano.gof.utils import MethodNotDefined
from collections import deque
......@@ -293,6 +293,37 @@ class GpuKernelBase(object):
"""
params_type = gpu_context_type
def get_params(self, node):
# Default implementation, suitable for most sub-classes.
# To be necessarly overridden if a subclass uses a ParamsType
# (see GpuCumOp, GpuAdvancedIncSubtensor1_dev20 for examples).
assert (self.params_type is gpu_context_type and
node.inputs and
isinstance(node.inputs[0].type, GpuArrayType))
return node.inputs[0].type.context
def get_gpu_context(self, node):
# Private method used to retrieve GPU context, instead of
# directly using self.get_params(node), as this latter may be overridden.
if isinstance(self.params_type, ParamsType) and self.params_type.has_type(gpu_context_type):
# Get field name of gpu_context_type into ParamsType object.
gpu_context_field = self.params_type.get_field(gpu_context_type)
# Get Params object (self.get_params() should have been overridden).
wrap = self.get_params(node)
# Get GPU context from Params object.
return getattr(wrap, gpu_context_field)
assert self.params_type is gpu_context_type
return self.get_params(node)
def get_gpu_context_c_name(self, params_c_name):
# Private method used to retrieve C name of GPU context variable,
# instead of directly using sub['params'], as params may not be a GPU context
# (e.g. for sub-classes that use ParamsType).
if isinstance(self.params_type, ParamsType) and self.params_type.has_type(gpu_context_type):
return "(%s->%s)" % (params_c_name, self.params_type.get_field(gpu_context_type))
assert self.params_type is gpu_context_type
return params_c_name
def gpu_kernels(self, node, name):
"""
This is the method to override. This should return an iterable
......@@ -397,7 +428,7 @@ int {sname}(unsigned int _nd, size_t *_n, size_t _shared, {args}) {{
flags=k._get_c_flags(), fail=fail, ctx=ctx)
def c_init_code_struct(self, node, name, sub):
ctx = sub['params']
ctx = self.get_gpu_context_c_name(sub['params'])
kernels = self.gpu_kernels(node, name)
inits_0 = '\n'.join(self._generate_zeros(k) for k in kernels)
inits = '\n'.join(self._generate_kernel_init(k, sub['fail'], ctx)
......@@ -432,7 +463,7 @@ int {sname}(unsigned int _nd, size_t *_n, size_t _shared, {args}) {{
The node that we need the cache version for.
"""
return (8, self.get_params(node).bin_id)
return (8, self.get_gpu_context(node).bin_id)
def forward_string_meth(name):
......@@ -470,6 +501,7 @@ class CGpuKernelBase(COp, GpuKernelBase):
kernel_re = re.compile(r'^#kernel ([a-zA-Z_].*?)$', re.MULTILINE)
get_params = GpuKernelBase.get_params
c_support_code_apply = forward_string_meth('c_support_code_apply')
c_support_code_struct = forward_string_meth('c_support_code_struct')
c_init_code_struct = forward_string_meth('c_init_code_struct')
......
......@@ -545,9 +545,6 @@ class BaseGpuCorrMM(CGpuKernelBase):
flops *= inputs[1] * filters[0] * inputs[0]
return flops
def get_params(self, node):
return node.inputs[0].type.context
def c_headers(self):
return ["<gpuarray/array.h>", "<gpuarray/blas.h>", "gpuarray_helper.h"]
......@@ -1142,9 +1139,6 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
flops *= inputs[1] * filters[0] * inputs[0]
return flops
def get_params(self, node):
return node.inputs[0].type.context
def c_headers(self):
return ["<gpuarray/array.h>", "<gpuarray/blas.h>", "gpuarray_helper.h"]
......
......@@ -618,9 +618,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
ret.outputs[0].type.broadcastable,
context_name=x.type.context_name)()])
def get_params(self, node):
return node.inputs[0].type.context
def perform(self, node, inp, out, ctx):
theano.Op.perform(self, node, inp, out, ctx)
......
......@@ -43,9 +43,6 @@ class GpuCumOp(GpuKernelBase, Op):
def c_header_dirs(self):
return [os.path.dirname(__file__)]
def get_params(self, node):
return node.inputs[0].type.context
def make_node(self, x):
assert x.type.dtype == 'float32', "Only float32 supported for GpuCumOp"
......
......@@ -46,9 +46,6 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
dtype=ten4.type.dtype,
context_name=ten4.type.context_name)()])
def get_params(self, node):
return node.inputs[0].type.context
def c_code_cache_version(self):
return (11,)
......
......@@ -42,9 +42,6 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
am = y_idx.type()
return Apply(self, [x, b, y_idx], [nll, sm, am])
def get_params(self, node):
return node.inputs[0].type.context
def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray/types.h>', 'gpuarray_helper.h']
......@@ -294,9 +291,6 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
y_idx = as_gpuarray_variable(y_idx, ctx_name)
return Apply(self, [dnll, sm, y_idx], [sm.type()])
def get_params(self, node):
return node.inputs[0].type.context
def c_code_cache_version(self):
return (12,)
......@@ -501,9 +495,6 @@ class GpuSoftmax(GpuKernelBase, Op):
x = as_gpuarray_variable(x, infer_context_name(x))
return Apply(self, [x], [x.type()])
def get_params(self, node):
return node.inputs[0].type.context
def infer_shape(self, node, shape):
return shape
......@@ -700,9 +691,6 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
b = as_gpuarray_variable(b, ctx_name)
return Apply(self, [x, b], [x.type()])
def get_params(self, node):
return node.inputs[0].type.context
def infer_shape(self, node, shape):
return [shape[0]]
......
......@@ -74,9 +74,6 @@ class GpuPool(CGpuKernelBase):
return Apply(self, [inp, ws, stride, pad], [inp.type()])
def get_params(self, node):
return node.inputs[0].type.context
def get_op_params(self):
ignore_border = int(self.ignore_border)
max_pool = int(self.mode == 'max')
......@@ -194,9 +191,6 @@ class GpuMaxPoolGrad(CGpuKernelBase):
return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()])
def get_params(self, node):
return node.inputs[0].type.context
def infer_shape(self, node, in_shapes):
return [in_shapes[0]]
......@@ -273,9 +267,6 @@ class GpuAveragePoolGrad(CGpuKernelBase):
return Apply(self, [inp, out_grad, ws, stride, pad], [inp.type()])
def get_params(self, node):
return node.inputs[0].type.context
def get_op_params(self):
inc_pad = int(self.mode == 'average_inc_pad')
sum_mode = int(self.mode == 'sum')
......@@ -355,9 +346,6 @@ class GpuDownsampleFactorMaxGradGrad(CGpuKernelBase):
return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()])
def get_params(self, node):
return node.inputs[0].type.context
def infer_shape(self, node, in_shapes):
return [in_shapes[1]]
......@@ -434,9 +422,6 @@ class GpuMaxPoolRop(CGpuKernelBase):
return Apply(self, [inp, eval_point, ws, stride, pad], [eval_point.type()])
def get_params(self, node):
return node.inputs[0].type.context
def get_op_params(self):
ignore_border = int(self.ignore_border)
return [('IGNORE_BORDER', ignore_border)]
......
......@@ -39,9 +39,6 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
[rstate, size],
[rstate.type(), output_type])
def get_params(self, node):
return node.inputs[0].type.context
@classmethod
def new(cls, rstate, ndim, dtype, size):
v_size = as_tensor_variable(size)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论