提交 a6e0ff7a authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Use the new name for op context (which is params now).

上级 51c981b0
...@@ -169,7 +169,7 @@ class Kernel(object): ...@@ -169,7 +169,7 @@ class Kernel(object):
class GpuKernelBase(object): class GpuKernelBase(object):
context_type = gpu_context_type params_type = gpu_context_type
def gpu_kernels(self, node, name): def gpu_kernels(self, node, name):
""" """
...@@ -214,7 +214,7 @@ class GpuKernelBase(object): ...@@ -214,7 +214,7 @@ class GpuKernelBase(object):
def c_support_code_apply(self, node, name): def c_support_code_apply(self, node, name):
kernels = self.gpu_kernels(node, name) kernels = self.gpu_kernels(node, name)
ctx = self.get_context(node) ctx = self.get_params(node)
bins = '\n'.join(self._generate_kernel_bin(k, ctx) for k in kernels) bins = '\n'.join(self._generate_kernel_bin(k, ctx) for k in kernels)
codes = '\n'.join(self._generate_kernel_code(k) for k in kernels) codes = '\n'.join(self._generate_kernel_code(k) for k in kernels)
return '\n'.join([bins, codes]) return '\n'.join([bins, codes])
...@@ -248,7 +248,7 @@ class GpuKernelBase(object): ...@@ -248,7 +248,7 @@ class GpuKernelBase(object):
flags=k._get_c_flags(), fail=fail, ctx=ctx) flags=k._get_c_flags(), fail=fail, ctx=ctx)
def c_init_code_struct(self, node, name, sub): def c_init_code_struct(self, node, name, sub):
ctx = sub['context'] ctx = sub['params']
kernels = self.gpu_kernels(node, name) kernels = self.gpu_kernels(node, name)
inits_0 = '\n'.join(self._generate_zeros(k) for k in kernels) inits_0 = '\n'.join(self._generate_zeros(k) for k in kernels)
inits = '\n'.join(self._generate_kernel_init(k, sub['fail'], ctx) inits = '\n'.join(self._generate_kernel_init(k, sub['fail'], ctx)
...@@ -269,7 +269,7 @@ class GpuKernelBase(object): ...@@ -269,7 +269,7 @@ class GpuKernelBase(object):
return (self.c_code_cache_version(), self.kernel_version(node)) return (self.c_code_cache_version(), self.kernel_version(node))
def kernel_version(self, node): def kernel_version(self, node):
return (3, node.get_context().bin_id) return (3, self.get_params(node).bin_id)
class HostFromGpu(Op): class HostFromGpu(Op):
...@@ -351,7 +351,7 @@ host_from_gpu = HostFromGpu() ...@@ -351,7 +351,7 @@ host_from_gpu = HostFromGpu()
class GpuFromHost(Op): class GpuFromHost(Op):
__props__ = ('context_name',) __props__ = ('context_name',)
_f16_ok = True _f16_ok = True
context_type = gpu_context_type params_type = gpu_context_type
def __init__(self, context_name): def __init__(self, context_name):
self.context_name = context_name self.context_name = context_name
...@@ -366,7 +366,7 @@ class GpuFromHost(Op): ...@@ -366,7 +366,7 @@ class GpuFromHost(Op):
context_name=self.context_name, context_name=self.context_name,
dtype=x.dtype)()]) dtype=x.dtype)()])
def get_context(self, node): def get_params(self, node):
return get_context(self.context_name) return get_context(self.context_name)
def perform(self, node, inp, out, ctx): def perform(self, node, inp, out, ctx):
...@@ -404,7 +404,7 @@ class GpuFromHost(Op): ...@@ -404,7 +404,7 @@ class GpuFromHost(Op):
if (%(out)s == NULL) { if (%(out)s == NULL) {
%(fail)s %(fail)s
} }
""" % {'name': name, 'inp': inputs[0], 'ctx': sub['context'], """ % {'name': name, 'inp': inputs[0], 'ctx': sub['params'],
'out': outputs[0], 'fail': sub['fail']} 'out': outputs[0], 'fail': sub['fail']}
def c_code_cache_version(self): def c_code_cache_version(self):
...@@ -414,7 +414,7 @@ class GpuFromHost(Op): ...@@ -414,7 +414,7 @@ class GpuFromHost(Op):
class GpuToGpu(Op): class GpuToGpu(Op):
__props__ = ('context_name',) __props__ = ('context_name',)
_f16_ok = True _f16_ok = True
context_type = gpu_context_type params_type = gpu_context_type
def __init__(self, context_name): def __init__(self, context_name):
self.context_name = context_name self.context_name = context_name
...@@ -429,7 +429,7 @@ class GpuToGpu(Op): ...@@ -429,7 +429,7 @@ class GpuToGpu(Op):
context_name=self.context_name, context_name=self.context_name,
dtype=x.dtype)()]) dtype=x.dtype)()])
def get_context(self, node): def get_params(self, node):
return get_context(self.context_name) return get_context(self.context_name)
def perform(self, node, inp, out, ctx): def perform(self, node, inp, out, ctx):
...@@ -454,7 +454,7 @@ class GpuToGpu(Op): ...@@ -454,7 +454,7 @@ class GpuToGpu(Op):
if (%(out)s == NULL) { if (%(out)s == NULL) {
%(fail)s %(fail)s
} }
""" % {'inp': inputs[0], 'ctx': sub['context'], """ % {'inp': inputs[0], 'ctx': sub['params'],
'out': outputs[0], 'fail': sub['fail']} 'out': outputs[0], 'fail': sub['fail']}
def c_code_cache_version(self): def c_code_cache_version(self):
...@@ -476,13 +476,13 @@ class GpuAlloc(HideC, Alloc): ...@@ -476,13 +476,13 @@ class GpuAlloc(HideC, Alloc):
__props__ = ('memset_0', 'context_name') __props__ = ('memset_0', 'context_name')
_f16_ok = True _f16_ok = True
context_type = gpu_context_type params_type = gpu_context_type
def __init__(self, context_name, memset_0=False): def __init__(self, context_name, memset_0=False):
self.context_name = context_name self.context_name = context_name
self.memset_0 = memset_0 self.memset_0 = memset_0
def get_context(self, node): def get_params(self, node):
return get_context(self.context_name) return get_context(self.context_name)
def __str__(self): def __str__(self):
...@@ -580,7 +580,7 @@ class GpuAlloc(HideC, Alloc): ...@@ -580,7 +580,7 @@ class GpuAlloc(HideC, Alloc):
%(fail)s %(fail)s
} }
} }
""" % dict(name=name, ndim=ndim, zz=zz, vv=vv, ctx=sub['context'], """ % dict(name=name, ndim=ndim, zz=zz, vv=vv, ctx=sub['params'],
fail=sub['fail'], memset_0=memset_0) fail=sub['fail'], memset_0=memset_0)
if config.gpuarray.sync: if config.gpuarray.sync:
...@@ -625,13 +625,13 @@ class GpuAlloc(HideC, Alloc): ...@@ -625,13 +625,13 @@ class GpuAlloc(HideC, Alloc):
class GpuAllocEmpty(HideC, Alloc): class GpuAllocEmpty(HideC, Alloc):
__props__ = ('dtype', 'context_name') __props__ = ('dtype', 'context_name')
_f16_ok = True _f16_ok = True
context_type = gpu_context_type params_type = gpu_context_type
def __init__(self, dtype, context_name): def __init__(self, dtype, context_name):
self.dtype = dtype self.dtype = dtype
self.context_name = context_name self.context_name = context_name
def get_context(self, node): def get_params(self, node):
return get_context(self.context_name) return get_context(self.context_name)
def make_node(self, *shape): def make_node(self, *shape):
...@@ -677,7 +677,7 @@ if (theano_prep_output(&%(zz)s, %(ndim)s, shape, %(type)s, GA_C_ORDER, ...@@ -677,7 +677,7 @@ if (theano_prep_output(&%(zz)s, %(ndim)s, shape, %(type)s, GA_C_ORDER,
%(fail)s %(fail)s
} }
""" % dict(zz=zz, ndim=ndim, type=gpuarray.dtype_to_typecode(self.dtype), """ % dict(zz=zz, ndim=ndim, type=gpuarray.dtype_to_typecode(self.dtype),
fail=fail, ctx=sub['context'])) fail=fail, ctx=sub['params']))
return ''.join(code) return ''.join(code)
...@@ -884,7 +884,7 @@ class GpuReshape(HideC, tensor.Reshape): ...@@ -884,7 +884,7 @@ class GpuReshape(HideC, tensor.Reshape):
class GpuJoin(HideC, Join): class GpuJoin(HideC, Join):
_f16_ok = True _f16_ok = True
context_type = gpu_context_type params_type = gpu_context_type
def make_node(self, axis, *tensors): def make_node(self, axis, *tensors):
node = Join.make_node(self, axis, *tensors) node = Join.make_node(self, axis, *tensors)
...@@ -899,7 +899,7 @@ class GpuJoin(HideC, Join): ...@@ -899,7 +899,7 @@ class GpuJoin(HideC, Join):
dtype=node.outputs[0].dtype, dtype=node.outputs[0].dtype,
context_name=ctx_name)()]) context_name=ctx_name)()])
def get_context(self, node): def get_params(self, node):
return node.outputs[0].type.context return node.outputs[0].type.context
def perform(self, node, axis_and_tensors, out_, ctx): def perform(self, node, axis_and_tensors, out_, ctx):
...@@ -947,7 +947,7 @@ if (%(out)s == NULL) ...@@ -947,7 +947,7 @@ if (%(out)s == NULL)
%(fail)s %(fail)s
""" % dict(n=len(inputs[1:]), fail=sub['fail'], out=out_[0], """ % dict(n=len(inputs[1:]), fail=sub['fail'], out=out_[0],
axis=inputs[0], copy_inputs_to_list='\n'.join(copy_to_list), axis=inputs[0], copy_inputs_to_list='\n'.join(copy_to_list),
restype=restype, ctx=sub['context']) restype=restype, ctx=sub['params'])
gpu_join = GpuJoin() gpu_join = GpuJoin()
...@@ -973,7 +973,7 @@ class GpuEye(GpuKernelBase, Op): ...@@ -973,7 +973,7 @@ class GpuEye(GpuKernelBase, Op):
self.dtype = dtype self.dtype = dtype
self.context_name = context_name self.context_name = context_name
def get_context(self, node): def get_params(self, node):
return get_context(self.context_name) return get_context(self.context_name)
def make_node(self, n, m, k): def make_node(self, n, m, k):
...@@ -1018,7 +1018,7 @@ KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) { ...@@ -1018,7 +1018,7 @@ KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) {
n, m = inp n, m = inp
z, = out z, = out
fail = sub['fail'] fail = sub['fail']
ctx = sub['context'] ctx = sub['params']
typecode = pygpu.gpuarray.dtype_to_typecode(self.dtype) typecode = pygpu.gpuarray.dtype_to_typecode(self.dtype)
sync = bool(config.gpuarray.sync) sync = bool(config.gpuarray.sync)
kname = self.gpu_kernels(node, name)[0].objvar kname = self.gpu_kernels(node, name)[0].objvar
......
...@@ -135,7 +135,7 @@ class GpuConv(GpuKernelBase, gof.Op): ...@@ -135,7 +135,7 @@ class GpuConv(GpuKernelBase, gof.Op):
out = GpuArrayType(img.dtype, broadcastable, context_name=ctx_name)() out = GpuArrayType(img.dtype, broadcastable, context_name=ctx_name)()
return gof.Apply(self, [img, kern], [out]) return gof.Apply(self, [img, kern], [out])
def get_context(self, node): def get_params(self, node):
return node.inputs[0].type.context return node.inputs[0].type.context
def flops(self, inputs, outputs): def flops(self, inputs, outputs):
......
...@@ -133,9 +133,9 @@ class DnnBase(COp): ...@@ -133,9 +133,9 @@ class DnnBase(COp):
# dnn does not know about broadcasting, so we do not need to assert # dnn does not know about broadcasting, so we do not need to assert
# the input broadcasting pattern. # the input broadcasting pattern.
check_broadcast = False check_broadcast = False
context_type = gpu_context_type params_type = gpu_context_type
def get_context(self, node): def get_params(self, node):
return node.outputs[0].type.context return node.outputs[0].type.context
def __init__(self, files=None, c_func=None): def __init__(self, files=None, c_func=None):
......
...@@ -107,14 +107,14 @@ cudnnHandle_t APPLY_SPECIFIC(_handle); ...@@ -107,14 +107,14 @@ cudnnHandle_t APPLY_SPECIFIC(_handle);
#section init_code_struct #section init_code_struct
{ {
cuda_enter(CONTEXT->ctx); cuda_enter(PARAMS->ctx);
cudnnStatus_t err; cudnnStatus_t err;
APPLY_SPECIFIC(_handle) = NULL; APPLY_SPECIFIC(_handle) = NULL;
if ((err = cudnnCreate(&APPLY_SPECIFIC(_handle))) != CUDNN_STATUS_SUCCESS) { if ((err = cudnnCreate(&APPLY_SPECIFIC(_handle))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not create cuDNN handle: %s", PyErr_Format(PyExc_RuntimeError, "could not create cuDNN handle: %s",
cudnnGetErrorString(err)); cudnnGetErrorString(err));
cuda_exit(CONTEXT->ctx); cuda_exit(PARAMS->ctx);
FAIL; FAIL;
} }
cuda_exit(CONTEXT->ctx); cuda_exit(PARAMS->ctx);
} }
...@@ -101,7 +101,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise): ...@@ -101,7 +101,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
return node return node
def get_context(self, node): def get_params(self, node):
return node.inputs[0].type.context return node.inputs[0].type.context
def generate_kernel(self, node, nodename): def generate_kernel(self, node, nodename):
...@@ -173,7 +173,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise): ...@@ -173,7 +173,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
("npy_float64", "ga_double"), ("npy_float64", "ga_double"),
]: ]:
kop = kop.replace(npy, ga) kop = kop.replace(npy, ga)
return ElemwiseKernel(self.get_context(node), inps + outs, kop, return ElemwiseKernel(self.get_params(node), inps + outs, kop,
preamble=support_code) preamble=support_code)
def c_headers(self): def c_headers(self):
...@@ -222,7 +222,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise): ...@@ -222,7 +222,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
fail = sub["fail"] fail = sub["fail"]
initial_dims = ','.join('1' for i in xrange(nd)) initial_dims = ','.join('1' for i in xrange(nd))
opname = str(self.scalar_op) opname = str(self.scalar_op)
ctx = sub['context'] ctx = sub['params']
# check that all inputs have valid dimensions # check that all inputs have valid dimensions
emitted_inames = {} emitted_inames = {}
...@@ -650,7 +650,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -650,7 +650,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
ret.outputs[0].type.broadcastable, ret.outputs[0].type.broadcastable,
context_name=x.type.context_name)()]) context_name=x.type.context_name)()])
def get_context(self, node): def get_params(self, node):
return node.inputs[0].type.context return node.inputs[0].type.context
def perform(self, node, inp, out, ctx): def perform(self, node, inp, out, ctx):
...@@ -683,7 +683,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -683,7 +683,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
inp = ['fake_input_name_%d' % i for i in xrange(len(inputs))] inp = ['fake_input_name_%d' % i for i in xrange(len(inputs))]
out = ['fake_output_name_%d' % i for i in xrange(len(node.outputs))] out = ['fake_output_name_%d' % i for i in xrange(len(node.outputs))]
sub = {'fail': 'fake failure code', 'context': 'fake context'} sub = {'fail': 'fake failure code', 'params': 'fake context'}
try: try:
self.c_code(node, name, inp, out, sub) self.c_code(node, name, inp, out, sub)
...@@ -711,7 +711,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -711,7 +711,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
sio = StringIO() sio = StringIO()
fail = sub['fail'] fail = sub['fail']
ctx = sub['context'] ctx = sub['params']
# check input # check input
print(""" print("""
...@@ -2664,7 +2664,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2664,7 +2664,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
return Apply(res.op, [input], [otype()]) return Apply(res.op, [input], [otype()])
def get_context(self, node): def get_params(self, node):
return node.outputs[0].type.context return node.outputs[0].type.context
def make_thunk(self, node, storage_map, compute_map, no_recycling): def make_thunk(self, node, storage_map, compute_map, no_recycling):
...@@ -2776,7 +2776,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2776,7 +2776,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
} }
} }
""" % dict(output=output, nd_out=nd_out, fail=sub['fail'], """ % dict(output=output, nd_out=nd_out, fail=sub['fail'],
ctx=sub['context'], ctx=sub['params'],
out_type=dtype_to_typecode(node.outputs[0].type.dtype)) out_type=dtype_to_typecode(node.outputs[0].type.dtype))
else: else:
code += """ code += """
...@@ -2788,7 +2788,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2788,7 +2788,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
%(fail)s %(fail)s
} }
} }
""" % dict(output=output, fail=sub['fail'], ctx=sub['context'], """ % dict(output=output, fail=sub['fail'], ctx=sub['params'],
out_type=dtype_to_typecode(node.outputs[0].type.dtype)) out_type=dtype_to_typecode(node.outputs[0].type.dtype))
if acc_dtype != node.outputs[0].type.dtype: if acc_dtype != node.outputs[0].type.dtype:
...@@ -2796,7 +2796,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2796,7 +2796,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
tmp = pygpu_empty(%(output)s->ga.nd, %(output)s->ga.dimensions, tmp = pygpu_empty(%(output)s->ga.nd, %(output)s->ga.dimensions,
%(acc_type)s, GA_C_ORDER, %(ctx)s, Py_None); %(acc_type)s, GA_C_ORDER, %(ctx)s, Py_None);
if (!tmp) %(fail)s if (!tmp) %(fail)s
""" % dict(output=output, fail=sub['fail'], ctx=sub['context'], """ % dict(output=output, fail=sub['fail'], ctx=sub['params'],
acc_type=dtype_to_typecode(acc_dtype)) acc_type=dtype_to_typecode(acc_dtype))
else: else:
code += """ code += """
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
/* Why do we need this? */ /* Why do we need this? */
size_t dim = 2048 * 32; size_t dim = 2048 * 32;
rand_buf = pygpu_empty(1, &dim, GA_UINT, GA_C_ORDER, CONTEXT, rand_buf = pygpu_empty(1, &dim, GA_UINT, GA_C_ORDER, PARAMS,
Py_None); Py_None);
if (rand_buf == NULL) { if (rand_buf == NULL) {
FAIL; FAIL;
......
...@@ -41,7 +41,7 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): ...@@ -41,7 +41,7 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
dtype=ten4.type.dtype, dtype=ten4.type.dtype,
context_name=ten4.type.context_name)()]) context_name=ten4.type.context_name)()])
def get_context(self, node): def get_params(self, node):
return node.inputs[0].type.context return node.inputs[0].type.context
def c_code_cache_version(self): def c_code_cache_version(self):
...@@ -250,7 +250,7 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): ...@@ -250,7 +250,7 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
ten4, neib_shape, neib_step = inp ten4, neib_shape, neib_step = inp
z, = out z, = out
fail = sub['fail'] fail = sub['fail']
ctx = sub['context'] ctx = sub['params']
mode = self.mode mode = self.mode
err_check = """ err_check = """
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
......
...@@ -43,7 +43,7 @@ def ensure_float(val, name): ...@@ -43,7 +43,7 @@ def ensure_float(val, name):
class Gemm16(COp): class Gemm16(COp):
__props__ = ('relu', 'inplace') __props__ = ('relu', 'inplace')
_f16_ok = True _f16_ok = True
context_type = gpu_context_type params_type = gpu_context_type
KERN_NAMES = ('nn_128x128', 'nn_128x64', 'nn_128x32', KERN_NAMES = ('nn_128x128', 'nn_128x64', 'nn_128x32',
'nn_vec_128x128', 'nn_vec_128x64', 'nn_vec_128x32', 'nn_vec_128x128', 'nn_vec_128x64', 'nn_vec_128x32',
'tn_128x128', 'tn_128x64', 'tn_128x32', 'tn_128x128', 'tn_128x64', 'tn_128x32',
...@@ -75,7 +75,7 @@ class Gemm16(COp): ...@@ -75,7 +75,7 @@ class Gemm16(COp):
return Apply(self, [C, alpha, A, B, beta], [C.type()]) return Apply(self, [C, alpha, A, B, beta], [C.type()])
def get_context(self, node): def get_params(self, node):
return node.inputs[0].type.context return node.inputs[0].type.context
def c_headers(self): def c_headers(self):
...@@ -128,7 +128,7 @@ if (GpuKernel_init(&k_%(name)s, c->ops, c->ctx, 1, &bcode, &sz, ...@@ -128,7 +128,7 @@ if (GpuKernel_init(&k_%(name)s, c->ops, c->ctx, 1, &bcode, &sz,
codel.append("memset(&k_{0}, 0, sizeof(GpuKernel));".format(name)) codel.append("memset(&k_{0}, 0, sizeof(GpuKernel));".format(name))
codel.append("const char *bcode;") codel.append("const char *bcode;")
codel.append("size_t sz;") codel.append("size_t sz;")
codel.append("PyGpuContextObject *c = %s;" % (sub['context'],)) codel.append("PyGpuContextObject *c = %s;" % (sub['params'],))
codel.append("int types[13] = {GA_BUFFER, GA_BUFFER, GA_BUFFER, " codel.append("int types[13] = {GA_BUFFER, GA_BUFFER, GA_BUFFER, "
"GA_BUFFER, GA_INT, GA_INT, GA_INT, GA_INT, GA_INT, " "GA_BUFFER, GA_INT, GA_INT, GA_INT, GA_INT, GA_INT, "
"GA_INT, GA_FLOAT, GA_FLOAT, GA_INT};") "GA_INT, GA_FLOAT, GA_FLOAT, GA_INT};")
......
...@@ -41,7 +41,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op): ...@@ -41,7 +41,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
am = y_idx.type() am = y_idx.type()
return Apply(self, [x, b, y_idx], [nll, sm, am]) return Apply(self, [x, b, y_idx], [nll, sm, am])
def get_context(self, node): def get_params(self, node):
return node.inputs[0].type.context return node.inputs[0].type.context
def c_headers(self): def c_headers(self):
...@@ -169,7 +169,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op): ...@@ -169,7 +169,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
dtype_am = node.outputs[2].dtype dtype_am = node.outputs[2].dtype
classname = self.__class__.__name__ classname = self.__class__.__name__
fail = sub['fail'] fail = sub['fail']
ctx = sub['context'] ctx = sub['params']
k_var = "k_xent_sm_1hot_bias_%(nodename)s" % locals() k_var = "k_xent_sm_1hot_bias_%(nodename)s" % locals()
err_check = """ err_check = """
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
...@@ -322,7 +322,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op): ...@@ -322,7 +322,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
y_idx = as_gpuarray_variable(y_idx, ctx_name) y_idx = as_gpuarray_variable(y_idx, ctx_name)
return Apply(self, [dnll, sm, y_idx], [sm.type()]) return Apply(self, [dnll, sm, y_idx], [sm.type()])
def get_context(self, node): def get_params(self, node):
return node.inputs[0].type.context return node.inputs[0].type.context
def c_code_cache_version(self): def c_code_cache_version(self):
...@@ -347,7 +347,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op): ...@@ -347,7 +347,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
dnll, sm, y_idx = inp dnll, sm, y_idx = inp
dx, = out dx, = out
fail = sub['fail'] fail = sub['fail']
ctx = sub['context'] ctx = sub['params']
k_var = "kCrossEntropySoftmax1HotWithBiasDx_" + nodename k_var = "kCrossEntropySoftmax1HotWithBiasDx_" + nodename
err_check = """ err_check = """
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
...@@ -528,7 +528,7 @@ class GpuSoftmax(GpuKernelBase, Op): ...@@ -528,7 +528,7 @@ class GpuSoftmax(GpuKernelBase, Op):
x = as_gpuarray_variable(x, infer_context_name(x)) x = as_gpuarray_variable(x, infer_context_name(x))
return Apply(self, [x], [x.type()]) return Apply(self, [x], [x.type()])
def get_context(self, node): def get_params(self, node):
return node.inputs[0].type.context return node.inputs[0].type.context
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
...@@ -552,7 +552,7 @@ class GpuSoftmax(GpuKernelBase, Op): ...@@ -552,7 +552,7 @@ class GpuSoftmax(GpuKernelBase, Op):
x, = inp x, = inp
z, = out z, = out
fail = sub['fail'] fail = sub['fail']
ctx = sub['context'] ctx = sub['params']
err_check = """ err_check = """
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, fmt_str, msg); PyErr_Format(PyExc_RuntimeError, fmt_str, msg);
...@@ -727,7 +727,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op): ...@@ -727,7 +727,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
b = as_gpuarray_variable(b, ctx_name) b = as_gpuarray_variable(b, ctx_name)
return Apply(self, [x, b], [x.type()]) return Apply(self, [x, b], [x.type()])
def get_context(self, node): def get_params(self, node):
return node.inputs[0].type.context return node.inputs[0].type.context
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
...@@ -753,7 +753,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op): ...@@ -753,7 +753,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
x, b = inp x, b = inp
z, = out z, = out
fail = sub['fail'] fail = sub['fail']
ctx = sub['context'] ctx = sub['params']
err_check = """ err_check = """
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, fmt_str, msg); PyErr_Format(PyExc_RuntimeError, fmt_str, msg);
......
...@@ -202,7 +202,7 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor): ...@@ -202,7 +202,7 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
op.create_iadd_node(ret) op.create_iadd_node(ret)
return ret return ret
def get_context(self, node): def get_params(self, node):
return node.outputs[0].type.context return node.outputs[0].type.context
def create_iadd_node(self, node): def create_iadd_node(self, node):
...@@ -609,7 +609,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1): ...@@ -609,7 +609,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
return gof.Apply(self, [x_, y_, ilist_], [x_.type()]) return gof.Apply(self, [x_, y_, ilist_], [x_.type()])
def get_context(self, node): def get_params(self, node):
return node.outputs[0].type.context return node.outputs[0].type.context
def perform(self, node, inp, out, ctx): def perform(self, node, inp, out, ctx):
...@@ -626,7 +626,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1): ...@@ -626,7 +626,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
return [os.path.dirname(__file__)] return [os.path.dirname(__file__)]
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
ctx = self.get_context(node) ctx = self.get_params(node)
if ctx.kind != 'cuda': if ctx.kind != 'cuda':
raise NotImplementedError("cuda only") raise NotImplementedError("cuda only")
if (self.set_instead_of_inc or if (self.set_instead_of_inc or
......
...@@ -771,7 +771,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base): ...@@ -771,7 +771,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
# GpuArray version # GpuArray version
_f16_ok = True _f16_ok = True
def get_context(self, node): def get_params(self, node):
return node.inputs[0].type.context return node.inputs[0].type.context
@classmethod @classmethod
...@@ -1014,7 +1014,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base): ...@@ -1014,7 +1014,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
""" % locals() """ % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return (7, self.GpuKernelBase_version) return (7,)
def guess_n_streams(size, warn=False): def guess_n_streams(size, warn=False):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论