提交 4232a21c authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add a test for CGpuKernelBase.

上级 766ecebc
import numpy
from six.moves import xrange
import theano
from theano import tensor, config, Apply
from theano.gradient import grad_undefined
from .config import mode_with_gpu, test_ctx_name
from ..basic_ops import CGpuKernelBase
from ..type import GpuArrayType, get_context
from pygpu.gpuarray import dtype_to_typecode
class TSTGpuEye(CGpuKernelBase):
"""
Eye for GPU.
"""
__props__ = ('dtype', 'context_name')
_f16_ok = True
def __init__(self, dtype=None, context_name=None):
if dtype is None:
dtype = config.floatX
self.dtype = dtype
self.context_name = context_name
CGpuKernelBase.__init__(self, ['tstgpueye.c'],
'APPLY_SPECIFIC(tstgpueye)')
def get_params(self, node):
return get_context(self.context_name)
def c_headers(self):
return ['<gpuarray/types.h>', '<gpuarray/kernel.h>']
def make_node(self, n, m):
n = tensor.as_tensor_variable(n)
m = tensor.as_tensor_variable(m)
assert n.ndim == 0
assert m.ndim == 0
otype = GpuArrayType(dtype=self.dtype,
broadcastable=(False, False),
context_name=self.context_name)
return Apply(self, [n, m], [otype()])
def infer_shape(self, node, in_shapes):
out_shape = [node.inputs[0], node.inputs[1]]
return [out_shape]
def grad(self, inp, grads):
return [grad_undefined(self, i, inp[i])
for i in xrange(2)]
def get_op_params(self):
return [('TYPECODE', str(dtype_to_typecode(self.dtype)))]
def test_cgpukernelbase():
op = TSTGpuEye(dtype='int32', context_name=test_ctx_name)
f = theano.function([], op(4, 5), mode=mode_with_gpu)
r = f()
assert (numpy.asarray(r) == numpy.eye(4, 5, dtype='int32')).all()
#section kernels
#kernel eye : *, size, size :
KERNEL void eye(GLOBAL_MEM DTYPE_o0 *a, ga_size n, ga_size m) {
ga_size nb = n < m ? n : m;
for (ga_size i = LID_0; i < nb; i += LDIM_0) {
a[i*m + i] = 1;
}
}
#section support_code_struct
int APPLY_SPECIFIC(tstgpueye)(PyArrayObject *n, PyArrayObject *m,
PyGpuArrayObject **z, PyGpuContextObject *ctx) {
size_t dims[2] = {0, 0};
size_t ls, gs;
void *args[3];
int err;
dims[0] = ((DTYPE_INPUT_0 *)PyArray_DATA(n))[0];
dims[1] = ((DTYPE_INPUT_1 *)PyArray_DATA(m))[0];
Py_XDECREF(*z);
*z = pygpu_zeros(2, dims,
TYPECODE,
GA_C_ORDER,
ctx, Py_None);
if (*z == NULL)
return -1;
args[0] = (*z)->ga.data;
args[1] = &dims[0];
args[2] = &dims[1];
ls = 1;
gs = 256;
err = GpuKernel_call(&k_eye, 1, &ls, &gs, 0, args);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"gpuarray error: kEye: %s. n%lu, m=%lu.",
GpuKernel_error(&k_eye, err),
(unsigned long)dims[0], (unsigned long)dims[1]);
return -1;
}
return 0;
}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论