提交 f5ceb43d authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Some fixes from tests.

上级 175d3b15
......@@ -243,10 +243,13 @@ class GpuKernelBase(object):
cleanups = '\n'.join(self._generate_kernel_cleanup(k) for k in kernels)
return cleanups
def _GpuKernelBase_version(self):
return (3,)
# This is a shorthand for if your op only has a fixed version
# You can reimplement it, but make sure to call kernel_version()
def c_code_cache_version_apply(self, node):
return (self.c_code_cache_version(), self.kernel_version(node))
GpuKernelBase_version = property(_GpuKernelBase_version)
def kernel_version(self, node):
return (3, node.get_context().bin_id)
class HostFromGpu(Op):
......@@ -1044,4 +1047,4 @@ KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) {
return s
def c_code_cache_version(self):
return (5, self.GpuKernelBase_version)
return (5,)
......@@ -1408,14 +1408,16 @@ def local_softmax_dnn(node):
@local_optimizer([GpuElemwise])
def local_log_softmax_dnn(node):
# This looks for GpuDnnSoftmax so we know that we have cudnn.
if version() < 3000:
# No log-softmax before cudnn v3
return
if (isinstance(node.op, GpuElemwise) and
isinstance(node.op.scalar_op, Log) and
node.inputs[0].owner and
isinstance(node.inputs[0].owner.op, GpuDnnSoftmax) and
len(node.inputs[0].clients) == 1):
# Don't move this call to version outside the condition, it
# needs to be here.
if version() < 3000:
# No log-softmax before cudnn v3
return
softmax_node = node.inputs[0].owner
new_softmax = GpuDnnSoftmax('log', softmax_node.op.mode)
return [new_softmax(softmax_node.inputs[0])]
......
......@@ -776,7 +776,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
Py_XDECREF(%(z)s);
%(z)s = pygpu_empty(%(nd_out)s, new_dims,
%(out_typecode)s, GA_C_ORDER,
pygpu_default_context(), Py_None);
%(ctx)s, Py_None);
if (NULL == %(z)s)
{
PyErr_Format(PyExc_RuntimeError, "Failed to allocate output");
......@@ -1896,7 +1896,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
""" % locals(), file=sio)
def c_code_cache_version_apply(self, node):
version = [17] # the version corresponding to the c code in this Op
version = [18] # the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend...
scalar_node = Apply(
......@@ -1906,6 +1906,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
version.extend(self.scalar_op.c_code_cache_version_apply(scalar_node))
for i in node.inputs + node.outputs:
version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version())
version.extend(self.kernel_version(node))
if all(version):
return tuple(version)
else:
......
......@@ -14,7 +14,8 @@ PyGpuArrayObject *rand_buf;
int gemm16(PyGpuArrayObject *C, float alpha,
PyGpuArrayObject *A, PyGpuArrayObject *B,
float beta, PyGpuArrayObject **out) {
float beta, PyGpuArrayObject **out,
PyGpuContextObject *c) {
PyGpuArrayObject *_A = NULL;
PyGpuArrayObject *_B = NULL;
GpuKernel *gk;
......
......@@ -145,7 +145,7 @@ if (GpuKernel_init(&k_%(name)s, c->ops, c->ctx, 1, &bcode, &sz,
@opt.register_opt()
@opt.op_lifter([tensor.Dot])
def local_dot_to_gemm16(node):
def local_dot_to_gemm16(node, ctx_name):
if nerv is None:
return
A = node.inputs[0]
......@@ -153,7 +153,6 @@ def local_dot_to_gemm16(node):
if (A.ndim == 2 and B.ndim == 2 and
A.dtype == 'float16' and B.dtype == 'float16'):
fgraph = node.inputs[0].fgraph
ctx_name = infer_context_name(A, B)
C = GpuAllocEmpty(dtype='float16', context_name=ctx_name)(
shape_i(A, 0, fgraph), shape_i(B, 1, fgraph))
return Gemm16()(C, 1.0, A, B, 0.0)
......
......@@ -969,7 +969,7 @@ def local_scan_to_gpua(node, context_name):
context_name=context_name)
nw_op = scan_op.Scan(scan_ins, scan_outs, info,
typebuild=typebuild).make_node(*nw_ins)
typeConstructor=typebuild).make_node(*nw_ins)
return nw_op.outputs
......
......@@ -320,7 +320,8 @@ def inplace_allocempty(op, idx):
if (alloc.owner and
isinstance(alloc.owner.op, GpuAllocEmpty) and
len(alloc.clients) > 1):
alloc_op = GpuAllocEmpty(alloc.owner.op.dtype)
alloc_op = GpuAllocEmpty(alloc.owner.op.dtype,
alloc.owner.op.context_name)
inputs[idx] = alloc_op(*alloc.owner.inputs)
return maker(node, inputs)
return opt
......
......@@ -57,7 +57,8 @@ def rand_gpuarray(*shape, **kwargs):
cls = kwargs.pop('cls', None)
if len(kwargs) != 0:
raise TypeError('Unexpected argument %s', list(kwargs.keys())[0])
return gpuarray.array(r, dtype=dtype, cls=cls)
return gpuarray.array(r, dtype=dtype, cls=cls,
context=get_context(test_ctx_name))
def makeTester(name, op, gpu_op, cases, checks=None, mode_gpu=mode_with_gpu,
......
......@@ -14,8 +14,8 @@ from theano import tensor
from theano.tests.unittest_tools import seed_rng
# We let that import do the init of the back-end if needed.
from .config import mode_with_gpu
from ..type import GpuArrayType
from .config import mode_with_gpu, test_ctx_name
from ..type import GpuArrayType, get_context
from ..conv import GpuConv
from theano.sandbox.gpuarray import dnn
......@@ -28,7 +28,7 @@ try:
except ImportError:
pass
gftensor4 = GpuArrayType('float32', [False] * 4)
gftensor4 = GpuArrayType('float32', [False] * 4, context_name=test_ctx_name)
def py_conv_valid_numpy(img, kern):
......@@ -135,8 +135,8 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1),
numpy.prod(ishape)).reshape(ishape), dtype='float32') + 1
npy_kern = -(theano._asarray(numpy.arange(
numpy.prod(kshape)).reshape(kshape), dtype='float32') + 1)
img = pygpu.array(npy_img)
kern = pygpu.array(npy_kern)
img = pygpu.array(npy_img, context=get_context(test_ctx_name))
kern = pygpu.array(npy_kern, context=get_context(test_ctx_name))
# we take the stride after the transfert as we make c_contiguous
# data on the GPU.
......
......@@ -116,7 +116,7 @@ class test_GpuCAReduceCPY(test_elemwise.test_CAReduce):
def test_infer_shape(self):
for dtype in self.dtypes:
super(test_GpuCAReduceCPY, self).test_infer_shape(self, dtype)
super(test_GpuCAReduceCPY, self).test_infer_shape(dtype)
class test_GpuCAReduceCuda(test_GpuCAReduceCPY):
......@@ -195,7 +195,7 @@ class test_GpuCAReduceCuda(test_GpuCAReduceCPY):
op = GpuCAReduceCuda
reds = [scalar.add, scalar.mul,
scalar.maximum, scalar.minimum]
pre_scalar_op = scalar.sqr
pre_scalar_op = None
def test_perform(self):
return
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论