提交 f86e273b authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fix the Cuda transfer ops and test them. Also since the test will always fail…

Fix the Cuda transfer ops and test them. Also since the test will always fail unless initialized in a very specific way, mark it a potentially failing not to alarm users.
上级 dfa8305f
......@@ -298,7 +298,7 @@ class GpuFromCuda(Op):
for (unsigned int i = 0; i < %(in)s->nd; i++) {
%(name)sdims[i] = (size_t)CudaNdarray_HOST_DIMS(%(in)s)[i];
%(name)sstr[i] = (ssize_t)CudaNdarray_HOST_STRIDES(%(in)s)[i];
%(name)sstr[i] = (ssize_t)CudaNdarray_HOST_STRIDES(%(in)s)[i]*4;
}
Py_XDECREF(%(out)s);
......@@ -311,7 +311,7 @@ class GpuFromCuda(Op):
%(name)sdata = cuda_make_buf(GpuArray_default_context()->ctx,
(CUdeviceptr)%(in)s->devdata,
(size_t)%(in)s->data_allocated);
((size_t)%(in)s->data_allocated)*4);
if (%(name)sdata == NULL) {
Py_DECREF(%(out)s);
free(%(name)sdims);
......@@ -336,7 +336,7 @@ class GpuFromCuda(Op):
'fail': sub['fail']}
def c_code_cache_version(self):
return (0,)
return (1,)
gpu_from_cuda = GpuFromCuda()
......@@ -411,33 +411,45 @@ class CudaFromGpu(Op):
def c_support_code(self):
return """
CUcontext (*cuda_get_ctx)(void *ctx) = compyte_get_extension('cuda_get_ctx');
CUdeviceptr (*cuda_get_ptr)(gpudata *g) = compyte_get_extension('cuda_get_ptr');
size_t (*cuda_get_sz)(gpudata *g) = compyte_get_extension('cuda_get_sz');
CUcontext (*cuda_get_ctx)(void *ctx);
CUdeviceptr (*cuda_get_ptr)(gpudata *g);
"""
def c_code(self, node, name, input, output, sub):
def c_init_code(self):
return ['cuda_get_ctx = (CUcontext (*)(void *ctx))compyte_get_extension("cuda_get_ctx");',
'cuda_get_ptr = (CUdeviceptr (*)(gpudata *g))compyte_get_extension("cuda_get_ptr");']
def c_code(self, node, name, inputs, outputs, sub):
return """
int err = 0, i;
int %(name)serr = 0, %(name)si;
CUcontext %(name)scur;
cuCtxGetCurrent(&%(name)scur);
if (%(name)scur != cuda_get_ctx(GpuArray_default_context()->ctx)) {
PyErr_SetString(PyExc_ValueError, "Ambient cuda context is not the same as output context.");
%(fail)s
}
Py_XDECREF(%(out)s);
%(out)s = (CudaNdarray *)CudaNdarray_new_nd(%(inp)s->nd);
%(out)s = (CudaNdarray *)CudaNdarray_new_nd(%(inp)s->ga.nd);
if (!%(out)s) {
%(fail)s
}
for (i = 0; i < %(inp)s->nd; i++) {
CudaNdarray_set_dim(%(out)s, i, %(inp)s->dimensions[i]);
CudaNdarray_set_stride(%(out)s, i, %(inp)s->strides[i]);
for (%(name)si = 0; %(name)si < %(inp)s->ga.nd; %(name)si++) {
CudaNdarray_set_dim(%(out)s, %(name)si, %(inp)s->ga.dimensions[%(name)si]);
CudaNdarray_set_stride(%(out)s, %(name)si, %(inp)s->ga.strides[%(name)si]/4);
}
err = CudaNdarray_set_device_data(%(out),
(float *)(((char *)cuda_get_ptr(%(inp)s.ga->data))+%(inp).ga.offset),
%(name)serr = CudaNdarray_set_device_data(%(out)s,
(float *)(((char *)cuda_get_ptr(%(inp)s->ga.data))+%(inp)s->ga.offset),
(PyObject *)%(inp)s);
if (err) {
if (%(name)serr) {
%(fail)s
}
""" % {'inp': inputs[0], 'out': output[0], 'fail': sub['fail']}
""" % {'name': name, 'inp': inputs[0], 'out': outputs[0],
'fail': sub['fail']}
def c_code_cache_version(self):
return (0,)
return (1,)
cuda_from_gpu = CudaFromGpu()
......
import unittest
from itertools import izip
from copy import copy, deepcopy
from nose.plugins.skip import SkipTest
import numpy
import theano
import theano.tensor as T
from theano.compile import DeepCopyOp
from theano.tensor.tests.test_basic import safe_make_node
from theano.tests.unittest_tools import SkipTest
from numpy.testing.noseclasses import KnownFailureTest
import theano.sandbox.gpuarray
if theano.sandbox.gpuarray.pygpu is None:
......@@ -33,6 +34,21 @@ else:
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpuarray')
def may_fail(msg, EClass):
"""Mark a test that requires very specific conditions to work to
mask a specific exception class."""
def test_decorator(f):
def wrapper():
try:
f()
except Exception, e:
if isinstance(e, EClass):
raise KnownFailureTest(msg, e)
raise
wrapper.__name__ = f.__name__
return wrapper
return test_decorator
def inplace_func(inputs, outputs, mode=None, allow_input_downcast=False,
on_unused_input='raise', name=None):
if mode is None:
......@@ -231,6 +247,8 @@ def test_transfer_strided():
assert numpy.all(fv == av)
@may_fail("Op fails if both contexts are not the same and it's rare "
"that the tests will be run this way", ValueError)
def test_transfer_cuda_gpu():
import theano.sandbox.cuda as cuda_ndarray
if cuda_ndarray.cuda_available == False:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论