提交 9b7d22e7 authored 作者: vdumoulin's avatar vdumoulin

Merge pull request #2 from nouiz/vdumoulin-new_backend

Fix the final stuff.
...@@ -122,6 +122,12 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op): ...@@ -122,6 +122,12 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
itemsize_am = numpy.dtype(node.outputs[2].dtype).itemsize itemsize_am = numpy.dtype(node.outputs[2].dtype).itemsize
x, b, y_idx = inp x, b, y_idx = inp
nll, sm, am = out nll, sm, am = out
dtype_x = node.inputs[0].dtype
dtype_b = node.inputs[1].dtype
dtype_y_idx = node.inputs[2].dtype
dtype_nll = node.outputs[0].dtype
dtype_sm = node.outputs[1].dtype
dtype_am = node.outputs[2].dtype
classname = self.__class__.__name__ classname = self.__class__.__name__
fail = sub['fail'] fail = sub['fail']
sio = StringIO() sio = StringIO()
...@@ -214,24 +220,24 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op): ...@@ -214,24 +220,24 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
k_xent_sm_1hot_bias_%(nodename)s<<<n_blocks, n_threads, n_shared_bytes>>>( k_xent_sm_1hot_bias_%(nodename)s<<<n_blocks, n_threads, n_shared_bytes>>>(
PyGpuArray_DIMS(%(x)s)[0], PyGpuArray_DIMS(%(x)s)[0],
PyGpuArray_DIMS(%(x)s)[1], PyGpuArray_DIMS(%(x)s)[1],
(dtype_%(x)s*)(((char *)cuda_get_ptr(%(x)s->ga.data)) + (npy_%(dtype_x)s*)(((char *)cuda_get_ptr(%(x)s->ga.data)) +
%(x)s->ga.offset), %(x)s->ga.offset),
PyGpuArray_STRIDES(%(x)s)[0] / %(itemsize_x)s, PyGpuArray_STRIDES(%(x)s)[0] / %(itemsize_x)s,
PyGpuArray_STRIDES(%(x)s)[1] / %(itemsize_x)s, PyGpuArray_STRIDES(%(x)s)[1] / %(itemsize_x)s,
(dtype_%(b)s*)(((char *)cuda_get_ptr(%(b)s->ga.data)) + (npy_%(dtype_b)s*)(((char *)cuda_get_ptr(%(b)s->ga.data)) +
%(b)s->ga.offset), %(b)s->ga.offset),
PyGpuArray_STRIDES(%(b)s)[0] / %(itemsize_b)s, PyGpuArray_STRIDES(%(b)s)[0] / %(itemsize_b)s,
(dtype_%(y_idx)s*)(((char *)cuda_get_ptr(%(y_idx)s->ga.data)) + (npy_%(dtype_y_idx)s*)(((char *)cuda_get_ptr(%(y_idx)s->ga.data)) +
%(y_idx)s->ga.offset), %(y_idx)s->ga.offset),
PyGpuArray_STRIDES(%(y_idx)s)[0] / %(itemsize_y_idx)s, PyGpuArray_STRIDES(%(y_idx)s)[0] / %(itemsize_y_idx)s,
(dtype_%(nll)s*)(((char *)cuda_get_ptr(%(nll)s->ga.data)) + (npy_%(dtype_nll)s*)(((char *)cuda_get_ptr(%(nll)s->ga.data)) +
%(nll)s->ga.offset), %(nll)s->ga.offset),
PyGpuArray_STRIDES(%(nll)s)[0] / %(itemsize_nll)s, PyGpuArray_STRIDES(%(nll)s)[0] / %(itemsize_nll)s,
(dtype_%(sm)s*)(((char *)cuda_get_ptr(%(sm)s->ga.data)) + (npy_%(dtype_sm)s*)(((char *)cuda_get_ptr(%(sm)s->ga.data)) +
%(sm)s->ga.offset), %(sm)s->ga.offset),
PyGpuArray_STRIDES(%(sm)s)[0] / %(itemsize_sm)s, PyGpuArray_STRIDES(%(sm)s)[0] / %(itemsize_sm)s,
PyGpuArray_STRIDES(%(sm)s)[1] / %(itemsize_sm)s, PyGpuArray_STRIDES(%(sm)s)[1] / %(itemsize_sm)s,
(dtype_%(am)s*)(((char *)cuda_get_ptr(%(am)s->ga.data)) + (npy_%(dtype_am)s*)(((char *)cuda_get_ptr(%(am)s->ga.data)) +
%(am)s->ga.offset), %(am)s->ga.offset),
PyGpuArray_STRIDES(%(am)s)[0] / %(itemsize_am)s); PyGpuArray_STRIDES(%(am)s)[0] / %(itemsize_am)s);
cudaError_t err = cudaGetLastError(); cudaError_t err = cudaGetLastError();
...@@ -302,6 +308,10 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op): ...@@ -302,6 +308,10 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
itemsize_sm = numpy.dtype(node.inputs[1].dtype).itemsize itemsize_sm = numpy.dtype(node.inputs[1].dtype).itemsize
itemsize_y_idx = numpy.dtype(node.inputs[2].dtype).itemsize itemsize_y_idx = numpy.dtype(node.inputs[2].dtype).itemsize
itemsize_dx = numpy.dtype(node.outputs[0].dtype).itemsize itemsize_dx = numpy.dtype(node.outputs[0].dtype).itemsize
dtype_dnll = node.inputs[0].dtype
dtype_sm = node.inputs[1].dtype
dtype_y_idx = node.inputs[2].dtype
dtype_dx = node.outputs[0].dtype
dnll, sm, y_idx = inp dnll, sm, y_idx = inp
dx, = out dx, = out
fail = sub['fail'] fail = sub['fail']
...@@ -353,20 +363,20 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op): ...@@ -353,20 +363,20 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
PyGpuArray_DIMS(%(dx)s)[0], PyGpuArray_DIMS(%(dx)s)[0],
PyGpuArray_DIMS(%(dx)s)[1], PyGpuArray_DIMS(%(dx)s)[1],
(dtype_%(dnll)s*)(((char *)cuda_get_ptr(%(dnll)s->ga.data)) + (npy_%(dtype_dnll)s*)(((char *)cuda_get_ptr(%(dnll)s->ga.data)) +
%(dnll)s->ga.offset), %(dnll)s->ga.offset),
PyGpuArray_STRIDES(%(dnll)s)[0] / %(itemsize_dnll)s, PyGpuArray_STRIDES(%(dnll)s)[0] / %(itemsize_dnll)s,
(dtype_%(sm)s*)(((char *)cuda_get_ptr(%(sm)s->ga.data)) + (npy_%(dtype_sm)s*)(((char *)cuda_get_ptr(%(sm)s->ga.data)) +
%(sm)s->ga.offset), %(sm)s->ga.offset),
PyGpuArray_STRIDES(%(sm)s)[0] / %(itemsize_sm)s, PyGpuArray_STRIDES(%(sm)s)[0] / %(itemsize_sm)s,
PyGpuArray_STRIDES(%(sm)s)[1] / %(itemsize_sm)s, PyGpuArray_STRIDES(%(sm)s)[1] / %(itemsize_sm)s,
(dtype_%(y_idx)s*)(((char *)cuda_get_ptr(%(y_idx)s->ga.data)) + (npy_%(dtype_y_idx)s*)(((char *)cuda_get_ptr(%(y_idx)s->ga.data)) +
%(y_idx)s->ga.offset), %(y_idx)s->ga.offset),
PyGpuArray_STRIDES(%(y_idx)s)[0] / %(itemsize_y_idx)s, PyGpuArray_STRIDES(%(y_idx)s)[0] / %(itemsize_y_idx)s,
(dtype_%(dx)s*)(((char *)cuda_get_ptr(%(dx)s->ga.data)) + (npy_%(dtype_dx)s*)(((char *)cuda_get_ptr(%(dx)s->ga.data)) +
%(dx)s->ga.offset), %(dx)s->ga.offset),
PyGpuArray_STRIDES(%(dx)s)[0] / %(itemsize_dx)s, PyGpuArray_STRIDES(%(dx)s)[0] / %(itemsize_dx)s,
PyGpuArray_STRIDES(%(dx)s)[1] / %(itemsize_dx)s PyGpuArray_STRIDES(%(dx)s)[1] / %(itemsize_dx)s
......
...@@ -11,16 +11,9 @@ from theano.sandbox import gpuarray ...@@ -11,16 +11,9 @@ from theano.sandbox import gpuarray
if theano.sandbox.gpuarray.pygpu is None: if theano.sandbox.gpuarray.pygpu is None:
raise SkipTest("pygpu not installed") raise SkipTest("pygpu not installed")
import theano.sandbox.cuda as cuda_ndarray # We let that import do the init of the back-end if needed.
if cuda_ndarray.cuda_available and not theano.sandbox.gpuarray.pygpu_activated: from theano.sandbox.gpuarray.tests.test_basic_ops import (mode_with_gpu,
if not cuda_ndarray.use.device_number: mode_without_gpu)
#We should not enable all the use like the flag device=gpu,
#as many tests don't work in that setup.
cuda_ndarray.use('gpu',
default_to_move_computation_to_gpu=False,
move_shared_float32_to_gpu=False,
enable_cuda=False)
gpuarray.init_dev('cuda')
if not gpuarray.pygpu_activated: if not gpuarray.pygpu_activated:
raise SkipTest("pygpu disabled") raise SkipTest("pygpu disabled")
...@@ -29,13 +22,6 @@ from theano.sandbox.gpuarray.nnet import ( ...@@ -29,13 +22,6 @@ from theano.sandbox.gpuarray.nnet import (
GpuCrossentropySoftmaxArgmax1HotWithBias, GpuCrossentropySoftmaxArgmax1HotWithBias,
GpuCrossentropySoftmax1HotWithBiasDx) GpuCrossentropySoftmax1HotWithBiasDx)
if theano.config.mode == 'FAST_COMPILE':
mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpuarray').excluding('gpu')
mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpuarray')
else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpuarray').excluding('gpu')
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpuarray')
def test_GpuCrossentropySoftmaxArgmax1HotWithBias(): def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
""" """
......
...@@ -138,12 +138,8 @@ class GpuArrayType(Type): ...@@ -138,12 +138,8 @@ class GpuArrayType(Type):
return numpy.dtype(self.dtype).itemsize return numpy.dtype(self.dtype).itemsize
def c_declare(self, name, sub): def c_declare(self, name, sub):
dtype = theano.tensor.TensorType(
dtype=self.dtype,
broadcastable=self.broadcastable).dtype_specs()[1]
return """ return """
PyGpuArrayObject *%(name)s; PyGpuArrayObject *%(name)s;
typedef %(dtype)s dtype_%(name)s;
""" % locals() """ % locals()
def c_init(self, name, sub): def c_init(self, name, sub):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论