提交 a3102cd3 authored 作者: Vincent Dumoulin's avatar Vincent Dumoulin

Convert output creation to use pygpu_empty

上级 a034952e
...@@ -6,6 +6,11 @@ from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler ...@@ -6,6 +6,11 @@ from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
from theano.sandbox.cuda.kernel_codegen import (nvcc_kernel, from theano.sandbox.cuda.kernel_codegen import (nvcc_kernel,
inline_softmax, inline_softmax,
inline_softmax_fixed_shared) inline_softmax_fixed_shared)
try:
import pygpu
from pygpu import gpuarray, elemwise
except ImportError:
pass
class GpuCrossentropySoftmaxArgmax1HotWithBias(Op): class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
...@@ -90,10 +95,15 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op): ...@@ -90,10 +95,15 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
} }
} }
CUdeviceptr (*cuda_get_ptr)(gpudata *g);
""" """
def c_init_code(self):
return ['cuda_get_ptr = (CUdeviceptr (*)(gpudata *g))compyte_get_extension("cuda_get_ptr");']
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
dtype = self.dtype dtype = self.dtype
typecode = pygpu.gpuarray.dtype_to_typecode(dtype)
x, b, y_idx = inp x, b, y_idx = inp
nll, sm, am = out nll, sm, am = out
classname = self.__class__.__name__ classname = self.__class__.__name__
...@@ -133,11 +143,12 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op): ...@@ -133,11 +143,12 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
PyGpuArray_DIMS(%(y_idx)s)[0])) PyGpuArray_DIMS(%(y_idx)s)[0]))
{ {
Py_XDECREF(%(nll)s); Py_XDECREF(%(nll)s);
%(nll)s = (CudaNdarray*)CudaNdarray_NewDims(1, %(nll)s = pygpu_empty(1, PyGpuArray_DIMS(%(y_idx)s),
PyGpuArray_DIMS(%(y_idx)s)); %(typecode)s,
if(!%(nll)s) GA_C_ORDER,
{ pygpu_default_context(), Py_None);
%(fail)s; if (!%(nll)s) {
%(fail)s
} }
} }
if ((NULL == %(sm)s) if ((NULL == %(sm)s)
...@@ -147,8 +158,10 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op): ...@@ -147,8 +158,10 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
PyGpuArray_DIMS(%(x)s)[1])) PyGpuArray_DIMS(%(x)s)[1]))
{ {
Py_XDECREF(%(sm)s); Py_XDECREF(%(sm)s);
%(sm)s = (CudaNdarray*) CudaNdarray_NewDims(2, %(sm)s = pygpu_empty(2, PyGpuArray_DIMS(%(x)s),
PyGpuArray_DIMS(%(x)s)); %(typecode)s,
GA_C_ORDER,
pygpu_default_context(), Py_None);
if(!%(sm)s) if(!%(sm)s)
{ {
PyErr_SetString(PyExc_MemoryError, PyErr_SetString(PyExc_MemoryError,
...@@ -162,8 +175,10 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op): ...@@ -162,8 +175,10 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
PyGpuArray_DIMS(%(y_idx)s)[0])) PyGpuArray_DIMS(%(y_idx)s)[0]))
{ {
Py_XDECREF(%(am)s); Py_XDECREF(%(am)s);
%(am)s = (CudaNdarray*) CudaNdarray_NewDims(1, %(am)s = pygpu_empty(1, PyGpuArray_DIMS(%(y_idx)s),
PyGpuArray_DIMS(%(y_idx)s)); %(typecode)s,
GA_C_ORDER,
pygpu_default_context(), Py_None);
if(!%(am)s) if(!%(am)s)
{ {
PyErr_SetString(PyExc_MemoryError, PyErr_SetString(PyExc_MemoryError,
...@@ -180,22 +195,29 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op): ...@@ -180,22 +195,29 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
int n_threads = 1; int n_threads = 1;
int n_shared_bytes = 0; //n_threads * sizeof(%(dtype)s); int n_shared_bytes = 0; //n_threads * sizeof(%(dtype)s);
k_xent_sm_1hot_bias<<<n_blocks, n_threads, n_shared_bytes>>>( k_xent_sm_1hot_bias<<<n_blocks, n_threads, n_shared_bytes>>>(
PyGpuArray_DIMS(%(x)s)[0], PyGpuArray_DIMS(%(x)s)[0],
PyGpuArray_DIMS(%(x)s)[1], PyGpuArray_DIMS(%(x)s)[1],
PyArray_DATA(%(x)s), (dtype_%(x)s*)(((char *)cuda_get_ptr(%(x)s->ga.data)) +
%(x)s->ga.offset);
PyGpuArray_STRIDES(%(x)s)[0], PyGpuArray_STRIDES(%(x)s)[0],
PyGpuArray_STRIDES(%(x)s)[1], PyGpuArray_STRIDES(%(x)s)[1],
PyArray_DATA(%(b)s), (dtype_%(b)s*)(((char *)cuda_get_ptr(%(b)s->ga.data)) +
%(b)s->ga.offset);
PyGpuArray_STRIDES(%(b)s)[0], PyGpuArray_STRIDES(%(b)s)[0],
PyArray_DATA(%(y_idx)s), (dtype_%(y_idx)s*)(((char *)cuda_get_ptr(%(y_idx)s->ga.data)) +
%(y_idx)s->ga.offset);
PyGpuArray_STRIDES(%(y_idx)s)[0], PyGpuArray_STRIDES(%(y_idx)s)[0],
PyArray_DATA(%(nll)s), (dtype_%(nll)s*)(((char *)cuda_get_ptr(%(nll)s->ga.data)) +
%(nll)s->ga.offset);
PyGpuArray_STRIDES(%(nll)s)[0], PyGpuArray_STRIDES(%(nll)s)[0],
PyArray_DATA(%(sm)s), (dtype_%(sm)s*)(((char *)cuda_get_ptr(%(sm)s->ga.data)) +
%(sm)s->ga.offset);
PyGpuArray_STRIDES(%(sm)s)[0], PyGpuArray_STRIDES(%(sm)s)[0],
PyGpuArray_STRIDES(%(sm)s)[1], PyGpuArray_STRIDES(%(sm)s)[1],
PyArray_DATA(%(am)s), (dtype_%(am)s*)(((char *)cuda_get_ptr(%(am)s->ga.data)) +
%(am)s->ga.offset);
PyGpuArray_STRIDES(%(am)s)[0]); PyGpuArray_STRIDES(%(am)s)[0]);
CNDA_THREAD_SYNC; CNDA_THREAD_SYNC;
cudaError_t err = cudaGetLastError(); cudaError_t err = cudaGetLastError();
...@@ -292,14 +314,12 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op): ...@@ -292,14 +314,12 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
PyGpuArray_DIMS(%(sm)s)[1])) PyGpuArray_DIMS(%(sm)s)[1]))
{ {
Py_XDECREF(%(dx)s); Py_XDECREF(%(dx)s);
%(dx)s = (CudaNdarray*)CudaNdarray_New(); %(dx)s = pygpu_empty(2, PyGpuArray_DIMS(%(sm)s),
if ((NULL == %(dx)s) %(typecode)s,
|| CudaNdarray_alloc_contiguous(%(dx)s, 2, GA_C_ORDER,
PyGpuArray_DIMS(%(sm)s))) pygpu_default_context(), Py_None);
{ if (!%(dx)s) {
Py_XDECREF(%(dx)s); %(fail)s
%(dx)s = NULL;
%(fail)s;
} }
} }
{ {
...@@ -312,17 +332,21 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op): ...@@ -312,17 +332,21 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
PyGpuArray_DIMS(%(dx)s)[0], PyGpuArray_DIMS(%(dx)s)[0],
PyGpuArray_DIMS(%(dx)s)[1], PyGpuArray_DIMS(%(dx)s)[1],
PyArray_DATA(%(dnll)s), (dtype_%(dnll)s*)(((char *)cuda_get_ptr(%(dnll)s->ga.data)) +
%(dnll)s->ga.offset);
PyGpuArray_STRIDES(%(dnll)s)[0], PyGpuArray_STRIDES(%(dnll)s)[0],
PyArray_DATA(%(sm)s), (dtype_%(sm)s*)(((char *)cuda_get_ptr(%(sm)s->ga.data)) +
%(sm)s->ga.offset);
PyGpuArray_STRIDES(%(sm)s)[0], PyGpuArray_STRIDES(%(sm)s)[0],
PyGpuArray_STRIDES(%(sm)s)[1], PyGpuArray_STRIDES(%(sm)s)[1],
PyArray_DATA(%(y_idx)s), (dtype_%(y_idx)s*)(((char *)cuda_get_ptr(%(y_idx)s->ga.data)) +
%(y_idx)s->ga.offset);
PyGpuArray_STRIDES(%(y_idx)s)[0], PyGpuArray_STRIDES(%(y_idx)s)[0],
PyArray_DATA(%(dx)s), (dtype_%(dx)s*)(((char *)cuda_get_ptr(%(dx)s->ga.data)) +
%(dx)s->ga.offset);
PyGpuArray_STRIDES(%(dx)s)[0], PyGpuArray_STRIDES(%(dx)s)[0],
PyGpuArray_STRIDES(%(dx)s)[1] PyGpuArray_STRIDES(%(dx)s)[1]
); );
...@@ -375,8 +399,13 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op): ...@@ -375,8 +399,13 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
} }
} }
} }
CUdeviceptr (*cuda_get_ptr)(gpudata *g);
""" % locals() """ % locals()
def c_init_code(self):
return ['cuda_get_ptr = (CUdeviceptr (*)(gpudata *g))compyte_get_extension("cuda_get_ptr");']
gpu_crossentropy_softmax_1hot_with_bias_dx = GpuCrossentropySoftmax1HotWithBiasDx() gpu_crossentropy_softmax_1hot_with_bias_dx = GpuCrossentropySoftmax1HotWithBiasDx()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论