Add gpuarray extension to obtain CUDA stream from context

上级 a5bf423a
...@@ -83,7 +83,7 @@ class GpuConnectionistTemporalClassification(gof.COp): ...@@ -83,7 +83,7 @@ class GpuConnectionistTemporalClassification(gof.COp):
def c_headers(self): def c_headers(self):
return ['ctc.h', 'numpy_compat.h', 'gpuarray_helper.h', 'gpuarray/types.h', return ['ctc.h', 'numpy_compat.h', 'gpuarray_helper.h', 'gpuarray/types.h',
'gpuarray_api.h', 'gpuarray/array.h', 'gpuarray/util.h'] 'gpuarray_api.h', 'gpuarray/array.h', 'gpuarray/util.h', 'gpuarray/extension.h']
def get_params(self, node): def get_params(self, node):
return node.inputs[0].type.context return node.inputs[0].type.context
......
...@@ -8,11 +8,14 @@ typedef struct ctc_context { ...@@ -8,11 +8,14 @@ typedef struct ctc_context {
int * label_lengths; int * label_lengths;
} ctc_context_t; } ctc_context_t;
void ctc_context_init(ctc_context_t * context) void ctc_context_init(ctc_context_t * context, PyGpuContextObject * gpu_context)
{ {
memset(&(context->options), 0, sizeof(struct ctcOptions)); memset(&(context->options), 0, sizeof(struct ctcOptions));
context->options.loc = CTC_GPU; context->options.loc = CTC_GPU;
context->options.stream = 0;
// Get CUDA function pointer to obtain stream
CUstream (*getstream_func_ptr)(void *) = (CUstream (*)(void *)) gpuarray_get_extension( "cuda_get_stream" );
context->options.stream = getstream_func_ptr(gpu_context->ctx);
context->workspace = NULL; context->workspace = NULL;
context->input_lengths = NULL; context->input_lengths = NULL;
...@@ -108,11 +111,11 @@ int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject * in_activations, ...@@ -108,11 +111,11 @@ int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject * in_activations,
PyArrayObject * in_input_lengths, PyArrayObject * in_input_lengths,
PyGpuArrayObject ** out_costs, PyGpuArrayObject ** out_costs,
PyGpuArrayObject ** out_gradients, PyGpuArrayObject ** out_gradients,
PyGpuContextObject * ctx) PyGpuContextObject * gpu_context)
{ {
ctc_context_t ctc_object; ctc_context_t ctc_object;
ctc_context_t * context = &ctc_object; ctc_context_t * context = &ctc_object;
ctc_context_init( context ); ctc_context_init( context, gpu_context );
float * activations = NULL; float * activations = NULL;
switch (in_activations->ga.typecode) switch (in_activations->ga.typecode)
...@@ -161,7 +164,7 @@ int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject * in_activations, ...@@ -161,7 +164,7 @@ int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject * in_activations,
Py_XDECREF( *out_costs ); Py_XDECREF( *out_costs );
*out_costs = pygpu_empty( 1, &cost_size, GA_FLOAT, GA_C_ORDER, *out_costs = pygpu_empty( 1, &cost_size, GA_FLOAT, GA_C_ORDER,
ctx, Py_None ); gpu_context, Py_None );
if ( NULL == *out_costs ) if ( NULL == *out_costs )
{ {
...@@ -199,7 +202,7 @@ int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject * in_activations, ...@@ -199,7 +202,7 @@ int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject * in_activations,
const size_t * activation_dims = PyGpuArray_DIMS( in_activations ); const size_t * activation_dims = PyGpuArray_DIMS( in_activations );
*out_gradients = pygpu_zeros( 3, activation_dims, GA_FLOAT, GA_C_ORDER, *out_gradients = pygpu_zeros( 3, activation_dims, GA_FLOAT, GA_C_ORDER,
ctx, Py_None ); gpu_context, Py_None );
if ( NULL == *out_gradients ) if ( NULL == *out_gradients )
{ {
...@@ -240,7 +243,7 @@ int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject * in_activations, ...@@ -240,7 +243,7 @@ int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject * in_activations,
} }
context->workspace = pygpu_empty(1, &gpu_workspace_size, GA_BYTE, context->workspace = pygpu_empty(1, &gpu_workspace_size, GA_BYTE,
GA_C_ORDER, ctx, Py_None ); GA_C_ORDER, gpu_context, Py_None );
if ( NULL == context->workspace ) if ( NULL == context->workspace )
{ {
...@@ -271,12 +274,12 @@ int APPLY_SPECIFIC(ctc_cost_gpu_no_grad)(PyGpuArrayObject * in_activations, ...@@ -271,12 +274,12 @@ int APPLY_SPECIFIC(ctc_cost_gpu_no_grad)(PyGpuArrayObject * in_activations,
PyArrayObject * in_labels, PyArrayObject * in_labels,
PyArrayObject * in_input_lengths, PyArrayObject * in_input_lengths,
PyGpuArrayObject ** out_costs, PyGpuArrayObject ** out_costs,
PyGpuContextObject * ctx) PyGpuContextObject * gpu_context)
{ {
return APPLY_SPECIFIC(ctc_cost_gpu)(in_activations, return APPLY_SPECIFIC(ctc_cost_gpu)(in_activations,
in_labels, in_labels,
in_input_lengths, in_input_lengths,
out_costs, out_costs,
NULL, NULL,
ctx); gpu_context);
} }
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论