提交 2d77ade5 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Reuse output when possible for GpuDot22.

上级 a73b9210
import os.path
from theano import Op, Apply, config
from theano.compile import optdb
from theano.gof import local_optimizer, LocalOptGroup
from theano.tensor.blas import Dot22, Gemv, Gemm, Ger
from theano.tensor.opt import in2out
from .basic_ops import HideC, as_gpuarray_variable
try:
......@@ -239,12 +245,8 @@ class GpuDot22(BlasOp, Dot22):
dims[0] = PyGpuArray_DIMS(%(A)s)[0];
dims[1] = PyGpuArray_DIMS(%(B)s)[1];
Py_XDECREF(%(out)s);
%(out)s = pygpu_empty(2, dims,
%(typecode)s,
GA_C_ORDER,
pygpu_default_context(), Py_None);
if (!%(out)s) {
if (theano_prep_output(&%(out)s, 2, dims, %(typecode)s, GA_C_ORDER,
pygpu_default_context()))
%(fail)s
}
......@@ -265,17 +267,16 @@ class GpuDot22(BlasOp, Dot22):
def c_code_cache_version(self):
return (2,)
def c_header_dir(self):
ret = super(GpuDot22, self).c_header_dirs()
return ret + [os.path.dirname(__file__)]
def c_headers(self):
ret = super(GpuDot22, self).c_headers()
return ret + ['<numpy_compat.h>']
return ret + ['<numpy_compat.h>', '"gpuarray_helper.h"']
gpu_dot22 = GpuDot22()
from theano.compile import optdb
from theano.gof import local_optimizer, LocalOptGroup
from theano.tensor.opt import in2out
@local_optimizer([gpugemv_no_inplace], inplace=True)
def local_inplace_gpuagemv(node):
if node.op == gpugemv_no_inplace:
......
#ifndef THEANO_GPUARRAY_HELPER
#define THEANO_GPUARRAY_HELPER
#include <string.h>
#include <pygpu_api.h>
static int theano_size_check(PyGpuArray *a, unsigned int nd,
const size_t *dims, int typecode) {
return (a->ga.nd == nd && a->ga.typecode == typecode &&
memcmp(a->dims, dims, nd * sizeof(size_t)) == 0);
}
static int theano_prep_output(PyGpuArrayObject **out, unsigned int nd,
const size_t *dims, int typecode, ga_order ord,
PyGpuContextObject *c) {
if (*out != NULL &&
theano_size_check(*out, nd, dims, typecode)) {
return 1;
}
Py_XDECREF(*out);
*out = pygpu_empty(nd, dims, typecode, ord, c, Py_None);
return (*out == NULL)? 0 : 1;
}
#endif
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论