提交 7e3db169 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fix a bunch of errors in the logic for GpuDot22 output reuse.

上级 11251d36
...@@ -29,14 +29,15 @@ class BlasOp(HideC): ...@@ -29,14 +29,15 @@ class BlasOp(HideC):
def c_support_code(self): def c_support_code(self):
return """ return """
PyGpuArray *gpublas_try_copy(PyGpuArray *out, PyGpuArray *y) PyGpuArrayObject *gpublas_try_copy(PyGpuArrayObject *out,
PyGpuArrayObject *y) {
if (out && if (out &&
GpuArray_CHKFLAGS(&out->ga, GA_CARRAY) && GpuArray_CHKFLAGS(&out->ga, GA_CARRAY) &&
theano_size_check(out, PyGpuArray_NDIM(y), theano_size_check(out, PyGpuArray_NDIM(y),
PyGpuArray_DIMS(y), PyGpuArray_DIMS(y),
y->ga.typecode)) { y->ga.typecode)) {
if (pygpu_move(out, y)) { if (pygpu_move(out, y)) {
Py_XDECREF(%(out)s) Py_XDECREF(out);
return NULL; return NULL;
} }
} else { } else {
...@@ -264,7 +265,7 @@ class GpuDot22(BlasOp, Dot22): ...@@ -264,7 +265,7 @@ class GpuDot22(BlasOp, Dot22):
dims[1] = PyGpuArray_DIMS(%(B)s)[1]; dims[1] = PyGpuArray_DIMS(%(B)s)[1];
if (theano_prep_output(&%(out)s, 2, dims, %(typecode)s, GA_C_ORDER, if (theano_prep_output(&%(out)s, 2, dims, %(typecode)s, GA_C_ORDER,
pygpu_default_context())) pygpu_default_context())) {
%(fail)s %(fail)s
} }
...@@ -283,7 +284,7 @@ class GpuDot22(BlasOp, Dot22): ...@@ -283,7 +284,7 @@ class GpuDot22(BlasOp, Dot22):
return code return code
def c_code_cache_version(self): def c_code_cache_version(self):
return (2,) return (3,)
def c_header_dirs(self): def c_header_dirs(self):
ret = super(GpuDot22, self).c_header_dirs() ret = super(GpuDot22, self).c_header_dirs()
......
...@@ -2,12 +2,13 @@ ...@@ -2,12 +2,13 @@
#define THEANO_GPUARRAY_HELPER #define THEANO_GPUARRAY_HELPER
#include <string.h> #include <string.h>
#include <pygpu_api.h> #include <gpuarray_api.h>
#include <numpy_compat.h>
static int theano_size_check(PyGpuArray *a, unsigned int nd, static int theano_size_check(PyGpuArrayObject *a, unsigned int nd,
const size_t *dims, int typecode) { const size_t *dims, int typecode) {
return (a->ga.nd == nd && a->ga.typecode == typecode && return (a->ga.nd == nd && a->ga.typecode == typecode &&
memcmp(a->dims, dims, nd * sizeof(size_t)) == 0); memcmp(a->ga.dimensions, dims, nd * sizeof(size_t)) == 0);
} }
static int theano_prep_output(PyGpuArrayObject **out, unsigned int nd, static int theano_prep_output(PyGpuArrayObject **out, unsigned int nd,
...@@ -15,12 +16,12 @@ static int theano_prep_output(PyGpuArrayObject **out, unsigned int nd, ...@@ -15,12 +16,12 @@ static int theano_prep_output(PyGpuArrayObject **out, unsigned int nd,
PyGpuContextObject *c) { PyGpuContextObject *c) {
if (*out != NULL && if (*out != NULL &&
theano_size_check(*out, nd, dims, typecode)) { theano_size_check(*out, nd, dims, typecode)) {
return 1; return 0;
} }
Py_XDECREF(*out); Py_XDECREF(*out);
*out = pygpu_empty(nd, dims, typecode, ord, c, Py_None); *out = pygpu_empty(nd, dims, typecode, ord, c, Py_None);
return (*out == NULL)? 0 : 1; return (*out == NULL) ? 1 : 0;
} }
#endif #endif
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论