提交 03d0e784 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2856 from abergeron/lstm_fixes

Lstm fixes
import os.path
from theano import Op, Apply, config
from theano.compile import optdb
from theano.gof import local_optimizer, LocalOptGroup
from theano.tensor.blas import Dot22, Gemv, Gemm, Ger
from theano.tensor.opt import in2out
from .basic_ops import HideC, as_gpuarray_variable
try:
......@@ -13,14 +19,35 @@ except ImportError as e:
class BlasOp(HideC):
def c_headers(self):
return ['<blas_api.h>']
return ['<blas_api.h>', '<numpy_compat.h>', '<gpuarray_helper.h>']
def c_header_dirs(self):
return [pygpu.get_include()]
return [pygpu.get_include(), os.path.dirname(__file__)]
def c_init_code(self):
return ['import_pygpu__blas();']
def c_support_code(self):
return """
PyGpuArrayObject *gpublas_try_copy(PyGpuArrayObject *out,
PyGpuArrayObject *y) {
if (out &&
GpuArray_CHKFLAGS(&out->ga, GA_CARRAY) &&
theano_size_check(out, PyGpuArray_NDIM(y),
PyGpuArray_DIMS(y),
y->ga.typecode)) {
if (pygpu_move(out, y)) {
Py_XDECREF(out);
return NULL;
}
} else {
Py_XDECREF(out);
out = pygpu_copy(y, GA_ANY_ORDER);
}
return out;
}
"""
class GpuGemv(BlasOp, Gemv):
def make_node(self, y, alpha, A, x, beta):
......@@ -44,21 +71,20 @@ class GpuGemv(BlasOp, Gemv):
beta=inp[4], fail=sub['fail'], name=name)
if self.inplace:
code = """
Py_XDECREF(%(out)s);
if (%(y)s->ga.strides[0] <= 0) {
%(out)s = pygpu_copy(%(y)s, GA_ANY_ORDER);
%(out)s = gpublas_try_copy(%(out)s, %(y)s);
if (%(out)s == NULL) {
%(fail)s
}
} else {
Py_XDECREF(%(out)s);
%(out)s = %(y)s;
Py_INCREF(%(out)s);
}
""" % vars
else:
code = """
Py_XDECREF(%(out)s);
%(out)s = pygpu_copy(%(y)s, GA_ANY_ORDER);
%(out)s = gpublas_try_copy(%(out)s, %(y)s);
if (%(out)s == NULL) {
%(fail)s
}
......@@ -79,7 +105,7 @@ class GpuGemv(BlasOp, Gemv):
return code
def c_code_cache_version(self):
return (2,)
return (3,)
gpugemv_no_inplace = GpuGemv(inplace=False)
gpugemv_inplace = GpuGemv(inplace=True)
......@@ -107,13 +133,13 @@ class GpuGemm(BlasOp, Gemm):
beta=inp[4], fail=sub['fail'], name=name)
if self.inplace:
code = """
Py_XDECREF(%(out)s);
if (!GpuArray_ISONESEGMENT(&%(C)s->ga)) {
%(out)s = pygpu_copy(%(C)s, GA_ANY_ORDER);
%(out)s = gpublas_try_copy(%(out)s, %(C)s);
if (%(out)s == NULL) {
%(fail)s
}
} else {
Py_XDECREF(%(out)s);
%(out)s = %(C)s;
Py_INCREF(%(out)s);
}
......@@ -121,7 +147,7 @@ class GpuGemm(BlasOp, Gemm):
else:
code = """
Py_XDECREF(%(out)s);
%(out)s = pygpu_copy(%(C)s, GA_ANY_ORDER);
%(out)s = gpublas_try_copy(%(out)s, %(C)s);
if (%(out)s == NULL) {
%(fail)s
}
......@@ -142,7 +168,7 @@ class GpuGemm(BlasOp, Gemm):
return code
def c_code_cache_version(self):
return (2,)
return (3,)
gpugemm_no_inplace = GpuGemm(inplace=False)
......@@ -171,21 +197,20 @@ class GpuGer(BlasOp, Ger):
fail=sub['fail'], name=name)
if self.destructive:
code = """
Py_XDECREF(%(out)s);
if (!GpuArray_ISONESEGMENT(&%(A)s->ga)) {
%(out)s = pygpu_copy(%(A)s, GA_ANY_ORDER);
%(out)s = gpublas_try_copy(%(out)s, %(A)s);
if (%(out)s == NULL) {
%(fail)s
}
} else {
Py_XDECREF(%(out)s);
%(out)s = %(A)s;
Py_INCREF(%(out)s);
}
""" % vars
else:
code = """
Py_XDECREF(%(out)s);
%(out)s = pygpu_copy(%(A)s, GA_ANY_ORDER);
%(out)s = gpublas_try_copy(%(out)s, %(A)s);
if (%(out)s == NULL) {
%(fail)s
}
......@@ -203,7 +228,7 @@ class GpuGer(BlasOp, Ger):
return code
def c_code_cache_version(self):
return (1,)
return (2,)
gpuger_no_inplace = GpuGer(destructive=False)
......@@ -239,11 +264,8 @@ class GpuDot22(BlasOp, Dot22):
dims[0] = PyGpuArray_DIMS(%(A)s)[0];
dims[1] = PyGpuArray_DIMS(%(B)s)[1];
%(out)s = pygpu_empty(2, dims,
%(typecode)s,
GA_C_ORDER,
pygpu_default_context(), Py_None);
if (!%(out)s) {
if (theano_prep_output(&%(out)s, 2, dims, %(typecode)s, GA_C_ORDER,
pygpu_default_context())) {
%(fail)s
}
......@@ -262,19 +284,10 @@ class GpuDot22(BlasOp, Dot22):
return code
def c_code_cache_version(self):
return (1,)
def c_headers(self):
ret = super(GpuDot22, self).c_headers()
return ret + ['<numpy_compat.h>']
return (3,)
gpu_dot22 = GpuDot22()
from theano.compile import optdb
from theano.gof import local_optimizer, LocalOptGroup
from theano.tensor.opt import in2out
@local_optimizer([gpugemv_no_inplace], inplace=True)
def local_inplace_gpuagemv(node):
if node.op == gpugemv_no_inplace:
......
#ifndef THEANO_GPUARRAY_HELPER
#define THEANO_GPUARRAY_HELPER
#include <string.h>
#include <gpuarray_api.h>
#include <numpy_compat.h>
static int theano_size_check(PyGpuArrayObject *a, unsigned int nd,
const size_t *dims, int typecode) {
return (a->ga.nd == nd && a->ga.typecode == typecode &&
memcmp(a->ga.dimensions, dims, nd * sizeof(size_t)) == 0);
}
static int theano_prep_output(PyGpuArrayObject **out, unsigned int nd,
const size_t *dims, int typecode, ga_order ord,
PyGpuContextObject *c) {
if (*out != NULL &&
theano_size_check(*out, nd, dims, typecode)) {
return 0;
}
Py_XDECREF(*out);
*out = pygpu_empty(nd, dims, typecode, ord, c, Py_None);
return (*out == NULL) ? 1 : 0;
}
#endif
......@@ -10,7 +10,7 @@ from ..elemwise import (GpuElemwise, GpuDimShuffle,
GpuCAReduceCuda, GpuCAReduceCPY)
from ..type import GpuArrayType
from pygpu.array import gpuarray
from pygpu import ndgpuarray as gpuarray
# This is acutally a test for GpuElemwise
......
......@@ -40,7 +40,12 @@ class GpuArrayType(Type):
return "GpuArrayType(%s, %s)" % (self.dtype, self.broadcastable)
def filter(self, data, strict=False, allow_downcast=None):
if strict:
if (isinstance(data, gpuarray.GpuArray) and
data.typecode == self.typecode):
# This is just to make this condition not enter the
# following branches
pass
elif strict:
if not isinstance(data, gpuarray.GpuArray):
raise TypeError("%s expected a GpuArray object." % self,
data, type(data))
......@@ -50,13 +55,24 @@ class GpuArrayType(Type):
(self, self.typecode, self.dtype,
data.typecode, str(data.dtype)))
# fallthrough to ndim check
elif allow_downcast:
elif (allow_downcast or
(allow_downcast is None and
type(data) == float and
self.dtype == config.floatX)):
data = gpuarray.array(data, dtype=self.typecode, copy=False,
ndmin=len(self.broadcastable))
else:
if not hasattr(data, 'dtype'):
# This is to convert objects that don't have a dtype
# (like lists). We anticipate that the type below
# will match and we pass copy=False so it won't make a
# second object on the GPU.
data = gpuarray.array(data, copy=False)
up_dtype = scalar.upcast(self.dtype, data.dtype)
if up_dtype == self.dtype:
data = gpuarray.array(data, dtype=self.dtype, copy=False)
data = gpuarray.array(data, dtype=self.dtype,
copy=False)
else:
raise TypeError("%s cannot store a value of dtype %s "
"without risking loss of precision." %
......@@ -150,18 +166,15 @@ class GpuArrayType(Type):
def convert_variable(self, var):
if (type(self) == type(var.type) and
self.typecode == var.type.typecode and
self.ndim == var.type.ndim and
all(sb == ob or ob for sb, ob in zip(self.broadcastable,
var.type.broadcastable))):
self.typecode == var.type.typecode and
self.ndim == var.type.ndim and
all(sb == ob or ob for sb, ob in zip(self.broadcastable,
var.type.broadcastable))):
return theano.tensor.patternbroadcast(var, self.broadcastable)
def __hash__(self):
return (hash(self.typecode) ^ hash(self.broadcastable))
def __str__(self):
return "GpuArray<%s>" % (self.dtype,)
def dtype_specs(self):
"""Return a tuple (python type, c type, numpy typenum) that corresponds
to self.dtype.
......@@ -250,9 +263,9 @@ class GpuArrayType(Type):
def c_headers(self):
# We need arrayobject for the PyArrayDescr struct def
# (even if we just use a pointer to it in a function def)
return ['<gpuarray/array.h>', '<gpuarray/kernel.h>', '<gpuarray/error.h>',
'<gpuarray/buffer_blas.h>', '<numpy/arrayobject.h>',
'<gpuarray_api.h>']
return ['<gpuarray/array.h>', '<gpuarray/kernel.h>',
'<gpuarray/error.h>', '<gpuarray/buffer_blas.h>',
'<numpy/arrayobject.h>', '<gpuarray_api.h>']
def c_header_dirs(self):
return [pygpu.get_include(), numpy.get_include()]
......@@ -284,8 +297,9 @@ GpuArrayType.Variable = GpuArrayVariable
class GpuArraySignature(tensor.TensorConstantSignature):
pass # might do something better if we can run the sum on the
# GPU, but for now this will suffice.
# might do something better if we can run the sum on the GPU, but
# for now this will suffice.
pass
class GpuArrayConstant(_operators, Constant):
......@@ -312,7 +326,9 @@ class GpuArraySharedVariable(_operators, SharedVariable):
return numpy.asarray(self.container.value)
def set_value(self, value, borrow=False):
self.container.value = pygpu.gpuarray.array(value, copy=(not borrow))
if isinstance(value, pygpu.gpuarray.GpuArray):
value = pygpu.gpuarray.array(value, copy=(not borrow))
self.container.value = value
def __getitem__(self, *args):
return _operators.__getitem__(self, *args)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论