提交 03d0e784 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2856 from abergeron/lstm_fixes

Lstm fixes
import os.path
from theano import Op, Apply, config from theano import Op, Apply, config
from theano.compile import optdb
from theano.gof import local_optimizer, LocalOptGroup
from theano.tensor.blas import Dot22, Gemv, Gemm, Ger from theano.tensor.blas import Dot22, Gemv, Gemm, Ger
from theano.tensor.opt import in2out
from .basic_ops import HideC, as_gpuarray_variable from .basic_ops import HideC, as_gpuarray_variable
try: try:
...@@ -13,14 +19,35 @@ except ImportError as e: ...@@ -13,14 +19,35 @@ except ImportError as e:
class BlasOp(HideC): class BlasOp(HideC):
def c_headers(self): def c_headers(self):
return ['<blas_api.h>'] return ['<blas_api.h>', '<numpy_compat.h>', '<gpuarray_helper.h>']
def c_header_dirs(self): def c_header_dirs(self):
return [pygpu.get_include()] return [pygpu.get_include(), os.path.dirname(__file__)]
def c_init_code(self): def c_init_code(self):
return ['import_pygpu__blas();'] return ['import_pygpu__blas();']
def c_support_code(self):
return """
PyGpuArrayObject *gpublas_try_copy(PyGpuArrayObject *out,
PyGpuArrayObject *y) {
if (out &&
GpuArray_CHKFLAGS(&out->ga, GA_CARRAY) &&
theano_size_check(out, PyGpuArray_NDIM(y),
PyGpuArray_DIMS(y),
y->ga.typecode)) {
if (pygpu_move(out, y)) {
Py_XDECREF(out);
return NULL;
}
} else {
Py_XDECREF(out);
out = pygpu_copy(y, GA_ANY_ORDER);
}
return out;
}
"""
class GpuGemv(BlasOp, Gemv): class GpuGemv(BlasOp, Gemv):
def make_node(self, y, alpha, A, x, beta): def make_node(self, y, alpha, A, x, beta):
...@@ -44,21 +71,20 @@ class GpuGemv(BlasOp, Gemv): ...@@ -44,21 +71,20 @@ class GpuGemv(BlasOp, Gemv):
beta=inp[4], fail=sub['fail'], name=name) beta=inp[4], fail=sub['fail'], name=name)
if self.inplace: if self.inplace:
code = """ code = """
Py_XDECREF(%(out)s);
if (%(y)s->ga.strides[0] <= 0) { if (%(y)s->ga.strides[0] <= 0) {
%(out)s = pygpu_copy(%(y)s, GA_ANY_ORDER); %(out)s = gpublas_try_copy(%(out)s, %(y)s);
if (%(out)s == NULL) { if (%(out)s == NULL) {
%(fail)s %(fail)s
} }
} else { } else {
Py_XDECREF(%(out)s);
%(out)s = %(y)s; %(out)s = %(y)s;
Py_INCREF(%(out)s); Py_INCREF(%(out)s);
} }
""" % vars """ % vars
else: else:
code = """ code = """
Py_XDECREF(%(out)s); %(out)s = gpublas_try_copy(%(out)s, %(y)s);
%(out)s = pygpu_copy(%(y)s, GA_ANY_ORDER);
if (%(out)s == NULL) { if (%(out)s == NULL) {
%(fail)s %(fail)s
} }
...@@ -79,7 +105,7 @@ class GpuGemv(BlasOp, Gemv): ...@@ -79,7 +105,7 @@ class GpuGemv(BlasOp, Gemv):
return code return code
def c_code_cache_version(self): def c_code_cache_version(self):
return (2,) return (3,)
gpugemv_no_inplace = GpuGemv(inplace=False) gpugemv_no_inplace = GpuGemv(inplace=False)
gpugemv_inplace = GpuGemv(inplace=True) gpugemv_inplace = GpuGemv(inplace=True)
...@@ -107,13 +133,13 @@ class GpuGemm(BlasOp, Gemm): ...@@ -107,13 +133,13 @@ class GpuGemm(BlasOp, Gemm):
beta=inp[4], fail=sub['fail'], name=name) beta=inp[4], fail=sub['fail'], name=name)
if self.inplace: if self.inplace:
code = """ code = """
Py_XDECREF(%(out)s);
if (!GpuArray_ISONESEGMENT(&%(C)s->ga)) { if (!GpuArray_ISONESEGMENT(&%(C)s->ga)) {
%(out)s = pygpu_copy(%(C)s, GA_ANY_ORDER); %(out)s = gpublas_try_copy(%(out)s, %(C)s);
if (%(out)s == NULL) { if (%(out)s == NULL) {
%(fail)s %(fail)s
} }
} else { } else {
Py_XDECREF(%(out)s);
%(out)s = %(C)s; %(out)s = %(C)s;
Py_INCREF(%(out)s); Py_INCREF(%(out)s);
} }
...@@ -121,7 +147,7 @@ class GpuGemm(BlasOp, Gemm): ...@@ -121,7 +147,7 @@ class GpuGemm(BlasOp, Gemm):
else: else:
code = """ code = """
Py_XDECREF(%(out)s); Py_XDECREF(%(out)s);
%(out)s = pygpu_copy(%(C)s, GA_ANY_ORDER); %(out)s = gpublas_try_copy(%(out)s, %(C)s);
if (%(out)s == NULL) { if (%(out)s == NULL) {
%(fail)s %(fail)s
} }
...@@ -142,7 +168,7 @@ class GpuGemm(BlasOp, Gemm): ...@@ -142,7 +168,7 @@ class GpuGemm(BlasOp, Gemm):
return code return code
def c_code_cache_version(self): def c_code_cache_version(self):
return (2,) return (3,)
gpugemm_no_inplace = GpuGemm(inplace=False) gpugemm_no_inplace = GpuGemm(inplace=False)
...@@ -171,21 +197,20 @@ class GpuGer(BlasOp, Ger): ...@@ -171,21 +197,20 @@ class GpuGer(BlasOp, Ger):
fail=sub['fail'], name=name) fail=sub['fail'], name=name)
if self.destructive: if self.destructive:
code = """ code = """
Py_XDECREF(%(out)s);
if (!GpuArray_ISONESEGMENT(&%(A)s->ga)) { if (!GpuArray_ISONESEGMENT(&%(A)s->ga)) {
%(out)s = pygpu_copy(%(A)s, GA_ANY_ORDER); %(out)s = gpublas_try_copy(%(out)s, %(A)s);
if (%(out)s == NULL) { if (%(out)s == NULL) {
%(fail)s %(fail)s
} }
} else { } else {
Py_XDECREF(%(out)s);
%(out)s = %(A)s; %(out)s = %(A)s;
Py_INCREF(%(out)s); Py_INCREF(%(out)s);
} }
""" % vars """ % vars
else: else:
code = """ code = """
Py_XDECREF(%(out)s); %(out)s = gpublas_try_copy(%(out)s, %(A)s);
%(out)s = pygpu_copy(%(A)s, GA_ANY_ORDER);
if (%(out)s == NULL) { if (%(out)s == NULL) {
%(fail)s %(fail)s
} }
...@@ -203,7 +228,7 @@ class GpuGer(BlasOp, Ger): ...@@ -203,7 +228,7 @@ class GpuGer(BlasOp, Ger):
return code return code
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (2,)
gpuger_no_inplace = GpuGer(destructive=False) gpuger_no_inplace = GpuGer(destructive=False)
...@@ -239,11 +264,8 @@ class GpuDot22(BlasOp, Dot22): ...@@ -239,11 +264,8 @@ class GpuDot22(BlasOp, Dot22):
dims[0] = PyGpuArray_DIMS(%(A)s)[0]; dims[0] = PyGpuArray_DIMS(%(A)s)[0];
dims[1] = PyGpuArray_DIMS(%(B)s)[1]; dims[1] = PyGpuArray_DIMS(%(B)s)[1];
%(out)s = pygpu_empty(2, dims, if (theano_prep_output(&%(out)s, 2, dims, %(typecode)s, GA_C_ORDER,
%(typecode)s, pygpu_default_context())) {
GA_C_ORDER,
pygpu_default_context(), Py_None);
if (!%(out)s) {
%(fail)s %(fail)s
} }
...@@ -262,19 +284,10 @@ class GpuDot22(BlasOp, Dot22): ...@@ -262,19 +284,10 @@ class GpuDot22(BlasOp, Dot22):
return code return code
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (3,)
def c_headers(self):
ret = super(GpuDot22, self).c_headers()
return ret + ['<numpy_compat.h>']
gpu_dot22 = GpuDot22() gpu_dot22 = GpuDot22()
from theano.compile import optdb
from theano.gof import local_optimizer, LocalOptGroup
from theano.tensor.opt import in2out
@local_optimizer([gpugemv_no_inplace], inplace=True) @local_optimizer([gpugemv_no_inplace], inplace=True)
def local_inplace_gpuagemv(node): def local_inplace_gpuagemv(node):
if node.op == gpugemv_no_inplace: if node.op == gpugemv_no_inplace:
......
#ifndef THEANO_GPUARRAY_HELPER
#define THEANO_GPUARRAY_HELPER
#include <string.h>
#include <gpuarray_api.h>
#include <numpy_compat.h>
static int theano_size_check(PyGpuArrayObject *a, unsigned int nd,
const size_t *dims, int typecode) {
return (a->ga.nd == nd && a->ga.typecode == typecode &&
memcmp(a->ga.dimensions, dims, nd * sizeof(size_t)) == 0);
}
static int theano_prep_output(PyGpuArrayObject **out, unsigned int nd,
const size_t *dims, int typecode, ga_order ord,
PyGpuContextObject *c) {
if (*out != NULL &&
theano_size_check(*out, nd, dims, typecode)) {
return 0;
}
Py_XDECREF(*out);
*out = pygpu_empty(nd, dims, typecode, ord, c, Py_None);
return (*out == NULL) ? 1 : 0;
}
#endif
...@@ -10,7 +10,7 @@ from ..elemwise import (GpuElemwise, GpuDimShuffle, ...@@ -10,7 +10,7 @@ from ..elemwise import (GpuElemwise, GpuDimShuffle,
GpuCAReduceCuda, GpuCAReduceCPY) GpuCAReduceCuda, GpuCAReduceCPY)
from ..type import GpuArrayType from ..type import GpuArrayType
from pygpu.array import gpuarray from pygpu import ndgpuarray as gpuarray
# This is acutally a test for GpuElemwise # This is acutally a test for GpuElemwise
......
...@@ -40,7 +40,12 @@ class GpuArrayType(Type): ...@@ -40,7 +40,12 @@ class GpuArrayType(Type):
return "GpuArrayType(%s, %s)" % (self.dtype, self.broadcastable) return "GpuArrayType(%s, %s)" % (self.dtype, self.broadcastable)
def filter(self, data, strict=False, allow_downcast=None): def filter(self, data, strict=False, allow_downcast=None):
if strict: if (isinstance(data, gpuarray.GpuArray) and
data.typecode == self.typecode):
# This is just to make this condition not enter the
# following branches
pass
elif strict:
if not isinstance(data, gpuarray.GpuArray): if not isinstance(data, gpuarray.GpuArray):
raise TypeError("%s expected a GpuArray object." % self, raise TypeError("%s expected a GpuArray object." % self,
data, type(data)) data, type(data))
...@@ -50,13 +55,24 @@ class GpuArrayType(Type): ...@@ -50,13 +55,24 @@ class GpuArrayType(Type):
(self, self.typecode, self.dtype, (self, self.typecode, self.dtype,
data.typecode, str(data.dtype))) data.typecode, str(data.dtype)))
# fallthrough to ndim check # fallthrough to ndim check
elif allow_downcast: elif (allow_downcast or
(allow_downcast is None and
type(data) == float and
self.dtype == config.floatX)):
data = gpuarray.array(data, dtype=self.typecode, copy=False, data = gpuarray.array(data, dtype=self.typecode, copy=False,
ndmin=len(self.broadcastable)) ndmin=len(self.broadcastable))
else: else:
if not hasattr(data, 'dtype'):
# This is to convert objects that don't have a dtype
# (like lists). We anticipate that the type below
# will match and we pass copy=False so it won't make a
# second object on the GPU.
data = gpuarray.array(data, copy=False)
up_dtype = scalar.upcast(self.dtype, data.dtype) up_dtype = scalar.upcast(self.dtype, data.dtype)
if up_dtype == self.dtype: if up_dtype == self.dtype:
data = gpuarray.array(data, dtype=self.dtype, copy=False) data = gpuarray.array(data, dtype=self.dtype,
copy=False)
else: else:
raise TypeError("%s cannot store a value of dtype %s " raise TypeError("%s cannot store a value of dtype %s "
"without risking loss of precision." % "without risking loss of precision." %
...@@ -150,18 +166,15 @@ class GpuArrayType(Type): ...@@ -150,18 +166,15 @@ class GpuArrayType(Type):
def convert_variable(self, var): def convert_variable(self, var):
if (type(self) == type(var.type) and if (type(self) == type(var.type) and
self.typecode == var.type.typecode and self.typecode == var.type.typecode and
self.ndim == var.type.ndim and self.ndim == var.type.ndim and
all(sb == ob or ob for sb, ob in zip(self.broadcastable, all(sb == ob or ob for sb, ob in zip(self.broadcastable,
var.type.broadcastable))): var.type.broadcastable))):
return theano.tensor.patternbroadcast(var, self.broadcastable) return theano.tensor.patternbroadcast(var, self.broadcastable)
def __hash__(self): def __hash__(self):
return (hash(self.typecode) ^ hash(self.broadcastable)) return (hash(self.typecode) ^ hash(self.broadcastable))
def __str__(self):
return "GpuArray<%s>" % (self.dtype,)
def dtype_specs(self): def dtype_specs(self):
"""Return a tuple (python type, c type, numpy typenum) that corresponds """Return a tuple (python type, c type, numpy typenum) that corresponds
to self.dtype. to self.dtype.
...@@ -250,9 +263,9 @@ class GpuArrayType(Type): ...@@ -250,9 +263,9 @@ class GpuArrayType(Type):
def c_headers(self): def c_headers(self):
# We need arrayobject for the PyArrayDescr struct def # We need arrayobject for the PyArrayDescr struct def
# (even if we just use a pointer to it in a function def) # (even if we just use a pointer to it in a function def)
return ['<gpuarray/array.h>', '<gpuarray/kernel.h>', '<gpuarray/error.h>', return ['<gpuarray/array.h>', '<gpuarray/kernel.h>',
'<gpuarray/buffer_blas.h>', '<numpy/arrayobject.h>', '<gpuarray/error.h>', '<gpuarray/buffer_blas.h>',
'<gpuarray_api.h>'] '<numpy/arrayobject.h>', '<gpuarray_api.h>']
def c_header_dirs(self): def c_header_dirs(self):
return [pygpu.get_include(), numpy.get_include()] return [pygpu.get_include(), numpy.get_include()]
...@@ -284,8 +297,9 @@ GpuArrayType.Variable = GpuArrayVariable ...@@ -284,8 +297,9 @@ GpuArrayType.Variable = GpuArrayVariable
class GpuArraySignature(tensor.TensorConstantSignature): class GpuArraySignature(tensor.TensorConstantSignature):
pass # might do something better if we can run the sum on the # might do something better if we can run the sum on the GPU, but
# GPU, but for now this will suffice. # for now this will suffice.
pass
class GpuArrayConstant(_operators, Constant): class GpuArrayConstant(_operators, Constant):
...@@ -312,7 +326,9 @@ class GpuArraySharedVariable(_operators, SharedVariable): ...@@ -312,7 +326,9 @@ class GpuArraySharedVariable(_operators, SharedVariable):
return numpy.asarray(self.container.value) return numpy.asarray(self.container.value)
def set_value(self, value, borrow=False): def set_value(self, value, borrow=False):
self.container.value = pygpu.gpuarray.array(value, copy=(not borrow)) if isinstance(value, pygpu.gpuarray.GpuArray):
value = pygpu.gpuarray.array(value, copy=(not borrow))
self.container.value = value
def __getitem__(self, *args): def __getitem__(self, *args):
return _operators.__getitem__(self, *args) return _operators.__getitem__(self, *args)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论