提交 3007bf79 authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #4915 from abergeron/dnn_rnn2

Cudnn RNN bindings.
...@@ -14,11 +14,9 @@ import theano.tensor as T ...@@ -14,11 +14,9 @@ import theano.tensor as T
def test_NanGuardMode(): def test_NanGuardMode():
""" # Tests if NanGuardMode is working by feeding in numpy.inf and numpy.nans
Tests if NanGuardMode is working by feeding in numpy.inf and numpy.nans # intentionally. A working implementation should be able to capture all
intentionally. A working implementation should be able to capture all # the abnormalties.
the abnormalties.
"""
x = T.matrix() x = T.matrix()
w = theano.shared(numpy.random.randn(5, 7).astype(theano.config.floatX)) w = theano.shared(numpy.random.randn(5, 7).astype(theano.config.floatX))
y = T.dot(x, w) y = T.dot(x, w)
......
...@@ -10,7 +10,7 @@ import sys ...@@ -10,7 +10,7 @@ import sys
import warnings import warnings
from functools import wraps from functools import wraps
from six import StringIO, PY3 from six import StringIO, PY3, iteritems
import theano import theano
from theano.compat import configparser as ConfigParser from theano.compat import configparser as ConfigParser
...@@ -91,37 +91,44 @@ theano_raw_cfg = ConfigParser.RawConfigParser() ...@@ -91,37 +91,44 @@ theano_raw_cfg = ConfigParser.RawConfigParser()
theano_raw_cfg.read(config_files) theano_raw_cfg.read(config_files)
def change_flags(**kwargs): class change_flags(object):
""" """
Use this as a decorator to change the value of Theano config variable. Use this as a decorator or context manager to change the value of
Theano config variables.
Useful during tests. Useful during tests.
""" """
def change_flags_exec(f): def __init__(self, **kwargs):
confs = dict()
for k in kwargs:
l = [v for v in theano.configparser._config_var_list
if v.fullname == k]
assert len(l) == 1
confs[k] = l[0]
self.confs = confs
self.new_vals = kwargs
def __call__(self, f):
@wraps(f) @wraps(f)
def inner(*args, **kwargs_): def res(*args, **kwargs):
old_val = {} with self:
for k in kwargs: return f(*args, **kwargs)
l = [v for v in theano.configparser._config_var_list return res
if v.fullname == k]
assert len(l) == 1 def __enter__(self):
old_val[k] = l[0].__get__(True, None) self.old_vals = {}
try: for k, v in iteritems(self.confs):
for k in kwargs: self.old_vals[k] = v.__get__(True, None)
l = [v for v in theano.configparser._config_var_list try:
if v.fullname == k] for k, v in iteritems(self.confs):
assert len(l) == 1 v.__set__(None, self.new_vals[k])
l[0].__set__(None, kwargs[k]) except:
return f(*args, **kwargs_) self.__exit__()
finally: raise
for k in kwargs:
l = [v for v in theano.configparser._config_var_list def __exit__(self, *args):
if v.fullname == k] for k, v in iteritems(self.confs):
assert len(l) == 1 v.__set__(None, self.old_vals[k])
l[0].__set__(None, old_val[k])
return inner
return change_flags_exec
def fetch_val_for_key(key, delete_key=False): def fetch_val_for_key(key, delete_key=False):
......
...@@ -696,6 +696,9 @@ class PureOp(object): ...@@ -696,6 +696,9 @@ class PureOp(object):
# Python implementation # # Python implementation #
######################### #########################
def L_op(self, inputs, outputs, output_grads):
return self.grad(inputs, output_grads)
def R_op(self, inputs, eval_points): def R_op(self, inputs, eval_points):
""" """
This method is primarily used by tensor.Rop This method is primarily used by tensor.Rop
......
...@@ -14,6 +14,7 @@ import theano ...@@ -14,6 +14,7 @@ import theano
from theano.gof import utils from theano.gof import utils
from theano.gof.utils import MethodNotDefined, object2 from theano.gof.utils import MethodNotDefined, object2
from theano.gof import graph from theano.gof import graph
from theano.configparser import change_flags
######## ########
# Type # # Type #
...@@ -638,6 +639,8 @@ class CDataType(Type): ...@@ -638,6 +639,8 @@ class CDataType(Type):
have a `void` return and take a single pointer argument. have a `void` return and take a single pointer argument.
""" """
__props__ = ('ctype', 'freefunc', 'headers', 'header_dirs',
'libraries', 'lib_dirs', 'extra_support_code')
def __init__(self, ctype, freefunc=None, headers=None, header_dirs=None, def __init__(self, ctype, freefunc=None, headers=None, header_dirs=None,
libraries=None, lib_dirs=None, extra_support_code=""): libraries=None, lib_dirs=None, extra_support_code=""):
...@@ -647,42 +650,51 @@ class CDataType(Type): ...@@ -647,42 +650,51 @@ class CDataType(Type):
assert isinstance(freefunc, string_types) assert isinstance(freefunc, string_types)
self.freefunc = freefunc self.freefunc = freefunc
if headers is None: if headers is None:
headers = [] headers = ()
self.headers = headers self.headers = tuple(headers)
if header_dirs is None: if header_dirs is None:
header_dirs = [] header_dirs = ()
self.header_dirs = header_dirs self.header_dirs = tuple(header_dirs)
if libraries is None: if libraries is None:
libraries = [] libraries = ()
self.libraries = libraries self.libraries = tuple(libraries)
if lib_dirs is None: if lib_dirs is None:
lib_dirs = [] lib_dirs = ()
self.lib_dirs = lib_dirs self.lib_dirs = tuple(lib_dirs)
self.extra_support_code = extra_support_code self.extra_support_code = extra_support_code
self._fn = None self._fn = None
def __eq__(self, other):
return (type(self) == type(other) and
self.ctype == other.ctype and
self.freefunc == other.freefunc)
def __hash__(self):
return hash((type(self), self.ctype, self.freefunc))
def filter(self, data, strict=False, allow_downcast=None): def filter(self, data, strict=False, allow_downcast=None):
if data is not None and not isinstance(data, _cdata_type): if data is not None and not isinstance(data, _cdata_type):
raise TypeError("expected None or a PyCapsule") raise TypeError("expected None or a PyCapsule")
return data return data
def _get_func(self): def _get_func(self):
"""
Return a function that makes a value from an integer.
The integer value is assumed to be a valid pointer for the
type and no check is done to ensure that.
"""
from theano.scalar import get_scalar_type from theano.scalar import get_scalar_type
if self._fn is None: if self._fn is None:
v = get_scalar_type('int64')() with change_flags(compute_test_value='off'):
self._fn = theano.function([v], _make_cdata(self)(v), profile=False) v = get_scalar_type('int64')()
self._fn = theano.function([v], _make_cdata(self)(v),
profile=False)
return self._fn return self._fn
def make_value(self, ptr): def make_value(self, ptr):
"""
Make a value of this type.
Parameters
----------
ptr : int
Integer representation of a valid pointer value
"""
return self._get_func()(ptr) return self._get_func()(ptr)
def c_declare(self, name, sub, check_input=True): def c_declare(self, name, sub, check_input=True):
......
...@@ -26,7 +26,7 @@ except ImportError: ...@@ -26,7 +26,7 @@ except ImportError:
# This is for documentation not to depend on the availability of pygpu # This is for documentation not to depend on the availability of pygpu
from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant, from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
GpuArraySharedVariable, gpuarray_shared_constructor, GpuArraySharedVariable, gpuarray_shared_constructor,
reg_context, get_context, ContextNotDefined) reg_context, get_context, ContextNotDefined, _get_props)
from .basic_ops import as_gpuarray_variable from .basic_ops import as_gpuarray_variable
from . import fft, dnn, opt, nerv, extra_ops, multinomial from . import fft, dnn, opt, nerv, extra_ops, multinomial
...@@ -89,19 +89,24 @@ def init_dev(dev, name=None): ...@@ -89,19 +89,24 @@ def init_dev(dev, name=None):
(name, dev, context.devname), (name, dev, context.devname),
file=sys.stderr) file=sys.stderr)
pygpu_activated = True pygpu_activated = True
ctx_props = _get_props(name)
ctx_props['dev'] = dev
if dev.startswith('cuda'): if dev.startswith('cuda'):
try: if 'cudnn_version' not in ctx_props:
cudnn_version = dnn.version() try:
# 5200 should not print warning with cudnn 5.1 final. ctx_props['cudnn_version'] = dnn.version()
if cudnn_version >= 5200: # 5200 should not print warning with cudnn 5.1 final.
warnings.warn("Your cuDNN version is more recent than Theano." if ctx_props['cudnn_version'] >= 5200:
" If you see problems, try updating Theano or" warnings.warn("Your cuDNN version is more recent than "
" downgrading cuDNN to version 5.1.") "Theano. If you encounter problems, try "
if config.print_active_device: "updating Theano or downgrading cuDNN to "
print("Using cuDNN version %d on context %s" % "version 5.1.")
(cudnn_version, name), file=sys.stderr) if config.print_active_device:
except Exception: print("Using cuDNN version %d on context %s" %
pass (ctx_props['cudnn_version'], name), file=sys.stderr)
ctx_props['cudnn_handle'] = dnn._make_handle(context)
except Exception:
pass
# This maps things like 'cuda0' to the context object on that device. # This maps things like 'cuda0' to the context object on that device.
init_dev.devmap = {} init_dev.devmap = {}
......
差异被折叠。
...@@ -149,41 +149,3 @@ static int c_make_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t *desc) { ...@@ -149,41 +149,3 @@ static int c_make_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t *desc) {
#section init_code #section init_code
setup_ext_cuda(); setup_ext_cuda();
#section support_code_struct
PyGpuContextObject *ctx;
cudnnHandle_t APPLY_SPECIFIC(_handle);
#section init_code_struct
{
// We need to keep a reference here to have it available in the destructor.
ctx = PARAMS;
Py_INCREF(ctx);
cuda_enter(PARAMS->ctx);
cudnnStatus_t err;
APPLY_SPECIFIC(_handle) = NULL;
if ((err = cudnnCreate(&APPLY_SPECIFIC(_handle))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not create cuDNN handle: %s",
cudnnGetErrorString(err));
cuda_exit(PARAMS->ctx);
FAIL;
}
if ((err = cudnnSetStream(APPLY_SPECIFIC(_handle),
cuda_get_stream(PARAMS->ctx))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "Could not set cudnn stream: %s",
cudnnGetErrorString(err));
cuda_exit(PARAMS->ctx);
FAIL;
}
cuda_exit(PARAMS->ctx);
}
#section cleanup_code_struct
cuda_enter(ctx->ctx);
cudnnDestroy(APPLY_SPECIFIC(_handle));
cuda_exit(ctx->ctx);
Py_DECREF((PyObject *)ctx);
...@@ -3,7 +3,9 @@ ...@@ -3,7 +3,9 @@
int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
PyGpuArrayObject *bias, npy_float64 epsilon, PyGpuArrayObject *bias, npy_float64 epsilon,
PyGpuArrayObject **outp, PyGpuArrayObject **x_mean, PyGpuArrayObject **outp, PyGpuArrayObject **x_mean,
PyGpuArrayObject **x_invstd, PyGpuContextObject *c) { PyGpuArrayObject **x_invstd, cudnnHandle_t _handle) {
PyGpuContextObject *c = inp->context;
if (c_set_tensorNd(inp, bn_input) != 0) if (c_set_tensorNd(inp, bn_input) != 0)
return 1; return 1;
if (c_set_tensorNd(scale, bn_params) != 0) if (c_set_tensorNd(scale, bn_params) != 0)
...@@ -37,7 +39,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, ...@@ -37,7 +39,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
beta = (void *)&fbeta; beta = (void *)&fbeta;
} }
cudnnStatus_t err = cudnnBatchNormalizationForwardTraining( cudnnStatus_t err = cudnnBatchNormalizationForwardTraining(
APPLY_SPECIFIC(_handle), _handle,
MODE, MODE,
alpha, alpha,
beta, beta,
......
...@@ -24,7 +24,9 @@ int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp, ...@@ -24,7 +24,9 @@ int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp,
PyGpuArrayObject *scale, PyGpuArrayObject *x_mean, PyGpuArrayObject *scale, PyGpuArrayObject *x_mean,
PyGpuArrayObject *x_invstd, npy_float64 epsilon, PyGpuArrayObject *x_invstd, npy_float64 epsilon,
PyGpuArrayObject **dinp, PyGpuArrayObject **dscale, PyGpuArrayObject **dinp, PyGpuArrayObject **dscale,
PyGpuArrayObject **dbias, PyGpuContextObject *c) { PyGpuArrayObject **dbias, cudnnHandle_t _handle) {
PyGpuContextObject *c = inp->context;
if (c_set_tensorNd(inp, bn_input) != 0) if (c_set_tensorNd(inp, bn_input) != 0)
return 1; return 1;
if (c_set_tensorNd(doutp, bn_doutput) != 0) if (c_set_tensorNd(doutp, bn_doutput) != 0)
...@@ -66,7 +68,7 @@ int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp, ...@@ -66,7 +68,7 @@ int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp,
betaParam = (void *)&fbeta; betaParam = (void *)&fbeta;
} }
cudnnStatus_t err = cudnnBatchNormalizationBackward( cudnnStatus_t err = cudnnBatchNormalizationBackward(
APPLY_SPECIFIC(_handle), _handle,
MODE, MODE,
alphaData, alphaData,
betaData, betaData,
......
...@@ -3,7 +3,9 @@ ...@@ -3,7 +3,9 @@
int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
PyGpuArrayObject *bias, PyGpuArrayObject *est_mean, PyGpuArrayObject *bias, PyGpuArrayObject *est_mean,
PyGpuArrayObject *est_var, npy_float64 epsilon, PyGpuArrayObject *est_var, npy_float64 epsilon,
PyGpuArrayObject **outp, PyGpuContextObject *c) { PyGpuArrayObject **outp, cudnnHandle_t _handle) {
PyGpuContextObject *c = inp->context;
if (c_set_tensorNd(inp, bn_input) != 0) if (c_set_tensorNd(inp, bn_input) != 0)
return 1; return 1;
if (c_set_tensorNd(scale, bn_params) != 0) if (c_set_tensorNd(scale, bn_params) != 0)
...@@ -33,7 +35,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, ...@@ -33,7 +35,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
beta = (void *)&fbeta; beta = (void *)&fbeta;
} }
cudnnStatus_t err = cudnnBatchNormalizationForwardInference( cudnnStatus_t err = cudnnBatchNormalizationForwardInference(
APPLY_SPECIFIC(_handle), _handle,
MODE, MODE,
alpha, alpha,
beta, beta,
......
#section support_code
int dnn_dropout_desc(float dropout, unsigned long long seed,
PyGpuContextObject *c,
cudnnDropoutDescriptor_t *odesc,
PyGpuArrayObject **ostates,
cudnnHandle_t _handle) {
PyGpuArrayObject *states;
cudnnDropoutDescriptor_t desc;
size_t states_sz;
cudnnStatus_t err;
cuda_enter(c->ctx);
err = cudnnCreateDropoutDescriptor(&desc);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_SetString(PyExc_RuntimeError, "Can't create dropout descriptor");
cuda_exit(c->ctx);
return -1;
}
/* Can't fail according to docs */
cudnnDropoutGetStatesSize(_handle, &states_sz);
states = pygpu_empty(1, &states_sz, GA_UBYTE, GA_C_ORDER, c, Py_None);
if (states == NULL) {
cudnnDestroyDropoutDescriptor(desc);
cuda_exit(c->ctx);
return -1;
}
err = cudnnSetDropoutDescriptor(desc, _handle, dropout,
PyGpuArray_DEV_DATA(states),
states_sz, seed);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_SetString(PyExc_RuntimeError, "Can't set dropout descriptor");
Py_DECREF((PyObject *)states);
cudnnDestroyDropoutDescriptor(desc);
cuda_exit(c->ctx);
return -1;
}
cuda_exit(c->ctx);
*odesc = desc;
*ostates = states;
return 0;
}
#section support_code
int dnn_dropout_fwd(PyGpuArrayObject *x,
cudnnDropoutDescriptor_t *desc,
PyGpuArrayObject *state,
PyGpuArrayObject **y,
PyGpuArrayObject **ostate,
gpudata **reserve,
cudnnHandle_t _handle) {
PyGpuArrayContext *c = x->context;
cudnnTensorDescriptor_t xdesc;
cudnnTensorDescriptor_t ydesc;
gpudata *res;
size_t res_sz;
cudnnStatus_t err;
if (c_make_tensorNd(x, &xdesc))
return -1;
if (theano_prep_output(y, x->ga.nd, x->ga.dimensions, x->ga.typecode,
GA_C_ORDER, c)) {
cudnnDestroyTensorDescriptor(xdesc);
return -1;
}
if (c_make_tensorNd(y, &ydesc)) {
cudnnDestroyTensorDescriptor(xdesc);
return -1;
}
*ostate = state;
Py_INCREF((PyObject *)state);
/* This can't fail according to the docs */
err = cudnnDropoutGetReserveSpaceSize(desc, &res_sz);
res = gpudata_alloc(c->ctx, res_zs, NULL, 0, NULL);
if (res == NULL) {
cudnnDestroyTensorDescriptor(xdesc);
cudnnDestroyTensorDescriptor(ydesc);
PyErr_SetString(PyExc_RuntimeError, "Could not allocate reserve for dropout");
}
*reserve = res;
cuda_enter(c->ctx);
err = cudnnDropoutForward(_handle, desc, xdesc, PyGpuArray_DEV_DATA(x),
ydesc, PyGpuArray_DEV_DATA(y), *(void **)res,
res_sz);
cudnnDestroyTensorDescriptor(xdesc);
cudnnDestroyTensorDescriptor(ydesc);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not run dropout: %s",
cudnnGetErrorString(err));
cuda_exit(c->ctx);
return -1;
}
cuda_exit(c->ctx);
return 0;
}
...@@ -26,11 +26,12 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -26,11 +26,12 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
cudnnConvolutionDescriptor_t desc, cudnnConvolutionDescriptor_t desc,
double alpha, double beta, double alpha, double beta,
PyGpuArrayObject **output, PyGpuArrayObject **output,
PyGpuContextObject *c) { cudnnHandle_t _handle) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; PyGpuContextObject *c = input->context;
float af = alpha, bf = beta;
void *alpha_p; void *alpha_p;
void *beta_p; void *beta_p;
float af = alpha, bf = beta;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (PyGpuArray_DIMS(input)[1] != PyGpuArray_DIMS(kerns)[1]) { if (PyGpuArray_DIMS(input)[1] != PyGpuArray_DIMS(kerns)[1]) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
...@@ -92,7 +93,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -92,7 +93,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
int count; int count;
cudnnConvolutionFwdAlgoPerf_t choice; cudnnConvolutionFwdAlgoPerf_t choice;
err = cudnnFindConvolutionForwardAlgorithm( err = cudnnFindConvolutionForwardAlgorithm(
APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns), _handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
desc, APPLY_SPECIFIC(output), 1, &count, &choice); desc, APPLY_SPECIFIC(output), 1, &count, &choice);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
...@@ -115,7 +116,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -115,7 +116,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
} }
err = cudnnGetConvolutionForwardAlgorithm( err = cudnnGetConvolutionForwardAlgorithm(
APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns), _handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
desc, APPLY_SPECIFIC(output), desc, APPLY_SPECIFIC(output),
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, free, &algo); CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, free, &algo);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
...@@ -198,7 +199,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -198,7 +199,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
{ {
size_t worksize; size_t worksize;
gpudata *workspace; gpudata *workspace;
err = cudnnGetConvolutionForwardWorkspaceSize(APPLY_SPECIFIC(_handle), err = cudnnGetConvolutionForwardWorkspaceSize(_handle,
APPLY_SPECIFIC(input), APPLY_SPECIFIC(input),
APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(kerns),
desc, desc,
...@@ -211,7 +212,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -211,7 +212,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
// TODO: Print a warning // TODO: Print a warning
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
err = cudnnGetConvolutionForwardWorkspaceSize(APPLY_SPECIFIC(_handle), err = cudnnGetConvolutionForwardWorkspaceSize(_handle,
APPLY_SPECIFIC(input), APPLY_SPECIFIC(input),
APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(kerns),
desc, desc,
...@@ -248,7 +249,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -248,7 +249,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
cuda_wait((*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); cuda_wait((*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnConvolutionForward( err = cudnnConvolutionForward(
APPLY_SPECIFIC(_handle), _handle,
alpha_p, alpha_p,
APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input), APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input),
APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns), APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns),
......
...@@ -25,11 +25,12 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -25,11 +25,12 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
PyGpuArrayObject *im, PyGpuArrayObject *im,
cudnnConvolutionDescriptor_t desc, cudnnConvolutionDescriptor_t desc,
double alpha, double beta, PyGpuArrayObject **input, double alpha, double beta, PyGpuArrayObject **input,
PyGpuContextObject *c) { cudnnHandle_t _handle) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; PyGpuContextObject *c = kerns->context;
float af = alpha, bf = beta;
void *alpha_p; void *alpha_p;
void *beta_p; void *beta_p;
float af = alpha, bf = beta;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (PyGpuArray_DIMS(im)[1] != PyGpuArray_DIMS(kerns)[1]) { if (PyGpuArray_DIMS(im)[1] != PyGpuArray_DIMS(kerns)[1]) {
PyErr_SetString(PyExc_ValueError, "images and kernel must have the same " PyErr_SetString(PyExc_ValueError, "images and kernel must have the same "
...@@ -93,7 +94,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -93,7 +94,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cudnnConvolutionBwdDataAlgoPerf_t choice; cudnnConvolutionBwdDataAlgoPerf_t choice;
err = cudnnFindConvolutionBackwardDataAlgorithm( err = cudnnFindConvolutionBackwardDataAlgorithm(
APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output), desc, _handle, APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output), desc,
APPLY_SPECIFIC(input), 1, &count, &choice); APPLY_SPECIFIC(input), 1, &count, &choice);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
...@@ -116,7 +117,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -116,7 +117,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
} }
err = cudnnGetConvolutionBackwardDataAlgorithm( err = cudnnGetConvolutionBackwardDataAlgorithm(
APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output), _handle, APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output),
desc, APPLY_SPECIFIC(input), desc, APPLY_SPECIFIC(input),
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, free, &algo); CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, free, &algo);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
...@@ -193,7 +194,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -193,7 +194,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
gpudata *workspace; gpudata *workspace;
err = cudnnGetConvolutionBackwardDataWorkspaceSize( err = cudnnGetConvolutionBackwardDataWorkspaceSize(
APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output), desc, _handle, APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output), desc,
APPLY_SPECIFIC(input), algo, &worksize); APPLY_SPECIFIC(input), algo, &worksize);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
...@@ -218,7 +219,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -218,7 +219,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cuda_wait((*input)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); cuda_wait((*input)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnConvolutionBackwardData( err = cudnnConvolutionBackwardData(
APPLY_SPECIFIC(_handle), _handle,
alpha_p, alpha_p,
APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns), APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns),
APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(output), APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(output),
......
...@@ -25,11 +25,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -25,11 +25,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
PyGpuArrayObject *km, PyGpuArrayObject *km,
cudnnConvolutionDescriptor_t desc, cudnnConvolutionDescriptor_t desc,
double alpha, double beta, PyGpuArrayObject **kerns, double alpha, double beta, PyGpuArrayObject **kerns,
PyGpuContextObject *c) { cudnnHandle_t _handle) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; PyGpuContextObject *c = input->context;
float af = alpha, bf = beta;
void *alpha_p; void *alpha_p;
void *beta_p; void *beta_p;
float af = alpha, bf = beta;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (PyGpuArray_DIMS(input)[1] != PyGpuArray_DIMS(km)[1]) { if (PyGpuArray_DIMS(input)[1] != PyGpuArray_DIMS(km)[1]) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
...@@ -93,7 +94,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -93,7 +94,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cudnnConvolutionBwdFilterAlgoPerf_t choice; cudnnConvolutionBwdFilterAlgoPerf_t choice;
err = cudnnFindConvolutionBackwardFilterAlgorithm( err = cudnnFindConvolutionBackwardFilterAlgorithm(
APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), desc, _handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), desc,
APPLY_SPECIFIC(kerns), 1, &count, &choice); APPLY_SPECIFIC(kerns), 1, &count, &choice);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
...@@ -117,7 +118,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -117,7 +118,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
} }
err = cudnnGetConvolutionBackwardFilterAlgorithm( err = cudnnGetConvolutionBackwardFilterAlgorithm(
APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), _handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output),
desc, APPLY_SPECIFIC(kerns), desc, APPLY_SPECIFIC(kerns),
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, free, &algo); CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, free, &algo);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
...@@ -181,7 +182,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -181,7 +182,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
gpudata *workspace; gpudata *workspace;
err = cudnnGetConvolutionBackwardFilterWorkspaceSize( err = cudnnGetConvolutionBackwardFilterWorkspaceSize(
APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), desc, _handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), desc,
APPLY_SPECIFIC(kerns), algo, &worksize); APPLY_SPECIFIC(kerns), algo, &worksize);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
...@@ -205,7 +206,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -205,7 +206,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cuda_wait((*kerns)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); cuda_wait((*kerns)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnConvolutionBackwardFilter( err = cudnnConvolutionBackwardFilter(
APPLY_SPECIFIC(_handle), _handle,
alpha_p, alpha_p,
APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input), APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input),
APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(output), APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(output),
......
...@@ -42,9 +42,10 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img, ...@@ -42,9 +42,10 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
PyArrayObject *stride, PyArrayObject *stride,
PyArrayObject *pad, PyArrayObject *pad,
PyGpuArrayObject **out, PyGpuArrayObject **out,
PyGpuContextObject *c) { cudnnHandle_t _handle) {
cudnnStatus_t err; PyGpuContextObject *c = img->context;
size_t dims[5]; size_t dims[5];
cudnnStatus_t err;
if (!GpuArray_IS_C_CONTIGUOUS(&img->ga)) { if (!GpuArray_IS_C_CONTIGUOUS(&img->ga)) {
PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported."); PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported.");
...@@ -122,7 +123,7 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img, ...@@ -122,7 +123,7 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
cuda_wait((*out)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); cuda_wait((*out)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnPoolingForward( err = cudnnPoolingForward(
APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(pool), _handle, APPLY_SPECIFIC(pool),
alpha, alpha,
APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(img), APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(img),
beta, beta,
......
...@@ -64,7 +64,8 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp, ...@@ -64,7 +64,8 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
PyArrayObject *stride, PyArrayObject *stride,
PyArrayObject *pad, PyArrayObject *pad,
PyGpuArrayObject **inp_grad, PyGpuArrayObject **inp_grad,
PyGpuContextObject *c) { cudnnHandle_t _handle) {
PyGpuContextObject *c = inp->context;
cudnnStatus_t err; cudnnStatus_t err;
if (!GpuArray_IS_C_CONTIGUOUS(&inp->ga)) { if (!GpuArray_IS_C_CONTIGUOUS(&inp->ga)) {
...@@ -153,7 +154,7 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp, ...@@ -153,7 +154,7 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
cuda_wait((*inp_grad)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); cuda_wait((*inp_grad)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnPoolingBackward( err = cudnnPoolingBackward(
APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(pool), _handle, APPLY_SPECIFIC(pool),
alpha, alpha,
APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(out), APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(out),
APPLY_SPECIFIC(output_grad), PyGpuArray_DEV_DATA(out_grad), APPLY_SPECIFIC(output_grad), PyGpuArray_DEV_DATA(out_grad),
......
#section support_code
int dnn_rnn_desc(int hidden_size, int num_layers,
cudnnDropoutDescriptor_t ddesc,
int input_mode, int direction_mode, int rnn_mode,
int dtype, cudnnRNNDescriptor_t *odesc,
cudnnHandle_t _handle) {
cudnnRNNDescriptor_t desc;
cudnnDataType_t data_type;
cudnnStatus_t err;
switch (dtype) {
case GA_FLOAT:
data_type = CUDNN_DATA_FLOAT;
break;
case GA_DOUBLE:
data_type = CUDNN_DATA_DOUBLE;
break;
case GA_HALF:
data_type = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString(PyExc_ValueError, "Unsupported data type");
return -1;
}
err = cudnnCreateRNNDescriptor(&desc);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_SetString(PyExc_RuntimeError, "Can't create RNN descriptor");
return -1;
}
err = cudnnSetRNNDescriptor(desc, hidden_size, num_layers, ddesc,
(cudnnRNNInputMode_t)input_mode,
(cudnnDirectionMode_t)direction_mode,
(cudnnRNNMode_t)rnn_mode, data_type);
if (err != CUDNN_STATUS_SUCCESS) {
cudnnDestroyRNNDescriptor(desc);
PyErr_SetString(PyExc_RuntimeError, "Can't set RNN descriptor");
return -1;
}
*odesc = desc;
return 0;
}
#section support_code
int dnn_rnn_fwd(cudnnRNNDescriptor_t desc,
PyGpuArrayObject *w, PyGpuArrayObject *x,
PyGpuArrayObject *hx, PyGpuArrayObject *cx,
gpudata **reserve, PyGpuArrayObject **y,
PyGpuArrayObject **hy, PyGpuArrayObject **cy,
cudnnHandle_t _handle) {
PyGpuContextObject *c = x->context;
cudnnTensorDescriptor_t xdesc = NULL;
cudnnTensorDescriptor_t hxdesc = NULL;
cudnnTensorDescriptor_t cxdesc = NULL;
cudnnTensorDescriptor_t ydesc = NULL;
cudnnTensorDescriptor_t hydesc = NULL;
cudnnTensorDescriptor_t cydesc = NULL;
cudnnFilterDescriptor_t wdesc = NULL;
cudnnTensorDescriptor_t *xl = NULL;
cudnnTensorDescriptor_t *yl = NULL;
gpudata *workspace = NULL;
size_t worksize, ressize;
size_t seqLength = PyGpuArray_DIM(x, 0);
size_t miniBatch = PyGpuArray_DIM(x, 1);
size_t inputSize = PyGpuArray_DIM(x, 2);
size_t hiddenSizeDir = PyGpuArray_DIM(hx, 2);
size_t shape[3];
int strs[3], dims[3];
cudnnStatus_t err;
cudnnDataType_t dt;
int res = -1;
switch (x->ga.typecode) {
case GA_FLOAT:
dt = CUDNN_DATA_FLOAT;
break;
case GA_DOUBLE:
dt = CUDNN_DATA_DOUBLE;
break;
case GA_HALF:
dt = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString(PyExc_TypeError, "Unsupported data type for x");
return -1;
}
// This is early to match the exit() in the fail label.
cuda_enter(c->ctx);
err = cudnnCreateTensorDescriptor(&xdesc);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not create xdesc: %s",
cudnnGetErrorString(err));
goto fail;
}
dims[0] = PyGpuArray_DIM(x, 1);
dims[1] = PyGpuArray_DIM(x, 2);
dims[2] = 1;
strs[0] = dims[1] * dims[2];
strs[1] = dims[2];
strs[2] = 1;
err = cudnnSetTensorNdDescriptor(xdesc, dt, 3, dims, strs);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not set xdesc: %s",
cudnnGetErrorString(err));
goto fail;
}
if (c_make_tensorNd(hx, &hxdesc) != 0)
goto fail;
if (cx != NULL)
if (c_make_tensorNd(cx, &cxdesc) != 0)
goto fail;
if (c_make_filter(w, &wdesc) != 0)
goto fail;
shape[0] = seqLength;
shape[1] = miniBatch;
shape[2] = hiddenSizeDir;
if (theano_prep_output(y, 3, shape, x->ga.typecode, GA_C_ORDER, c) != 0)
goto fail;
err = cudnnCreateTensorDescriptor(&ydesc);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not create ydesc: %s",
cudnnGetErrorString(err));
goto fail;
}
dims[0] = shape[1];
dims[1] = shape[2];
dims[2] = 1;
strs[0] = dims[2] * dims[1];
strs[1] = dims[2];
strs[2] = 1;
err = cudnnSetTensorNdDescriptor(ydesc, dt, 3, dims, strs);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not set ydesc: %s",
cudnnGetErrorString(err));
goto fail;
}
if (theano_prep_output(hy, 3, PyGpuArray_DIMS(hx),
hx->ga.typecode, GA_C_ORDER, c) != 0)
goto fail;
if (c_make_tensorNd(*hy, &hydesc) != 0)
goto fail;
if (cy != NULL) {
if (theano_prep_output(cy, 3, PyGpuArray_DIMS(cx),
cx->ga.typecode, GA_C_ORDER, c) != 0)
goto fail;
if (c_make_tensorNd(*cy, &cydesc) != 0)
goto fail;
}
xl = (cudnnTensorDescriptor_t *)calloc(sizeof(cudnnTensorDescriptor_t), seqLength);
if (xl == NULL) {
PyErr_NoMemory();
goto fail;
}
for (size_t i = 0; i < seqLength; i++)
xl[i] = xdesc;
yl = (cudnnTensorDescriptor_t *)calloc(sizeof(cudnnTensorDescriptor_t), seqLength);
if (yl == NULL) {
PyErr_NoMemory();
goto fail;
}
for (size_t i = 0; i < seqLength; i++)
yl[i] = ydesc;
err = cudnnGetRNNWorkspaceSize(_handle, desc, (int)seqLength,
xl, &worksize);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not get worksize: %s",
cudnnGetErrorString(err));
goto fail;
}
workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL);
if (workspace == NULL) {
PyErr_Format(PyExc_RuntimeError, "Could not allocate workspace");
goto fail;
}
err = cudnnGetRNNTrainingReserveSize(_handle, desc, (int)seqLength,
xl, &ressize);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not get reserve size: %s",
cudnnGetErrorString(err));
goto fail;
}
*reserve = gpudata_alloc(c->ctx, ressize, NULL, 0, NULL);
if (*reserve == NULL) {
PyErr_Format(PyExc_RuntimeError, "Could not allocate reserve");
goto fail;
}
err = cudnnRNNForwardTraining(_handle, desc, (int)seqLength,
xl, PyGpuArray_DEV_DATA(x),
hxdesc, PyGpuArray_DEV_DATA(hx),
cxdesc, cx ? PyGpuArray_DEV_DATA(cx) : NULL,
wdesc, PyGpuArray_DEV_DATA(w),
yl, PyGpuArray_DEV_DATA(*y),
hydesc, PyGpuArray_DEV_DATA(*hy),
cydesc, cy ? PyGpuArray_DEV_DATA(*cy) : NULL,
*(void **)workspace, worksize,
*(void **)(*reserve), ressize);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could run RNN: %s",
cudnnGetErrorString(err));
goto fail;
}
res = 0;
fail:
if (xdesc != NULL)
cudnnDestroyTensorDescriptor(xdesc);
if (hxdesc != NULL)
cudnnDestroyTensorDescriptor(hxdesc);
if (cxdesc != NULL)
cudnnDestroyTensorDescriptor(cxdesc);
if (wdesc != NULL)
cudnnDestroyFilterDescriptor(wdesc);
if (ydesc != NULL)
cudnnDestroyTensorDescriptor(ydesc);
if (hydesc != NULL)
cudnnDestroyTensorDescriptor(hydesc);
if (cydesc != NULL)
cudnnDestroyTensorDescriptor(cydesc);
free(xl);
free(yl);
if (workspace != NULL)
gpudata_release(workspace);
cuda_exit(c->ctx);
return res;
}
#section support_code
int dnn_rnn_gi(cudnnRNNDescriptor_t desc, npy_uint64 xshp,
PyGpuArrayObject *y, PyGpuArrayObject *dy,
PyGpuArrayObject *w, PyGpuArrayObject *hx,
gpudata *reserve, PyGpuArrayObject *cx,
PyGpuArrayObject *dhy, PyGpuArrayObject *dcy,
gpudata **oreserve, PyGpuArrayObject **dx,
PyGpuArrayObject **dhx, PyGpuArrayObject **dcx,
cudnnHandle_t _handle) {
PyGpuContextObject *c = y->context;
cudnnTensorDescriptor_t ydesc = NULL;
cudnnTensorDescriptor_t dhydesc = NULL;
cudnnTensorDescriptor_t dcydesc = NULL;
cudnnFilterDescriptor_t wdesc = NULL;
cudnnTensorDescriptor_t hxdesc = NULL;
cudnnTensorDescriptor_t cxdesc = NULL;
cudnnTensorDescriptor_t dxdesc = NULL;
cudnnTensorDescriptor_t dhxdesc = NULL;
cudnnTensorDescriptor_t dcxdesc = NULL;
cudnnTensorDescriptor_t *yl = NULL;
cudnnTensorDescriptor_t *dxl = NULL;
gpudata *workspace = NULL;
size_t worksize, ressize;
size_t seqLength = PyGpuArray_DIM(y, 0);
size_t miniBatch = PyGpuArray_DIM(y, 1);
size_t inputSize = xshp;
size_t shape[3];
int dims[3], strs[3];
cudnnStatus_t err;
cudnnDataType_t dt;
int res = -1;
switch (y->ga.typecode) {
case GA_FLOAT:
dt = CUDNN_DATA_FLOAT;
break;
case GA_DOUBLE:
dt = CUDNN_DATA_DOUBLE;
break;
case GA_HALF:
dt = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString(PyExc_TypeError, "Unsupported data type for y");
return -1;
}
cuda_enter(c->ctx);
err = cudnnCreateTensorDescriptor(&ydesc);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not create ydesc: %s",
cudnnGetErrorString(err));
goto fail;
}
/* We need to use the last two dimensions for this, this is not a typo */
dims[0] = PyGpuArray_DIM(y, 1);
dims[1] = PyGpuArray_DIM(y, 2);
dims[2] = 1;
strs[0] = dims[2] * dims[1];
strs[1] = dims[2];
strs[2] = 1;
err = cudnnSetTensorNdDescriptor(ydesc, dt, 3, dims, strs);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not set ydesc: %s",
cudnnGetErrorString(err));
goto fail;
}
if (dhy != NULL)
if (c_make_tensorNd(dhy, &dhydesc) != 0)
goto fail;
if (dcy != NULL)
if (c_make_tensorNd(dcy, &dcydesc) != 0)
goto fail;
if (c_make_filter(w, &wdesc) != 0)
goto fail;
if (c_make_tensorNd(hx, &hxdesc) != 0)
goto fail;
if (cx != NULL)
if (c_make_tensorNd(cx, &cxdesc) != 0)
goto fail;
shape[0] = seqLength;
shape[1] = miniBatch;
shape[2] = inputSize;
if (theano_prep_output(dx, 3, shape, y->ga.typecode, GA_C_ORDER, c) != 0)
goto fail;
err = cudnnCreateTensorDescriptor(&dxdesc);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not create dxdesc: %s",
cudnnGetErrorString(err));
goto fail;
}
/* Again not a typo, we need to use the last two dimensions */
dims[0] = shape[1];
dims[1] = shape[2];
dims[2] = 1;
strs[0] = dims[2] * dims[1];
strs[1] = dims[2];
strs[2] = 1;
err = cudnnSetTensorNdDescriptor(dxdesc, dt, 3, dims, strs);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not set dxdesc: %s",
cudnnGetErrorString(err));
goto fail;
}
if (theano_prep_output(dhx, 3, PyGpuArray_DIMS(hx), hx->ga.typecode,
GA_C_ORDER, c) != 0)
goto fail;
if (c_make_tensorNd(*dhx, &dhxdesc) != 0)
goto fail;
if (cx != NULL) {
if (theano_prep_output(dcx, 3, PyGpuArray_DIMS(cx), cx->ga.typecode,
GA_C_ORDER, c) != 0)
goto fail;
if (c_make_tensorNd(*dcx, &dcxdesc) != 0)
goto fail;
}
yl = (cudnnTensorDescriptor_t *)calloc(sizeof(cudnnTensorDescriptor_t), seqLength);
if (yl == NULL) {
PyErr_NoMemory();
goto fail;
}
for (size_t i = 0; i < seqLength; i++)
yl[i] = ydesc;
dxl = (cudnnTensorDescriptor_t *)calloc(sizeof(cudnnTensorDescriptor_t), seqLength);
if (dxl == NULL) {
PyErr_NoMemory();
goto fail;
}
for (size_t i = 0; i < seqLength; i++)
dxl[i] = dxdesc;
err = cudnnGetRNNWorkspaceSize(_handle, desc, (int)seqLength, dxl, &worksize);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not get worksize: %s",
cudnnGetErrorString(err));
goto fail;
}
workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL);
if (workspace == NULL) {
PyErr_Format(PyExc_RuntimeError, "Could not allocate workspace");
goto fail;
}
err = cudnnGetRNNTrainingReserveSize(_handle, desc, (int)seqLength,
dxl, &ressize);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not get reserve size: %s",
cudnnGetErrorString(err));
goto fail;
}
*oreserve = gpudata_alloc(c->ctx, ressize, NULL, 0, NULL);
if (*oreserve == NULL) {
PyErr_Format(PyExc_RuntimeError, "Could not allocate reserve");
goto fail;
}
if (gpudata_move(*oreserve, 0, reserve, 0, ressize) != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "could not copy reserve");
goto fail;
}
err = cudnnRNNBackwardData(_handle, desc, (int)seqLength,
yl, PyGpuArray_DEV_DATA(y),
/* y and dy are the same shape */
yl, PyGpuArray_DEV_DATA(dy),
dhydesc, dhy ? PyGpuArray_DEV_DATA(dhy) : NULL,
dcydesc, dcy ? PyGpuArray_DEV_DATA(dcy) : NULL,
wdesc, PyGpuArray_DEV_DATA(w),
hxdesc, PyGpuArray_DEV_DATA(hx),
cxdesc, cx ? PyGpuArray_DEV_DATA(cx) : NULL,
dxl, PyGpuArray_DEV_DATA(*dx),
dhxdesc, PyGpuArray_DEV_DATA(*dhx),
dcxdesc, dcx ? PyGpuArray_DEV_DATA(*dcx) : NULL,
*(void **)workspace, worksize,
*(void **)(*oreserve), ressize);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could run RNN grad inputs: %s",
cudnnGetErrorString(err));
goto fail;
}
res = 0;
fail:
if (ydesc != NULL)
cudnnDestroyTensorDescriptor(ydesc);
if (dhydesc != NULL)
cudnnDestroyTensorDescriptor(dhydesc);
if (dcydesc != NULL)
cudnnDestroyTensorDescriptor(dcydesc);
if (wdesc != NULL)
cudnnDestroyFilterDescriptor(wdesc);
if (hxdesc != NULL)
cudnnDestroyTensorDescriptor(hxdesc);
if (cxdesc != NULL)
cudnnDestroyTensorDescriptor(cxdesc);
if (dxdesc != NULL)
cudnnDestroyTensorDescriptor(dxdesc);
if (dhxdesc != NULL)
cudnnDestroyTensorDescriptor(dhxdesc);
if (dcxdesc != NULL)
cudnnDestroyTensorDescriptor(dcxdesc);
free(yl);
free(dxl);
if (workspace != NULL)
gpudata_release(workspace);
cuda_exit(c->ctx);
return res;
}
#section support_code
int dnn_rnn_gw(cudnnRNNDescriptor_t desc, npy_uint64 _wsize,
PyGpuArrayObject *x, PyGpuArrayObject *hx,
PyGpuArrayObject *y, gpudata *reserve,
PyGpuArrayObject **dw, cudnnHandle_t _handle) {
PyGpuContextObject *c = x->context;
cudnnTensorDescriptor_t xdesc = NULL;
cudnnTensorDescriptor_t hxdesc = NULL;
cudnnTensorDescriptor_t ydesc = NULL;
cudnnFilterDescriptor_t dwdesc = NULL;
cudnnTensorDescriptor_t *xl = NULL;
cudnnTensorDescriptor_t *yl = NULL;
gpudata *workspace = NULL;
size_t worksize, ressize;
size_t iters = PyGpuArray_DIM(x, 0);
size_t wsize = _wsize;
int dims[3], strs[3];
cudnnStatus_t err;
cudnnDataType_t dt;
int res = -1;
switch (x->ga.typecode) {
case GA_FLOAT:
dt = CUDNN_DATA_FLOAT;
break;
case GA_DOUBLE:
dt = CUDNN_DATA_DOUBLE;
break;
case GA_HALF:
dt = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString(PyExc_TypeError, "Unsupported data type for x");
return -1;
}
// This is early to match the exit() in the fail label.
cuda_enter(c->ctx);
err = cudnnCreateTensorDescriptor(&xdesc);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not create xdesc: %s",
cudnnGetErrorString(err));
goto fail;
}
/* We need to use the last two dimensions for this, this is not a typo */
dims[0] = PyGpuArray_DIM(x, 1);
dims[1] = PyGpuArray_DIM(x, 2);
dims[2] = 1;
strs[0] = dims[2] * dims[1];
strs[1] = dims[2];
strs[2] = 1;
err = cudnnSetTensorNdDescriptor(xdesc, dt, 3, dims, strs);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not set xdesc: %s",
cudnnGetErrorString(err));
goto fail;
}
if (c_make_tensorNd(hx, &hxdesc) != 0)
goto fail;
err = cudnnCreateTensorDescriptor(&ydesc);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not create ydesc: %s",
cudnnGetErrorString(err));
goto fail;
}
/* Again not a typo, we need to use the last two dimensions */
dims[0] = PyGpuArray_DIM(y, 1);
dims[1] = PyGpuArray_DIM(y, 2);
dims[2] = 1;
strs[0] = dims[2] * dims[1];
strs[1] = dims[2];
strs[2] = 1;
err = cudnnSetTensorNdDescriptor(ydesc, dt, 3, dims, strs);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not set ydesc: %s",
cudnnGetErrorString(err));
goto fail;
}
if (theano_prep_output(dw, 1, &wsize, x->ga.typecode, GA_C_ORDER, c) != 0)
goto fail;
GpuArray_memset(&(*dw)->ga, 0);
if (c_make_filter(*dw, &dwdesc) != 0)
goto fail;
xl = (cudnnTensorDescriptor_t *)calloc(sizeof(cudnnTensorDescriptor_t), iters);
if (xl == NULL) {
PyErr_NoMemory();
goto fail;
}
for (size_t i = 0; i < iters; i++)
xl[i] = xdesc;
yl = (cudnnTensorDescriptor_t *)calloc(sizeof(cudnnTensorDescriptor_t), iters);
if (yl == NULL) {
PyErr_NoMemory();
goto fail;
}
for (size_t i = 0; i < iters; i++)
yl[i] = ydesc;
err = cudnnGetRNNWorkspaceSize(_handle, desc, (int)iters,
xl, &worksize);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not get worksize: %s",
cudnnGetErrorString(err));
goto fail;
}
workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL);
if (workspace == NULL) {
PyErr_Format(PyExc_RuntimeError, "Could not allocate workspace");
goto fail;
}
err = cudnnGetRNNTrainingReserveSize(_handle, desc, (int)iters,
xl, &ressize);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not get reserve size: %s",
cudnnGetErrorString(err));
goto fail;
}
err = cudnnRNNBackwardWeights(_handle, desc, (int)iters,
xl, PyGpuArray_DEV_DATA(x),
hxdesc, PyGpuArray_DEV_DATA(hx),
yl, PyGpuArray_DEV_DATA(y),
*(void **)workspace, worksize,
dwdesc, PyGpuArray_DEV_DATA(*dw),
*(void **)reserve, ressize);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could run RNN grad weights: %s",
cudnnGetErrorString(err));
goto fail;
}
res = 0;
fail:
if (xdesc != NULL)
cudnnDestroyTensorDescriptor(xdesc);
if (hxdesc != NULL)
cudnnDestroyTensorDescriptor(hxdesc);
if (ydesc != NULL)
cudnnDestroyTensorDescriptor(ydesc);
if (dwdesc != NULL)
cudnnDestroyFilterDescriptor(dwdesc);
free(xl);
free(yl);
if (workspace != NULL)
gpudata_release(workspace);
cuda_exit(c->ctx);
return res;
}
#section support_code
int dnn_rnn_paramsize(cudnnRNNDescriptor_t desc,
PyArrayObject *isize,
npy_int32 typecode,
npy_uint64 *oparam_size,
cudnnHandle_t _handle) {
cudnnTensorDescriptor_t xdesc;
size_t param_size;
cudnnStatus_t err;
cudnnDataType_t dt;
int shape[3];
int strides[3];
if (PyArray_DIM(isize, 0) != 2) {
PyErr_SetString(PyExc_ValueError, "input_size should be of length two");
return -1;
}
switch (typecode) {
case GA_FLOAT:
dt = CUDNN_DATA_FLOAT;
break;
case GA_DOUBLE:
dt = CUDNN_DATA_DOUBLE;
break;
case GA_HALF:
dt = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString(PyExc_ValueError, "Unsupported data type");
return -1;
}
err = cudnnCreateTensorDescriptor(&xdesc);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_SetString(PyExc_RuntimeError, "Could not create tensor descriptor");
return -1;
}
shape[0] = *(npy_uint64 *)PyArray_GETPTR1(isize, 0);
shape[1] = *(npy_uint64 *)PyArray_GETPTR1(isize, 1);
shape[2] = 1;
strides[0] = shape[2] * shape[1];
strides[1] = shape[2];
strides[2] = 1;
err = cudnnSetTensorNdDescriptor(xdesc, dt, 3, shape, strides);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "Could not set tensor descriptor: %s",
cudnnGetErrorString(err));
return -1;
}
err = cudnnGetRNNParamsSize(_handle, desc, xdesc, &param_size, dt);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_SetString(PyExc_RuntimeError, "Could not get parameter size");
return -1;
}
cudnnDestroyTensorDescriptor(xdesc);
*oparam_size = param_size;
return 0;
}
...@@ -35,7 +35,8 @@ if (APPLY_SPECIFIC(output) != NULL) ...@@ -35,7 +35,8 @@ if (APPLY_SPECIFIC(output) != NULL)
int APPLY_SPECIFIC(softmax)(PyGpuArrayObject *x, int APPLY_SPECIFIC(softmax)(PyGpuArrayObject *x,
PyGpuArrayObject **out, PyGpuArrayObject **out,
PyGpuContextObject *c) { cudnnHandle_t _handle) {
PyGpuContextObject *c = x->context;
cudnnStatus_t err; cudnnStatus_t err;
if (c_set_tensorNd(x, APPLY_SPECIFIC(input)) != 0) if (c_set_tensorNd(x, APPLY_SPECIFIC(input)) != 0)
...@@ -77,7 +78,7 @@ int APPLY_SPECIFIC(softmax)(PyGpuArrayObject *x, ...@@ -77,7 +78,7 @@ int APPLY_SPECIFIC(softmax)(PyGpuArrayObject *x,
cuda_wait((*out)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); cuda_wait((*out)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnSoftmaxForward( err = cudnnSoftmaxForward(
APPLY_SPECIFIC(_handle), _handle,
SOFTMAX_ALGO, SOFTMAX_ALGO,
SOFTMAX_MODE, SOFTMAX_MODE,
alpha, alpha,
......
...@@ -46,7 +46,8 @@ if (APPLY_SPECIFIC(dx) != NULL) ...@@ -46,7 +46,8 @@ if (APPLY_SPECIFIC(dx) != NULL)
int APPLY_SPECIFIC(softmax_grad)(PyGpuArrayObject *dy, int APPLY_SPECIFIC(softmax_grad)(PyGpuArrayObject *dy,
PyGpuArrayObject *sm, PyGpuArrayObject *sm,
PyGpuArrayObject **dx, PyGpuArrayObject **dx,
PyGpuContextObject *c) { cudnnHandle_t _handle) {
PyGpuContextObject *c = dy->context;
cudnnStatus_t err; cudnnStatus_t err;
if (c_set_tensorNd(dy, APPLY_SPECIFIC(dy)) != 0) if (c_set_tensorNd(dy, APPLY_SPECIFIC(dy)) != 0)
...@@ -91,7 +92,7 @@ int APPLY_SPECIFIC(softmax_grad)(PyGpuArrayObject *dy, ...@@ -91,7 +92,7 @@ int APPLY_SPECIFIC(softmax_grad)(PyGpuArrayObject *dy,
cuda_wait((*dx)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); cuda_wait((*dx)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnSoftmaxBackward( err = cudnnSoftmaxBackward(
APPLY_SPECIFIC(_handle), _handle,
SOFTMAX_ALGO, SOFTMAX_ALGO,
SOFTMAX_MODE, SOFTMAX_MODE,
alpha, alpha,
......
from __future__ import absolute_import, print_function, division
import theano
import theano.tensor as T
import numpy
class Model(object):
def __init__(self, name=""):
self.name = name
self.layers = []
self.params = []
self.other_updates = {}
def add_layer(self, layer):
self.layers.append(layer)
for p in layer.params:
self.params.append(p)
if hasattr(layer, 'other_updates'):
for y in layer.other_updates:
self.other_updates[y[0]] = y[1]
def get_params(self):
return self.params
def uniform(stdev, size):
"""uniform distribution with the given stdev and size"""
return numpy.random.uniform(
low=-stdev * numpy.sqrt(3),
high=stdev * numpy.sqrt(3),
size=size
).astype(theano.config.floatX)
def linear_transform_weights(input_dim, output_dim,
param_list=None, name=""):
"theano shared variable given input and output dimension"
weight_inialization = uniform(numpy.sqrt(2.0 / input_dim),
(input_dim, output_dim))
W = theano.shared(weight_inialization, name=name)
assert(param_list is not None)
param_list.append(W)
return W
def bias_weights(length, param_list=None, name=""):
"theano shared variable for bias unit, given length"
bias_initialization = numpy.zeros(length).astype(theano.config.floatX)
bias = theano.shared(
bias_initialization,
name=name
)
if param_list is not None:
param_list.append(bias)
return bias
class Layer(object):
'''Generic Layer Template which all layers should inherit'''
def __init__(self, name=""):
self.name = name
self.params = []
def get_params(self):
return self.params
class GRU(Layer):
def __init__(self, input_dim, output_dim, input_layer, s0=None, name=""):
'''Layers information'''
self.name = name
self.input_dim = input_dim
self.hidden_dim = output_dim
self.output_dim = output_dim
self.input_layer = input_layer
self.X = input_layer.output()
self.s0 = s0
self.params = []
'''Layers weights'''
'''self.params is passed so that any paramters could be appended to it'''
self.W_r = linear_transform_weights(input_dim, output_dim, param_list=self.params, name=name + ".W_r")
self.b_wr = bias_weights((output_dim,), param_list=self.params, name=name + ".b_wr")
self.W_i = linear_transform_weights(input_dim, output_dim, param_list=self.params, name=name + ".W_i")
self.b_wi = bias_weights((output_dim,), param_list=self.params, name=name + ".b_wi")
self.W_h = linear_transform_weights(input_dim, output_dim, param_list=self.params, name=name + ".W_h")
self.b_wh = bias_weights((output_dim,), param_list=self.params, name=name + ".b_wh")
self.R_r = linear_transform_weights(output_dim, output_dim, param_list=self.params, name=name + ".R_r")
self.b_rr = bias_weights((output_dim,), param_list=self.params, name=name + ".b_rr")
self.R_i = linear_transform_weights(output_dim, output_dim, param_list=self.params, name=name + ".R_i")
self.b_ru = bias_weights((output_dim,), param_list=self.params, name=name + ".b_ru")
self.R_h = linear_transform_weights(output_dim, output_dim, param_list=self.params, name=name + ".R_h")
self.b_rh = bias_weights((output_dim,), param_list=self.params, name=name + ".b_rh")
'''step through processed input to create output'''
def step(inp, s_prev):
i_t = T.nnet.sigmoid(
T.dot(inp, self.W_i) + T.dot(s_prev, self.R_i) + self.b_wi + self.b_ru)
r_t = T.nnet.sigmoid(
T.dot(inp, self.W_r) + T.dot(s_prev, self.R_r) + self.b_wr + self.b_rr)
h_hat_t = T.tanh(
T.dot(inp, self.W_h) + (r_t * (T.dot(s_prev, self.R_h) + self.b_rh)) + self.b_wh)
s_curr = ((1.0 - i_t) * h_hat_t) + (i_t * s_prev)
return s_curr
outputs_info = self.s0
states, updates = theano.scan(
fn=step,
sequences=[self.X],
outputs_info=outputs_info
)
self.Y = states
def output(self):
return self.Y
class LSTM(Layer):
def __init__(self, input_dim, output_dim, input_layer, s0=None, c0=None,
name=""):
'''Layers information'''
self.name = name
self.input_dim = input_dim
self.hidden_dim = output_dim
self.output_dim = output_dim
self.input_layer = input_layer
self.X = input_layer.output()
self.s0 = s0
self.c0 = c0
self.params = []
'''Layers weights'''
'''self.params is passed so that any paramters could be appended to it'''
self.W_i = linear_transform_weights(input_dim, output_dim, param_list=self.params, name=name + ".W_i")
self.b_wi = bias_weights((output_dim,), param_list=self.params, name=name + ".b_wi")
self.W_f = linear_transform_weights(input_dim, output_dim, param_list=self.params, name=name + ".W_f")
self.b_wf = bias_weights((output_dim,), param_list=self.params, name=name + ".b_wf")
self.W_c = linear_transform_weights(input_dim, output_dim, param_list=self.params, name=name + ".W_c")
self.b_wc = bias_weights((output_dim,), param_list=self.params, name=name + ".b_wc")
self.W_o = linear_transform_weights(input_dim, output_dim, param_list=self.params, name=name + ".W_o")
self.b_wo = bias_weights((output_dim,), param_list=self.params, name=name + ".b_wo")
self.R_i = linear_transform_weights(output_dim, output_dim, param_list=self.params, name=name + ".R_i")
self.b_ri = bias_weights((output_dim,), param_list=self.params, name=name + ".b_ri")
self.R_f = linear_transform_weights(output_dim, output_dim, param_list=self.params, name=name + ".R_f")
self.b_rf = bias_weights((output_dim,), param_list=self.params, name=name + ".b_rf")
self.R_c = linear_transform_weights(output_dim, output_dim, param_list=self.params, name=name + ".R_c")
self.b_rc = bias_weights((output_dim,), param_list=self.params, name=name + ".b_rc")
self.R_o = linear_transform_weights(output_dim, output_dim, param_list=self.params, name=name + ".R_o")
self.b_ro = bias_weights((output_dim,), param_list=self.params, name=name + ".b_ro")
'''step through processed input to create output'''
def step(x_t, h_tm1, c_tm1):
i_t = T.nnet.sigmoid(
T.dot(x_t, self.W_i) + T.dot(h_tm1, self.R_i) + self.b_wi + self.b_ri)
f_t = T.nnet.sigmoid(
T.dot(x_t, self.W_f) + T.dot(h_tm1, self.R_f) + self.b_wf + self.b_rf)
o_t = T.nnet.sigmoid(
T.dot(x_t, self.W_o) + T.dot(h_tm1, self.R_o) + self.b_ro + self.b_wo)
c_hat_t = T.tanh(
T.dot(x_t, self.W_c) + T.dot(h_tm1, self.R_c) + self.b_wc + self.b_rc)
c_t = f_t * c_tm1 + i_t * c_hat_t
h_t = o_t * T.tanh(c_t)
return h_t, c_t
outputs_info = [self.s0, self.c0]
states, updates = theano.scan(
fn=step,
sequences=[self.X],
outputs_info=outputs_info
)
self.Y = states[0]
self.C = states[1]
def output(self):
return self.Y
class FC(Layer):
def __init__(self, input_dim, output_dim, input_layer, name=""):
self.input_layer = input_layer
self.name = name
self.params = []
self.input_dim = input_dim
self.output_dim = output_dim
self.X = self.input_layer.output()
self.W = linear_transform_weights(input_dim, output_dim, param_list=self.params, name=name + ".W")
self.b = bias_weights((output_dim,), param_list=self.params, name=name + ".b")
def output(self):
return T.dot(self.X, self.W) + self.b
class WrapperLayer(Layer):
def __init__(self, X, name=""):
self.params = []
self.name = name
self.X = X
def output(self):
return self.X
...@@ -68,6 +68,7 @@ def reg_context(name, ctx): ...@@ -68,6 +68,7 @@ def reg_context(name, ctx):
if not isinstance(ctx, gpuarray.GpuContext): if not isinstance(ctx, gpuarray.GpuContext):
raise TypeError("context is not GpuContext") raise TypeError("context is not GpuContext")
_context_reg[name] = ctx _context_reg[name] = ctx
_props_map[ctx] = dict()
def get_context(name): def get_context(name):
...@@ -96,6 +97,26 @@ def list_contexts(): ...@@ -96,6 +97,26 @@ def list_contexts():
""" """
return _context_reg.keys() return _context_reg.keys()
# Mappings of properties to contexts. Please never use this if you
# can avoid it.
# This is basically a way to store "global" variables that depend on
# the context.
_props_map = {}
def _get_props(name):
ctx = get_context(name)
return _props_map[ctx]
def get_prop(name, k):
return _get_props(name)[k]
def set_prop(name, k, v):
_get_props(name)[k] = v
# Private method # Private method
def _name_for_ctx(ctx): def _name_for_ctx(ctx):
......
...@@ -1102,7 +1102,8 @@ def _populate_grad_dict(var_to_app_to_idx, ...@@ -1102,7 +1102,8 @@ def _populate_grad_dict(var_to_app_to_idx,
str(o_shape) + " on an output of shape " + str(o_shape) + " on an output of shape " +
str(g_shape)) str(g_shape))
input_grads = node.op.grad(inputs, new_output_grads) input_grads = node.op.L_op(inputs, node.outputs,
new_output_grads)
if input_grads is None: if input_grads is None:
raise TypeError("%s.grad returned NoneType, " raise TypeError("%s.grad returned NoneType, "
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论