提交 7438aa53 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fix shapes for bidirectional since the documentation is full of lies.

上级 c0b24762
...@@ -2183,7 +2183,7 @@ def _split_rnn_params(w, desc, layer, input_size, dtype, rnn_mode): ...@@ -2183,7 +2183,7 @@ def _split_rnn_params(w, desc, layer, input_size, dtype, rnn_mode):
class GpuDnnRNNOp(DnnBase): class GpuDnnRNNOp(DnnBase):
__props__ = () __props__ = ()
_cop_num_inputs = 5 _cop_num_inputs = 6
_cop_num_outputs = 4 _cop_num_outputs = 4
def __init__(self, rnn_mode, direction_mode): def __init__(self, rnn_mode, direction_mode):
...@@ -2208,7 +2208,7 @@ class GpuDnnRNNOp(DnnBase): ...@@ -2208,7 +2208,7 @@ class GpuDnnRNNOp(DnnBase):
w = as_gpuarray_variable(w, context_name) w = as_gpuarray_variable(w, context_name)
x = as_gpuarray_variable(x, context_name) x = as_gpuarray_variable(x, context_name)
hx = as_gpuarray_variable(hx, context_name) hx = as_gpuarray_variable(hx, context_name)
inputs = [desc, w, x, hx] inputs = [desc, as_i32(self.num_dirs), w, x, hx]
assert w.ndim == 1 assert w.ndim == 1
assert x.ndim == 3 # seqLength, minibatch, inputSize assert x.ndim == 3 # seqLength, minibatch, inputSize
assert hx.ndim == 3 # numLayers, minibatch, hiddenSize * bidi assert hx.ndim == 3 # numLayers, minibatch, hiddenSize * bidi
...@@ -2232,8 +2232,8 @@ class GpuDnnRNNOp(DnnBase): ...@@ -2232,8 +2232,8 @@ class GpuDnnRNNOp(DnnBase):
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def L_op(self, inputs, outputs, output_grads): def L_op(self, inputs, outputs, output_grads):
desc, w, x, hx = inputs[:4] desc, numDirs, w, x, hx = inputs[:5]
cx = inputs[4] if len(inputs) == 5 else None cx = inputs[5] if len(inputs) == 6 else None
reserve, y, hy = outputs[:3] reserve, y, hy = outputs[:3]
_, dy, dhy = output_grads[:3] _, dy, dhy = output_grads[:3]
dcy = output_grads[3] if len(output_grads) == 4 else None dcy = output_grads[3] if len(output_grads) == 4 else None
...@@ -2261,14 +2261,14 @@ class GpuDnnRNNOp(DnnBase): ...@@ -2261,14 +2261,14 @@ class GpuDnnRNNOp(DnnBase):
reserve2, dx, dhx = dinputs[:3] reserve2, dx, dhx = dinputs[:3]
dw = GpuDnnRNNGradWeights()( dw = GpuDnnRNNGradWeights()(
desc, x, hx, y, reserve2, w) desc, x, hx, y, reserve2, w)
res = [DisconnectedType()(), dw, dx, dhx] res = [DisconnectedType()(), DisconnectedType()(), dw, dx, dhx]
if cx is not None: if cx is not None:
res.append(dinputs[3]) # dcx res.append(dinputs[3]) # dcx
return res return res
def connection_pattern(self, node): def connection_pattern(self, node):
deconn = [[False] * len(node.outputs)] deconn = [[False] * len(node.outputs)] * 2
conn = [[True] * len(node.outputs)] * (len(node.inputs) - 1) conn = [[True] * len(node.outputs)] * (len(node.inputs) - 2)
return deconn + conn return deconn + conn
......
#section support_code #section support_code
int dnn_rnn_fwd(cudnnRNNDescriptor_t desc, int dnn_rnn_fwd(cudnnRNNDescriptor_t desc, uint32_t numDirs,
PyGpuArrayObject *w, PyGpuArrayObject *x, PyGpuArrayObject *w, PyGpuArrayObject *x,
PyGpuArrayObject *hx, PyGpuArrayObject *cx, PyGpuArrayObject *hx, PyGpuArrayObject *cx,
gpudata **reserve, PyGpuArrayObject **y, gpudata **reserve, PyGpuArrayObject **y,
...@@ -22,7 +22,7 @@ int dnn_rnn_fwd(cudnnRNNDescriptor_t desc, ...@@ -22,7 +22,7 @@ int dnn_rnn_fwd(cudnnRNNDescriptor_t desc,
size_t seqLength = PyGpuArray_DIM(x, 0); size_t seqLength = PyGpuArray_DIM(x, 0);
size_t miniBatch = PyGpuArray_DIM(x, 1); size_t miniBatch = PyGpuArray_DIM(x, 1);
size_t inputSize = PyGpuArray_DIM(x, 2); size_t inputSize = PyGpuArray_DIM(x, 2);
size_t hiddenSizeDir = PyGpuArray_DIM(hx, 2); size_t hiddenSize = PyGpuArray_DIM(hx, 2);
size_t shape[3]; size_t shape[3];
int strs[3], dims[3]; int strs[3], dims[3];
cudnnStatus_t err; cudnnStatus_t err;
...@@ -84,7 +84,7 @@ int dnn_rnn_fwd(cudnnRNNDescriptor_t desc, ...@@ -84,7 +84,7 @@ int dnn_rnn_fwd(cudnnRNNDescriptor_t desc,
shape[0] = seqLength; shape[0] = seqLength;
shape[1] = miniBatch; shape[1] = miniBatch;
shape[2] = hiddenSizeDir; shape[2] = hiddenSize * numDirs;
if (theano_prep_output(y, 3, shape, x->ga.typecode, GA_C_ORDER, c) != 0) if (theano_prep_output(y, 3, shape, x->ga.typecode, GA_C_ORDER, c) != 0)
goto fail; goto fail;
......
...@@ -1580,6 +1580,55 @@ def test_dnn_rnn_gru(): ...@@ -1580,6 +1580,55 @@ def test_dnn_rnn_gru():
utt.assert_allclose(ref_grad_layer[j], g) utt.assert_allclose(ref_grad_layer[j], g)
def test_dnn_rnn_gru_bidi():
# test params
input_dim = 32
hidden_dim = 16
batch_size = 2
depth = 3
timesteps = 5
# test code
X = T.tensor3('X')
Y = T.tensor3('Y')
h0 = T.tensor3('h0')
rnnb = dnn.RNNBlock(theano.config.floatX, hidden_dim, depth, 'gru', direction_mode='bidirectional')
psize = rnnb.get_param_size([batch_size, input_dim])
params_cudnn = gpuarray_shared_constructor(
numpy.random.random((psize,)).astype(theano.config.floatX))
def funcs(out, params, hy=None):
cost = 0
if out:
cost += T.mean((Y - out)**2)
if hy:
cost += T.mean(hy**2)
grad = T.grad(cost, [X, h0] + params)
grad_fn = theano.function([X, Y, h0], grad, mode=mode_with_gpu,
on_unused_input='ignore')
return grad_fn
y, hy = rnnb.apply(params_cudnn, X, h0)
cudnn_fn = theano.function([X, h0], y, mode=mode_with_gpu)
cudnn_grad_fn = funcs(y, [params_cudnn])
cudnn2_grad_fn = funcs(y, [params_cudnn], hy)
cudnn3_grad_fn = funcs(None, [params_cudnn], hy)
cudnn_grad_fns = [cudnn_grad_fn, cudnn2_grad_fn, cudnn3_grad_fn]
x_val = numpy.random.random((timesteps, batch_size, input_dim)).astype(theano.config.floatX)
y_val = numpy.random.random((timesteps, batch_size, 2*hidden_dim)).astype(theano.config.floatX)
h0_val = numpy.random.random((2*depth, batch_size, hidden_dim)).astype(theano.config.floatX)
cudnn_out = cudnn_fn(x_val, h0_val)
for cudnn_grad_fn in cudnn_grad_fns:
cudnn_grads = cudnn_grad_fn(x_val, y_val, h0_val)
def test_dnn_rnn_lstm(): def test_dnn_rnn_lstm():
# test params # test params
input_dim = 32 input_dim = 32
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论