提交 09318e0d authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5405 from abergeron/fix_rnn_bidi

Fix shapes for cudnn bidirectional rnn since the documentation is full of lies.
......@@ -2183,7 +2183,7 @@ def _split_rnn_params(w, desc, layer, input_size, dtype, rnn_mode):
class GpuDnnRNNOp(DnnBase):
__props__ = ()
_cop_num_inputs = 5
_cop_num_inputs = 6
_cop_num_outputs = 4
def __init__(self, rnn_mode, direction_mode):
......@@ -2208,7 +2208,7 @@ class GpuDnnRNNOp(DnnBase):
w = as_gpuarray_variable(w, context_name)
x = as_gpuarray_variable(x, context_name)
hx = as_gpuarray_variable(hx, context_name)
inputs = [desc, w, x, hx]
inputs = [desc, as_i32(self.num_dirs), w, x, hx]
assert w.ndim == 1
assert x.ndim == 3 # seqLength, minibatch, inputSize
assert hx.ndim == 3 # numLayers, minibatch, hiddenSize * bidi
......@@ -2232,8 +2232,8 @@ class GpuDnnRNNOp(DnnBase):
return Apply(self, inputs, outputs)
def L_op(self, inputs, outputs, output_grads):
desc, w, x, hx = inputs[:4]
cx = inputs[4] if len(inputs) == 5 else None
desc, numDirs, w, x, hx = inputs[:5]
cx = inputs[5] if len(inputs) == 6 else None
reserve, y, hy = outputs[:3]
_, dy, dhy = output_grads[:3]
dcy = output_grads[3] if len(output_grads) == 4 else None
......@@ -2261,14 +2261,14 @@ class GpuDnnRNNOp(DnnBase):
reserve2, dx, dhx = dinputs[:3]
dw = GpuDnnRNNGradWeights()(
desc, x, hx, y, reserve2, w)
res = [DisconnectedType()(), dw, dx, dhx]
res = [DisconnectedType()(), DisconnectedType()(), dw, dx, dhx]
if cx is not None:
res.append(dinputs[3]) # dcx
return res
def connection_pattern(self, node):
deconn = [[False] * len(node.outputs)]
conn = [[True] * len(node.outputs)] * (len(node.inputs) - 1)
deconn = [[False] * len(node.outputs)] * 2
conn = [[True] * len(node.outputs)] * (len(node.inputs) - 2)
return deconn + conn
......
#section support_code
int dnn_rnn_fwd(cudnnRNNDescriptor_t desc,
int dnn_rnn_fwd(cudnnRNNDescriptor_t desc, uint32_t numDirs,
PyGpuArrayObject *w, PyGpuArrayObject *x,
PyGpuArrayObject *hx, PyGpuArrayObject *cx,
gpudata **reserve, PyGpuArrayObject **y,
......@@ -22,7 +22,7 @@ int dnn_rnn_fwd(cudnnRNNDescriptor_t desc,
size_t seqLength = PyGpuArray_DIM(x, 0);
size_t miniBatch = PyGpuArray_DIM(x, 1);
size_t inputSize = PyGpuArray_DIM(x, 2);
size_t hiddenSizeDir = PyGpuArray_DIM(hx, 2);
size_t hiddenSize = PyGpuArray_DIM(hx, 2);
size_t shape[3];
int strs[3], dims[3];
cudnnStatus_t err;
......@@ -84,7 +84,7 @@ int dnn_rnn_fwd(cudnnRNNDescriptor_t desc,
shape[0] = seqLength;
shape[1] = miniBatch;
shape[2] = hiddenSizeDir;
shape[2] = hiddenSize * numDirs;
if (theano_prep_output(y, 3, shape, x->ga.typecode, GA_C_ORDER, c) != 0)
goto fail;
......
......@@ -1580,6 +1580,55 @@ def test_dnn_rnn_gru():
utt.assert_allclose(ref_grad_layer[j], g)
def test_dnn_rnn_gru_bidi():
# test params
input_dim = 32
hidden_dim = 16
batch_size = 2
depth = 3
timesteps = 5
# test code
X = T.tensor3('X')
Y = T.tensor3('Y')
h0 = T.tensor3('h0')
rnnb = dnn.RNNBlock(theano.config.floatX, hidden_dim, depth, 'gru', direction_mode='bidirectional')
psize = rnnb.get_param_size([batch_size, input_dim])
params_cudnn = gpuarray_shared_constructor(
numpy.random.random((psize,)).astype(theano.config.floatX))
def funcs(out, params, hy=None):
cost = 0
if out:
cost += T.mean((Y - out)**2)
if hy:
cost += T.mean(hy**2)
grad = T.grad(cost, [X, h0] + params)
grad_fn = theano.function([X, Y, h0], grad, mode=mode_with_gpu,
on_unused_input='ignore')
return grad_fn
y, hy = rnnb.apply(params_cudnn, X, h0)
cudnn_fn = theano.function([X, h0], y, mode=mode_with_gpu)
cudnn_grad_fn = funcs(y, [params_cudnn])
cudnn2_grad_fn = funcs(y, [params_cudnn], hy)
cudnn3_grad_fn = funcs(None, [params_cudnn], hy)
cudnn_grad_fns = [cudnn_grad_fn, cudnn2_grad_fn, cudnn3_grad_fn]
x_val = numpy.random.random((timesteps, batch_size, input_dim)).astype(theano.config.floatX)
y_val = numpy.random.random((timesteps, batch_size, 2 * hidden_dim)).astype(theano.config.floatX)
h0_val = numpy.random.random((2 * depth, batch_size, hidden_dim)).astype(theano.config.floatX)
cudnn_fn(x_val, h0_val)
for cudnn_grad_fn in cudnn_grad_fns:
cudnn_grad_fn(x_val, y_val, h0_val)
def test_dnn_rnn_lstm():
# test params
input_dim = 32
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论