Merge pull request #5405 from abergeron/fix_rnn_bidi

Fix shapes for cudnn bidirectional rnn since the documentation is full of lies.

Merge pull request #5405 from abergeron/fix_rnn_bidi
09318e0d · Frédéric Bastien · GitHub · c0b24762 · d9c7cf5d · 09318e0d
--- a/theano/gpuarray/dnn.py
+++ b/theano/gpuarray/dnn.py
@@ -2183,7 +2183,7 @@ def _split_rnn_params(w, desc, layer, input_size, dtype, rnn_mode):

 class GpuDnnRNNOp(DnnBase):
    __props__ = ()
-    _cop_num_inputs = 5
+    _cop_num_inputs = 6
    _cop_num_outputs = 4

    def __init__(self, rnn_mode, direction_mode):
@@ -2208,7 +2208,7 @@ class GpuDnnRNNOp(DnnBase):
        w = as_gpuarray_variable(w, context_name)
        x = as_gpuarray_variable(x, context_name)
        hx = as_gpuarray_variable(hx, context_name)
-        inputs = [desc, w, x, hx]
+        inputs = [desc, as_i32(self.num_dirs), w, x, hx]
        assert w.ndim == 1
        assert x.ndim == 3  # seqLength, minibatch, inputSize
        assert hx.ndim == 3  # numLayers, minibatch, hiddenSize * bidi
@@ -2232,8 +2232,8 @@ class GpuDnnRNNOp(DnnBase):
        return Apply(self, inputs, outputs)

    def L_op(self, inputs, outputs, output_grads):
-        desc, w, x, hx = inputs[:4]
-        cx = inputs[4] if len(inputs) == 5 else None
+        desc, numDirs, w, x, hx = inputs[:5]
+        cx = inputs[5] if len(inputs) == 6 else None
        reserve, y, hy = outputs[:3]
        _, dy, dhy = output_grads[:3]
        dcy = output_grads[3] if len(output_grads) == 4 else None
@@ -2261,14 +2261,14 @@ class GpuDnnRNNOp(DnnBase):
        reserve2, dx, dhx = dinputs[:3]
        dw = GpuDnnRNNGradWeights()(
            desc, x, hx, y, reserve2, w)
-        res = [DisconnectedType()(), dw, dx, dhx]
+        res = [DisconnectedType()(), DisconnectedType()(), dw, dx, dhx]
        if cx is not None:
            res.append(dinputs[3])  # dcx
        return res

    def connection_pattern(self, node):
-        deconn = [[False] * len(node.outputs)]
-        conn = [[True] * len(node.outputs)] * (len(node.inputs) - 1)
+        deconn = [[False] * len(node.outputs)] * 2
+        conn = [[True] * len(node.outputs)] * (len(node.inputs) - 2)
        return deconn + conn



--- a/theano/gpuarray/dnn_rnn_fwd.c
+++ b/theano/gpuarray/dnn_rnn_fwd.c
 #section support_code

-int dnn_rnn_fwd(cudnnRNNDescriptor_t desc,
+int dnn_rnn_fwd(cudnnRNNDescriptor_t desc, uint32_t numDirs,
                PyGpuArrayObject *w, PyGpuArrayObject *x,
                PyGpuArrayObject *hx, PyGpuArrayObject *cx,
                gpudata **reserve, PyGpuArrayObject **y,
@@ -22,7 +22,7 @@ int dnn_rnn_fwd(cudnnRNNDescriptor_t desc,
  size_t seqLength = PyGpuArray_DIM(x, 0);
  size_t miniBatch = PyGpuArray_DIM(x, 1);
  size_t inputSize = PyGpuArray_DIM(x, 2);
-  size_t hiddenSizeDir = PyGpuArray_DIM(hx, 2);
+  size_t hiddenSize = PyGpuArray_DIM(hx, 2);
  size_t shape[3];
  int strs[3], dims[3];
  cudnnStatus_t err;
@@ -84,7 +84,7 @@ int dnn_rnn_fwd(cudnnRNNDescriptor_t desc,

  shape[0] = seqLength;
  shape[1] = miniBatch;
-  shape[2] = hiddenSizeDir;
+  shape[2] = hiddenSize * numDirs;
  if (theano_prep_output(y, 3, shape, x->ga.typecode, GA_C_ORDER, c) != 0)
    goto fail;


--- a/theano/gpuarray/tests/test_dnn.py
+++ b/theano/gpuarray/tests/test_dnn.py
@@ -1580,6 +1580,55 @@ def test_dnn_rnn_gru():
                utt.assert_allclose(ref_grad_layer[j], g)


+def test_dnn_rnn_gru_bidi():
+    # test params
+    input_dim = 32
+    hidden_dim = 16
+    batch_size = 2
+    depth = 3
+    timesteps = 5
+
+    # test code
+    X = T.tensor3('X')
+    Y = T.tensor3('Y')
+    h0 = T.tensor3('h0')
+
+    rnnb = dnn.RNNBlock(theano.config.floatX, hidden_dim, depth, 'gru', direction_mode='bidirectional')
+    psize = rnnb.get_param_size([batch_size, input_dim])
+    params_cudnn = gpuarray_shared_constructor(
+        numpy.random.random((psize,)).astype(theano.config.floatX))
+
+    def funcs(out, params, hy=None):
+        cost = 0
+        if out:
+            cost += T.mean((Y - out)**2)
+        if hy:
+            cost += T.mean(hy**2)
+        grad = T.grad(cost, [X, h0] + params)
+        grad_fn = theano.function([X, Y, h0], grad, mode=mode_with_gpu,
+                                  on_unused_input='ignore')
+        return grad_fn
+
+    y, hy = rnnb.apply(params_cudnn, X, h0)
+
+    cudnn_fn = theano.function([X, h0], y, mode=mode_with_gpu)
+
+    cudnn_grad_fn = funcs(y, [params_cudnn])
+    cudnn2_grad_fn = funcs(y, [params_cudnn], hy)
+    cudnn3_grad_fn = funcs(None, [params_cudnn], hy)
+
+    cudnn_grad_fns = [cudnn_grad_fn, cudnn2_grad_fn, cudnn3_grad_fn]
+
+    x_val = numpy.random.random((timesteps, batch_size, input_dim)).astype(theano.config.floatX)
+    y_val = numpy.random.random((timesteps, batch_size, 2 * hidden_dim)).astype(theano.config.floatX)
+    h0_val = numpy.random.random((2 * depth, batch_size, hidden_dim)).astype(theano.config.floatX)
+
+    cudnn_fn(x_val, h0_val)
+
+    for cudnn_grad_fn in cudnn_grad_fns:
+        cudnn_grad_fn(x_val, y_val, h0_val)
+
+
 def test_dnn_rnn_lstm():
    # test params
    input_dim = 32