提交 d7bf519a authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add a test for gru mode of cudnn.

上级 61554830
...@@ -29,7 +29,7 @@ from theano.tensor.signal.pool import ( ...@@ -29,7 +29,7 @@ from theano.tensor.signal.pool import (
Pool, MaxPoolGrad, AveragePoolGrad) Pool, MaxPoolGrad, AveragePoolGrad)
from . import pygpu from . import pygpu
from .type import (get_context, gpu_context_type, list_contexts, from .type import (get_context, gpu_context_type, list_contexts,
get_prop, set_prop) get_prop, set_prop, GpuArraySharedVariable)
from .basic_ops import (as_gpuarray_variable, infer_context_name, from .basic_ops import (as_gpuarray_variable, infer_context_name,
gpu_contiguous, gpu_alloc_empty, gpu_contiguous, gpu_alloc_empty,
empty_like, GpuArrayType) empty_like, GpuArrayType)
...@@ -2233,6 +2233,16 @@ class GpuDnnRNNOp(DnnBase): ...@@ -2233,6 +2233,16 @@ class GpuDnnRNNOp(DnnBase):
reserve, y, hy = outputs[:3] reserve, y, hy = outputs[:3]
_, dy, dhy = output_grads[:3] _, dy, dhy = output_grads[:3]
dcy = output_grads[3] if len(output_grads) == 4 else None dcy = output_grads[3] if len(output_grads) == 4 else None
# If both dy and dhy are disconnected, then this will error
# out, but it is indeed an error.
if isinstance(dy.type, DisconnectedType):
dy = as_gpuarray_variable(dhy[-1],
context_name=dhy.type.context_name)
if isinstance(dhy.type, DisconnectedType):
dhy = as_gpuarray_variable(hy.zeros_like(),
context_name=hy.type.context_name)
if dcy and isinstance(dcy.type, DisconnectedType):
dcy = None
dinputs = GpuDnnRNNGradInputs()( dinputs = GpuDnnRNNGradInputs()(
desc, x, y, dy, dhy, dcy, w, hx, cx, reserve, return_list=True) desc, x, y, dy, dhy, dcy, w, hx, cx, reserve, return_list=True)
reserve2, dx, dhx = dinputs[:3] reserve2, dx, dhx = dinputs[:3]
...@@ -2326,6 +2336,8 @@ class RNNBlock(object): ...@@ -2326,6 +2336,8 @@ class RNNBlock(object):
return bytesize // numpy.dtype(self.dtype).itemsize return bytesize // numpy.dtype(self.dtype).itemsize
def split_params(self, w, layer, input_size): def split_params(self, w, layer, input_size):
if not isinstance(w, GpuArraySharedVariable):
raise TypeError("split_params only works on gpuarray shared variables")
return _split_rnn_params(w, self.desc, layer, input_size, self.dtype, self.rnn_mode) return _split_rnn_params(w, self.desc, layer, input_size, self.dtype, self.rnn_mode)
def apply(self, w, x, hx, cx=None): def apply(self, w, x, hx, cx=None):
......
from __future__ import absolute_import, print_function
import theano
import theano.tensor as T
import numpy
class Model(object):
def __init__(self, name=""):
self.name = name
self.layers = []
self.params = []
self.other_updates = {}
def add_layer(self, layer):
self.layers.append(layer)
for p in layer.params:
self.params.append(p)
if hasattr(layer, 'other_updates'):
for y in layer.other_updates:
self.other_updates[y[0]] = y[1]
def get_params(self):
return self.params
def uniform(stdev, size):
"""uniform distribution with the given stdev and size"""
return numpy.random.uniform(
low=-stdev * numpy.sqrt(3),
high=stdev * numpy.sqrt(3),
size=size
).astype(theano.config.floatX)
def linear_transform_weights(input_dim, output_dim,
param_list=None, name=""):
"theano shared variable given input and output dimension"
weight_inialization = uniform(numpy.sqrt(2.0 / input_dim),
(input_dim, output_dim))
W = theano.shared(weight_inialization, name=name)
assert(param_list is not None)
param_list.append(W)
return W
def bias_weights(length, param_list=None, name=""):
"theano shared variable for bias unit, given length"
bias_initialization = numpy.zeros(length).astype(theano.config.floatX)
bias = theano.shared(
bias_initialization,
name=name
)
if param_list is not None:
param_list.append(bias)
return bias
class Layer(object):
'''Generic Layer Template which all layers should inherit'''
def __init__(self, name=""):
self.name = name
self.params = []
def get_params(self):
return self.params
class GRU(Layer):
def __init__(self, input_dim, output_dim, input_layer, s0=None, batch_normalize=False, name=""):
'''Layers information'''
self.name = name
self.input_dim = input_dim
self.hidden_dim = output_dim
self.output_dim = output_dim
self.input_layer = input_layer
self.X = input_layer.output().dimshuffle(1, 0, 2)
self.s0 = s0
self.params = []
'''Layers weights'''
'''self.params is passed so that any paramters could be appended to it'''
self.W_r = linear_transform_weights(input_dim, output_dim, param_list=self.params, name=name + ".W_r")
self.b_wr = bias_weights((output_dim,), param_list=self.params, name=name + ".b_wr")
self.W_i = linear_transform_weights(input_dim, output_dim, param_list=self.params, name=name + ".W_i")
self.b_wi = bias_weights((output_dim,), param_list=self.params, name=name + ".b_wi")
self.W_h = linear_transform_weights(input_dim, output_dim, param_list=self.params, name=name + ".W_h")
self.b_wh = bias_weights((output_dim,), param_list=self.params, name=name + ".b_wh")
self.R_r = linear_transform_weights(output_dim, output_dim, param_list=self.params, name=name + ".R_r")
self.b_rr = bias_weights((output_dim,), param_list=self.params, name=name + ".b_rr")
self.R_i = linear_transform_weights(output_dim, output_dim, param_list=self.params, name=name + ".R_i")
self.b_ru = bias_weights((output_dim,), param_list=self.params, name=name + ".b_ru")
self.R_h = linear_transform_weights(output_dim, output_dim, param_list=self.params, name=name + ".R_h")
self.b_rh = bias_weights((output_dim,), param_list=self.params, name=name + ".b_rh")
'''step through processed input to create output'''
def step(inp, s_prev):
i_t = T.nnet.sigmoid(
T.dot(inp, self.W_i) + T.dot(s_prev, self.R_i) + self.b_wi + self.b_ru)
r_t = T.nnet.sigmoid(
T.dot(inp, self.W_r) + T.dot(s_prev, self.R_r) + self.b_wr + self.b_rr)
h_hat_t = T.tanh(
T.dot(inp, self.W_h) + (r_t * (T.dot(s_prev, self.R_h) + self.b_rh)) + self.b_wh)
s_curr = ((1.0 - i_t) * h_hat_t) + (i_t * s_prev)
return s_curr
outputs_info = self.s0
states, updates = theano.scan(
fn=step,
sequences=[self.X],
outputs_info=outputs_info
)
self.Y = states.dimshuffle(1, 0, 2)
def output(self):
return self.Y
class FC(Layer):
def __init__(self, input_dim, output_dim, input_layer, name=""):
self.input_layer = input_layer
self.name = name
self.params = []
self.input_dim = input_dim
self.output_dim = output_dim
self.X = self.input_layer.output()
self.W = linear_transform_weights(input_dim, output_dim, param_list=self.params, name=name + ".W")
self.b = bias_weights((output_dim,), param_list=self.params, name=name + ".b")
def output(self):
return T.dot(self.X, self.W) + self.b
class WrapperLayer(Layer):
def __init__(self, X, name=""):
self.params = []
self.name = name
self.X = X
def output(self):
return self.X
...@@ -14,11 +14,13 @@ from theano.tensor.signal.pool import pool_2d, pool_3d ...@@ -14,11 +14,13 @@ from theano.tensor.signal.pool import pool_2d, pool_3d
from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad
from .. import dnn from .. import dnn
from .. import gpuarray_shared_constructor
from ..basic_ops import GpuAllocEmpty from ..basic_ops import GpuAllocEmpty
from ..type import gpuarray_shared_constructor from ..type import gpuarray_shared_constructor
from .config import mode_with_gpu, mode_without_gpu, test_ctx_name from .config import mode_with_gpu, mode_without_gpu, test_ctx_name
from . import test_nnet from . import test_nnet
from .rnn_support import Model, GRU, WrapperLayer
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_FWD from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_FWD
...@@ -1434,3 +1436,78 @@ def test_batchnorm_inference(): ...@@ -1434,3 +1436,78 @@ def test_batchnorm_inference():
utt.assert_allclose(outputs[4], outputs[4 + 5]) # dbias utt.assert_allclose(outputs[4], outputs[4 + 5]) # dbias
utt.assert_allclose(outputs[5], outputs[5 + 5]) # dmean utt.assert_allclose(outputs[5], outputs[5 + 5]) # dmean
utt.assert_allclose(outputs[6], outputs[6 + 5], rtol=2e-3, atol=4e-5) # dvar utt.assert_allclose(outputs[6], outputs[6 + 5], rtol=2e-3, atol=4e-5) # dvar
def test_dnn_rnn_gru():
# test params
input_dim = 32
hidden_dim = 16
batch_size = 2
depth = 3
timesteps = 5
# test code
X = T.tensor3('X')
X.tag.test_value = numpy.zeros((timesteps, batch_size, input_dim), dtype=theano.config.floatX)
Y = T.tensor3('Y')
Y.tag.test_value = numpy.zeros((timesteps, batch_size, hidden_dim), dtype=theano.config.floatX)
h0 = T.tensor3('h0')
h0.tag.test_value = numpy.zeros((depth, batch_size, hidden_dim), dtype=theano.config.floatX)
rnnb = dnn.RNNBlock(theano.config.floatX, hidden_dim, depth, 'gru')
psize = rnnb.get_param_size([batch_size, input_dim])
params_cudnn = gpuarray_shared_constructor(
numpy.zeros((psize,), dtype=theano.config.floatX))
model = Model()
last_layer = WrapperLayer(X.dimshuffle(1, 0, 2))
last_dim = input_dim
for i in range(depth):
gru = GRU(last_dim, hidden_dim, last_layer, s0=h0[i, :, :])
model.add_layer(gru)
last_layer = gru
last_dim = hidden_dim
layer_params = gru.get_params()
dnn_params = rnnb.split_params(params_cudnn, i,
[batch_size, input_dim])
for j, p in enumerate(dnn_params):
p[:] = layer_params[j].get_value(borrow=True,
return_internal_type=True)
def funcs(out, params):
fn = theano.function([X, h0], out, mode=mode_with_gpu)
cost = T.mean((Y - out)**2)
grad = T.grad(cost, [X, h0] + params)
grad_fn = theano.function([X, Y, h0], grad, mode=mode_with_gpu)
return fn, grad_fn
ref_fn, ref_grad_fn = funcs(last_layer.output().dimshuffle((1, 0, 2)),
model.get_params())
cudnn_fn, cudnn_grad_fn = funcs(rnnb.apply(params_cudnn, X, h0)[0],
[params_cudnn])
x_val = numpy.random.random((timesteps, batch_size, input_dim)).astype(theano.config.floatX)
y_val = numpy.random.random((timesteps, batch_size, hidden_dim)).astype(theano.config.floatX)
h0_val = numpy.random.random((depth, batch_size, hidden_dim)).astype(theano.config.floatX)
ref_out = ref_fn(x_val, h0_val)
cudnn_out = cudnn_fn(x_val, h0_val)
utt.assert_allclose(ref_out, cudnn_out)
ref_grads = ref_grad_fn(x_val, y_val, h0_val)
cudnn_grads = cudnn_grad_fn(x_val, y_val, h0_val)
utt.assert_allclose(ref_grads[0], cudnn_grads[0])
utt.assert_allclose(ref_grads[1], cudnn_grads[1])
ref_grads_params = ref_grads[2:]
cudnn_grads_params = gpuarray_shared_constructor(cudnn_grads[2])
for i in range(depth):
cudnn_grads_layer = rnnb.split_params(cudnn_grads_params, i,
[batch_size, input_dim])
ref_grads_layer = ref_grads_params[i * len(cudnn_grads_layer):
(i + 1) * len(cudnn_grads_layer)]
for j, g in enumerate(cudnn_grads_layer):
utt.assert_allclose(ref_grads_layer[j], g)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论