Refactor CTC GPU wrapper to make use of L_op and allocate costs and gradients in make_node

上级 ba6d2de5
from __future__ import absolute_import, print_function, division
import theano
from theano import Op
from theano import config
from theano import (config, gof)
import theano.tensor as T
from .basic_ops import (gpu_contiguous, as_gpuarray_variable,
infer_context_name, CGpuKernelBase)
from .basic_ops import (gpu_contiguous, as_gpuarray_variable, infer_context_name)
import theano.tensor.nnet.ctc
from .type import GpuArrayType
from .type import (GpuArrayType, gpu_context_type)
from .elemwise import GpuDimShuffle
from theano.gradient import grad_undefined
from theano.gof import local_optimizer
......@@ -20,32 +18,44 @@ import pygpu
ctc_enabled = config.ctc.enabled
class GpuConnectionistTemporalClassification(CGpuKernelBase, Op):
class GpuConnectionistTemporalClassification(gof.COp):
"""
GPU wrapper for Baidu CTC loss function.
Parameters
----------
activations
Three-dimensional tensor, which has a shape of (t, m, p), where
t is the time index, m is the minibatch index, and p is the index
over the probabilities of each symbol in the alphabet. The memory
layout is assumed to be in C-order, which consists in the slowest
to the fastest changing dimension, from left to right. In this case,
p is the fastest changing dimension.
labels
A 1-D tensor of all the labels for the minibatch.
input_lengths
A 1-D tensor with the number of time steps for each sequence in
the minibatch.
Returns
-------
3D tensor
Cost of each example in the minibatch. Tensor is of shape
(time index, minibatch index, probabilities).
"""
__props__ = ('context_name', 'compute_grad',)
__props__ = ('compute_grad',)
func_file = "./ctc_wrapper.c"
func_name = "APPLY_SPECIFIC(ctc_cost_gpu)"
def __init__(self, compute_grad=True, context_name=None):
params_type = gpu_context_type
def __init__(self, compute_grad=True):
if not compute_grad:
self.func_name = "APPLY_SPECIFIC(ctc_cost_gpu_no_grad)"
self.compute_grad = compute_grad
self.context_name = context_name
Op.__init__(self)
CGpuKernelBase.__init__(self, self.func_file, self.func_name)
self.costs = GpuArrayType(dtype='float32',
broadcastable=(False,),
context_name=self.context_name)
if self.compute_grad:
self.gradients = GpuArrayType(dtype='float32',
broadcastable=(False, False, False,),
context_name=self.context_name)
gof.COp.__init__(self, self.func_file, self.func_name)
if config.ctc.root == "":
raise ValueError('ctc.root variable is not set, please set it '
......@@ -75,35 +85,19 @@ class GpuConnectionistTemporalClassification(CGpuKernelBase, Op):
return ['ctc.h', 'numpy_compat.h', 'gpuarray_helper.h', 'gpuarray/types.h',
'gpuarray_api.h', 'gpuarray/array.h', 'gpuarray/util.h']
def make_node(self, activations, labels, input_lengths):
"""
Parameters
----------
activations
Three-dimensional tensor, which has a shape of (t, m, p), where
t is the time index, m is the minibatch index, and p is the index
over the probabilities of each symbol in the alphabet. The memory
layout is assumed to be in C-order, which consists in the slowest
to the fastest changing dimension, from left to right. In this case,
p is the fastest changing dimension.
labels
A 1-D tensor of all the labels for the minibatch.
input_lengths
A 1-D tensor with the number of time steps for each sequence in
the minibatch.
"""
def get_params(self, node):
return node.inputs[0].type.context
def make_node(self, activations, labels, input_lengths):
if not ctc_enabled:
raise RuntimeError('Baidu CTC is not enabled and '
'GpuConnectionistTemporalClassification Op '
'can not be constructed.')
context = infer_context_name(activations, labels, input_lengths)
assert context == self.context_name
t_activations = as_gpuarray_variable(activations,
context_name=self.context_name)
context_name=context)
# Ensure activations array is C-contiguous
t_activations = gpu_contiguous(t_activations)
......@@ -123,24 +117,31 @@ class GpuConnectionistTemporalClassification(CGpuKernelBase, Op):
# Return only the cost. Gradient will be returned by grad()
self.default_output = 0
out_params = [as_gpuarray_variable(self.costs(), context_name=self.context_name)]
if self.gradients is not None:
out_params.append(as_gpuarray_variable(self.gradients(),
context_name=self.context_name))
costs = GpuArrayType(dtype='float32',
broadcastable=(False,),
context_name=context)()
if self.compute_grad:
gradients = GpuArrayType(dtype='float32',
broadcastable=(False, False, False,),
context_name=context)()
return theano.Apply(self, inputs=[t_activations, t_labels, t_input_lengths],
outputs=out_params)
outputs=[costs, gradients])
def grad(self, inputs, output_grads):
def L_op(self, inputs, outputs, output_grads):
if not ctc_enabled:
raise RuntimeError('Baidu CTC is not enabled and '
'GpuConnectionistTemporalClassification Op '
'can not be constructed.')
z = output_grads[0]
grad_shuffle = GpuDimShuffle(input_broadcastable=(False, False, False),
new_order=(1, 0, 2))(self.gradients())
grad_bdot = T.basic.batched_dot(z, grad_shuffle)
grad_shuffle_reverse = GpuDimShuffle(input_broadcastable=(False, False, False),
# Gradients computed by Op
gradients = outputs[1]
# Gradients of original function, to compose chain rule
grad_op = output_grads[0]
grad_shuffle = GpuDimShuffle(input_broadcastable=(False, False, False,),
new_order=(1, 0, 2))(gradients)
grad_bdot = T.basic.batched_dot(grad_op, grad_shuffle)
grad_shuffle_reverse = GpuDimShuffle(input_broadcastable=(False, False, False,),
new_order=(1, 0, 2))(grad_bdot)
return [grad_shuffle_reverse,
grad_undefined(self, 1, inputs[1]),
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论