提交 c51f5936 authored 作者: Brandon T. Willard's avatar Brandon T. Willard

Replace theano.tensor alias T with tt in theano.tensor sub-package

Indirect references to theano.tensor imports at module level were also converted to direct references in quite a few cases.
上级 2cb3a154
import theano.tensor.basic as tt
from theano import config from theano import config
from theano.gof.params_type import ParamsType from theano.gof.params_type import ParamsType
from theano.scalar import bool as bool_t from theano.scalar import bool as bool_t
...@@ -6,7 +8,6 @@ from theano.tensor.blas import ldflags, blas_header_text, blas_header_version ...@@ -6,7 +8,6 @@ from theano.tensor.blas import ldflags, blas_header_text, blas_header_version
from theano.tensor.blas import blas_optdb, optdb, local_optimizer from theano.tensor.blas import blas_optdb, optdb, local_optimizer
from theano.tensor.blas import Ger, ger, ger_destructive from theano.tensor.blas import Ger, ger, ger_destructive
from theano.tensor.blas import Gemv, gemv_inplace, gemv_no_inplace from theano.tensor.blas import Gemv, gemv_inplace, gemv_no_inplace
from theano.tensor import basic as T
class BaseBLAS(object): class BaseBLAS(object):
...@@ -706,10 +707,10 @@ def make_c_gemv_destructive(node): ...@@ -706,10 +707,10 @@ def make_c_gemv_destructive(node):
dest = inputs[0] dest = inputs[0]
if ( if (
dest.owner dest.owner
and isinstance(dest.owner.op, T.AllocEmpty) and isinstance(dest.owner.op, tt.AllocEmpty)
and len(dest.clients) > 1 and len(dest.clients) > 1
): ):
inputs[0] = T.AllocEmpty(dest.dtype)(*dest.owner.inputs) inputs[0] = tt.AllocEmpty(dest.dtype)(*dest.owner.inputs)
return [cgemv_inplace(*inputs)] return [cgemv_inplace(*inputs)]
......
import numpy as np import numpy as np
import theano.tensor as tt
from theano import gof from theano import gof
import theano.tensor as T
from theano.gradient import DisconnectedType from theano.gradient import DisconnectedType
...@@ -10,10 +12,10 @@ class RFFTOp(gof.Op): ...@@ -10,10 +12,10 @@ class RFFTOp(gof.Op):
def output_type(self, inp): def output_type(self, inp):
# add extra dim for real/imag # add extra dim for real/imag
return T.TensorType(inp.dtype, broadcastable=[False] * (inp.type.ndim + 1)) return tt.TensorType(inp.dtype, broadcastable=[False] * (inp.type.ndim + 1))
def make_node(self, a, s=None): def make_node(self, a, s=None):
a = T.as_tensor_variable(a) a = tt.as_tensor_variable(a)
if a.ndim < 2: if a.ndim < 2:
raise TypeError( raise TypeError(
"%s: input must have dimension > 2, with first dimension batches" "%s: input must have dimension > 2, with first dimension batches"
...@@ -22,10 +24,10 @@ class RFFTOp(gof.Op): ...@@ -22,10 +24,10 @@ class RFFTOp(gof.Op):
if s is None: if s is None:
s = a.shape[1:] s = a.shape[1:]
s = T.as_tensor_variable(s) s = tt.as_tensor_variable(s)
else: else:
s = T.as_tensor_variable(s) s = tt.as_tensor_variable(s)
if s.dtype not in T.integer_dtypes: if s.dtype not in tt.integer_dtypes:
raise TypeError( raise TypeError(
"%s: length of the transformed axis must be" "%s: length of the transformed axis must be"
" of type integer" % self.__class__.__name__ " of type integer" % self.__class__.__name__
...@@ -54,7 +56,7 @@ class RFFTOp(gof.Op): ...@@ -54,7 +56,7 @@ class RFFTOp(gof.Op):
+ [slice(1, (s[-1] // 2) + (s[-1] % 2))] + [slice(1, (s[-1] // 2) + (s[-1] % 2))]
+ [slice(None)] + [slice(None)]
) )
gout = T.set_subtensor(gout[idx], gout[idx] * 0.5) gout = tt.set_subtensor(gout[idx], gout[idx] * 0.5)
return [irfft_op(gout, s), DisconnectedType()()] return [irfft_op(gout, s), DisconnectedType()()]
def connection_pattern(self, node): def connection_pattern(self, node):
...@@ -71,10 +73,10 @@ class IRFFTOp(gof.Op): ...@@ -71,10 +73,10 @@ class IRFFTOp(gof.Op):
def output_type(self, inp): def output_type(self, inp):
# remove extra dim for real/imag # remove extra dim for real/imag
return T.TensorType(inp.dtype, broadcastable=[False] * (inp.type.ndim - 1)) return tt.TensorType(inp.dtype, broadcastable=[False] * (inp.type.ndim - 1))
def make_node(self, a, s=None): def make_node(self, a, s=None):
a = T.as_tensor_variable(a) a = tt.as_tensor_variable(a)
if a.ndim < 3: if a.ndim < 3:
raise TypeError( raise TypeError(
"%s: input must have dimension >= 3, with " % self.__class__.__name__ "%s: input must have dimension >= 3, with " % self.__class__.__name__
...@@ -83,11 +85,11 @@ class IRFFTOp(gof.Op): ...@@ -83,11 +85,11 @@ class IRFFTOp(gof.Op):
if s is None: if s is None:
s = a.shape[1:-1] s = a.shape[1:-1]
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2) s = tt.set_subtensor(s[-1], (s[-1] - 1) * 2)
s = T.as_tensor_variable(s) s = tt.as_tensor_variable(s)
else: else:
s = T.as_tensor_variable(s) s = tt.as_tensor_variable(s)
if s.dtype not in T.integer_dtypes: if s.dtype not in tt.integer_dtypes:
raise TypeError( raise TypeError(
"%s: length of the transformed axis must be" "%s: length of the transformed axis must be"
" of type integer" % self.__class__.__name__ " of type integer" % self.__class__.__name__
...@@ -117,7 +119,7 @@ class IRFFTOp(gof.Op): ...@@ -117,7 +119,7 @@ class IRFFTOp(gof.Op):
+ [slice(1, (s[-1] // 2) + (s[-1] % 2))] + [slice(1, (s[-1] // 2) + (s[-1] % 2))]
+ [slice(None)] + [slice(None)]
) )
gf = T.set_subtensor(gf[idx], gf[idx] * 2) gf = tt.set_subtensor(gf[idx], gf[idx] * 2)
return [gf, DisconnectedType()()] return [gf, DisconnectedType()()]
def connection_pattern(self, node): def connection_pattern(self, node):
...@@ -157,7 +159,7 @@ def rfft(inp, norm=None): ...@@ -157,7 +159,7 @@ def rfft(inp, norm=None):
cond_norm = _unitary(norm) cond_norm = _unitary(norm)
scaling = 1 scaling = 1
if cond_norm == "ortho": if cond_norm == "ortho":
scaling = T.sqrt(s.prod().astype(inp.dtype)) scaling = tt.sqrt(s.prod().astype(inp.dtype))
return rfft_op(inp, s) / scaling return rfft_op(inp, s) / scaling
...@@ -196,9 +198,9 @@ def irfft(inp, norm=None, is_odd=False): ...@@ -196,9 +198,9 @@ def irfft(inp, norm=None, is_odd=False):
s = inp.shape[1:-1] s = inp.shape[1:-1]
if is_odd: if is_odd:
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2 + 1) s = tt.set_subtensor(s[-1], (s[-1] - 1) * 2 + 1)
else: else:
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2) s = tt.set_subtensor(s[-1], (s[-1] - 1) * 2)
cond_norm = _unitary(norm) cond_norm = _unitary(norm)
scaling = 1 scaling = 1
...@@ -206,7 +208,7 @@ def irfft(inp, norm=None, is_odd=False): ...@@ -206,7 +208,7 @@ def irfft(inp, norm=None, is_odd=False):
if cond_norm is None: if cond_norm is None:
scaling = s.prod().astype(inp.dtype) scaling = s.prod().astype(inp.dtype)
elif cond_norm == "ortho": elif cond_norm == "ortho":
scaling = T.sqrt(s.prod().astype(inp.dtype)) scaling = tt.sqrt(s.prod().astype(inp.dtype))
return irfft_op(inp, s) / scaling return irfft_op(inp, s) / scaling
......
import numpy as np import numpy as np
import theano import theano
import theano.tensor.basic as tt
from theano import Apply, Op from theano import Apply, Op
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.gof.opt import copy_stack_trace from theano.gof.opt import copy_stack_trace
from theano.tensor import as_tensor_variable, TensorType from theano.scalar import Composite, add, as_common_dtype, mul, sub, true_div
from theano.tensor import basic as T from theano.tensor import TensorType, as_tensor_variable
from theano.tensor.elemwise import Elemwise
from theano.tensor.opt import register_specialize_device from theano.tensor.opt import register_specialize_device
from theano.scalar import Composite, as_common_dtype
from theano.scalar import add, sub, true_div, mul
class BNComposite(Composite): class BNComposite(Composite):
...@@ -72,9 +74,7 @@ def batch_normalization(inputs, gamma, beta, mean, std, mode="low_mem"): ...@@ -72,9 +74,7 @@ def batch_normalization(inputs, gamma, beta, mean, std, mode="low_mem"):
between implementation is likely to be less important on the full model fprop/bprop. between implementation is likely to be less important on the full model fprop/bprop.
""" """
if mode == "low_mem": if mode == "low_mem":
elm_bn = theano.tensor.elemwise.Elemwise( elm_bn = Elemwise(scalar_op=BNComposite(dtype=inputs.dtype))
scalar_op=BNComposite(dtype=inputs.dtype)
)
rval = elm_bn(inputs, mean, std, gamma, beta) rval = elm_bn(inputs, mean, std, gamma, beta)
elif mode == "high_mem": elif mode == "high_mem":
rval = (inputs - mean) * (gamma / std) + beta rval = (inputs - mean) * (gamma / std) + beta
...@@ -239,8 +239,8 @@ def batch_normalization_train( ...@@ -239,8 +239,8 @@ def batch_normalization_train(
gamma = gamma.dimshuffle(params_dimshuffle_pattern) gamma = gamma.dimshuffle(params_dimshuffle_pattern)
beta = beta.dimshuffle(params_dimshuffle_pattern) beta = beta.dimshuffle(params_dimshuffle_pattern)
else: else:
gamma = T.addbroadcast(gamma, *axes) gamma = tt.addbroadcast(gamma, *axes)
beta = T.addbroadcast(beta, *axes) beta = tt.addbroadcast(beta, *axes)
batchnorm_op = AbstractBatchNormTrain(axes=axes) batchnorm_op = AbstractBatchNormTrain(axes=axes)
...@@ -251,8 +251,8 @@ def batch_normalization_train( ...@@ -251,8 +251,8 @@ def batch_normalization_train(
running_mean = running_mean.dimshuffle(params_dimshuffle_pattern) running_mean = running_mean.dimshuffle(params_dimshuffle_pattern)
running_var = running_var.dimshuffle(params_dimshuffle_pattern) running_var = running_var.dimshuffle(params_dimshuffle_pattern)
else: else:
running_mean = T.addbroadcast(running_mean, *axes) running_mean = tt.addbroadcast(running_mean, *axes)
running_var = T.addbroadcast(running_var, *axes) running_var = tt.addbroadcast(running_var, *axes)
out, mean, invstd, new_running_mean, new_running_var = batchnorm_op( out, mean, invstd, new_running_mean, new_running_var = batchnorm_op(
inputs, inputs,
gamma, gamma,
...@@ -263,11 +263,11 @@ def batch_normalization_train( ...@@ -263,11 +263,11 @@ def batch_normalization_train(
running_var=running_var, running_var=running_var,
) )
if new_running_mean.broadcastable != running_mean.broadcastable: if new_running_mean.broadcastable != running_mean.broadcastable:
new_running_mean = T.patternbroadcast( new_running_mean = tt.patternbroadcast(
new_running_mean, running_mean.broadcastable new_running_mean, running_mean.broadcastable
) )
if new_running_var.broadcastable != running_var.broadcastable: if new_running_var.broadcastable != running_var.broadcastable:
new_running_var = T.patternbroadcast( new_running_var = tt.patternbroadcast(
new_running_var, running_var.broadcastable new_running_var, running_var.broadcastable
) )
results = (out, mean, invstd, new_running_mean, new_running_var) results = (out, mean, invstd, new_running_mean, new_running_var)
...@@ -376,10 +376,10 @@ def batch_normalization_test( ...@@ -376,10 +376,10 @@ def batch_normalization_test(
mean = mean.dimshuffle(params_dimshuffle_pattern) mean = mean.dimshuffle(params_dimshuffle_pattern)
var = var.dimshuffle(params_dimshuffle_pattern) var = var.dimshuffle(params_dimshuffle_pattern)
else: else:
gamma = T.addbroadcast(gamma, *axes) gamma = tt.addbroadcast(gamma, *axes)
beta = T.addbroadcast(beta, *axes) beta = tt.addbroadcast(beta, *axes)
mean = T.addbroadcast(mean, *axes) mean = tt.addbroadcast(mean, *axes)
var = T.addbroadcast(var, *axes) var = tt.addbroadcast(var, *axes)
batchnorm_op = AbstractBatchNormInference(axes=axes) batchnorm_op = AbstractBatchNormInference(axes=axes)
return batchnorm_op(inputs, gamma, beta, mean, var, epsilon=epsilon) return batchnorm_op(inputs, gamma, beta, mean, var, epsilon=epsilon)
...@@ -610,14 +610,13 @@ class AbstractBatchNormInference(Op): ...@@ -610,14 +610,13 @@ class AbstractBatchNormInference(Op):
) )
scale, bias, est_mean, est_var = ( scale, bias, est_mean, est_var = (
theano.tensor.addbroadcast(t, *axes) tt.addbroadcast(t, *axes) for t in (scale, bias, est_mean, est_var)
for t in (scale, bias, est_mean, est_var)
) )
# define helper expressions # define helper expressions
est_var_eps = est_var + epsilon est_var_eps = est_var + epsilon
est_std = theano.tensor.sqrt(est_var_eps) est_std = tt.sqrt(est_var_eps)
two = theano.tensor.constant(2.0) two = tt.constant(2.0)
# define and return gradients # define and return gradients
dx = dy * (scale / est_std) dx = dy * (scale / est_std)
...@@ -673,7 +672,7 @@ class AbstractBatchNormTrainGrad(Op): ...@@ -673,7 +672,7 @@ class AbstractBatchNormTrainGrad(Op):
ddinputs, ddscale, ddbias = grads ddinputs, ddscale, ddbias = grads
x_diff = x - x_mean x_diff = x - x_mean
mean_dy_x_diff = T.mean(dy * x_diff, axis=self.axes, keepdims=True) mean_dy_x_diff = tt.mean(dy * x_diff, axis=self.axes, keepdims=True)
# compute gradients given each of the output gradients # compute gradients given each of the output gradients
g_wrt_x = 0 g_wrt_x = 0
...@@ -683,10 +682,10 @@ class AbstractBatchNormTrainGrad(Op): ...@@ -683,10 +682,10 @@ class AbstractBatchNormTrainGrad(Op):
g_wrt_x_invstd = 0 g_wrt_x_invstd = 0
if not isinstance(ddinputs.type, theano.gradient.DisconnectedType): if not isinstance(ddinputs.type, theano.gradient.DisconnectedType):
ccc = scale * (ddinputs - T.mean(ddinputs, axis=self.axes, keepdims=True)) ccc = scale * (ddinputs - tt.mean(ddinputs, axis=self.axes, keepdims=True))
ddd = (x_invstd ** 3) * ( ddd = (x_invstd ** 3) * (
ccc * T.mean(dy * x_diff, axis=self.axes, keepdims=True) ccc * tt.mean(dy * x_diff, axis=self.axes, keepdims=True)
+ dy * T.mean(ccc * x_diff, axis=self.axes, keepdims=True) + dy * tt.mean(ccc * x_diff, axis=self.axes, keepdims=True)
) )
g_wrt_x = g_wrt_x - ddd g_wrt_x = g_wrt_x - ddd
...@@ -695,19 +694,19 @@ class AbstractBatchNormTrainGrad(Op): ...@@ -695,19 +694,19 @@ class AbstractBatchNormTrainGrad(Op):
- ( - (
(x_invstd ** 3) (x_invstd ** 3)
* x_diff * x_diff
* T.mean(ccc * x_diff, axis=self.axes, keepdims=True) * tt.mean(ccc * x_diff, axis=self.axes, keepdims=True)
) )
) )
eee = (dy * x_invstd) - ((x_invstd ** 3) * x_diff * mean_dy_x_diff) eee = (dy * x_invstd) - ((x_invstd ** 3) * x_diff * mean_dy_x_diff)
g_wrt_scale = g_wrt_scale + T.sum( g_wrt_scale = g_wrt_scale + tt.sum(
ddinputs * (eee - T.mean(eee, axis=self.axes, keepdims=True)), ddinputs * (eee - tt.mean(eee, axis=self.axes, keepdims=True)),
axis=self.axes, axis=self.axes,
keepdims=True, keepdims=True,
) )
g_wrt_x_mean = g_wrt_x_mean + T.sum(ddd, axis=self.axes, keepdims=True) g_wrt_x_mean = g_wrt_x_mean + tt.sum(ddd, axis=self.axes, keepdims=True)
g_wrt_x_invstd = g_wrt_x_invstd + T.sum( g_wrt_x_invstd = g_wrt_x_invstd + tt.sum(
ccc * (dy - 3 * (x_invstd ** 2) * x_diff * mean_dy_x_diff), ccc * (dy - 3 * (x_invstd ** 2) * x_diff * mean_dy_x_diff),
axis=self.axes, axis=self.axes,
keepdims=True, keepdims=True,
...@@ -717,14 +716,14 @@ class AbstractBatchNormTrainGrad(Op): ...@@ -717,14 +716,14 @@ class AbstractBatchNormTrainGrad(Op):
g_wrt_x = g_wrt_x + (x_invstd * ddscale * dy) g_wrt_x = g_wrt_x + (x_invstd * ddscale * dy)
g_wrt_dy = g_wrt_dy + (x_invstd * ddscale * x_diff) g_wrt_dy = g_wrt_dy + (x_invstd * ddscale * x_diff)
g_wrt_x_mean = g_wrt_x_mean - ( g_wrt_x_mean = g_wrt_x_mean - (
x_invstd * ddscale * T.sum(dy, axis=self.axes, keepdims=True) x_invstd * ddscale * tt.sum(dy, axis=self.axes, keepdims=True)
) )
g_wrt_x_invstd = g_wrt_x_invstd + ( g_wrt_x_invstd = g_wrt_x_invstd + (
ddscale * T.sum(dy * x_diff, axis=self.axes, keepdims=True) ddscale * tt.sum(dy * x_diff, axis=self.axes, keepdims=True)
) )
if not isinstance(ddbias.type, theano.gradient.DisconnectedType): if not isinstance(ddbias.type, theano.gradient.DisconnectedType):
g_wrt_dy = g_wrt_dy + T.fill(dy, ddbias) g_wrt_dy = g_wrt_dy + tt.fill(dy, ddbias)
# depending on which output gradients are given, # depending on which output gradients are given,
# some inputs should be disconnected # some inputs should be disconnected
...@@ -804,7 +803,7 @@ def local_abstract_batch_norm_train(node): ...@@ -804,7 +803,7 @@ def local_abstract_batch_norm_train(node):
# The epsilon should not upcast the dtype. # The epsilon should not upcast the dtype.
if var.dtype == "float32" and epsilon.dtype == "float64": if var.dtype == "float32" and epsilon.dtype == "float64":
epsilon = epsilon.astype("float32") epsilon = epsilon.astype("float32")
invstd = T.inv(T.sqrt(var + epsilon)) invstd = tt.inv(tt.sqrt(var + epsilon))
out = (x - mean) * (scale * invstd) + bias out = (x - mean) * (scale * invstd) + bias
results = [out, mean, invstd] results = [out, mean, invstd]
...@@ -816,7 +815,7 @@ def local_abstract_batch_norm_train(node): ...@@ -816,7 +815,7 @@ def local_abstract_batch_norm_train(node):
) )
results.append(running_mean) results.append(running_mean)
if len(node.inputs) > 6: if len(node.inputs) > 6:
m = T.cast(T.prod(x.shape) / T.prod(scale.shape), theano.config.floatX) m = tt.cast(tt.prod(x.shape) / tt.prod(scale.shape), theano.config.floatX)
running_var = node.inputs[6] running_var = node.inputs[6]
running_var = ( running_var = (
running_var * (1.0 - running_average_factor) running_var * (1.0 - running_average_factor)
...@@ -825,7 +824,7 @@ def local_abstract_batch_norm_train(node): ...@@ -825,7 +824,7 @@ def local_abstract_batch_norm_train(node):
results.append(running_var) results.append(running_var)
results = [ results = [
T.patternbroadcast(r, r_orig.broadcastable) tt.patternbroadcast(r, r_orig.broadcastable)
for (r, r_orig) in zip(results, node.outputs) for (r, r_orig) in zip(results, node.outputs)
] ]
...@@ -855,16 +854,16 @@ def local_abstract_batch_norm_train_grad(node): ...@@ -855,16 +854,16 @@ def local_abstract_batch_norm_train_grad(node):
return None return None
x_diff = x - x_mean x_diff = x - x_mean
mean_dy_x_diff = T.mean(dy * x_diff, axis=axes, keepdims=True) mean_dy_x_diff = tt.mean(dy * x_diff, axis=axes, keepdims=True)
c = (dy * x_invstd) - x_diff * (mean_dy_x_diff * (x_invstd ** 3)) c = (dy * x_invstd) - x_diff * (mean_dy_x_diff * (x_invstd ** 3))
g_wrt_inputs = scale * (c - T.mean(c, axis=axes, keepdims=True)) g_wrt_inputs = scale * (c - tt.mean(c, axis=axes, keepdims=True))
g_wrt_scale = T.sum(dy * x_invstd * x_diff, axis=axes, keepdims=True) g_wrt_scale = tt.sum(dy * x_invstd * x_diff, axis=axes, keepdims=True)
g_wrt_bias = T.sum(dy, axis=axes, keepdims=True) g_wrt_bias = tt.sum(dy, axis=axes, keepdims=True)
results = [g_wrt_inputs, g_wrt_scale, g_wrt_bias] results = [g_wrt_inputs, g_wrt_scale, g_wrt_bias]
results = [ results = [
T.patternbroadcast(r, r_orig.broadcastable) tt.patternbroadcast(r, r_orig.broadcastable)
for (r, r_orig) in zip(results, node.outputs) for (r, r_orig) in zip(results, node.outputs)
] ]
...@@ -896,9 +895,9 @@ def local_abstract_batch_norm_inference(node): ...@@ -896,9 +895,9 @@ def local_abstract_batch_norm_inference(node):
epsilon = epsilon.astype("float32") epsilon = epsilon.astype("float32")
result = (x - estimated_mean) * ( result = (x - estimated_mean) * (
scale / T.sqrt(estimated_variance + epsilon) scale / tt.sqrt(estimated_variance + epsilon)
) + bias ) + bias
result = T.patternbroadcast(result, node.outputs[0].broadcastable) result = tt.patternbroadcast(result, node.outputs[0].broadcastable)
for var in theano.gof.graph.variables(node.inputs, [result]): for var in theano.gof.graph.variables(node.inputs, [result]):
if var not in node.inputs: if var not in node.inputs:
......
import os import os
import sys import sys
import theano.tensor as T
from theano import config import theano.tensor as tt
from theano import gof
from theano import config, gof
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.gof.cmodule import GCC_compiler from theano.gof.cmodule import GCC_compiler
from theano.tensor.opt import register_canonicalize
from theano.tensor.extra_ops import cpu_contiguous
from theano.gradient import grad_undefined from theano.gradient import grad_undefined
from theano.tensor.extra_ops import cpu_contiguous
from theano.tensor.opt import register_canonicalize
def _ctc_find_lib(): def _ctc_find_lib():
...@@ -156,12 +157,12 @@ class ConnectionistTemporalClassification(gof.COp, gof.OpenMPOp): ...@@ -156,12 +157,12 @@ class ConnectionistTemporalClassification(gof.COp, gof.OpenMPOp):
return ["ctc.h"] + gof.OpenMPOp.c_headers(self) return ["ctc.h"] + gof.OpenMPOp.c_headers(self)
def make_node(self, activations, labels, input_lengths): def make_node(self, activations, labels, input_lengths):
t_activations = T.as_tensor_variable(activations) t_activations = tt.as_tensor_variable(activations)
# Ensure activations array is C-contiguous # Ensure activations array is C-contiguous
t_activations = cpu_contiguous(t_activations) t_activations = cpu_contiguous(t_activations)
t_labels = T.as_tensor_variable(labels) t_labels = tt.as_tensor_variable(labels)
t_input_lengths = T.as_tensor_variable(input_lengths) t_input_lengths = tt.as_tensor_variable(input_lengths)
if t_activations.type.dtype != "float32": if t_activations.type.dtype != "float32":
raise TypeError("activations must use the float32 type!") raise TypeError("activations must use the float32 type!")
...@@ -181,10 +182,10 @@ class ConnectionistTemporalClassification(gof.COp, gof.OpenMPOp): ...@@ -181,10 +182,10 @@ class ConnectionistTemporalClassification(gof.COp, gof.OpenMPOp):
if t_input_lengths.ndim != 1: if t_input_lengths.ndim != 1:
raise ValueError("input_lengths must have 1 dimension.") raise ValueError("input_lengths must have 1 dimension.")
costs = T.fvector(name="ctc_cost") costs = tt.fvector(name="ctc_cost")
outputs = [costs] outputs = [costs]
if self.compute_grad: if self.compute_grad:
gradients = T.ftensor3(name="ctc_grad") gradients = tt.ftensor3(name="ctc_grad")
outputs += [gradients] outputs += [gradients]
return gof.Apply( return gof.Apply(
...@@ -197,9 +198,9 @@ class ConnectionistTemporalClassification(gof.COp, gof.OpenMPOp): ...@@ -197,9 +198,9 @@ class ConnectionistTemporalClassification(gof.COp, gof.OpenMPOp):
assert gradients is not None assert gradients is not None
grad_op = output_grads[0] grad_op = output_grads[0]
total_grad = T.basic.batched_dot( total_grad = tt.batched_dot(grad_op, gradients.dimshuffle(1, 0, 2)).dimshuffle(
grad_op, gradients.dimshuffle(1, 0, 2) 1, 0, 2
).dimshuffle(1, 0, 2) )
return [ return [
total_grad, total_grad,
grad_undefined(self, 1, inputs[1]), grad_undefined(self, 1, inputs[1]),
......
...@@ -2,16 +2,14 @@ ...@@ -2,16 +2,14 @@
TODO: implement Images2Neibs.infer_shape() methods TODO: implement Images2Neibs.infer_shape() methods
""" """
import numpy as np import numpy as np
import theano import theano
from theano import Op, Apply import theano.tensor as tt
from theano import Apply, Op
from theano.gof import EnumList from theano.gof import EnumList
import theano.tensor as T from theano.gradient import grad_not_implemented, grad_undefined
from theano.gradient import grad_not_implemented
from theano.gradient import grad_undefined
class Images2Neibs(Op): class Images2Neibs(Op):
...@@ -102,19 +100,19 @@ class Images2Neibs(Op): ...@@ -102,19 +100,19 @@ class Images2Neibs(Op):
pattern. pattern.
""" """
ten4 = T.as_tensor_variable(ten4) ten4 = tt.as_tensor_variable(ten4)
neib_shape = T.as_tensor_variable(neib_shape) neib_shape = tt.as_tensor_variable(neib_shape)
if neib_step is None: if neib_step is None:
neib_step = neib_shape neib_step = neib_shape
else: else:
neib_step = T.as_tensor_variable(neib_step) neib_step = tt.as_tensor_variable(neib_step)
assert ten4.ndim == 4 assert ten4.ndim == 4
assert neib_shape.ndim == 1 assert neib_shape.ndim == 1
assert neib_step.ndim == 1 assert neib_step.ndim == 1
return Apply( return Apply(
self, [ten4, neib_shape, neib_step], [T.matrix(dtype=ten4.type.dtype)] self, [ten4, neib_shape, neib_step], [tt.matrix(dtype=ten4.type.dtype)]
) )
def grad(self, inp, grads): def grad(self, inp, grads):
...@@ -165,14 +163,14 @@ class Images2Neibs(Op): ...@@ -165,14 +163,14 @@ class Images2Neibs(Op):
+ ((rows - nrows) // rstep + 1,) + ((rows - nrows) // rstep + 1,)
+ ((cols - ncols) // cstep + 1,) + ((cols - ncols) // cstep + 1,)
) )
return T.inc_subtensor(result_indices, pgz.reshape(newshape)) return tt.inc_subtensor(result_indices, pgz.reshape(newshape))
indices = T.arange(neib_shape[0] * neib_shape[1]) indices = tt.arange(neib_shape[0] * neib_shape[1])
pgzs = gz.dimshuffle((1, 0)) pgzs = gz.dimshuffle((1, 0))
result, _ = theano.scan( result, _ = theano.scan(
fn=pos2map, fn=pos2map,
sequences=[indices, pgzs], sequences=[indices, pgzs],
outputs_info=T.zeros(x.shape), outputs_info=tt.zeros(x.shape),
non_sequences=[neib_shape, neib_step], non_sequences=[neib_shape, neib_step],
) )
grad_input = result[-1] grad_input = result[-1]
...@@ -354,8 +352,8 @@ class Images2Neibs(Op): ...@@ -354,8 +352,8 @@ class Images2Neibs(Op):
c, d = node.inputs[1] c, d = node.inputs[1]
step_x, step_y = node.inputs[2] step_x, step_y = node.inputs[2]
if self.mode == "wrap_centered": if self.mode == "wrap_centered":
grid_c = T.ceil_intdiv(in_shape[2], step_x) grid_c = tt.ceil_intdiv(in_shape[2], step_x)
grid_d = T.ceil_intdiv(in_shape[3], step_y) grid_d = tt.ceil_intdiv(in_shape[3], step_y)
elif self.mode == "valid": elif self.mode == "valid":
grid_c = 1 + ((in_shape[2] - c) // step_x) grid_c = 1 + ((in_shape[2] - c) // step_x)
grid_d = 1 + ((in_shape[3] - d) // step_y) grid_d = 1 + ((in_shape[3] - d) // step_y)
...@@ -795,11 +793,11 @@ def neibs2images(neibs, neib_shape, original_shape, mode="valid"): ...@@ -795,11 +793,11 @@ def neibs2images(neibs, neib_shape, original_shape, mode="valid"):
.. note:: The code will output the initial image array. .. note:: The code will output the initial image array.
""" """
neibs = T.as_tensor_variable(neibs) neibs = tt.as_tensor_variable(neibs)
neib_shape = T.as_tensor_variable(neib_shape) neib_shape = tt.as_tensor_variable(neib_shape)
original_shape = T.as_tensor_variable(original_shape) original_shape = tt.as_tensor_variable(original_shape)
new_neib_shape = T.stack([original_shape[-1] // neib_shape[1], neib_shape[1]]) new_neib_shape = tt.stack([original_shape[-1] // neib_shape[1], neib_shape[1]])
output_2d = images2neibs( output_2d = images2neibs(
neibs.dimshuffle("x", "x", 0, 1), new_neib_shape, mode=mode neibs.dimshuffle("x", "x", 0, 1), new_neib_shape, mode=mode
) )
...@@ -809,10 +807,10 @@ def neibs2images(neibs, neib_shape, original_shape, mode="valid"): ...@@ -809,10 +807,10 @@ def neibs2images(neibs, neib_shape, original_shape, mode="valid"):
# the shape and still raise error when it don't have the right # the shape and still raise error when it don't have the right
# shape. # shape.
valid_shape = original_shape valid_shape = original_shape
valid_shape = T.set_subtensor( valid_shape = tt.set_subtensor(
valid_shape[2], (valid_shape[2] // neib_shape[0]) * neib_shape[0] valid_shape[2], (valid_shape[2] // neib_shape[0]) * neib_shape[0]
) )
valid_shape = T.set_subtensor( valid_shape = tt.set_subtensor(
valid_shape[3], (valid_shape[3] // neib_shape[1]) * neib_shape[1] valid_shape[3], (valid_shape[3] // neib_shape[1]) * neib_shape[1]
) )
output_4d = output_2d.reshape(valid_shape, ndim=4) output_4d = output_2d.reshape(valid_shape, ndim=4)
...@@ -820,7 +818,7 @@ def neibs2images(neibs, neib_shape, original_shape, mode="valid"): ...@@ -820,7 +818,7 @@ def neibs2images(neibs, neib_shape, original_shape, mode="valid"):
for d in [2, 3]: for d in [2, 3]:
pad_shape = list(output_4d.shape) pad_shape = list(output_4d.shape)
pad_shape[d] = original_shape[d] - valid_shape[d] pad_shape[d] = original_shape[d] - valid_shape[d]
output_4d = T.concatenate([output_4d, T.zeros(pad_shape)], axis=d) output_4d = tt.concatenate([output_4d, tt.zeros(pad_shape)], axis=d)
elif mode == "valid": elif mode == "valid":
# TODO: we do not implement all mode with this code. # TODO: we do not implement all mode with this code.
# Add a check for the good cases. # Add a check for the good cases.
......
差异被折叠。
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论