提交 c51f5936 authored 作者: Brandon T. Willard's avatar Brandon T. Willard

Replace theano.tensor alias T with tt in theano.tensor sub-package

Indirect references to theano.tensor imports at module level were also converted to direct references in quite a few cases.
上级 2cb3a154
import theano.tensor.basic as tt
from theano import config
from theano.gof.params_type import ParamsType
from theano.scalar import bool as bool_t
......@@ -6,7 +8,6 @@ from theano.tensor.blas import ldflags, blas_header_text, blas_header_version
from theano.tensor.blas import blas_optdb, optdb, local_optimizer
from theano.tensor.blas import Ger, ger, ger_destructive
from theano.tensor.blas import Gemv, gemv_inplace, gemv_no_inplace
from theano.tensor import basic as T
class BaseBLAS(object):
......@@ -706,10 +707,10 @@ def make_c_gemv_destructive(node):
dest = inputs[0]
if (
dest.owner
and isinstance(dest.owner.op, T.AllocEmpty)
and isinstance(dest.owner.op, tt.AllocEmpty)
and len(dest.clients) > 1
):
inputs[0] = T.AllocEmpty(dest.dtype)(*dest.owner.inputs)
inputs[0] = tt.AllocEmpty(dest.dtype)(*dest.owner.inputs)
return [cgemv_inplace(*inputs)]
......
import numpy as np
import theano.tensor as tt
from theano import gof
import theano.tensor as T
from theano.gradient import DisconnectedType
......@@ -10,10 +12,10 @@ class RFFTOp(gof.Op):
def output_type(self, inp):
# add extra dim for real/imag
return T.TensorType(inp.dtype, broadcastable=[False] * (inp.type.ndim + 1))
return tt.TensorType(inp.dtype, broadcastable=[False] * (inp.type.ndim + 1))
def make_node(self, a, s=None):
a = T.as_tensor_variable(a)
a = tt.as_tensor_variable(a)
if a.ndim < 2:
raise TypeError(
"%s: input must have dimension > 2, with first dimension batches"
......@@ -22,10 +24,10 @@ class RFFTOp(gof.Op):
if s is None:
s = a.shape[1:]
s = T.as_tensor_variable(s)
s = tt.as_tensor_variable(s)
else:
s = T.as_tensor_variable(s)
if s.dtype not in T.integer_dtypes:
s = tt.as_tensor_variable(s)
if s.dtype not in tt.integer_dtypes:
raise TypeError(
"%s: length of the transformed axis must be"
" of type integer" % self.__class__.__name__
......@@ -54,7 +56,7 @@ class RFFTOp(gof.Op):
+ [slice(1, (s[-1] // 2) + (s[-1] % 2))]
+ [slice(None)]
)
gout = T.set_subtensor(gout[idx], gout[idx] * 0.5)
gout = tt.set_subtensor(gout[idx], gout[idx] * 0.5)
return [irfft_op(gout, s), DisconnectedType()()]
def connection_pattern(self, node):
......@@ -71,10 +73,10 @@ class IRFFTOp(gof.Op):
def output_type(self, inp):
# remove extra dim for real/imag
return T.TensorType(inp.dtype, broadcastable=[False] * (inp.type.ndim - 1))
return tt.TensorType(inp.dtype, broadcastable=[False] * (inp.type.ndim - 1))
def make_node(self, a, s=None):
a = T.as_tensor_variable(a)
a = tt.as_tensor_variable(a)
if a.ndim < 3:
raise TypeError(
"%s: input must have dimension >= 3, with " % self.__class__.__name__
......@@ -83,11 +85,11 @@ class IRFFTOp(gof.Op):
if s is None:
s = a.shape[1:-1]
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2)
s = T.as_tensor_variable(s)
s = tt.set_subtensor(s[-1], (s[-1] - 1) * 2)
s = tt.as_tensor_variable(s)
else:
s = T.as_tensor_variable(s)
if s.dtype not in T.integer_dtypes:
s = tt.as_tensor_variable(s)
if s.dtype not in tt.integer_dtypes:
raise TypeError(
"%s: length of the transformed axis must be"
" of type integer" % self.__class__.__name__
......@@ -117,7 +119,7 @@ class IRFFTOp(gof.Op):
+ [slice(1, (s[-1] // 2) + (s[-1] % 2))]
+ [slice(None)]
)
gf = T.set_subtensor(gf[idx], gf[idx] * 2)
gf = tt.set_subtensor(gf[idx], gf[idx] * 2)
return [gf, DisconnectedType()()]
def connection_pattern(self, node):
......@@ -157,7 +159,7 @@ def rfft(inp, norm=None):
cond_norm = _unitary(norm)
scaling = 1
if cond_norm == "ortho":
scaling = T.sqrt(s.prod().astype(inp.dtype))
scaling = tt.sqrt(s.prod().astype(inp.dtype))
return rfft_op(inp, s) / scaling
......@@ -196,9 +198,9 @@ def irfft(inp, norm=None, is_odd=False):
s = inp.shape[1:-1]
if is_odd:
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2 + 1)
s = tt.set_subtensor(s[-1], (s[-1] - 1) * 2 + 1)
else:
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2)
s = tt.set_subtensor(s[-1], (s[-1] - 1) * 2)
cond_norm = _unitary(norm)
scaling = 1
......@@ -206,7 +208,7 @@ def irfft(inp, norm=None, is_odd=False):
if cond_norm is None:
scaling = s.prod().astype(inp.dtype)
elif cond_norm == "ortho":
scaling = T.sqrt(s.prod().astype(inp.dtype))
scaling = tt.sqrt(s.prod().astype(inp.dtype))
return irfft_op(inp, s) / scaling
......
import numpy as np
import theano
import theano.tensor.basic as tt
from theano import Apply, Op
from theano.gof import local_optimizer
from theano.gof.opt import copy_stack_trace
from theano.tensor import as_tensor_variable, TensorType
from theano.tensor import basic as T
from theano.scalar import Composite, add, as_common_dtype, mul, sub, true_div
from theano.tensor import TensorType, as_tensor_variable
from theano.tensor.elemwise import Elemwise
from theano.tensor.opt import register_specialize_device
from theano.scalar import Composite, as_common_dtype
from theano.scalar import add, sub, true_div, mul
class BNComposite(Composite):
......@@ -72,9 +74,7 @@ def batch_normalization(inputs, gamma, beta, mean, std, mode="low_mem"):
between implementation is likely to be less important on the full model fprop/bprop.
"""
if mode == "low_mem":
elm_bn = theano.tensor.elemwise.Elemwise(
scalar_op=BNComposite(dtype=inputs.dtype)
)
elm_bn = Elemwise(scalar_op=BNComposite(dtype=inputs.dtype))
rval = elm_bn(inputs, mean, std, gamma, beta)
elif mode == "high_mem":
rval = (inputs - mean) * (gamma / std) + beta
......@@ -239,8 +239,8 @@ def batch_normalization_train(
gamma = gamma.dimshuffle(params_dimshuffle_pattern)
beta = beta.dimshuffle(params_dimshuffle_pattern)
else:
gamma = T.addbroadcast(gamma, *axes)
beta = T.addbroadcast(beta, *axes)
gamma = tt.addbroadcast(gamma, *axes)
beta = tt.addbroadcast(beta, *axes)
batchnorm_op = AbstractBatchNormTrain(axes=axes)
......@@ -251,8 +251,8 @@ def batch_normalization_train(
running_mean = running_mean.dimshuffle(params_dimshuffle_pattern)
running_var = running_var.dimshuffle(params_dimshuffle_pattern)
else:
running_mean = T.addbroadcast(running_mean, *axes)
running_var = T.addbroadcast(running_var, *axes)
running_mean = tt.addbroadcast(running_mean, *axes)
running_var = tt.addbroadcast(running_var, *axes)
out, mean, invstd, new_running_mean, new_running_var = batchnorm_op(
inputs,
gamma,
......@@ -263,11 +263,11 @@ def batch_normalization_train(
running_var=running_var,
)
if new_running_mean.broadcastable != running_mean.broadcastable:
new_running_mean = T.patternbroadcast(
new_running_mean = tt.patternbroadcast(
new_running_mean, running_mean.broadcastable
)
if new_running_var.broadcastable != running_var.broadcastable:
new_running_var = T.patternbroadcast(
new_running_var = tt.patternbroadcast(
new_running_var, running_var.broadcastable
)
results = (out, mean, invstd, new_running_mean, new_running_var)
......@@ -376,10 +376,10 @@ def batch_normalization_test(
mean = mean.dimshuffle(params_dimshuffle_pattern)
var = var.dimshuffle(params_dimshuffle_pattern)
else:
gamma = T.addbroadcast(gamma, *axes)
beta = T.addbroadcast(beta, *axes)
mean = T.addbroadcast(mean, *axes)
var = T.addbroadcast(var, *axes)
gamma = tt.addbroadcast(gamma, *axes)
beta = tt.addbroadcast(beta, *axes)
mean = tt.addbroadcast(mean, *axes)
var = tt.addbroadcast(var, *axes)
batchnorm_op = AbstractBatchNormInference(axes=axes)
return batchnorm_op(inputs, gamma, beta, mean, var, epsilon=epsilon)
......@@ -610,14 +610,13 @@ class AbstractBatchNormInference(Op):
)
scale, bias, est_mean, est_var = (
theano.tensor.addbroadcast(t, *axes)
for t in (scale, bias, est_mean, est_var)
tt.addbroadcast(t, *axes) for t in (scale, bias, est_mean, est_var)
)
# define helper expressions
est_var_eps = est_var + epsilon
est_std = theano.tensor.sqrt(est_var_eps)
two = theano.tensor.constant(2.0)
est_std = tt.sqrt(est_var_eps)
two = tt.constant(2.0)
# define and return gradients
dx = dy * (scale / est_std)
......@@ -673,7 +672,7 @@ class AbstractBatchNormTrainGrad(Op):
ddinputs, ddscale, ddbias = grads
x_diff = x - x_mean
mean_dy_x_diff = T.mean(dy * x_diff, axis=self.axes, keepdims=True)
mean_dy_x_diff = tt.mean(dy * x_diff, axis=self.axes, keepdims=True)
# compute gradients given each of the output gradients
g_wrt_x = 0
......@@ -683,10 +682,10 @@ class AbstractBatchNormTrainGrad(Op):
g_wrt_x_invstd = 0
if not isinstance(ddinputs.type, theano.gradient.DisconnectedType):
ccc = scale * (ddinputs - T.mean(ddinputs, axis=self.axes, keepdims=True))
ccc = scale * (ddinputs - tt.mean(ddinputs, axis=self.axes, keepdims=True))
ddd = (x_invstd ** 3) * (
ccc * T.mean(dy * x_diff, axis=self.axes, keepdims=True)
+ dy * T.mean(ccc * x_diff, axis=self.axes, keepdims=True)
ccc * tt.mean(dy * x_diff, axis=self.axes, keepdims=True)
+ dy * tt.mean(ccc * x_diff, axis=self.axes, keepdims=True)
)
g_wrt_x = g_wrt_x - ddd
......@@ -695,19 +694,19 @@ class AbstractBatchNormTrainGrad(Op):
- (
(x_invstd ** 3)
* x_diff
* T.mean(ccc * x_diff, axis=self.axes, keepdims=True)
* tt.mean(ccc * x_diff, axis=self.axes, keepdims=True)
)
)
eee = (dy * x_invstd) - ((x_invstd ** 3) * x_diff * mean_dy_x_diff)
g_wrt_scale = g_wrt_scale + T.sum(
ddinputs * (eee - T.mean(eee, axis=self.axes, keepdims=True)),
g_wrt_scale = g_wrt_scale + tt.sum(
ddinputs * (eee - tt.mean(eee, axis=self.axes, keepdims=True)),
axis=self.axes,
keepdims=True,
)
g_wrt_x_mean = g_wrt_x_mean + T.sum(ddd, axis=self.axes, keepdims=True)
g_wrt_x_invstd = g_wrt_x_invstd + T.sum(
g_wrt_x_mean = g_wrt_x_mean + tt.sum(ddd, axis=self.axes, keepdims=True)
g_wrt_x_invstd = g_wrt_x_invstd + tt.sum(
ccc * (dy - 3 * (x_invstd ** 2) * x_diff * mean_dy_x_diff),
axis=self.axes,
keepdims=True,
......@@ -717,14 +716,14 @@ class AbstractBatchNormTrainGrad(Op):
g_wrt_x = g_wrt_x + (x_invstd * ddscale * dy)
g_wrt_dy = g_wrt_dy + (x_invstd * ddscale * x_diff)
g_wrt_x_mean = g_wrt_x_mean - (
x_invstd * ddscale * T.sum(dy, axis=self.axes, keepdims=True)
x_invstd * ddscale * tt.sum(dy, axis=self.axes, keepdims=True)
)
g_wrt_x_invstd = g_wrt_x_invstd + (
ddscale * T.sum(dy * x_diff, axis=self.axes, keepdims=True)
ddscale * tt.sum(dy * x_diff, axis=self.axes, keepdims=True)
)
if not isinstance(ddbias.type, theano.gradient.DisconnectedType):
g_wrt_dy = g_wrt_dy + T.fill(dy, ddbias)
g_wrt_dy = g_wrt_dy + tt.fill(dy, ddbias)
# depending on which output gradients are given,
# some inputs should be disconnected
......@@ -804,7 +803,7 @@ def local_abstract_batch_norm_train(node):
# The epsilon should not upcast the dtype.
if var.dtype == "float32" and epsilon.dtype == "float64":
epsilon = epsilon.astype("float32")
invstd = T.inv(T.sqrt(var + epsilon))
invstd = tt.inv(tt.sqrt(var + epsilon))
out = (x - mean) * (scale * invstd) + bias
results = [out, mean, invstd]
......@@ -816,7 +815,7 @@ def local_abstract_batch_norm_train(node):
)
results.append(running_mean)
if len(node.inputs) > 6:
m = T.cast(T.prod(x.shape) / T.prod(scale.shape), theano.config.floatX)
m = tt.cast(tt.prod(x.shape) / tt.prod(scale.shape), theano.config.floatX)
running_var = node.inputs[6]
running_var = (
running_var * (1.0 - running_average_factor)
......@@ -825,7 +824,7 @@ def local_abstract_batch_norm_train(node):
results.append(running_var)
results = [
T.patternbroadcast(r, r_orig.broadcastable)
tt.patternbroadcast(r, r_orig.broadcastable)
for (r, r_orig) in zip(results, node.outputs)
]
......@@ -855,16 +854,16 @@ def local_abstract_batch_norm_train_grad(node):
return None
x_diff = x - x_mean
mean_dy_x_diff = T.mean(dy * x_diff, axis=axes, keepdims=True)
mean_dy_x_diff = tt.mean(dy * x_diff, axis=axes, keepdims=True)
c = (dy * x_invstd) - x_diff * (mean_dy_x_diff * (x_invstd ** 3))
g_wrt_inputs = scale * (c - T.mean(c, axis=axes, keepdims=True))
g_wrt_scale = T.sum(dy * x_invstd * x_diff, axis=axes, keepdims=True)
g_wrt_bias = T.sum(dy, axis=axes, keepdims=True)
g_wrt_inputs = scale * (c - tt.mean(c, axis=axes, keepdims=True))
g_wrt_scale = tt.sum(dy * x_invstd * x_diff, axis=axes, keepdims=True)
g_wrt_bias = tt.sum(dy, axis=axes, keepdims=True)
results = [g_wrt_inputs, g_wrt_scale, g_wrt_bias]
results = [
T.patternbroadcast(r, r_orig.broadcastable)
tt.patternbroadcast(r, r_orig.broadcastable)
for (r, r_orig) in zip(results, node.outputs)
]
......@@ -896,9 +895,9 @@ def local_abstract_batch_norm_inference(node):
epsilon = epsilon.astype("float32")
result = (x - estimated_mean) * (
scale / T.sqrt(estimated_variance + epsilon)
scale / tt.sqrt(estimated_variance + epsilon)
) + bias
result = T.patternbroadcast(result, node.outputs[0].broadcastable)
result = tt.patternbroadcast(result, node.outputs[0].broadcastable)
for var in theano.gof.graph.variables(node.inputs, [result]):
if var not in node.inputs:
......
import os
import sys
import theano.tensor as T
from theano import config
from theano import gof
import theano.tensor as tt
from theano import config, gof
from theano.gof import local_optimizer
from theano.gof.cmodule import GCC_compiler
from theano.tensor.opt import register_canonicalize
from theano.tensor.extra_ops import cpu_contiguous
from theano.gradient import grad_undefined
from theano.tensor.extra_ops import cpu_contiguous
from theano.tensor.opt import register_canonicalize
def _ctc_find_lib():
......@@ -156,12 +157,12 @@ class ConnectionistTemporalClassification(gof.COp, gof.OpenMPOp):
return ["ctc.h"] + gof.OpenMPOp.c_headers(self)
def make_node(self, activations, labels, input_lengths):
t_activations = T.as_tensor_variable(activations)
t_activations = tt.as_tensor_variable(activations)
# Ensure activations array is C-contiguous
t_activations = cpu_contiguous(t_activations)
t_labels = T.as_tensor_variable(labels)
t_input_lengths = T.as_tensor_variable(input_lengths)
t_labels = tt.as_tensor_variable(labels)
t_input_lengths = tt.as_tensor_variable(input_lengths)
if t_activations.type.dtype != "float32":
raise TypeError("activations must use the float32 type!")
......@@ -181,10 +182,10 @@ class ConnectionistTemporalClassification(gof.COp, gof.OpenMPOp):
if t_input_lengths.ndim != 1:
raise ValueError("input_lengths must have 1 dimension.")
costs = T.fvector(name="ctc_cost")
costs = tt.fvector(name="ctc_cost")
outputs = [costs]
if self.compute_grad:
gradients = T.ftensor3(name="ctc_grad")
gradients = tt.ftensor3(name="ctc_grad")
outputs += [gradients]
return gof.Apply(
......@@ -197,9 +198,9 @@ class ConnectionistTemporalClassification(gof.COp, gof.OpenMPOp):
assert gradients is not None
grad_op = output_grads[0]
total_grad = T.basic.batched_dot(
grad_op, gradients.dimshuffle(1, 0, 2)
).dimshuffle(1, 0, 2)
total_grad = tt.batched_dot(grad_op, gradients.dimshuffle(1, 0, 2)).dimshuffle(
1, 0, 2
)
return [
total_grad,
grad_undefined(self, 1, inputs[1]),
......
......@@ -2,16 +2,14 @@
TODO: implement Images2Neibs.infer_shape() methods
"""
import numpy as np
import theano
from theano import Op, Apply
import theano.tensor as tt
from theano import Apply, Op
from theano.gof import EnumList
import theano.tensor as T
from theano.gradient import grad_not_implemented
from theano.gradient import grad_undefined
from theano.gradient import grad_not_implemented, grad_undefined
class Images2Neibs(Op):
......@@ -102,19 +100,19 @@ class Images2Neibs(Op):
pattern.
"""
ten4 = T.as_tensor_variable(ten4)
neib_shape = T.as_tensor_variable(neib_shape)
ten4 = tt.as_tensor_variable(ten4)
neib_shape = tt.as_tensor_variable(neib_shape)
if neib_step is None:
neib_step = neib_shape
else:
neib_step = T.as_tensor_variable(neib_step)
neib_step = tt.as_tensor_variable(neib_step)
assert ten4.ndim == 4
assert neib_shape.ndim == 1
assert neib_step.ndim == 1
return Apply(
self, [ten4, neib_shape, neib_step], [T.matrix(dtype=ten4.type.dtype)]
self, [ten4, neib_shape, neib_step], [tt.matrix(dtype=ten4.type.dtype)]
)
def grad(self, inp, grads):
......@@ -165,14 +163,14 @@ class Images2Neibs(Op):
+ ((rows - nrows) // rstep + 1,)
+ ((cols - ncols) // cstep + 1,)
)
return T.inc_subtensor(result_indices, pgz.reshape(newshape))
return tt.inc_subtensor(result_indices, pgz.reshape(newshape))
indices = T.arange(neib_shape[0] * neib_shape[1])
indices = tt.arange(neib_shape[0] * neib_shape[1])
pgzs = gz.dimshuffle((1, 0))
result, _ = theano.scan(
fn=pos2map,
sequences=[indices, pgzs],
outputs_info=T.zeros(x.shape),
outputs_info=tt.zeros(x.shape),
non_sequences=[neib_shape, neib_step],
)
grad_input = result[-1]
......@@ -354,8 +352,8 @@ class Images2Neibs(Op):
c, d = node.inputs[1]
step_x, step_y = node.inputs[2]
if self.mode == "wrap_centered":
grid_c = T.ceil_intdiv(in_shape[2], step_x)
grid_d = T.ceil_intdiv(in_shape[3], step_y)
grid_c = tt.ceil_intdiv(in_shape[2], step_x)
grid_d = tt.ceil_intdiv(in_shape[3], step_y)
elif self.mode == "valid":
grid_c = 1 + ((in_shape[2] - c) // step_x)
grid_d = 1 + ((in_shape[3] - d) // step_y)
......@@ -795,11 +793,11 @@ def neibs2images(neibs, neib_shape, original_shape, mode="valid"):
.. note:: The code will output the initial image array.
"""
neibs = T.as_tensor_variable(neibs)
neib_shape = T.as_tensor_variable(neib_shape)
original_shape = T.as_tensor_variable(original_shape)
neibs = tt.as_tensor_variable(neibs)
neib_shape = tt.as_tensor_variable(neib_shape)
original_shape = tt.as_tensor_variable(original_shape)
new_neib_shape = T.stack([original_shape[-1] // neib_shape[1], neib_shape[1]])
new_neib_shape = tt.stack([original_shape[-1] // neib_shape[1], neib_shape[1]])
output_2d = images2neibs(
neibs.dimshuffle("x", "x", 0, 1), new_neib_shape, mode=mode
)
......@@ -809,10 +807,10 @@ def neibs2images(neibs, neib_shape, original_shape, mode="valid"):
# the shape and still raise error when it don't have the right
# shape.
valid_shape = original_shape
valid_shape = T.set_subtensor(
valid_shape = tt.set_subtensor(
valid_shape[2], (valid_shape[2] // neib_shape[0]) * neib_shape[0]
)
valid_shape = T.set_subtensor(
valid_shape = tt.set_subtensor(
valid_shape[3], (valid_shape[3] // neib_shape[1]) * neib_shape[1]
)
output_4d = output_2d.reshape(valid_shape, ndim=4)
......@@ -820,7 +818,7 @@ def neibs2images(neibs, neib_shape, original_shape, mode="valid"):
for d in [2, 3]:
pad_shape = list(output_4d.shape)
pad_shape[d] = original_shape[d] - valid_shape[d]
output_4d = T.concatenate([output_4d, T.zeros(pad_shape)], axis=d)
output_4d = tt.concatenate([output_4d, tt.zeros(pad_shape)], axis=d)
elif mode == "valid":
# TODO: we do not implement all mode with this code.
# Add a check for the good cases.
......
差异被折叠。
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论