提交 c51f5936 authored 作者: Brandon T. Willard's avatar Brandon T. Willard

Replace theano.tensor alias T with tt in theano.tensor sub-package

Indirect references to theano.tensor imports at module level were also converted to direct references in quite a few cases.
上级 2cb3a154
import theano.tensor.basic as tt
from theano import config
from theano.gof.params_type import ParamsType
from theano.scalar import bool as bool_t
......@@ -6,7 +8,6 @@ from theano.tensor.blas import ldflags, blas_header_text, blas_header_version
from theano.tensor.blas import blas_optdb, optdb, local_optimizer
from theano.tensor.blas import Ger, ger, ger_destructive
from theano.tensor.blas import Gemv, gemv_inplace, gemv_no_inplace
from theano.tensor import basic as T
class BaseBLAS(object):
......@@ -706,10 +707,10 @@ def make_c_gemv_destructive(node):
dest = inputs[0]
if (
dest.owner
and isinstance(dest.owner.op, T.AllocEmpty)
and isinstance(dest.owner.op, tt.AllocEmpty)
and len(dest.clients) > 1
):
inputs[0] = T.AllocEmpty(dest.dtype)(*dest.owner.inputs)
inputs[0] = tt.AllocEmpty(dest.dtype)(*dest.owner.inputs)
return [cgemv_inplace(*inputs)]
......
import numpy as np
import theano.tensor as tt
from theano import gof
import theano.tensor as T
from theano.gradient import DisconnectedType
......@@ -10,10 +12,10 @@ class RFFTOp(gof.Op):
def output_type(self, inp):
# add extra dim for real/imag
return T.TensorType(inp.dtype, broadcastable=[False] * (inp.type.ndim + 1))
return tt.TensorType(inp.dtype, broadcastable=[False] * (inp.type.ndim + 1))
def make_node(self, a, s=None):
a = T.as_tensor_variable(a)
a = tt.as_tensor_variable(a)
if a.ndim < 2:
raise TypeError(
"%s: input must have dimension > 2, with first dimension batches"
......@@ -22,10 +24,10 @@ class RFFTOp(gof.Op):
if s is None:
s = a.shape[1:]
s = T.as_tensor_variable(s)
s = tt.as_tensor_variable(s)
else:
s = T.as_tensor_variable(s)
if s.dtype not in T.integer_dtypes:
s = tt.as_tensor_variable(s)
if s.dtype not in tt.integer_dtypes:
raise TypeError(
"%s: length of the transformed axis must be"
" of type integer" % self.__class__.__name__
......@@ -54,7 +56,7 @@ class RFFTOp(gof.Op):
+ [slice(1, (s[-1] // 2) + (s[-1] % 2))]
+ [slice(None)]
)
gout = T.set_subtensor(gout[idx], gout[idx] * 0.5)
gout = tt.set_subtensor(gout[idx], gout[idx] * 0.5)
return [irfft_op(gout, s), DisconnectedType()()]
def connection_pattern(self, node):
......@@ -71,10 +73,10 @@ class IRFFTOp(gof.Op):
def output_type(self, inp):
# remove extra dim for real/imag
return T.TensorType(inp.dtype, broadcastable=[False] * (inp.type.ndim - 1))
return tt.TensorType(inp.dtype, broadcastable=[False] * (inp.type.ndim - 1))
def make_node(self, a, s=None):
a = T.as_tensor_variable(a)
a = tt.as_tensor_variable(a)
if a.ndim < 3:
raise TypeError(
"%s: input must have dimension >= 3, with " % self.__class__.__name__
......@@ -83,11 +85,11 @@ class IRFFTOp(gof.Op):
if s is None:
s = a.shape[1:-1]
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2)
s = T.as_tensor_variable(s)
s = tt.set_subtensor(s[-1], (s[-1] - 1) * 2)
s = tt.as_tensor_variable(s)
else:
s = T.as_tensor_variable(s)
if s.dtype not in T.integer_dtypes:
s = tt.as_tensor_variable(s)
if s.dtype not in tt.integer_dtypes:
raise TypeError(
"%s: length of the transformed axis must be"
" of type integer" % self.__class__.__name__
......@@ -117,7 +119,7 @@ class IRFFTOp(gof.Op):
+ [slice(1, (s[-1] // 2) + (s[-1] % 2))]
+ [slice(None)]
)
gf = T.set_subtensor(gf[idx], gf[idx] * 2)
gf = tt.set_subtensor(gf[idx], gf[idx] * 2)
return [gf, DisconnectedType()()]
def connection_pattern(self, node):
......@@ -157,7 +159,7 @@ def rfft(inp, norm=None):
cond_norm = _unitary(norm)
scaling = 1
if cond_norm == "ortho":
scaling = T.sqrt(s.prod().astype(inp.dtype))
scaling = tt.sqrt(s.prod().astype(inp.dtype))
return rfft_op(inp, s) / scaling
......@@ -196,9 +198,9 @@ def irfft(inp, norm=None, is_odd=False):
s = inp.shape[1:-1]
if is_odd:
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2 + 1)
s = tt.set_subtensor(s[-1], (s[-1] - 1) * 2 + 1)
else:
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2)
s = tt.set_subtensor(s[-1], (s[-1] - 1) * 2)
cond_norm = _unitary(norm)
scaling = 1
......@@ -206,7 +208,7 @@ def irfft(inp, norm=None, is_odd=False):
if cond_norm is None:
scaling = s.prod().astype(inp.dtype)
elif cond_norm == "ortho":
scaling = T.sqrt(s.prod().astype(inp.dtype))
scaling = tt.sqrt(s.prod().astype(inp.dtype))
return irfft_op(inp, s) / scaling
......
import numpy as np
import theano
import theano.tensor.basic as tt
from theano import Apply, Op
from theano.gof import local_optimizer
from theano.gof.opt import copy_stack_trace
from theano.tensor import as_tensor_variable, TensorType
from theano.tensor import basic as T
from theano.scalar import Composite, add, as_common_dtype, mul, sub, true_div
from theano.tensor import TensorType, as_tensor_variable
from theano.tensor.elemwise import Elemwise
from theano.tensor.opt import register_specialize_device
from theano.scalar import Composite, as_common_dtype
from theano.scalar import add, sub, true_div, mul
class BNComposite(Composite):
......@@ -72,9 +74,7 @@ def batch_normalization(inputs, gamma, beta, mean, std, mode="low_mem"):
between implementation is likely to be less important on the full model fprop/bprop.
"""
if mode == "low_mem":
elm_bn = theano.tensor.elemwise.Elemwise(
scalar_op=BNComposite(dtype=inputs.dtype)
)
elm_bn = Elemwise(scalar_op=BNComposite(dtype=inputs.dtype))
rval = elm_bn(inputs, mean, std, gamma, beta)
elif mode == "high_mem":
rval = (inputs - mean) * (gamma / std) + beta
......@@ -239,8 +239,8 @@ def batch_normalization_train(
gamma = gamma.dimshuffle(params_dimshuffle_pattern)
beta = beta.dimshuffle(params_dimshuffle_pattern)
else:
gamma = T.addbroadcast(gamma, *axes)
beta = T.addbroadcast(beta, *axes)
gamma = tt.addbroadcast(gamma, *axes)
beta = tt.addbroadcast(beta, *axes)
batchnorm_op = AbstractBatchNormTrain(axes=axes)
......@@ -251,8 +251,8 @@ def batch_normalization_train(
running_mean = running_mean.dimshuffle(params_dimshuffle_pattern)
running_var = running_var.dimshuffle(params_dimshuffle_pattern)
else:
running_mean = T.addbroadcast(running_mean, *axes)
running_var = T.addbroadcast(running_var, *axes)
running_mean = tt.addbroadcast(running_mean, *axes)
running_var = tt.addbroadcast(running_var, *axes)
out, mean, invstd, new_running_mean, new_running_var = batchnorm_op(
inputs,
gamma,
......@@ -263,11 +263,11 @@ def batch_normalization_train(
running_var=running_var,
)
if new_running_mean.broadcastable != running_mean.broadcastable:
new_running_mean = T.patternbroadcast(
new_running_mean = tt.patternbroadcast(
new_running_mean, running_mean.broadcastable
)
if new_running_var.broadcastable != running_var.broadcastable:
new_running_var = T.patternbroadcast(
new_running_var = tt.patternbroadcast(
new_running_var, running_var.broadcastable
)
results = (out, mean, invstd, new_running_mean, new_running_var)
......@@ -376,10 +376,10 @@ def batch_normalization_test(
mean = mean.dimshuffle(params_dimshuffle_pattern)
var = var.dimshuffle(params_dimshuffle_pattern)
else:
gamma = T.addbroadcast(gamma, *axes)
beta = T.addbroadcast(beta, *axes)
mean = T.addbroadcast(mean, *axes)
var = T.addbroadcast(var, *axes)
gamma = tt.addbroadcast(gamma, *axes)
beta = tt.addbroadcast(beta, *axes)
mean = tt.addbroadcast(mean, *axes)
var = tt.addbroadcast(var, *axes)
batchnorm_op = AbstractBatchNormInference(axes=axes)
return batchnorm_op(inputs, gamma, beta, mean, var, epsilon=epsilon)
......@@ -610,14 +610,13 @@ class AbstractBatchNormInference(Op):
)
scale, bias, est_mean, est_var = (
theano.tensor.addbroadcast(t, *axes)
for t in (scale, bias, est_mean, est_var)
tt.addbroadcast(t, *axes) for t in (scale, bias, est_mean, est_var)
)
# define helper expressions
est_var_eps = est_var + epsilon
est_std = theano.tensor.sqrt(est_var_eps)
two = theano.tensor.constant(2.0)
est_std = tt.sqrt(est_var_eps)
two = tt.constant(2.0)
# define and return gradients
dx = dy * (scale / est_std)
......@@ -673,7 +672,7 @@ class AbstractBatchNormTrainGrad(Op):
ddinputs, ddscale, ddbias = grads
x_diff = x - x_mean
mean_dy_x_diff = T.mean(dy * x_diff, axis=self.axes, keepdims=True)
mean_dy_x_diff = tt.mean(dy * x_diff, axis=self.axes, keepdims=True)
# compute gradients given each of the output gradients
g_wrt_x = 0
......@@ -683,10 +682,10 @@ class AbstractBatchNormTrainGrad(Op):
g_wrt_x_invstd = 0
if not isinstance(ddinputs.type, theano.gradient.DisconnectedType):
ccc = scale * (ddinputs - T.mean(ddinputs, axis=self.axes, keepdims=True))
ccc = scale * (ddinputs - tt.mean(ddinputs, axis=self.axes, keepdims=True))
ddd = (x_invstd ** 3) * (
ccc * T.mean(dy * x_diff, axis=self.axes, keepdims=True)
+ dy * T.mean(ccc * x_diff, axis=self.axes, keepdims=True)
ccc * tt.mean(dy * x_diff, axis=self.axes, keepdims=True)
+ dy * tt.mean(ccc * x_diff, axis=self.axes, keepdims=True)
)
g_wrt_x = g_wrt_x - ddd
......@@ -695,19 +694,19 @@ class AbstractBatchNormTrainGrad(Op):
- (
(x_invstd ** 3)
* x_diff
* T.mean(ccc * x_diff, axis=self.axes, keepdims=True)
* tt.mean(ccc * x_diff, axis=self.axes, keepdims=True)
)
)
eee = (dy * x_invstd) - ((x_invstd ** 3) * x_diff * mean_dy_x_diff)
g_wrt_scale = g_wrt_scale + T.sum(
ddinputs * (eee - T.mean(eee, axis=self.axes, keepdims=True)),
g_wrt_scale = g_wrt_scale + tt.sum(
ddinputs * (eee - tt.mean(eee, axis=self.axes, keepdims=True)),
axis=self.axes,
keepdims=True,
)
g_wrt_x_mean = g_wrt_x_mean + T.sum(ddd, axis=self.axes, keepdims=True)
g_wrt_x_invstd = g_wrt_x_invstd + T.sum(
g_wrt_x_mean = g_wrt_x_mean + tt.sum(ddd, axis=self.axes, keepdims=True)
g_wrt_x_invstd = g_wrt_x_invstd + tt.sum(
ccc * (dy - 3 * (x_invstd ** 2) * x_diff * mean_dy_x_diff),
axis=self.axes,
keepdims=True,
......@@ -717,14 +716,14 @@ class AbstractBatchNormTrainGrad(Op):
g_wrt_x = g_wrt_x + (x_invstd * ddscale * dy)
g_wrt_dy = g_wrt_dy + (x_invstd * ddscale * x_diff)
g_wrt_x_mean = g_wrt_x_mean - (
x_invstd * ddscale * T.sum(dy, axis=self.axes, keepdims=True)
x_invstd * ddscale * tt.sum(dy, axis=self.axes, keepdims=True)
)
g_wrt_x_invstd = g_wrt_x_invstd + (
ddscale * T.sum(dy * x_diff, axis=self.axes, keepdims=True)
ddscale * tt.sum(dy * x_diff, axis=self.axes, keepdims=True)
)
if not isinstance(ddbias.type, theano.gradient.DisconnectedType):
g_wrt_dy = g_wrt_dy + T.fill(dy, ddbias)
g_wrt_dy = g_wrt_dy + tt.fill(dy, ddbias)
# depending on which output gradients are given,
# some inputs should be disconnected
......@@ -804,7 +803,7 @@ def local_abstract_batch_norm_train(node):
# The epsilon should not upcast the dtype.
if var.dtype == "float32" and epsilon.dtype == "float64":
epsilon = epsilon.astype("float32")
invstd = T.inv(T.sqrt(var + epsilon))
invstd = tt.inv(tt.sqrt(var + epsilon))
out = (x - mean) * (scale * invstd) + bias
results = [out, mean, invstd]
......@@ -816,7 +815,7 @@ def local_abstract_batch_norm_train(node):
)
results.append(running_mean)
if len(node.inputs) > 6:
m = T.cast(T.prod(x.shape) / T.prod(scale.shape), theano.config.floatX)
m = tt.cast(tt.prod(x.shape) / tt.prod(scale.shape), theano.config.floatX)
running_var = node.inputs[6]
running_var = (
running_var * (1.0 - running_average_factor)
......@@ -825,7 +824,7 @@ def local_abstract_batch_norm_train(node):
results.append(running_var)
results = [
T.patternbroadcast(r, r_orig.broadcastable)
tt.patternbroadcast(r, r_orig.broadcastable)
for (r, r_orig) in zip(results, node.outputs)
]
......@@ -855,16 +854,16 @@ def local_abstract_batch_norm_train_grad(node):
return None
x_diff = x - x_mean
mean_dy_x_diff = T.mean(dy * x_diff, axis=axes, keepdims=True)
mean_dy_x_diff = tt.mean(dy * x_diff, axis=axes, keepdims=True)
c = (dy * x_invstd) - x_diff * (mean_dy_x_diff * (x_invstd ** 3))
g_wrt_inputs = scale * (c - T.mean(c, axis=axes, keepdims=True))
g_wrt_scale = T.sum(dy * x_invstd * x_diff, axis=axes, keepdims=True)
g_wrt_bias = T.sum(dy, axis=axes, keepdims=True)
g_wrt_inputs = scale * (c - tt.mean(c, axis=axes, keepdims=True))
g_wrt_scale = tt.sum(dy * x_invstd * x_diff, axis=axes, keepdims=True)
g_wrt_bias = tt.sum(dy, axis=axes, keepdims=True)
results = [g_wrt_inputs, g_wrt_scale, g_wrt_bias]
results = [
T.patternbroadcast(r, r_orig.broadcastable)
tt.patternbroadcast(r, r_orig.broadcastable)
for (r, r_orig) in zip(results, node.outputs)
]
......@@ -896,9 +895,9 @@ def local_abstract_batch_norm_inference(node):
epsilon = epsilon.astype("float32")
result = (x - estimated_mean) * (
scale / T.sqrt(estimated_variance + epsilon)
scale / tt.sqrt(estimated_variance + epsilon)
) + bias
result = T.patternbroadcast(result, node.outputs[0].broadcastable)
result = tt.patternbroadcast(result, node.outputs[0].broadcastable)
for var in theano.gof.graph.variables(node.inputs, [result]):
if var not in node.inputs:
......
import os
import sys
import theano.tensor as T
from theano import config
from theano import gof
import theano.tensor as tt
from theano import config, gof
from theano.gof import local_optimizer
from theano.gof.cmodule import GCC_compiler
from theano.tensor.opt import register_canonicalize
from theano.tensor.extra_ops import cpu_contiguous
from theano.gradient import grad_undefined
from theano.tensor.extra_ops import cpu_contiguous
from theano.tensor.opt import register_canonicalize
def _ctc_find_lib():
......@@ -156,12 +157,12 @@ class ConnectionistTemporalClassification(gof.COp, gof.OpenMPOp):
return ["ctc.h"] + gof.OpenMPOp.c_headers(self)
def make_node(self, activations, labels, input_lengths):
t_activations = T.as_tensor_variable(activations)
t_activations = tt.as_tensor_variable(activations)
# Ensure activations array is C-contiguous
t_activations = cpu_contiguous(t_activations)
t_labels = T.as_tensor_variable(labels)
t_input_lengths = T.as_tensor_variable(input_lengths)
t_labels = tt.as_tensor_variable(labels)
t_input_lengths = tt.as_tensor_variable(input_lengths)
if t_activations.type.dtype != "float32":
raise TypeError("activations must use the float32 type!")
......@@ -181,10 +182,10 @@ class ConnectionistTemporalClassification(gof.COp, gof.OpenMPOp):
if t_input_lengths.ndim != 1:
raise ValueError("input_lengths must have 1 dimension.")
costs = T.fvector(name="ctc_cost")
costs = tt.fvector(name="ctc_cost")
outputs = [costs]
if self.compute_grad:
gradients = T.ftensor3(name="ctc_grad")
gradients = tt.ftensor3(name="ctc_grad")
outputs += [gradients]
return gof.Apply(
......@@ -197,9 +198,9 @@ class ConnectionistTemporalClassification(gof.COp, gof.OpenMPOp):
assert gradients is not None
grad_op = output_grads[0]
total_grad = T.basic.batched_dot(
grad_op, gradients.dimshuffle(1, 0, 2)
).dimshuffle(1, 0, 2)
total_grad = tt.batched_dot(grad_op, gradients.dimshuffle(1, 0, 2)).dimshuffle(
1, 0, 2
)
return [
total_grad,
grad_undefined(self, 1, inputs[1]),
......
......@@ -2,16 +2,14 @@
TODO: implement Images2Neibs.infer_shape() methods
"""
import numpy as np
import theano
from theano import Op, Apply
import theano.tensor as tt
from theano import Apply, Op
from theano.gof import EnumList
import theano.tensor as T
from theano.gradient import grad_not_implemented
from theano.gradient import grad_undefined
from theano.gradient import grad_not_implemented, grad_undefined
class Images2Neibs(Op):
......@@ -102,19 +100,19 @@ class Images2Neibs(Op):
pattern.
"""
ten4 = T.as_tensor_variable(ten4)
neib_shape = T.as_tensor_variable(neib_shape)
ten4 = tt.as_tensor_variable(ten4)
neib_shape = tt.as_tensor_variable(neib_shape)
if neib_step is None:
neib_step = neib_shape
else:
neib_step = T.as_tensor_variable(neib_step)
neib_step = tt.as_tensor_variable(neib_step)
assert ten4.ndim == 4
assert neib_shape.ndim == 1
assert neib_step.ndim == 1
return Apply(
self, [ten4, neib_shape, neib_step], [T.matrix(dtype=ten4.type.dtype)]
self, [ten4, neib_shape, neib_step], [tt.matrix(dtype=ten4.type.dtype)]
)
def grad(self, inp, grads):
......@@ -165,14 +163,14 @@ class Images2Neibs(Op):
+ ((rows - nrows) // rstep + 1,)
+ ((cols - ncols) // cstep + 1,)
)
return T.inc_subtensor(result_indices, pgz.reshape(newshape))
return tt.inc_subtensor(result_indices, pgz.reshape(newshape))
indices = T.arange(neib_shape[0] * neib_shape[1])
indices = tt.arange(neib_shape[0] * neib_shape[1])
pgzs = gz.dimshuffle((1, 0))
result, _ = theano.scan(
fn=pos2map,
sequences=[indices, pgzs],
outputs_info=T.zeros(x.shape),
outputs_info=tt.zeros(x.shape),
non_sequences=[neib_shape, neib_step],
)
grad_input = result[-1]
......@@ -354,8 +352,8 @@ class Images2Neibs(Op):
c, d = node.inputs[1]
step_x, step_y = node.inputs[2]
if self.mode == "wrap_centered":
grid_c = T.ceil_intdiv(in_shape[2], step_x)
grid_d = T.ceil_intdiv(in_shape[3], step_y)
grid_c = tt.ceil_intdiv(in_shape[2], step_x)
grid_d = tt.ceil_intdiv(in_shape[3], step_y)
elif self.mode == "valid":
grid_c = 1 + ((in_shape[2] - c) // step_x)
grid_d = 1 + ((in_shape[3] - d) // step_y)
......@@ -795,11 +793,11 @@ def neibs2images(neibs, neib_shape, original_shape, mode="valid"):
.. note:: The code will output the initial image array.
"""
neibs = T.as_tensor_variable(neibs)
neib_shape = T.as_tensor_variable(neib_shape)
original_shape = T.as_tensor_variable(original_shape)
neibs = tt.as_tensor_variable(neibs)
neib_shape = tt.as_tensor_variable(neib_shape)
original_shape = tt.as_tensor_variable(original_shape)
new_neib_shape = T.stack([original_shape[-1] // neib_shape[1], neib_shape[1]])
new_neib_shape = tt.stack([original_shape[-1] // neib_shape[1], neib_shape[1]])
output_2d = images2neibs(
neibs.dimshuffle("x", "x", 0, 1), new_neib_shape, mode=mode
)
......@@ -809,10 +807,10 @@ def neibs2images(neibs, neib_shape, original_shape, mode="valid"):
# the shape and still raise error when it don't have the right
# shape.
valid_shape = original_shape
valid_shape = T.set_subtensor(
valid_shape = tt.set_subtensor(
valid_shape[2], (valid_shape[2] // neib_shape[0]) * neib_shape[0]
)
valid_shape = T.set_subtensor(
valid_shape = tt.set_subtensor(
valid_shape[3], (valid_shape[3] // neib_shape[1]) * neib_shape[1]
)
output_4d = output_2d.reshape(valid_shape, ndim=4)
......@@ -820,7 +818,7 @@ def neibs2images(neibs, neib_shape, original_shape, mode="valid"):
for d in [2, 3]:
pad_shape = list(output_4d.shape)
pad_shape[d] = original_shape[d] - valid_shape[d]
output_4d = T.concatenate([output_4d, T.zeros(pad_shape)], axis=d)
output_4d = tt.concatenate([output_4d, tt.zeros(pad_shape)], axis=d)
elif mode == "valid":
# TODO: we do not implement all mode with this code.
# Add a check for the good cases.
......
......@@ -15,31 +15,37 @@ revisited later when all the intermediate part are on the GPU.
import logging
import warnings
import numpy as np
import numpy as np
import theano
from theano import gof
import theano.tensor.basic as tt
from theano import scalar
from theano.tensor import extra_ops, as_tensor_variable
from theano.gof.opt import copy_stack_trace
from theano.tensor import basic as tensor, subtensor, opt, elemwise
from theano.tensor.type import values_eq_approx_remove_inf, values_eq_approx_remove_nan
from theano.compile import optdb
from theano.gof import Apply
from theano.tensor.nnet.sigm import sigmoid, softplus
from theano.gradient import DisconnectedType
from theano.gradient import grad_not_implemented
from theano.gof.graph import Apply
from theano.gof.op import Op
from theano.tensor.opt import (
register_specialize,
register_stabilize,
register_canonicalize,
)
from theano.gof.opt import (
optimizer,
copy_stack_trace,
local_optimizer,
)
from theano.gradient import DisconnectedType, grad_not_implemented
from theano.scalar import UnaryScalarOp
from theano.tensor import as_tensor_variable, extra_ops, opt, subtensor
from theano.tensor.elemwise import Elemwise
from theano.tensor.subtensor import AdvancedSubtensor
from theano.tensor.basic import log, MaxAndArgmax
from theano.tensor.nnet.blocksparse import sparse_block_dot
############
#
# TENSOR OPS
#
from theano.tensor.nnet.sigm import sigmoid, softplus
from theano.tensor.type import values_eq_approx_remove_inf, values_eq_approx_remove_nan
class SoftmaxWithBias(gof.Op):
class SoftmaxWithBias(Op):
"""
An L{Op} for the output of neural-net multiclass classifiers.
......@@ -58,11 +64,11 @@ class SoftmaxWithBias(gof.Op):
__props__ = ()
def make_node(self, x, b):
x = tensor.as_tensor_variable(x)
b = tensor.as_tensor_variable(b)
if x.type.ndim != 2 or x.type.dtype not in tensor.float_dtypes:
x = tt.as_tensor_variable(x)
b = tt.as_tensor_variable(b)
if x.type.ndim != 2 or x.type.dtype not in tt.float_dtypes:
raise ValueError("x must be 2-d tensor of floats")
if b.type.ndim != 1 or b.type.dtype not in tensor.float_dtypes:
if b.type.ndim != 1 or b.type.dtype not in tt.float_dtypes:
raise ValueError("b must be 1-d tensor of floats")
sm = x.type()
......@@ -105,7 +111,7 @@ class SoftmaxWithBias(gof.Op):
return [DisconnectedType()(), DisconnectedType()()]
dx = softmax_grad(g_sm, outputs[0])
db = tensor.sum(dx, axis=0)
db = tt.sum(dx, axis=0)
return dx, db
def infer_shape(self, node, shape):
......@@ -295,7 +301,7 @@ class SoftmaxWithBias(gof.Op):
softmax_with_bias = SoftmaxWithBias()
class SoftmaxGrad(gof.Op):
class SoftmaxGrad(Op):
"""
Gradient wrt x of the Softmax Op.
......@@ -306,14 +312,14 @@ class SoftmaxGrad(gof.Op):
__props__ = ()
def make_node(self, dy, sm):
dy = tensor.as_tensor_variable(dy)
sm = tensor.as_tensor_variable(sm)
if dy.type.ndim not in (1, 2) or dy.type.dtype not in tensor.float_dtypes:
dy = tt.as_tensor_variable(dy)
sm = tt.as_tensor_variable(sm)
if dy.type.ndim not in (1, 2) or dy.type.dtype not in tt.float_dtypes:
raise ValueError("dy must be 1-d or 2-d tensor of floats. Got ", dy.type)
if dy.ndim == 1:
dy = tensor.shape_padleft(dy, n_ones=1)
dy = tt.shape_padleft(dy, n_ones=1)
if sm.ndim == 1:
sm = tensor.shape_padleft(sm, n_ones=1)
sm = tt.shape_padleft(sm, n_ones=1)
return Apply(self, [dy, sm], [sm.type()])
def perform(self, node, input_storage, output_storage):
......@@ -329,10 +335,10 @@ class SoftmaxGrad(gof.Op):
dy, sm = inp
(g,) = grads
tmp = g + tensor.neg(tensor.sum(g * sm, axis=1).dimshuffle((0, "x")))
tmp = g + tt.neg(tt.sum(g * sm, axis=1).dimshuffle((0, "x")))
g_dy = tmp * sm
tmp2 = tensor.sum(dy * sm, axis=1).dimshuffle((0, "x"))
tmp2 = tt.sum(dy * sm, axis=1).dimshuffle((0, "x"))
g_sm = tmp * dy - g * tmp2
return g_dy, g_sm
......@@ -416,7 +422,7 @@ class SoftmaxGrad(gof.Op):
softmax_grad = SoftmaxGrad()
class Softmax(gof.Op):
class Softmax(Op):
r"""
Softmax activation function
:math:`\\varphi(\\mathbf{x})_j =
......@@ -431,8 +437,8 @@ class Softmax(gof.Op):
__props__ = ()
def make_node(self, x):
x = tensor.as_tensor_variable(x)
if x.type.ndim not in (1, 2) or x.type.dtype not in tensor.float_dtypes:
x = tt.as_tensor_variable(x)
if x.type.ndim not in (1, 2) or x.type.dtype not in tt.float_dtypes:
raise ValueError("x must be 1-d or 2-d tensor of floats. Got %s" % x.type)
if x.ndim == 1:
warnings.warn(
......@@ -441,7 +447,7 @@ class Softmax(gof.Op):
"vector case is gonna be supported soon and the output will be a vector.",
stacklevel=4,
)
x = tensor.shape_padleft(x, n_ones=1)
x = tt.shape_padleft(x, n_ones=1)
return Apply(self, [x], [x.type()])
......@@ -616,7 +622,7 @@ class Softmax(gof.Op):
softmax_op = Softmax()
class LogSoftmax(gof.Op):
class LogSoftmax(Op):
r"""
LogSoftmax activation function
:math:`\\varphi(\\mathbf{x})_j =
......@@ -629,8 +635,8 @@ class LogSoftmax(gof.Op):
__props__ = ()
def make_node(self, x):
x = tensor.as_tensor_variable(x)
if x.type.ndim not in (1, 2) or x.type.dtype not in tensor.float_dtypes:
x = tt.as_tensor_variable(x)
if x.type.ndim not in (1, 2) or x.type.dtype not in tt.float_dtypes:
raise ValueError("x must be 1-d or 2-d tensor of floats. Got %s" % x.type)
if x.ndim == 1:
warnings.warn(
......@@ -639,7 +645,7 @@ class LogSoftmax(gof.Op):
"vector case is gonna be supported soon and the output will be a vector.",
stacklevel=4,
)
x = tensor.shape_padleft(x, n_ones=1)
x = tt.shape_padleft(x, n_ones=1)
return Apply(self, [x], [x.type()])
......@@ -652,7 +658,7 @@ class LogSoftmax(gof.Op):
def grad(self, inp, grads):
(x,) = inp
sm = softmax_op(x)
return [grads[0] - tensor.sum(grads[0], axis=1, keepdims=True) * sm]
return [grads[0] - tt.sum(grads[0], axis=1, keepdims=True) * sm]
def R_op(self, inputs, eval_points):
# I think the Jacobian is symmetric so the R_op
......@@ -765,8 +771,8 @@ logsoftmax_op = LogSoftmax()
# This is not registered in stabilize, as it cause some crossentropy
# optimization to not be inserted.
@opt.register_specialize("stabilize", "fast_compile")
@gof.local_optimizer([tensor.Elemwise])
@register_specialize("stabilize", "fast_compile")
@local_optimizer([Elemwise])
def local_logsoftmax(node):
"""
Detect Log(Softmax(x)) and replace it with LogSoftmax(x)
......@@ -774,7 +780,7 @@ def local_logsoftmax(node):
Note: only forward pass is affected
"""
if (
isinstance(node.op, tensor.Elemwise)
isinstance(node.op, Elemwise)
and isinstance(node.op.scalar_op, scalar.basic.Log)
and len(node.inputs) == 1
and node.inputs[0].owner is not None
......@@ -790,8 +796,8 @@ def local_logsoftmax(node):
# This is not registered in stabilize, as it cause some crossentropy
# optimization to not be inserted.
@opt.register_specialize("stabilize", "fast_compile")
@gof.local_optimizer([SoftmaxGrad])
@register_specialize("stabilize", "fast_compile")
@local_optimizer([SoftmaxGrad])
def local_logsoftmax_grad(node):
"""
Detect Log(Softmax(x))'s grad and replace it with LogSoftmax(x)'s grad
......@@ -802,7 +808,7 @@ def local_logsoftmax_grad(node):
isinstance(node.op, SoftmaxGrad)
and len(node.inputs) == 2
and node.inputs[0].owner is not None
and node.inputs[0].owner.op == tensor.true_div
and node.inputs[0].owner.op == tt.true_div
and len(node.inputs[0].owner.inputs) >= 2
and node.inputs[0].owner.inputs[1].owner is not None
and node.inputs[0].owner.inputs[1].owner.op == softmax_op
......@@ -810,7 +816,7 @@ def local_logsoftmax_grad(node):
and not (
# skip if it will be optimized by
# local_advanced_indexing_crossentropy_onehot_grad
node.inputs[0].owner.op == tensor.true_div
node.inputs[0].owner.op == tt.true_div
and node.inputs[0].owner.inputs[0].owner is not None
and isinstance(
node.inputs[0].owner.inputs[0].owner.op, subtensor.AdvancedIncSubtensor
......@@ -822,15 +828,15 @@ def local_logsoftmax_grad(node):
# sm_input = node.inputs[1].owner.inputs[0]
grads = node.inputs[0].owner.inputs[0]
if grads.broadcastable[1] and not sm.broadcastable[1]:
grads = tensor.alloc(grads, grads.shape[0], sm.shape[1])
ret = grads - tensor.sum(grads, axis=1, keepdims=True) * sm
grads = tt.alloc(grads, grads.shape[0], sm.shape[1])
ret = grads - tt.sum(grads, axis=1, keepdims=True) * sm
ret.tag.values_eq_approx = values_eq_approx_remove_nan
copy_stack_trace(node.outputs[0], ret)
return [ret]
def softmax_graph(c):
return tensor.exp(c) / tensor.exp(c).sum(axis=-1, keepdims=True)
return tt.exp(c) / tt.exp(c).sum(axis=-1, keepdims=True)
def softmax(c):
......@@ -846,8 +852,8 @@ def logsoftmax(c):
return logsoftmax_op(c)
@opt.register_specialize("fast_compile_gpu")
@gof.local_optimizer([softmax_op])
@register_specialize("fast_compile_gpu")
@local_optimizer([softmax_op])
def local_softmax_with_bias(node):
"""
Try to turn softmax(sum_of_stuff) -> softmax_w_bias(matrix, bias).
......@@ -855,25 +861,25 @@ def local_softmax_with_bias(node):
"""
if node.op == softmax_op:
(x,) = node.inputs
if x.owner and x.owner.op == tensor.add:
if x.owner and x.owner.op == tt.add:
vectors = []
non_vectors = []
for x_in in x.owner.inputs:
if list(x_in.type.broadcastable) == [True, False]:
# print isinstance(x_in.owner.op,
# tensor.DimShuffle) since specialization comes
# tt.DimShuffle) since specialization comes
# relatively late in optimization, we don't want to
# put in extra DimShuffles un-necessarily.
if (
x_in.owner
and isinstance(x_in.owner.op, tensor.DimShuffle)
and isinstance(x_in.owner.op, tt.DimShuffle)
and list(x_in.owner.inputs[0].type.broadcastable) == [False]
):
# cut out the DimShuffle that was broadcasting a vector
vectors.append(x_in.owner.inputs[0])
else:
# insert an extra DimShuffle to correct the old one
vectors.append(tensor.DimShuffle((True, False), (1,))(x_in))
vectors.append(tt.DimShuffle((True, False), (1,))(x_in))
else:
non_vectors.append(x_in)
......@@ -882,19 +888,19 @@ def local_softmax_with_bias(node):
if len(non_vectors) == 0:
assert len(vectors) > 0 # we should have at least 1 input...
promoted_vector = vectors.pop()
non_vectors.append(tensor.shape_padleft(promoted_vector))
non_vectors.append(tt.shape_padleft(promoted_vector))
assert non_vectors # not empty
if vectors:
# we're in business...
if len(vectors) > 1:
vector_sum = tensor.add(*vectors)
vector_sum = tt.add(*vectors)
copy_stack_trace(x_in, vector_sum)
else:
vector_sum = vectors[0]
if len(non_vectors) > 1:
non_vector_sum = tensor.add(*non_vectors)
non_vector_sum = tt.add(*non_vectors)
copy_stack_trace(x_in, non_vector_sum)
else:
non_vector_sum = non_vectors[0]
......@@ -921,7 +927,7 @@ def softmax_simplifier(numerators, denominators):
if numerator.ndim != 2:
continue
if numerator.owner and numerator.owner.op == tensor.exp:
if numerator.owner and numerator.owner.op == tt.exp:
x = numerator.owner.inputs[0]
else:
continue
......@@ -929,13 +935,11 @@ def softmax_simplifier(numerators, denominators):
matching_denom = None
for denominator in denominators:
if denominator.owner and isinstance(
denominator.owner.op, tensor.DimShuffle
):
if denominator.owner and isinstance(denominator.owner.op, tt.DimShuffle):
if denominator.owner.op.new_order == (0, "x"):
z = denominator.owner.inputs[0]
# thing getting dimshuffled
if z.owner and isinstance(z.owner.op, tensor.Sum):
if z.owner and isinstance(z.owner.op, tt.Sum):
# print 'ASDF', denominator.owner.op.new_order
# print z.owner.op.axis
if z.owner.op.axis == (1,):
......@@ -956,7 +960,7 @@ def softmax_simplifier(numerators, denominators):
opt.local_mul_canonizer.add_simplifier(softmax_simplifier, "softmax_simplifier")
class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
class CrossentropySoftmaxArgmax1HotWithBias(Op):
"""
A special compound L{Op} for the output of neural-net classifiers.
......@@ -994,21 +998,21 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
__props__ = ()
def __init__(self, **kwargs):
gof.Op.__init__(self, **kwargs)
Op.__init__(self, **kwargs)
def make_node(self, x, b, y_idx):
x = tensor.as_tensor_variable(x)
b = tensor.as_tensor_variable(b)
y_idx = tensor.as_tensor_variable(y_idx)
if x.type.ndim != 2 or x.type.dtype not in tensor.float_dtypes:
x = tt.as_tensor_variable(x)
b = tt.as_tensor_variable(b)
y_idx = tt.as_tensor_variable(y_idx)
if x.type.ndim != 2 or x.type.dtype not in tt.float_dtypes:
raise ValueError("x must be 2-d tensor of floats", x.type)
if b.type.ndim != 1 or x.type.dtype not in tensor.float_dtypes:
if b.type.ndim != 1 or x.type.dtype not in tt.float_dtypes:
raise ValueError("b must be 1-d tensor of floats", b.type)
if y_idx.type.ndim != 1 or y_idx.type.dtype not in tensor.discrete_dtypes:
if y_idx.type.ndim != 1 or y_idx.type.dtype not in tt.discrete_dtypes:
raise ValueError("y_idx must be 1-d tensor of [u]ints", y_idx.type)
# TODO: Is this correct? It used to be y, not y_idx
nll = tensor.TensorType(x.type.dtype, y_idx.type.broadcastable).make_variable()
nll = tt.TensorType(x.type.dtype, y_idx.type.broadcastable).make_variable()
# nll = TensorType(x.dtype, y.broadcastable)
sm = x.type()
am = y_idx.type()
......@@ -1092,7 +1096,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
if not isinstance(g_nll.type, DisconnectedType):
nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx)
dx = crossentropy_softmax_1hot_with_bias_dx(g_nll, sm, y_idx)
db = tensor.sum(dx, axis=[0])
db = tt.sum(dx, axis=[0])
dx_terms.append(dx)
db_terms.append(db)
......@@ -1215,7 +1219,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
return code_template % dict(locals(), **sub)
class CrossentropySoftmax1HotWithBiasDx(gof.Op):
class CrossentropySoftmax1HotWithBiasDx(Op):
"""
Gradient wrt x of the CrossentropySoftmaxArgmax1HotWithBias Op.
......@@ -1226,14 +1230,14 @@ class CrossentropySoftmax1HotWithBiasDx(gof.Op):
__props__ = ()
def make_node(self, dy, sm, y_idx, **kwargs):
dy = tensor.as_tensor_variable(dy)
sm = tensor.as_tensor_variable(sm)
y_idx = tensor.as_tensor_variable(y_idx)
if dy.type.ndim > 1 or dy.type.dtype not in tensor.float_dtypes:
dy = tt.as_tensor_variable(dy)
sm = tt.as_tensor_variable(sm)
y_idx = tt.as_tensor_variable(y_idx)
if dy.type.ndim > 1 or dy.type.dtype not in tt.float_dtypes:
raise ValueError("dy must be {0,1}-d tensor of floats", dy.type)
if sm.type.ndim != 2 or sm.type.dtype not in tensor.float_dtypes:
if sm.type.ndim != 2 or sm.type.dtype not in tt.float_dtypes:
raise ValueError("sm must be 2-d tensor of floats", sm.type)
if y_idx.type.ndim != 1 or y_idx.type.dtype not in tensor.discrete_dtypes:
if y_idx.type.ndim != 1 or y_idx.type.dtype not in tt.discrete_dtypes:
raise ValueError("y_idx must be 1-d tensor of [u]ints", y_idx.type)
return Apply(self, [dy, sm, y_idx], [sm.type()])
......@@ -1261,12 +1265,10 @@ class CrossentropySoftmax1HotWithBiasDx(gof.Op):
# advanced indexing is not working yet. When it works, do it to avoid
# potentially misleading behavior in gradient computations! (although
# typically we should not need the gradient w.r.t. dy).
y_idx_range = tensor.arange(y_idx.shape[0])
g_dy = tensor.sum(
y_idx_range = tt.arange(y_idx.shape[0])
g_dy = tt.sum(
g_dx
* subtensor.AdvancedIncSubtensor()(
sm, tensor.fill(dy, -1), y_idx_range, y_idx
),
* subtensor.AdvancedIncSubtensor()(sm, tt.fill(dy, -1), y_idx_range, y_idx),
axis=1,
)
g_sm = dy.dimshuffle(0, "x") * g_dx
......@@ -1394,7 +1396,7 @@ def crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs):
def crossentropy_softmax_1hot(x, y_idx, **kwargs):
b = tensor.zeros_like(x[0, :])
b = tt.zeros_like(x[0, :])
return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs)
......@@ -1415,16 +1417,16 @@ def crossentropy_softmax_max_and_argmax_1hot_with_bias(x, b, y_idx, **kwargs):
"""
(xent, softmax) = crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs)
(max_pr, argmax) = tensor.max_and_argmax(softmax, axis=-1)
(max_pr, argmax) = tt.max_and_argmax(softmax, axis=-1)
return (xent, softmax, max_pr, argmax)
def crossentropy_softmax_max_and_argmax_1hot(x, y_idx, **kwargs):
b = tensor.zeros_like(x[0, :])
b = tt.zeros_like(x[0, :])
return crossentropy_softmax_max_and_argmax_1hot_with_bias(x, b, y_idx, **kwargs)
class CrossentropyCategorical1HotGrad(gof.Op):
class CrossentropyCategorical1HotGrad(Op):
__props__ = ()
......@@ -1446,7 +1448,7 @@ class CrossentropyCategorical1HotGrad(gof.Op):
crossentropy_categorical_1hot_grad = CrossentropyCategorical1HotGrad()
class CrossentropyCategorical1Hot(gof.Op):
class CrossentropyCategorical1Hot(Op):
r"""
Compute the cross entropy between a coding distribution and
a true distribution of the form [0, 0, ... 0, 1, 0, ..., 0].
......@@ -1477,20 +1479,20 @@ class CrossentropyCategorical1Hot(gof.Op):
dvector
"""
_coding_dist = tensor.as_tensor_variable(coding_dist)
_true_one_of_n = tensor.as_tensor_variable(true_one_of_n)
_coding_dist = tt.as_tensor_variable(coding_dist)
_true_one_of_n = tt.as_tensor_variable(true_one_of_n)
if _coding_dist.type.ndim != 2:
raise TypeError("matrix required for argument: coding_dist")
if _true_one_of_n.type not in (tensor.lvector, tensor.ivector):
if _true_one_of_n.type not in (tt.lvector, tt.ivector):
raise TypeError(
"integer vector required for argument: true_one_of_n"
"(got type: %s instead of: %s)" % (_true_one_of_n.type, tensor.lvector)
"(got type: %s instead of: %s)" % (_true_one_of_n.type, tt.lvector)
)
return Apply(
self,
[_coding_dist, _true_one_of_n],
[tensor.Tensor(dtype=_coding_dist.dtype, broadcastable=[False])()],
[tt.Tensor(dtype=_coding_dist.dtype, broadcastable=[False])()],
)
def perform(self, node, inp, out):
......@@ -1516,9 +1518,9 @@ class CrossentropyCategorical1Hot(gof.Op):
crossentropy_categorical_1hot = CrossentropyCategorical1Hot()
@opt.register_stabilize("fast_compile_gpu")
@opt.register_specialize("fast_compile_gpu")
@gof.optimizer
@register_stabilize("fast_compile_gpu")
@register_specialize("fast_compile_gpu")
@optimizer
def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph):
"""
This is a stabilization optimization.
......@@ -1555,7 +1557,7 @@ def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph):
return
@gof.optimizer
@optimizer
def crossentropy_to_crossentropy_with_softmax(fgraph):
"""
This is a stabilization optimization that is more general than
......@@ -1585,7 +1587,7 @@ def crossentropy_to_crossentropy_with_softmax(fgraph):
new_sm,
new_am,
) = crossentropy_softmax_argmax_1hot_with_bias(
x, tensor.zeros_like(x[0]), one_of_n
x, tt.zeros_like(x[0]), one_of_n
)
fgraph.replace_all_validate(
[(nll, new_nll), (sm, new_sm)],
......@@ -1622,10 +1624,10 @@ optdb.register(
)
@opt.register_specialize(
@register_specialize(
"fast_compile_gpu", "local_crossentropy_to_crossentropy_with_softmax_grad"
) # old name
@gof.local_optimizer([softmax_grad])
@local_optimizer([softmax_grad])
def local_softmax_grad_to_crossentropy_with_softmax_grad(node):
if node.op == softmax_grad:
g_coding_dist, coding_dist = node.inputs
......@@ -1641,20 +1643,20 @@ def local_softmax_grad_to_crossentropy_with_softmax_grad(node):
return [dx]
@opt.register_specialize("fast_compile_gpu")
@gof.local_optimizer([tensor.MaxAndArgmax])
@register_specialize("fast_compile_gpu")
@local_optimizer([MaxAndArgmax])
def local_argmax_pushdown(node):
if (
isinstance(node.op, tensor.MaxAndArgmax)
isinstance(node.op, MaxAndArgmax)
and node.inputs[0].owner
and len(node.outputs[0].clients) > 0
and node.inputs[0].owner.op
in (
softmax_op,
softplus,
tensor.exp,
tensor.log,
tensor.tanh,
tt.exp,
log,
tt.tanh,
sigmoid,
softmax_with_bias,
)
......@@ -1671,7 +1673,7 @@ def local_argmax_pushdown(node):
)
if (
isinstance(node.op, tensor.MaxAndArgmax)
isinstance(node.op, MaxAndArgmax)
and node.inputs[0].owner
and len(node.outputs[0].clients) == 0
):
......@@ -1682,19 +1684,19 @@ def local_argmax_pushdown(node):
if x.owner and x.owner.op in (
softmax_op,
softplus,
tensor.exp,
tensor.log,
tensor.tanh,
tt.exp,
log,
tt.tanh,
sigmoid,
):
(pre_x,) = x.owner.inputs
ret = tensor.max_and_argmax(pre_x, axis)
ret = tt.max_and_argmax(pre_x, axis)
copy_stack_trace(x_max, ret)
return ret
if x.owner and x.owner.op == softmax_with_bias:
pre_x, pre_bias = x.owner.inputs
ret = tensor.max_and_argmax(
pre_x + tensor.DimShuffle(pre_bias.broadcastable, ("x", 0))(pre_bias),
ret = tt.max_and_argmax(
pre_x + tt.DimShuffle(pre_bias.broadcastable, ("x", 0))(pre_bias),
axis,
)
# copy both stack traces
......@@ -1706,11 +1708,11 @@ def local_argmax_pushdown(node):
def _check_rows_is_arange_len_labels(rows, labels):
"""
Check that 'rows' is the same node as T.arange(labels.shape[0]).
"""Check that `rows` is the same node as `tt.arange(labels.shape[0])`.
Also considers the case where labels.shape[0] is constant and equal
to 1, and T.arange(labels.shape[0]) has been constant-folded into 0.
Also considers the case where `labels.shape[0]` is constant and equal to 1,
and `tt.arange(labels.shape[0])` has been constant-folded into
0.
"""
......@@ -1724,7 +1726,7 @@ def _check_rows_is_arange_len_labels(rows, labels):
if len(shape_of[labels]) == 1 and _is_const(shape_of[labels][0], 1):
return _is_const(rows, 0)
if rows.owner and isinstance(rows.owner.op, tensor.ARange):
if rows.owner and isinstance(rows.owner.op, tt.ARange):
start, stop, step = rows.owner.inputs
if getattr(start, "data", None) != 0: # constants will have data
return False
......@@ -1741,7 +1743,7 @@ def _check_rows_is_arange_len_labels(rows, labels):
shape_subtensor.inputs, allow_partial=True
) == [0]:
shape_var = shape_subtensor.inputs[0]
if shape_var.owner and shape_var.owner.op == tensor.shape:
if shape_var.owner and shape_var.owner.op == tt.shape:
return shape_var.owner.inputs[0] is labels
else:
shape_of = stop.owner.fgraph.shape_feature.shape_of
......@@ -1751,7 +1753,7 @@ def _check_rows_is_arange_len_labels(rows, labels):
def _is_const(z, val, approx=False):
try:
maybe = opt.get_scalar_constant_value(z)
except tensor.NotScalarConstantError:
except tt.NotScalarConstantError:
return False
if approx:
return np.allclose(maybe, val)
......@@ -1759,24 +1761,24 @@ def _is_const(z, val, approx=False):
return np.all(maybe == val)
@opt.register_specialize("fast_compile_gpu")
@gof.local_optimizer([subtensor.AdvancedSubtensor, tensor.log])
@register_specialize("fast_compile_gpu")
@local_optimizer([AdvancedSubtensor, log])
def local_advanced_indexing_crossentropy_onehot(node):
log = None
log_op = None
sm = None
# First case: log(softmax(x))[rows, labels]
if isinstance(node.op, subtensor.AdvancedSubtensor):
if isinstance(node.op, AdvancedSubtensor):
try:
log, rows, labels = node.inputs
log_op, rows, labels = node.inputs
except Exception:
pass
if log and log.owner and log.owner.op == tensor.log:
sm = log.owner.inputs[0]
if log_op and log_op.owner and log_op.owner.op == log:
sm = log_op.owner.inputs[0]
# Second case: log(softmax(x)[rows, labels])
elif node.op == tensor.log:
elif node.op == log:
pre_log = node.inputs[0].owner
if pre_log and isinstance(pre_log.op, subtensor.AdvancedSubtensor):
if pre_log and isinstance(pre_log.op, AdvancedSubtensor):
try:
sm, rows, labels = pre_log.inputs
except Exception:
......@@ -1789,7 +1791,7 @@ def local_advanced_indexing_crossentropy_onehot(node):
x_var, b_var = sm_w_bias[0].owner.inputs
else:
x_var = sm.owner.inputs[0]
b_var = tensor.zeros_like(x_var[0])
b_var = tt.zeros_like(x_var[0])
# Check that rows == arange(labels.shape[0])
if _check_rows_is_arange_len_labels(rows, labels):
......@@ -1802,8 +1804,8 @@ def local_advanced_indexing_crossentropy_onehot(node):
return [ret]
@opt.register_specialize("fast_compile_gpu")
@gof.local_optimizer([softmax_grad])
@register_specialize("fast_compile_gpu")
@local_optimizer([softmax_grad])
def local_advanced_indexing_crossentropy_onehot_grad(node):
if not (node.op == softmax_grad):
return
......@@ -1880,11 +1882,11 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
# If there's a 'minus' sign before the whole expression, put it in
# out_grad and iterate
if incr.owner and incr.owner.op == tensor.neg:
if incr.owner and incr.owner.op == tt.neg:
out_grad = -out_grad
incr = incr.owner.inputs[0]
if incr.owner and incr.owner.op == tensor.true_div:
if incr.owner and incr.owner.op == tt.true_div:
num, denom = incr.owner.inputs
# set out_grad according to the numerator, it may be divided later
......@@ -1897,24 +1899,22 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
if not denom.owner:
return
if isinstance(denom.owner.op, subtensor.AdvancedSubtensor):
if isinstance(denom.owner.op, AdvancedSubtensor):
# Base case
adv_subtensor = denom
# out_grad /= 1.
elif denom.owner.op == tensor.mul:
elif denom.owner.op == tt.mul:
# Try to find the AdvancedSubtensor node mentionned above,
# and the output gradient
for i, input in enumerate(denom.owner.inputs):
if input.owner and isinstance(
input.owner.op, subtensor.AdvancedSubtensor
):
if input.owner and isinstance(input.owner.op, AdvancedSubtensor):
other_inputs = [
in_ for (j, in_) in enumerate(denom.owner.inputs) if j != i
]
if len(other_inputs) == 1:
rest = other_inputs[0]
else:
rest = tensor.mul(*[other_inputs])
rest = tt.mul(*[other_inputs])
# Check that rest is a vector or a scalar
if rest.ndim == 1 or np.all(rest.broadcastable):
......@@ -1925,7 +1925,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
return
# The output gradient needs to be a vector
out_grad = tensor.fill(x_var[:, 0], out_grad)
out_grad = tt.fill(x_var[:, 0], out_grad)
if adv_subtensor is not None:
try:
......@@ -1950,7 +1950,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
# it was really case 1.
# Second case
elif d_sm.owner and d_sm.owner.op == tensor.true_div:
elif d_sm.owner and d_sm.owner.op == tt.true_div:
# we're looking for
# AdvIncSubtensor(zeros, grad_nll, arange(len(y)), y) / softmax
try:
......@@ -1979,7 +1979,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
# if the graph is valid, they have the same shape, so we
# also know that z has the right shape.
if incr.ndim != 1 or incr.dtype not in tensor.float_dtypes:
if incr.ndim != 1 or incr.dtype not in tt.float_dtypes:
return
# here we know that we are incrementing some part of
......@@ -2018,8 +2018,8 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
return
@opt.register_specialize("fast_compile_gpu")
@gof.local_optimizer([softmax_with_bias])
@register_specialize("fast_compile_gpu")
@local_optimizer([softmax_with_bias])
def graph_merge_softmax_with_crossentropy_softmax(node):
if node.op == softmax_with_bias:
x, b = node.inputs
......@@ -2033,10 +2033,10 @@ def graph_merge_softmax_with_crossentropy_softmax(node):
return [mergeable_client[1]]
@opt.register_specialize
@opt.register_stabilize
@opt.register_canonicalize
@gof.local_optimizer([CrossentropySoftmax1HotWithBiasDx])
@register_specialize
@register_stabilize
@register_canonicalize
@local_optimizer([CrossentropySoftmax1HotWithBiasDx])
def local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc(node):
"""
Replace a CrossentropySoftmax1HotWithBiasDx op, whose incoming gradient is
......@@ -2057,7 +2057,7 @@ def local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc(node):
assert dy.ndim == 1
if dy.owner is not None and isinstance(dy.owner.op, tensor.Alloc):
if dy.owner is not None and isinstance(dy.owner.op, tt.Alloc):
# dz is the input of the Alloc op, i.e. T.alloc(dz, <shape>)
dz = dy.owner.inputs[0]
......@@ -2087,9 +2087,7 @@ def local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc(node):
# If `dz` is broadcastable, we need to check whether the shapes
# of `dy` and `sm` are the same or whether the shape of `dy` is
# equal to 1.
cond = tensor.or_(
tensor.eq(dy.shape[0], 1), tensor.eq(dy.shape[0], sm.shape[0])
)
cond = tt.or_(tt.eq(dy.shape[0], 1), tt.eq(dy.shape[0], sm.shape[0]))
msg = "`sm` and `dy` do not have the same shape."
dz = opt.Assert(msg)(dz, cond)
......@@ -2115,7 +2113,7 @@ def binary_crossentropy(output, target):
TODO : Rewrite as a scalar, and then broadcast to tensor.
"""
return -(target * tensor.log(output) + (1.0 - target) * tensor.log(1.0 - output))
return -(target * log(output) + (1.0 - target) * log(1.0 - output))
def sigmoid_binary_crossentropy(output, target):
......@@ -2193,16 +2191,14 @@ def categorical_crossentropy(coding_dist, true_dist):
"""
if true_dist.ndim == coding_dist.ndim:
return -tensor.sum(
true_dist * tensor.log(coding_dist), axis=coding_dist.ndim - 1
)
return -tt.sum(true_dist * log(coding_dist), axis=coding_dist.ndim - 1)
elif true_dist.ndim == coding_dist.ndim - 1:
return crossentropy_categorical_1hot(coding_dist, true_dist)
else:
raise TypeError("rank mismatch between coding and true distributions")
class Prepend_scalar_constant_to_each_row(gof.Op):
class Prepend_scalar_constant_to_each_row(Op):
__props__ = ()
......@@ -2216,10 +2212,10 @@ class Prepend_scalar_constant_to_each_row(gof.Op):
def make_node(self, mat):
# check type of input
x = tensor.as_tensor_variable(mat)
x = tt.as_tensor_variable(mat)
if not mat.type.broadcastable == (False, False):
raise TypeError("Expected a matrix as input")
y = tensor.as_tensor_variable(self.val)
y = tt.as_tensor_variable(self.val)
assert y.ndim == 0
if x.type.dtype != y.type.dtype:
TypeError("the value to prepend don't have the same type as the matrix")
......@@ -2255,18 +2251,18 @@ class Prepend_scalar_constant_to_each_row(gof.Op):
return goutput[:, 1:]
class Prepend_scalar_to_each_row(gof.Op):
class Prepend_scalar_to_each_row(Op):
__props__ = ()
def make_node(self, val, mat):
# check type of input
x = tensor.as_tensor_variable(mat)
x = tt.as_tensor_variable(mat)
if isinstance(val, float):
val = scalar.constant(val)
if not mat.type.broadcastable == (False, False):
raise TypeError("Expected a matrix as input")
y = tensor.as_tensor_variable(val)
y = tt.as_tensor_variable(val)
assert y.ndim == 0
if x.type.dtype != y.type.dtype:
TypeError("the value to prepend don't have the same type as the matrix")
......@@ -2345,7 +2341,7 @@ def relu(x, alpha=0):
# We can't use 0.5 and 1 for one and half. as if alpha is a
# numpy dtype, they will be considered as float64, so would
# cause upcast to float64.
alpha = tensor.as_tensor_variable(alpha)
alpha = tt.as_tensor_variable(alpha)
f1 = 0.5 * (1 + alpha)
f2 = 0.5 * (1 - alpha)
return f1 * x + f2 * abs(x)
......@@ -2446,7 +2442,7 @@ def h_softmax(
>>> import numpy as np
>>> import theano
>>> from theano import tensor
>>> import theano.tensor as tt
>>> from theano.tensor.nnet import h_softmax
>>>
>>> # Parameters
......@@ -2472,15 +2468,15 @@ def h_softmax(
>>> # We can now build the graph to compute a loss function, typically the
>>> # negative log-likelihood:
>>>
>>> x = tensor.imatrix('x')
>>> target = tensor.imatrix('target')
>>> x = tt.imatrix('x')
>>> target = tt.imatrix('target')
>>>
>>> # This only computes the output corresponding to the target.
>>> # The complexity is O(n_classes + n_outputs_per_class).
>>> y_hat_tg = h_softmax(x, batch_size, output_size, n_classes,
... n_outputs_per_class, W1, b1, W2, b2, target)
>>>
>>> negll = -tensor.mean(tensor.log(y_hat_tg))
>>> negll = -tt.mean(tt.log(y_hat_tg))
>>>
>>> # We may need to compute all the outputs (at test time usually):
>>>
......@@ -2497,15 +2493,13 @@ def h_softmax(
"""
# First softmax that computes the probabilities of belonging to each class
class_probs = theano.tensor.nnet.softmax(tensor.dot(x, W1) + b1)
class_probs = softmax(tt.dot(x, W1) + b1)
if target is None: # Computes the probabilites of all the outputs
# Second softmax that computes the output probabilities
activations = tensor.tensordot(x, W2, (1, 1)) + b2
output_probs = theano.tensor.nnet.softmax(
activations.reshape((-1, n_outputs_per_class))
)
activations = tt.tensordot(x, W2, (1, 1)) + b2
output_probs = softmax(activations.reshape((-1, n_outputs_per_class)))
output_probs = output_probs.reshape((batch_size, n_classes, -1))
output_probs = class_probs.dimshuffle(0, 1, "x") * output_probs
output_probs = output_probs.reshape((batch_size, -1))
......@@ -2528,14 +2522,14 @@ def h_softmax(
activations = sparse_block_dot(
W2.dimshuffle("x", 0, 1, 2),
x.dimshuffle(0, "x", 1),
tensor.zeros((batch_size, 1), dtype="int32"),
tt.zeros((batch_size, 1), dtype="int32"),
b2,
target_classes.dimshuffle(0, "x"),
)
output_probs = theano.tensor.nnet.softmax(activations.dimshuffle(0, 2))
target_class_probs = class_probs[tensor.arange(batch_size), target_classes]
output_probs = output_probs[tensor.arange(batch_size), target_outputs_in_class]
output_probs = softmax(activations.dimshuffle(0, 2))
target_class_probs = class_probs[tt.arange(batch_size), target_classes]
output_probs = output_probs[tt.arange(batch_size), target_outputs_in_class]
output_probs = target_class_probs * output_probs
return output_probs
......@@ -2565,7 +2559,7 @@ def elu(x, alpha=1):
"Fast and Accurate Deep Network Learning by
Exponential Linear Units (ELUs)" <http://arxiv.org/abs/1511.07289>`.
"""
return tensor.switch(x > 0, x, alpha * tensor.expm1(x))
return tt.switch(x > 0, x, alpha * tt.expm1(x))
def selu(x):
......@@ -2593,7 +2587,7 @@ def selu(x):
return scale * elu(x, alpha)
class ScalarSoftsign(theano.scalar.UnaryScalarOp):
class ScalarSoftsign(UnaryScalarOp):
"""
Softsign activation function
:math:`\\varphi(\\mathbf{x}) = \\frac{1}{1+|x|}`
......@@ -2625,7 +2619,7 @@ class ScalarSoftsign(theano.scalar.UnaryScalarOp):
scalar_softsign = ScalarSoftsign(theano.scalar.upgrade_to_float, name="scalar_softsign")
softsign = elemwise.Elemwise(scalar_softsign, name="softsign")
softsign = Elemwise(scalar_softsign, name="softsign")
def confusion_matrix(actual, pred):
......@@ -2652,10 +2646,11 @@ def confusion_matrix(actual, pred):
Examples
--------
>>> import theano
>>> import theano.tensor as tt
>>> from theano.tensor.nnet import confusion_matrix
>>> x = theano.tensor.vector()
>>> y = theano.tensor.vector()
>>> x = tt.vector()
>>> y = tt.vector()
>>> f = theano.function([x, y], confusion_matrix(x, y))
>>> y_true = [2, 0, 2, 2, 0, 1]
>>> y_pred = [0, 0, 2, 2, 0, 2]
......@@ -2669,13 +2664,13 @@ def confusion_matrix(actual, pred):
if pred.ndim != 1:
raise ValueError("pred must be 1-d tensor variable")
order = extra_ops.Unique(False, False, False)(tensor.concatenate([actual, pred]))
order = extra_ops.Unique(False, False, False)(tt.concatenate([actual, pred]))
colA = actual.dimshuffle(0, "x")
colP = pred.dimshuffle(0, "x")
oneHotA = tensor.eq(colA, order).astype("int64")
oneHotP = tensor.eq(colP, order).astype("int64")
oneHotA = tt.eq(colA, order).astype("int64")
oneHotP = tt.eq(colP, order).astype("int64")
conf_mat = tensor.dot(oneHotA.T, oneHotP)
conf_mat = tt.dot(oneHotA.T, oneHotP)
return [conf_mat, order]
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论