提交 03e77233 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #3095 from harlouci/flake8_v4

flake8 for tensor/nnet/nnet.py
from __future__ import print_function
import numpy as N
from six.moves import xrange
import theano
from theano.tensor import basic as T
import numpy as N
#from util import strutil
# from util import strutil
from theano.tensor.blas_headers import blas_header_text, blas_header_version
from theano.tensor.blas import ldflags
from theano.misc import strutil
......@@ -72,25 +74,27 @@ class Conv3D(theano.Op):
def grad(self, inputs, output_gradients):
V, W, b, d = inputs
dCdH , = output_gradients
dCdH, = output_gradients
# make all of these ops support broadcasting of scalar b to vector b and eplace the zeros_like in all their grads
# print dCdH.broadcastable
# print "dCdH.broadcastable"
# quit(-1)
#dCdH = printing.Print("dCdH = ",["shape"])
# dCdH = printing.Print("dCdH = ",["shape"])
# Make sure the broadcasting pattern of the gradient is the the same
# as the initial variable
dCdV = ConvTransp3D.convTransp3D(W, T.zeros_like(V[0, 0, 0, 0, :]), d, dCdH, V.shape[1:4])
dCdV = theano.tensor.nnet.convTransp3D(
W, T.zeros_like(V[0, 0, 0, 0, :]), d, dCdH, V.shape[1:4])
dCdV = T.patternbroadcast(dCdV, V.broadcastable)
WShape = W.shape
dCdW = ConvGrad3D.convGrad3D(V, d, WShape, dCdH)
dCdW = theano.tensor.nnet.convGrad3D(V, d, WShape, dCdH)
dCdW = T.patternbroadcast(dCdW, W.broadcastable)
dCdb = T.sum(dCdH, axis=(0, 1, 2, 3))
dCdb = T.patternbroadcast(dCdb, b.broadcastable)
dCdd = grad_undefined(self, 3, inputs[3],
"The gradient of Conv3D with respect to the convolution" +\
" stride is undefined because Conv3D is only defined for" +\
dCdd = grad_undefined(
self, 3, inputs[3],
"The gradient of Conv3D with respect to the convolution"
" stride is undefined because Conv3D is only defined for"
" integer strides.")
if 'name' in dir(dCdH) and dCdH.name is not None:
......@@ -113,11 +117,13 @@ class Conv3D(theano.Op):
else:
b_name = 'anon_b'
dCdV.name = 'Conv3D_dCdV(dCdH='+dCdH_name+',V='+V_name+')'
dCdW.name = 'Conv3D_dCdW(dCdH='+dCdH_name+',V='+V_name+',W='+W_name+')'
dCdb.name = 'Conv3D_dCdb(dCdH='+dCdH_name+',V='+V_name+',W='+W_name+',b='+b_name+')'
dCdV.name = 'Conv3D_dCdV(dCdH=' + dCdH_name + ',V=' + V_name + ')'
dCdW.name = ('Conv3D_dCdW(dCdH=' + dCdH_name + ',V=' + V_name +
',W=' + W_name + ')')
dCdb.name = ('Conv3D_dCdb(dCdH=' + dCdH_name + ',V=' + V_name +
',W=' + W_name + ',b=' + b_name + ')')
return [ dCdV, dCdW, dCdb, dCdd ]
return [dCdV, dCdW, dCdb, dCdd]
def perform(self, node, inputs, output_storage):
V, W, b, d = inputs
......@@ -144,7 +150,7 @@ class Conv3D(theano.Op):
output_width = T.floor((vidWidth - filterWidth) // dc) + 1
output_dur = T.floor((vidDur - filterDur) // dt) + 1
rval = (batch_size, output_height, output_width, output_dur, output_channels )
rval = (batch_size, output_height, output_width, output_dur, output_channels)
return [rval]
......@@ -326,7 +332,7 @@ class Conv3D(theano.Op):
elif VV.dtype == 'float32':
gemv = 'sgemv_'
else:
raise Exception('Unrecognized dtype for convolution '+V.value.dtype)
raise Exception('Unrecognized dtype for convolution ' + V.value.dtype)
codeSource += """
if (inputChannels > 20 && outputChannels > 20 && ws4 == sizeof(ELEM_AT(%(W)s,0)))
......@@ -571,7 +577,7 @@ def computeH(V, W, b, d):
outputChannels = W.shape[0]
inputChannels = V.shape[4]
if W.shape[4] != inputChannels:
raise Exception("W.shape[4] = "+str(W.shape[4])+" but inputChannels = "+str(inputChannels))
raise Exception("W.shape[4] = " + str(W.shape[4]) + " but inputChannels = " + str(inputChannels))
filterHeight = W.shape[1]
filterWidth = W.shape[2]
filterDur = W.shape[3]
......@@ -586,12 +592,12 @@ def computeH(V, W, b, d):
assert dy > 0
assert dt > 0
outputHeight = int( (vidHeight - filterHeight) / dx )+1
outputWidth = int( (vidWidth - filterWidth) / dy )+1
outputDur = int( (vidDur - filterDur) / dt ) + 1
outputHeight = int((vidHeight - filterHeight) / dx) + 1
outputWidth = int((vidWidth - filterWidth) / dy) + 1
outputDur = int((vidDur - filterDur) / dt) + 1
H = N.zeros( (batchSize, outputHeight,
outputWidth, outputDur, outputChannels ), dtype=V.dtype )
H = N.zeros((batchSize, outputHeight,
outputWidth, outputDur, outputChannels), dtype=V.dtype)
# H[i,j,x,y,t] = b_j + sum_k sum_l sum_m sum_z W[j,z,k,l,m] V[i,z, dx*x+k,dy*y+l,dt*t+m]
for i in xrange(0, H.shape[0]):
......@@ -610,12 +616,8 @@ def computeH(V, W, b, d):
# if (i,j,x,y,t) == (0,0,0,0,0):
# print (( W[j,z,k,l,m] , V[i,z,d[0]*x+k,d[1]*y+l,d[2]*t+m] ), (k,l,m) )
w = W[j, k, l, m, z]
v = V[i, d[0]*x+k, d[1]*y+l, d[2]*t+m, z]
v = V[i, d[0] * x + k, d[1] * y + l, d[2] * t + m, z]
# if i == 0 and x == 0 and y == 0 and t == 0 and j == 0:
# print 'setting H[0] += '+str(w*v)+' W['+str((j,z,k,l,m))+']='+str(w)+' V['+str((i,d[0]*x+k,d[1]*y+l,d[2]*t+m,z))+']='+str(v)
H[i, x, y, t, j] += w * v
return H
from . import ConvGrad3D
from . import ConvTransp3D
from six.moves import xrange
import numpy as N
import theano
from theano.tensor import basic as T
from theano.misc import strutil
import numpy as N
from six.moves import xrange
from theano.gradient import grad_undefined
from theano.gradient import DisconnectedType
......@@ -23,11 +25,15 @@ class ConvGrad3D(theano.Op):
WShape_ = T.as_tensor_variable(WShape)
dCdH_ = T.as_tensor_variable(dCdH)
return theano.Apply(self, inputs=[V_, d_, WShape_, dCdH_], outputs=[ T.TensorType(V_.dtype, (False, False, False, False, False))() ] )
return theano.Apply(self,
inputs=[V_, d_, WShape_, dCdH_],
outputs=[T.TensorType(
V_.dtype,
(False, False, False, False, False))()])
def infer_shape(self, node, input_shapes):
V, d, W_shape, dCdH = node.inputs
return [ ( W_shape[0], W_shape[1], W_shape[2], W_shape[3], W_shape[4] ) ]
return [(W_shape[0], W_shape[1], W_shape[2], W_shape[3], W_shape[4])]
def connection_pattern(self, node):
......@@ -38,12 +44,12 @@ class ConvGrad3D(theano.Op):
dLdA, = output_gradients
z = T.zeros_like(C[0, 0, 0, 0, :])
dLdC = convTransp3D(dLdA, z, d, B, C.shape[1:4])
dLdC = theano.tensor.nnet.convTransp3D(dLdA, z, d, B, C.shape[1:4])
# d actually does affect the outputs, so it's not disconnected
dLdd = grad_undefined(self, 1, d)
# The shape of the weights doesn't affect the output elements
dLdWShape = DisconnectedType()()
dLdB = conv3D(C, dLdA, T.zeros_like(B[0, 0, 0, 0, :]), d)
dLdB = theano.tensor.nnet.conv3D(C, dLdA, T.zeros_like(B[0, 0, 0, 0, :]), d)
return [dLdC, dLdd, dLdWShape, dLdB]
......@@ -54,15 +60,10 @@ class ConvGrad3D(theano.Op):
# partial C / partial W[j,z,k,l,m] = sum_i sum_p sum_q sum_r (partial C /partial H[i,j,p,q,r] ) * V[i,z,dr*p+k,dc*q+l,dt*r+m]
batchSize = dCdH.shape[0]
outputFilters = dCdH.shape[4]
outputHeight = dCdH.shape[1]
outputWidth = dCdH.shape[2]
outputDur = dCdH.shape[3]
assert V.shape[0] == batchSize
inputFilters = V.shape[4]
inputHeight = V.shape[1]
inputWidth = V.shape[2]
inputDur = V.shape[3]
dr, dc, dt = d
dCdW = N.zeros(WShape, dtype=V.dtype)
......@@ -78,7 +79,10 @@ class ConvGrad3D(theano.Op):
for r in xrange(0, outputDur):
for j in xrange(0, WShape[0]):
for z in xrange(0, WShape[4]):
dCdW[j, k, l, m, z] += dCdH[i, p, q, r, j] * V[i, dr*p+k, dc*q+l, dt*r+m, z]
dCdW[j, k, l, m, z] += (
dCdH[i, p, q, r, j] *
V[i, dr * p + k, dc * q + l,
dt * r + m, z])
output_storage[0][0] = dCdW
......@@ -272,6 +276,3 @@ class ConvGrad3D(theano.Op):
convGrad3D = ConvGrad3D()
from theano.tensor.nnet.Conv3D import conv3D
from theano.tensor.nnet.ConvTransp3D import convTransp3D
from __future__ import print_function
import numpy as N
from six.moves import xrange
import theano
from theano.tensor import basic as T
from theano.misc import strutil
import theano
from theano.gradient import grad_undefined
from theano.gradient import DisconnectedType
......@@ -31,7 +33,10 @@ class ConvTransp3D(theano.Op):
else:
RShape_ = T.as_tensor_variable([-1, -1, -1])
return theano.Apply(self, inputs=[W_, b_, d_, H_, RShape_], outputs=[ T.TensorType(H_.dtype, (False, False, False, False, False))() ] )
return theano.Apply(self,
inputs=[W_, b_, d_, H_, RShape_],
outputs=[T.TensorType(H_.dtype,
(False, False, False, False, False))()])
def infer_shape(self, node, input_shapes):
W, b, d, H, RShape = node.inputs
......@@ -44,9 +49,9 @@ class ConvTransp3D(theano.Op):
def grad(self, inputs, output_gradients):
W, b, d, H, RShape = inputs
dCdR, = output_gradients
dCdH = conv3D(dCdR, W, T.zeros_like(H[0, 0, 0, 0, :]), d)
dCdH = theano.tensor.nnet.conv3D(dCdR, W, T.zeros_like(H[0, 0, 0, 0, :]), d)
WShape = W.shape
dCdW = convGrad3D(dCdR, d, WShape, H)
dCdW = theano.tensor.nnet.convGrad3D(dCdR, d, WShape, H)
dCdb = T.sum(dCdR, axis=(0, 1, 2, 3))
# not differentiable, since d affects the output elements
dCdd = grad_undefined(self, 2, d)
......@@ -73,8 +78,10 @@ class ConvTransp3D(theano.Op):
else:
b_name = 'anon_b'
dCdW.name = 'ConvTransp3D_dCdW.H='+H_name+',dCdR='+dCdR_name+',W='+W_name
dCdb.name = 'ConvTransp3D_dCdb.H='+H_name+',dCdR='+dCdR_name+',W='+W_name+',b='+b_name
dCdW.name = ('ConvTransp3D_dCdW.H=' + H_name + ',dCdR=' + dCdR_name +
',W=' + W_name)
dCdb.name = ('ConvTransp3D_dCdb.H=' + H_name + ',dCdR=' + dCdR_name +
',W=' + W_name + ',b=' + b_name)
dCdH.name = 'ConvTransp3D_dCdH.H=' + H_name + ',dCdR=' + dCdR_name
return [dCdW, dCdb, dCdd, dCdH, dCdRShape]
......@@ -404,8 +411,8 @@ def computeR(W, b, d, H, Rshape=None):
if tk < 0:
break
R[
i, r, c, t, j] += N.dot(W[:, rk, ck, tk, j], H[i, rc, cc, tc, :] )
R[i, r, c, t, j] += N.dot(
W[:, rk, ck, tk, j], H[i, rc, cc, tc, :])
tc += 1
"" # close loop over tc
......@@ -421,7 +428,3 @@ def computeR(W, b, d, H, Rshape=None):
"" # close loop over i
return R
from theano.tensor.nnet.Conv3D import conv3D
from theano.tensor.nnet.ConvGrad3D import convGrad3D
from __future__ import print_function
"""
Contains an Op for convolving input images with a set of filters. This was
developed especially for Convolutional Neural Networks.
......@@ -9,7 +8,7 @@ tensor.signal and tensor.signal.downsample.
See especially conv2d().
"""
__docformat__ = "restructuredtext en"
from __future__ import print_function
import logging
......@@ -17,12 +16,11 @@ import numpy
from six.moves import xrange
import theano
from theano import OpenMPOp
from theano.tensor import (as_tensor_variable, blas, get_scalar_constant_value,
patternbroadcast, NotScalarConstantError)
from theano import OpenMPOp, config
from theano.gof import Apply
imported_scipy_signal = False
try:
# TODO: move these back out to global scope when they no longer
# cause an atexit error
......@@ -30,8 +28,9 @@ try:
from scipy.signal.sigtools import _convolve2d
imported_scipy_signal = True
except ImportError:
pass
imported_scipy_signal = False
__docformat__ = "restructuredtext en"
_logger = logging.getLogger("theano.tensor.nnet.conv")
......@@ -103,7 +102,7 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
try:
image_shape[i] = get_scalar_constant_value(
as_tensor_variable(image_shape[i]))
except NotScalarConstantError as e:
except NotScalarConstantError:
raise NotScalarConstantError(
"The convolution need that the shape"
" information are constant values. We got"
......@@ -118,7 +117,7 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
try:
filter_shape[i] = get_scalar_constant_value(
as_tensor_variable(filter_shape[i]))
except NotScalarConstantError as e:
except NotScalarConstantError:
raise NotScalarConstantError(
"The convolution need that the shape"
" information are constant values. We got"
......@@ -509,7 +508,7 @@ class ConvOp(OpenMPOp):
self.out_mode = output_mode
if not self.out_mode in ["valid", "full"]:
if self.out_mode not in ["valid", "full"]:
raise Exception("Mode %s not implemented" % self.out_mode)
if any((shp is not None) and (shp <= 0) for shp in self.outshp):
......@@ -522,7 +521,6 @@ class ConvOp(OpenMPOp):
if (self.unroll_kern is None and
self.unroll_batch is None and
self.unroll_patch is None):
# no version specified. Find the faster we have
if self.bsize is None and self.nkern is None:
self.unroll_patch = True
......@@ -613,7 +611,6 @@ class ConvOp(OpenMPOp):
inputs - 4 dim: batches x stacksize x rows x cols
kerns - 4 dim: nkern x stackidx x rows x cols
"""
outdim = kerns.ndim
_inputs = as_tensor_variable(inputs)
_kerns = as_tensor_variable(kerns)
# TODO: lift this restriction by upcasting either inputs or kerns
......@@ -778,7 +775,7 @@ class ConvOp(OpenMPOp):
img2d2[:, :, kshp[0] - 1:kshp[0] - 1 + imshp[1],
kshp[1] - 1:kshp[1] - 1 + imshp[2]] = img2d
img2d = img2d2
#N_image_shape = image_data.shape
# N_image_shape = image_data.shape
for b in xrange(bsize):
for n in xrange(nkern):
......@@ -786,7 +783,9 @@ class ConvOp(OpenMPOp):
for im0 in xrange(stacklen):
for row in xrange(0, zz.shape[2], self.dx):
for col in xrange(0, zz.shape[3], self.dy):
zz[b, n, row, col] += (img2d[b, im0, row:row + kshp[0], col:col + kshp[1]] *
zz[b, n, row, col] += (
img2d[b, im0, row:row + kshp[0],
col:col + kshp[1]] *
filtersflipped[n, im0, ::-1, ::-1]).sum()
# We copy it to remove the Stride mismatch warning from DEBUG_MODE.
......@@ -843,8 +842,8 @@ class ConvOp(OpenMPOp):
# mimic what happens inside theano.grad: get the input gradient
# of the final cost wrt all variables involved.
return theano.gradient.grad(cost=None,
known_grads={node: gz}, wrt=[inputs, kerns])
return theano.gradient.grad(cost=None, known_grads={node: gz},
wrt=[inputs, kerns])
if self.dx not in (1, 2) or self.dy not in (1, 2):
raise NotImplementedError(
......@@ -858,7 +857,7 @@ class ConvOp(OpenMPOp):
raise Exception("ConvOp.grad when dx!=1 or dy!=1 we must have all "
"the optional shape information")
####### Determine gradient on kernels ########
# Determine gradient on kernels ########
assert inputs.ndim == 4 and kerns.ndim == 4
newin = inputs.dimshuffle((1, 0, 2, 3))
......@@ -943,7 +942,7 @@ class ConvOp(OpenMPOp):
dw = dw.dimshuffle((1, 0, 2, 3))
dw = dw[:, :, ::-1, ::-1]
####### Determine gradient on inputs ########
# Determine gradient on inputs ########
mode = 'valid'
if not self.out_mode == 'full':
mode = 'full'
......@@ -1015,7 +1014,6 @@ using namespace std;
self.unroll_patch or
self.unroll_batch > 0 or
self.unroll_kern > 0):
return False
return True
return False
......@@ -1030,7 +1028,6 @@ using namespace std;
# compilation with -O3. This don't happen at -O2
if (theano.gof.cmodule.gcc_version() in ['4.3.0'] and
self.kshp == (1, 1)):
return ['-O3']
else:
return []
......
......@@ -246,7 +246,7 @@ def conv3d(signals, filters,
# now sum out along the Tf to get the output
# but we have to sum on a diagonal through the Tf and Ts submatrix.
if border_mode[0] == 'valid':
if _filters_shape_5d[1]!=1:
if _filters_shape_5d[1] != 1:
out_5d = diagonal_subtensor(out_tmp, 1, 3).sum(axis=3)
else: # for Tf==1, no sum along Tf, the Ts-axis of the output is unchanged!
out_5d = out_tmp.reshape((
......
......@@ -2,15 +2,15 @@
TODO: implement Images2Neibs.infer_shape() methods
"""
from six.moves import xrange
import numpy
import theano
from theano import Op, Apply
import theano.tensor as T
from theano.gradient import grad_not_implemented
from theano.gradient import grad_undefined
import numpy
class Images2Neibs(Op):
......@@ -543,7 +543,6 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>` for
gradient computation.
Example, which uses a tensor gained in example for
:func:`images2neibs <theano.sandbox.neigbours.neibs2images>`:
......
......@@ -15,6 +15,7 @@ from six.moves import xrange
import theano
from theano import gof
from theano import scalar
from theano.tensor import basic as tensor
from theano.tensor import subtensor
from theano.tensor import elemwise
......@@ -27,12 +28,12 @@ from theano.gradient import DisconnectedType
from theano.gradient import grad_not_implemented
from theano.tensor.type import values_eq_approx_remove_nan
############
#
# TENSOR OPS
#
class SoftmaxWithBias(gof.Op):
"""
An L{Op} for the output of neural-net multiclass classifiers.
......@@ -300,11 +301,11 @@ class SoftmaxGrad(gof.Op):
dy, sm = inp
g, = grads
tmp = g + tensor.neg(tensor.sum(g*sm, axis=1).dimshuffle((0, 'x')))
tmp = g + tensor.neg(tensor.sum(g * sm, axis=1).dimshuffle((0, 'x')))
g_dy = tmp * sm
tmp2 = tensor.sum(dy*sm, axis=1).dimshuffle((0, 'x'))
g_sm = tmp*dy - g *tmp2
tmp2 = tensor.sum(dy * sm, axis=1).dimshuffle((0, 'x'))
g_sm = tmp * dy - g * tmp2
return g_dy, g_sm
......@@ -571,12 +572,15 @@ class Softmax(gof.Op):
softmax_op = Softmax()
def softmax_graph(c):
return tensor.exp(c) / tensor.exp(c).sum(axis=-1, keepdims=True)
def softmax(c):
return softmax_op(c)
@opt.register_specialize('fast_compile_gpu')
@gof.local_optimizer([softmax_op])
def local_softmax_with_bias(node):
......@@ -593,9 +597,9 @@ def local_softmax_with_bias(node):
# tensor.DimShuffle) since specialization comes
# relatively late in optimization, we don't want to
# put in extra DimShuffles un-necessarily.
if (x_in.owner and isinstance(x_in.owner.op,
tensor.DimShuffle)
and list(x_in.owner.inputs[0].type.broadcastable) == [False]):
if (x_in.owner and
isinstance(x_in.owner.op, tensor.DimShuffle) and
list(x_in.owner.inputs[0].type.broadcastable) == [False]):
# cut out the DimShuffle that was broadcasting a vector
vectors.append(x_in.owner.inputs[0])
else:
......@@ -673,8 +677,7 @@ def softmax_simplifier(numerators, denominators):
numerators.append(softmax_op(x))
return numerators, denominators
opt.local_mul_canonizer.add_simplifier(softmax_simplifier,
'softmax_simplifier')
opt.local_mul_canonizer.add_simplifier(softmax_simplifier, 'softmax_simplifier')
if 0:
@opt.register_specialize
......@@ -836,7 +839,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
# TODO: Is this correct? It used to be y, not y_idx
nll = tensor.TensorType(x.type.dtype,
y_idx.type.broadcastable)()
y_idx.type.broadcastable).make_variable()
# nll = TensorType(x.dtype, y.broadcastable)
sm = x.type()
am = y_idx.type()
......@@ -866,15 +869,14 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
if any(y_idx < 0):
raise ValueError("y_i value out of bounds")
sm = numpy.zeros_like(x) # softmax
nll = numpy.zeros(x.shape[0], dtype=node.outputs[0].type.
dtype) # nll(y | softmax(x))
nll = numpy.zeros(x.shape[0], dtype=node.outputs[0].type.dtype) # nll(y | softmax(x))
am = numpy.zeros_like(y_idx)
for i in xrange(sm.shape[0]):
# add the bias vector to the i'th row of x
row = x[i] + b
# get the maximum value of i'th row for numerically safe
#softmax / nll
# softmax / nll
am[i] = numpy.argmax(row)
m = row[am[i]]
......@@ -1083,8 +1085,7 @@ class CrossentropySoftmax1HotWithBiasDx(gof.Op):
y_idx_range = tensor.arange(y_idx.shape[0])
g_dy = tensor.sum(
g_dx * subtensor.AdvancedIncSubtensor()(
sm, tensor.fill(dy, -1), y_idx_range, y_idx),
axis=1)
sm, tensor.fill(dy, -1), y_idx_range, y_idx), axis=1)
g_sm = dy.dimshuffle(0, 'x') * g_dx
g_y_idx = grad_not_implemented(self, 2, y_idx)
return [g_dy, g_sm, g_y_idx]
......@@ -1226,8 +1227,7 @@ def crossentropy_softmax_max_and_argmax_1hot_with_bias(x, b, y_idx, **kwargs):
unnecessary? e.g. CrossentropySoftmaxArgmax1HotWithBias should return
the appropriate information (i.e. the max probability)?
"""
(xent, softmax) = crossentropy_softmax_1hot_with_bias(x, b, y_idx,
**kwargs)
(xent, softmax) = crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs)
(max_pr, argmax) = tensor.max_and_argmax(softmax, axis=-1)
return (xent, softmax, max_pr, argmax)
......@@ -1251,8 +1251,8 @@ class CrossentropyCategorical1HotGrad(gof.Op):
g_coding_strg, = out
g_coding = numpy.zeros_like(coding_dist)
for i in xrange(len(g_y)):
g_coding[i, true_one_of_n[i]] = -g_y[i] / coding_dist[i,
true_one_of_n[i]]
g_coding[i, true_one_of_n[i]] = (-g_y[i] /
coding_dist[i, true_one_of_n[i]])
g_coding_strg[0] = g_coding
def infer_shape(self, node, in_shapes):
......@@ -1346,9 +1346,10 @@ def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph):
sm, one_of_n = node.inputs
if sm.owner and sm.owner.op == softmax_with_bias:
x, b = sm.owner.inputs
new_nll, new_sm, new_am = crossentropy_softmax_argmax_1hot_with_bias(x, b,
one_of_n)
fgraph.replace_all_validate([(nll, new_nll), (sm, new_sm)],
new_nll, new_sm, new_am = crossentropy_softmax_argmax_1hot_with_bias(
x, b, one_of_n)
fgraph.replace_all_validate(
[(nll, new_nll), (sm, new_sm)],
reason="crossentropy_to_crossentropy_with_softmax_with_bias")
return True
......@@ -1381,16 +1382,18 @@ def crossentropy_to_crossentropy_with_softmax(fgraph):
sm, one_of_n = node.inputs
if sm.owner and sm.owner.op == softmax_op:
x, = sm.owner.inputs
new_nll, new_sm, new_am = crossentropy_softmax_argmax_1hot_with_bias(x,
tensor.zeros_like(x[0]), one_of_n)
fgraph.replace_all_validate([(nll, new_nll), (sm, new_sm)],
new_nll, new_sm, new_am = crossentropy_softmax_argmax_1hot_with_bias(
x, tensor.zeros_like(x[0]), one_of_n)
fgraph.replace_all_validate(
[(nll, new_nll), (sm, new_sm)],
reason="crossentropy_to_crossentropy_with_softmax")
return True
if sm.owner and sm.owner.op == softmax_with_bias:
x, b = sm.owner.inputs
new_nll, new_sm, new_am = crossentropy_softmax_argmax_1hot_with_bias(x, b,
one_of_n)
fgraph.replace_all_validate([(nll, new_nll), (sm, new_sm)],
fgraph.replace_all_validate(
[(nll, new_nll), (sm, new_sm)],
reason="crossentropy_to_crossentropy_with_softmax")
return True
......@@ -1415,8 +1418,8 @@ def local_softmax_grad_to_crossentropy_with_softmax_grad(node):
if (g_coding_dist.owner and
g_coding_dist.owner.op == crossentropy_categorical_1hot_grad):
g_nll, coding_dist, true_one_of_n = g_coding_dist.owner.inputs
dx = crossentropy_softmax_1hot_with_bias_dx(g_nll,
coding_dist, true_one_of_n)
dx = crossentropy_softmax_1hot_with_bias_dx(g_nll, coding_dist,
true_one_of_n)
return [dx]
......@@ -1428,7 +1431,8 @@ def local_argmax_pushdown(node):
(softmax_op, softplus, tensor.exp, tensor.log, tensor.tanh, sigmoid,
softmax_with_bias):
if theano.config.warn.argmax_pushdown_bug:
logging.getLogger('theano.tensor.nnet.nnet').warn("WARNING: there "
logging.getLogger('theano.tensor.nnet.nnet').warn(
"WARNING: there "
"was a bug in Theano fixed on May 27th, 2010 in this case."
" I.E. when we take the max of a softplus, softmax, exp, "
"log, tanh, sigmoid, softmax_with_bias op, we were doing "
......@@ -1657,15 +1661,15 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
if isinstance(denom.owner.op, subtensor.AdvancedSubtensor):
# Base case
adv_subtensor = denom
#out_grad /= 1.
# out_grad /= 1.
elif denom.owner.op == tensor.mul:
# Try to find the AdvancedSubtensor node mentionned above,
# and the output gradient
for i, input in enumerate(denom.owner.inputs):
if input.owner and isinstance(input.owner.op,
subtensor.AdvancedSubtensor):
other_inputs = [in_ for (j,
in_) in enumerate(denom.owner.inputs) if j != i]
other_inputs = [in_ for (j, in_) in
enumerate(denom.owner.inputs) if j != i]
if len(other_inputs) == 1:
rest = other_inputs[0]
else:
......@@ -1894,16 +1898,14 @@ def categorical_crossentropy(coding_dist, true_dist):
"""
if true_dist.ndim == coding_dist.ndim:
return -tensor.sum(true_dist * tensor.log(coding_dist), axis=coding_dist.ndim-1)
return -tensor.sum(true_dist * tensor.log(coding_dist),
axis=coding_dist.ndim - 1)
elif true_dist.ndim == coding_dist.ndim - 1:
return crossentropy_categorical_1hot(coding_dist, true_dist)
else:
raise TypeError('rank mismatch between coding and true distributions')
from theano import scalar
class Prepend_scalar_constant_to_each_row(gof.Op):
__props__ = ()
......@@ -2026,7 +2028,7 @@ local_log_softmax = gof.PatternSub(in_pattern=(tensor.log, (softmax_op, 'x')),
# don't do register_stabilize, this is to make local_log_softmax run
# only after another more specific optimization that stabilizes cross entropy
#opt.register_stabilize(local_log_softmax, name = 'local_log_softmax')
# opt.register_stabilize(local_log_softmax, name = 'local_log_softmax')
opt.register_specialize(local_log_softmax, 'fast_compile_gpu', name='local_log_softmax')
......
......@@ -7,7 +7,6 @@ from __future__ import print_function
import warnings
import numpy
from six.moves import xrange
import theano
from theano import config, gof, printing, scalar
......@@ -129,9 +128,8 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
"""
This method was used to generate the graph: sigmoid_prec.png in the doc
"""
import matplotlib
data = numpy.arange(-15, 15, .1)
val = 1/(1+numpy.exp(-data))
val = 1 / (1 + numpy.exp(-data))
def hard_sigmoid(x):
return theano.tensor.nnet.hard_sigmoid(x)
......@@ -167,7 +165,7 @@ sigmoid_inplace = elemwise.Elemwise(
ScalarSigmoid(scalar.transfer_type(0)),
inplace_pattern={0: 0},
name='sigmoid_inplace',
)
)
pprint.assign(sigmoid, printing.FunctionPrinter('sigmoid'))
......@@ -240,7 +238,7 @@ pprint.assign(ultra_fast_sigmoid,
printing.FunctionPrinter('ultra_fast_sigmoid'))
#@opt.register_uncanonicalize
# @opt.register_uncanonicalize
@gof.local_optimizer([sigmoid])
def local_ultra_fast_sigmoid(node):
"""
......@@ -290,7 +288,7 @@ def hard_sigmoid(x):
return x
#@opt.register_uncanonicalize
# @opt.register_uncanonicalize
@gof.local_optimizer([sigmoid])
def local_hard_sigmoid(node):
if (isinstance(node.op, tensor.Elemwise) and
......@@ -439,7 +437,8 @@ def is_1pexp(t):
return None
AddConfigVar('warn.identify_1pexp_bug',
AddConfigVar(
'warn.identify_1pexp_bug',
'Warn if Theano versions prior to 7987b51 (2011-12-18) could have '
'yielded a wrong result due to a bug in the is_1pexp function',
BoolParam(theano.configdefaults.warn_default('0.4.1')),
......@@ -892,7 +891,7 @@ def local_1msigmoid(node):
if sub_r.owner and sub_r.owner.op == sigmoid:
try:
val_l = opt.get_scalar_constant_value(sub_l)
except Exception as e:
except Exception:
return
if numpy.allclose(numpy.sum(val_l), 1):
return [sigmoid(-sub_r.owner.inputs[0])]
......@@ -921,7 +920,6 @@ if 0:
print(sigm_canonicalize(node))
def sigm_canonicalize(node):
add = tensor.add
mul = tensor.mul
div = tensor.true_div
......
......@@ -88,15 +88,7 @@ whitelist_flake8 = [
"tensor/signal/conv.py",
"tensor/signal/tests/test_conv.py",
"tensor/signal/tests/test_downsample.py",
"tensor/nnet/nnet.py",
"tensor/nnet/Conv3D.py",
"tensor/nnet/__init__.py",
"tensor/nnet/ConvTransp3D.py",
"tensor/nnet/sigm.py",
"tensor/nnet/ConvGrad3D.py",
"tensor/nnet/conv3d2d.py",
"tensor/nnet/conv.py",
"tensor/nnet/neighbours.py",
"tensor/nnet/tests/test_conv.py",
"tensor/nnet/tests/test_neighbours.py",
"tensor/nnet/tests/test_nnet.py",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论