提交 1569a7a9 authored 作者: David Warde-Farley's avatar David Warde-Farley

Bring theano.tensor into PEP 3113 compliance.

上级 ead4f23e
......@@ -1379,9 +1379,13 @@ class TensorFromScalar(Op):
[s],
[tensor(dtype = s.type.dtype,
broadcastable = ())])
def perform(self, node, (s, ), (out, )):
def perform(self, node, inp, out_):
s, = inp
out, = out_
out[0] = numpy.asarray(s)
def grad(self, (s,), (dt,)):
def grad(self, inp, grads):
s, = inp
dt, = grads
return [scalar_from_tensor(dt)]
tensor_from_scalar = TensorFromScalar()
......@@ -1392,9 +1396,13 @@ class ScalarFromTensor(Op):
return Apply(self,
[t],
[scal.Scalar(dtype = t.type.dtype).make_variable()])
def perform(self, node, (s, ), (out, )):
def perform(self, node, inp, out_):
s, = inp
out, = out_
out[0] = s.flatten()[0]
def grad(self, (s,), (dt,)):
def grad(self, inp, grads):
s, = inp
dt, = grads
return [tensor_from_scalar(dt)]
def __str__(self):
return self.__class__.__name__
......@@ -1503,9 +1511,11 @@ class Shape(Op):
#the type to TensorVariable to have the optimization working
#correctly.
return Apply(self, [x], [lvector()])
def perform(self, node, (x, ), (out, )):
def perform(self, node, inp, out_):
x, = inp
out, = out_
out[0] = theano._asarray(x.shape, dtype = 'int64')
def grad(self, (x,), (gz,)):
def grad(self, inp, grads):
return [None]
@constructor
def old_shape(a):
......@@ -1553,12 +1563,15 @@ class SpecifyShape(Op):
shape = as_tensor_variable(shape)
return Apply(self, [x, shape], [x.type()])
def perform(self, node, (x,shape ), (out, )):
def perform(self, node, inp, out_):
x, shape = inp
out, = out_
assert numpy.all(x.shape==shape), ("got shape", x.shape,
"expected", shape)
out[0] = x
def infer_shape(self, node, (xshape, sshape)):
def infer_shape(self, node, shapes):
xshape, sshape = shapes
new_shape=[]
for dim in range(node.inputs[0].ndim):
try:
......@@ -1571,7 +1584,9 @@ class SpecifyShape(Op):
assert len(new_shape)==len(xshape)
return [new_shape]
def grad(self, (x, s), (gz,)):
def grad(self, inp, grads):
x, s = inp
gz, = grads
# Should I set an SpecifyShape on gz? I think so
# But I don't do it now as we need to make an optimization
# to remove that op from the graph to don't block other optimization
......@@ -1643,18 +1658,21 @@ class MaxAndArgmax(Op):
outputs = [tensor(x.type.dtype, broadcastable,name='max'),
tensor('int32', broadcastable,name='argmax')]
return Apply(self, inputs, outputs)
def perform(self, node, (x, axis), (max, max_idx)):
def perform(self, node, inp, outs):
x, axis = inp
max, max_idx = outs
max[0] = numpy.asarray(numpy.max(x, axis))
max_idx[0] = theano._asarray(numpy.argmax(x, axis), dtype='int32')
def infer_shape(self, node, (ishape,axis_shape)):
def infer_shape(self, node, shapes):
ishape, axis_shape = shapes
axis=node.inputs[1]
if axis is None:
return [(),()]
rval = tuple([ishape[i] for (i,b) in enumerate(node.inputs[0].type.broadcastable) if i !=axis.data])
return [rval,rval]
def grad(self, (x, axis), (g_max, g_max_idx)):
def grad(self, inp, grads):
# @warning: This only works if axis is 0, else the max is
# broadcasted wrong in the call to eq.
# @note: This function should work correctly for L{vector}s.
......@@ -1663,7 +1681,8 @@ class MaxAndArgmax(Op):
# gMax * dMax/dx + gArgMax * dArgMax/dx, gMax * dMax/daxis + gArgMax * dArgMax/daxis
# g_max has one less dimension than x, so you need to complete g_max to x's shape
# when axis=0 the broadcasting mechanism does it automatically
x, axis = inp
g_max, g_max_idx = grads
if not ( axis.data == 0 or axis.data == x.ndim-1):
raise NotImplementedError('MaxAndArgmax gradient with axis corresponding to internal dimension')
if axis.data==0:
......@@ -2089,10 +2108,12 @@ class Eye(gof.Op):
k = as_tensor_variable(k)
return gof.Apply(self, [n,m,k], [TensorType(dtype = self.dtype, broadcastable = (False,False))()])
def perform(self, node, (n,m,k), (out,)):
def perform(self, node, inp, out_):
n, m, k = inp
out, = out_
out[0] = numpy.eye(n,m,k,dtype=self.dtype)
def grad(self, (n,m,k),(gout,)):
def grad(self, inp, grads):
return [None, None, None]
def __eq__(self,other):
......@@ -2127,7 +2148,9 @@ if 0:
dims = as_tensor_variable(dims)
return gof.Apply(self, [dims], [self.type()])
def perform(self, node, (dims,), (out,)):
def perform(self, node, inp, out_):
dims, = inp
out, = out_
if out[0] is not None:
out[0].resize(dims, refcheck = 0)
out[0].fill(self.value)
......@@ -2139,7 +2162,7 @@ if 0:
else:
out[0] = numpy.ones(dims, dtype = self.dtype) * self.value
def grad(self, (dims,), (gout,)):
def grad(self, inp, grads):
return None,
def __eq__(self, other):
......@@ -2212,7 +2235,8 @@ class Alloc(gof.Op):
otype = TensorType(dtype=v.dtype, broadcastable=bcast)
return gof.Apply(self, [v]+sh, [otype()])
def perform(self, node, inputs, (out,)):
def perform(self, node, inputs, out_):
out, = out_
v = inputs[0]
sh = tuple([int(i) for i in inputs[1:]])
if out[0] is None or out[0].shape != sh:
......@@ -2228,7 +2252,7 @@ class Alloc(gof.Op):
def infer_shape(self, node, input_shapes):
return [node.inputs[1:]]
def grad(self, inputs, (gout,)):
def grad(self, inputs, grads):
return [None for i in inputs]
def __call__(self, val, *shapes):
......@@ -2286,7 +2310,9 @@ class Mean(elemwise.CAReduce):
# we want to protect against overflow
return 'float64'
def perform(self, node, (input, ), (output, )):
def perform(self, node, inp, out):
input, = inp
output, = out
output[0]=numpy.mean(input,axis=self.axis)
def c_code(self, node, name, inames, onames, sub):
......@@ -2407,10 +2433,14 @@ if 0:
# broadcastable = [False if i==axis else x for i, x in enumerate(input.broadcastable)])
return gof.Apply(self, [inputs, repeats, axis], [type()])
def perform(self, node, (input, repeats, axis), (out, )):
def perform(self, node, inp, out_):
input, repeats, axis = inp
out, = out_
out[0] = numpy.repeat(input, repeats, axis)
def grad(self, (input, repeats, axis), (gout, )):
def grad(self, inp, grads):
input, repeats, axis = inp
gout, = grads
return add.grad((input, gout), (gout,))[:1]
repeat = Repeat()
......@@ -2428,7 +2458,9 @@ class Default(gof.Op):
if x.type != default.type:
raise TypeError('Both default() arguments must have same type', x, default)
return gof.Apply(self, [x, default], [default.type()])
def perform(self, node, (x, default), (out, )):
def perform(self, node, inp, out_):
x, default = inp
out, = out_
if x is None:
# why copy? Theano can't yet understand out[0] being a view of either x or y,
# so we can be a view of x, but only a copy of y.
......@@ -2655,7 +2687,8 @@ class Subtensor(Op):
[tensor(dtype = x.type.dtype,
broadcastable = broadcastable)])
def perform(self, node, inputs, (out, )):
def perform(self, node, inputs, out_):
out, = out_
x = inputs[0]
# The subtensor (or idx_list) does not depend on the inputs.
......@@ -2717,7 +2750,8 @@ class Subtensor(Op):
assert len(outshp) == node.outputs[0].ndim
return [outshp]
def grad(self, inputs, (gz,)):
def grad(self, inputs, grads):
gz, = grads
x = inputs[0]
rest = inputs[1:]
return [IncSubtensor(self.idx_list)(zeros_like(x), gz, *rest)] + [None] * len(rest)
......@@ -2935,7 +2969,8 @@ class IncSubtensor(Op):
(x, y) + inputs,
[x.type()])
def perform(self, node, inputs, (out, )):
def perform(self, node, inputs, out_):
out, = out_
x, y = inputs[:2]
indices = list(reversed(inputs[2:]))
......@@ -2973,7 +3008,8 @@ class IncSubtensor(Op):
def infer_shape(self, node, shapes):
return [shapes[0]]
def grad(self, inputs, (g_output,)):
def grad(self, inputs, grads):
g_output, = grads
x, y = inputs[:2]
idx_list = inputs[2:]
......@@ -3052,8 +3088,9 @@ class Split(Op):
return Apply(self, inputs, outputs)
def perform(self, node, (x, axis, splits), outputs):
def perform(self, node, inputs, outputs):
"""WRITEME"""
x, axis, splits = inputs
#in python 2.4, x.shape[numpy.asarray(1)] don't work.
if sys.version_info[0:2]==(2, 4) and axis.size==1:
axis=int(axis)
......@@ -3084,8 +3121,9 @@ class Split(Op):
outputs[i][0] = x.__getitem__(general_key).copy()
lower_idx = upper_idx
def grad(self, (x, axis, splits), g_outputs):
def grad(self, inputs, g_outputs):
"""Join the gradients along the axis that was used to split x."""
_, axis, _ = inputs
return [join(axis, *g_outputs), None, None]
......@@ -3124,12 +3162,16 @@ class Rebroadcast(Op):
broadcastable = [self.axis.get(i, b)
for i, b in enumerate(x.type.broadcastable)])
return Apply(self, [x], [t()])
def perform(self, node, (x, ), (out, )):
def perform(self, node, inp, out_):
x, = inp
out, = out_
for axis, value in self.axis.iteritems():
if value and x.shape[axis] != 1:
raise ValueError('Dimension %s in Rebroadcast\'s input was supposed to be 1 (got %s instead)' % (axis, x.shape[axis]))
out[0] = x
def grad(self, (x, ), (gz,)):
def grad(self, inp, grads):
x, = inp
gz, = grads
# restore the broadcasting pattern of the input
return Rebroadcast(*[(axis, x.type.broadcastable[axis]) for axis, value in self.axis.iteritems()])(gz),
......@@ -3272,15 +3314,17 @@ class Join(Op):
node.tag.shape_zero = len(orig)
return node
def perform(self, node, axis_and_tensors, (out, )):
def perform(self, node, axis_and_tensors, out_):
out, = out_
axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
out[0] = theano._asarray(numpy.concatenate(tensors, axis = axis),
dtype=node.outputs[0].type.dtype)
def grad(self, axis_and_tensors, (gz,)):
def grad(self, axis_and_tensors, grads):
""" The gradient wrt a join op is a `Split`, used to partition the gradient along the
`axis` which was used for joining.
"""
gz, = grads
axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
if 'float' in tensors[0].dtype or 'complex' in tensors[0].dtype:
# assume that this is differentiable
......@@ -3291,8 +3335,9 @@ class Join(Op):
# assume that this isn't differentiable
return [None] * (1 + len(tensors))
def _native_grad(self, axis_and_tensors, (gz,)):
def _native_grad(self, axis_and_tensors, grads):
"""WRITEME"""
gz, = grads
axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
sizes_along_axis = [shape(x)[axis] for x in tensors]
n_dims = len(shape(tensors[0]))
......@@ -3484,12 +3529,14 @@ if 0: #vertical and horizontal stacking are deprecated. Better to use stack() a
outputs = [tensor(dtype = x.type.dtype,
broadcastable = bcastable)]
return Apply(self, inputs, outputs)
def perform(self, node, (x, y), (out, )):
def perform(self, node, inp, out_):
x, y = inp
out, = out_
assert x.ndim == y.ndim
# Make sure every dimension (save the first) is the same
for i in range(x.ndim): assert i == 0 or x.shape[i] == y.shape[i]
out[0] = numpy.vstack([x, y])
def grad(self, (x, y), (gz,)):
def grad(self, inp, grads):
"""
@todo: Make VSplit (or this grad implementation) its own L{Op},
that way we can do more sanity-checking::
......@@ -3498,6 +3545,8 @@ if 0: #vertical and horizontal stacking are deprecated. Better to use stack() a
for i in range(x.data.ndim): assert i == 0 or x.data.shape[i] == y.shape[i]
etc...
"""
x, y = inp
gz, = grads
xs = shape(x)
ys = shape(y)
return gz[:xs[0]], gz[xs[0]:]
......@@ -3548,7 +3597,9 @@ class Reshape(Op):
except TypeError:
pass
return gof.Apply(self, [x, shp], [tensor(x.type.dtype, bcasts)])
def perform(self, node, (x, shp), (out,)):
def perform(self, node, inp, out_):
x, shp = inp
out, = out_
if (len(shp) != self.ndim):
raise ValueError('shape argument to Reshape.perform has incorrect length %i'
', should be %i' % (len(shp), self.ndim), shp)
......@@ -3556,7 +3607,9 @@ class Reshape(Op):
out[0] = numpy.reshape(x, shp)
except:
raise ValueError('Cannot reshape input of shape %s to shape %s' % (x.shape,shp))
def grad(self, (x, shp), (g_out,)):
def grad(self, inp, grads):
x, shp = inp
g_out, = grads
return [reshape(g_out, shape(x), ndim=x.ndim), None]
def infer_shape(self, node, ishapes):
#we can't just put node.inputs[1] as not all op support interation
......@@ -3589,7 +3642,9 @@ class Flatten(Op):
if self.outdim < 1 or (x.ndim and self.outdim > x.ndim):
raise ValueError('invalid output ndimensions(%i) for tensor of rank %i' %(self.outdim, t_x.ndim))
return gof.Apply(self, [t_x], [tensor(x.type.dtype, (False,)*self.outdim)])
def perform(self, node, (x,), (out,)):
def perform(self, node, inp, out_):
x, = inp
out, = out_
outdim = self.outdim
if outdim == 1:
try:
......@@ -3602,7 +3657,9 @@ class Flatten(Op):
newshape = x.shape[:outdim-1] + (numpy.prod(x.shape[outdim-1:]),)
#print 'newshape', newshape, x.shape, x.shape
out[0] = x.reshape(newshape)
def grad(self, (x,), (g_out,)):
def grad(self, inp, grads):
x, = inp
g_out, = grads
return [reshape(g_out, shape(x), x.ndim)]
def flatten(x, outdim=1):
......@@ -3613,7 +3670,9 @@ class TileGrad(Op):
#this is so weird, I can't think of how to make this a general thing.
def make_node(self, x, reps, g_out):
return gof.Apply(self, [x, reps, g_out], [x.type()])
def perform(self, node, (x, reps, g_out), (gx,)):
def perform(self, node, inp, out):
x, reps, g_out = inp
gx, = out
xsh = x.shape
if len(reps)==2 and reps[1] == 1 and len(x.shape) == 1:
gx[0] = numpy.sum(g_out, axis=0)
......@@ -3645,11 +3704,15 @@ class Tile(Op):
x = as_tensor_variable(x)
reps = as_tensor_variable(reps)
return gof.Apply(self, [x, reps], [tensor(x.type.dtype, [False,] * self.ndim)])
def perform(self, node, (x, reps), (out,)):
def perform(self, node, inp, out_):
x, reps = inp
out, = out_
out[0] = numpy.tile(x, reps)
if len(out[0].shape) != self.ndim:
raise ValueError('Tile.perform produced incorrect shape')
def grad(self, (x, reps), (g_out,)):
def grad(self, inp, grads):
x, reps = inp
g_out, = grads
return [tilegrad(x, reps, g_out), None]
def tile(x, reps, ndim=None):
......@@ -3712,13 +3775,16 @@ class ARange(Op):
return [(maximum(cast(ceil(cast((stop-start),'float64')
/step),'int64'),0),)]
def perform(self, node, (start, stop, step), (out,)):
def perform(self, node, inp, out_):
start, stop, step = inp
out, = out_
start = start.item()
stop = stop.item()
step = step.item()
out[0] = numpy.arange(start, stop, step, dtype=self.dtype)
def grad(self, inputs, (gz,)):
def grad(self, inputs, grads):
gz, = grads
return [None] * len(inputs)
_arange = {}
......@@ -3831,7 +3897,9 @@ class PermuteRowElements(Op):
else:
raise ValueError('Dimension mismatch: %s, %s' % (xs0, ys0))
def perform(self, node, (x, y, inverse), (outs,)):
def perform(self, node, inp, out):
x, y, inverse = inp
outs, = out
x_s = x.shape
y_s = y.shape
assert len(x_s) == len(y_s)
......@@ -3854,7 +3922,9 @@ class PermuteRowElements(Op):
self._rec_perform(node, x, y, inverse, outs[0], curdim=0)
def grad(self, (x, y, inverse), (gz,)):
def grad(self, inp, grads):
x, y, inverse = inp
gz, = grads
# First, compute the gradient wrt the broadcasted x.
# If 'inverse' is False (0), apply the inverse of y on gz.
# Else, apply y on gz.
......@@ -3930,10 +4000,13 @@ class AdvancedSubtensor1(Op):
return Apply(self, [x_, ilist_], [x_.type()])
def perform(self, node, (x,i), (out,)):
def perform(self, node, inp, out_):
x, i = inp
out, = out_
out[0] = x[i]
def grad(self, inputs, (gz,)):
def grad(self, inputs, grads):
gz, = grads
class NotImplementedOp(Op):
# This op should be pruned from the graph.
# This Op can be created in a graph,
......@@ -3998,14 +4071,16 @@ class AdvancedSubtensor(Op):
# Default case, we don't know
return node.env.shape_feature.default_infer_shape(node, ishapes)
def perform(self, node, inputs, (out,)):
def perform(self, node, inputs, out_):
out, = out_
# TODO: in general, we need to re-pack the inputs into a valid index, just like
# subtensor
out[0] = inputs[0].__getitem__(inputs[1:])
#return
#raise NotImplementedError()
def grad(self, inputs, (gz,)):
def grad(self, inputs, grads):
gz, = grads
x = inputs[0]
rest = inputs[1:]
return [AdvancedIncSubtensor(self.args)(zeros_like(x), gz, *rest)] + [None]*len(rest)
......@@ -4034,7 +4109,8 @@ class AdvancedIncSubtensor(Op):
raise NotImplementedError('Advanced indexing increment of x (of dim %i) by y (of dim %i) with arguments (%s) not supported yet'\
% (x.ndim, y.ndim, ','.join(str(input) for input in inputs)))
def perform(self, node, inputs, (out,)):
def perform(self, node, inputs, out_):
out, = out_
# TODO: same thing as in AdvancedSubtensor's perform TODO
out[0] = inputs[0].copy()
out[0][inputs[2:]] += inputs[1]
......@@ -4116,7 +4192,9 @@ class Dot(Op):
outputs = [tensor(scal.upcast(*i_dtypes), bz)]
return Apply(self, inputs, outputs)
def perform(self, node, (x, y), (z, )):
def perform(self, node, inp, out):
x, y = inp
z, = out
try:
# the asarray is here because dot between two vectors gives a numpy float object
# but we need to return a 0d ndarray
......@@ -4126,7 +4204,9 @@ class Dot(Op):
e.args = e.args + (x.shape, y.shape)
raise
def grad(self, (x, y), (gz,)):
def grad(self, inp, grads):
x, y = inp
gz, = grads
if gz.type.ndim == 0:
rval = gz * y, gz * x
elif x.type.ndim == 1 and y.type.ndim > 1:
......@@ -4137,7 +4217,8 @@ class Dot(Op):
rval = dot(gz, y.T), dot(x.T, gz)
return cast(rval[0], x.dtype), cast(rval[1], y.dtype)
def infer_shape(self, node, (xshp,yshp)):
def infer_shape(self, node, shapes):
xshp, yshp = shapes
x, y = node.inputs
if x.ndim == 2 and y.ndim == 2:
return [(xshp[0], yshp[1])]
......@@ -4181,7 +4262,9 @@ class TensorDotGrad(Op):
op = TensorDotGrad(axes)
return Apply(op, [x,y,gz], [gx, gy])
def perform(self, node, (x, y, gz), (gx,gy)):
def perform(self, node, inp, out):
x, y, gz = inp
gx, gy = out
sum_over_y = range(y.ndim)
[sum_over_y.remove(q) for q in self.axes[1]]
......@@ -4260,7 +4343,9 @@ class TensorDot(Op):
broadcastable=[False]*outdim);
return Apply(op, inputs=[x,y], outputs=[output,])
def perform(self, node, (x, y), (z,)):
def perform(self, node, inp, out):
x, y = inp
z, = out
try:
z[0] = numpy.asarray(numpy.tensordot(x, y, self.axes))
except ValueError, e:
......@@ -4268,7 +4353,9 @@ class TensorDot(Op):
e.args = e.args + (x.shape, y.shape, self.axes)
raise
def grad(self, (x, y), (gz,)):
def grad(self, inp, grads):
x, y = inp
gz, = grads
gx, gy = tensordot_grad(self.axes)(x, y, gz)
return [gx, gy]
......@@ -4335,9 +4422,13 @@ class Outer(Op):
outputs = [tensor(scal.upcast(*i_dtypes), bz)]
return Apply(self, inputs, outputs)
def perform(self, node, (x, y), (z, )):
def perform(self, node, inp, out):
x, y = inp
z, = out
z[0] = numpy.outer(x, y)
def grad(self, (x, y), (gz,)):
def grad(self, inp, grads):
x, y = inp
gz, = grads
return dot(gz, y), dot(x, gz) #no transposing necessary
def __str__(self):
return "outer"
......
......@@ -477,7 +477,9 @@ class Gemm(GemmRelated):
if len(bb): raise ValueError(Gemm.E_scalar, bb)
output = z.type()
return Apply(self, inputs, [output])
def perform(self, node, (z, a, x, y, b), (zout, )):
def perform(self, node, inp, out):
z, a, x, y, b = inp
zout, = out
assert a.shape == ()
assert b.shape == ()
if not self.inplace:
......@@ -596,7 +598,9 @@ class Gemm(GemmRelated):
#undef REAL
"""
def c_code(self, node, name, (_z, _a, _x, _y, _b), (_zout, ), sub): #DEBUG
def c_code(self, node, name, inp, out, sub): #DEBUG
_z, _a, _x, _y, _b = inp
_zout, = out
if node.inputs[0].type.dtype.startswith('complex'):
raise utils.MethodNotDefined('%s.c_code' \
% self.__class__.__name__)
......@@ -949,7 +953,9 @@ class Dot22(GemmRelated):
outputs = [T.tensor(x.type.dtype, bz)]
return Apply(self, [x,y], outputs)
def perform(self, node, (x, y), (z, )):
def perform(self, node, inp, out):
x, y = inp
z, = out
try:
z[0] = numpy.asarray(numpy.dot(x, y))
except ValueError, e:
......@@ -988,7 +994,9 @@ class Dot22(GemmRelated):
double a = 1.0;
double b = 0.0;
"""
def c_code(self, node, name, (_x, _y), (_zout, ), sub): #DEBUG
def c_code(self, node, name, inp, out, sub): #DEBUG
_x, _y = inp
_zout, = out
if node.inputs[0].type.dtype.startswith('complex'):
raise utils.MethodNotDefined('%s.c_code' \
% self.__class__.__name__)
......@@ -1083,7 +1091,9 @@ class Dot22Scalar(GemmRelated):
outputs = [T.tensor(x.type.dtype, bz)]
return Apply(self, [x,y,scalar], outputs)
def perform(self, node, (x, y, scalar), (z, )):
def perform(self, node, inp, out):
x, y, scalar = inp
z, = out
try:
z[0] = scalar * numpy.asarray(numpy.dot(x, y))
except ValueError, e:
......@@ -1117,7 +1127,9 @@ class Dot22Scalar(GemmRelated):
#undef REAL
double b = 0.0;
"""
def c_code(self, node, name, (_x, _y, _a), (_zout, ), sub): #DEBUG
def c_code(self, node, name, inp, out, sub): #DEBUG
_x, _y, _a = inp
_zout, = out
if len(self.c_libraries())<=0:
return super(Dot22Scalar, self).c_code(node, name, (_x, _y), (_zout, ), sub)
full_code = self.build_gemm_call() % dict(locals(), **sub)
......
......@@ -179,7 +179,9 @@ class DimShuffle(Op):
else:
return "DimShuffle{%s}" % ",".join(str(x) for x in self.new_order)
def perform(self, node, (input, ), (storage, )):
def perform(self, node, inp, out):
input, = inp
storage, = out
# drop
res = input
if type(res) != numpy.ndarray:
......@@ -204,7 +206,8 @@ class DimShuffle(Op):
storage[0] = numpy.asarray(res) #asarray puts scalars back into array
def infer_shape(self, node, (ishp,)):
def infer_shape(self, node, shapes):
ishp, = shapes
ishp = list(ishp)
for drop in reversed(self.drop):
del ishp[drop]
......@@ -216,7 +219,9 @@ class DimShuffle(Op):
rval.insert(augm, 1)
return [rval]
def c_code(self, node, name, (input,), (res,), sub):
def c_code(self, node, name, inp, out, sub):
input, = inp
res, = out
basename = input + '__view_or_copy'
def statements(lst):
......@@ -317,7 +322,9 @@ class DimShuffle(Op):
def c_code_cache_version(self):
return (1,)
def grad(self, (x, ), (gz, )):
def grad(self, inp, grads):
x, = inp
gz, = grads
gz = as_tensor_variable(gz)
grad_order = ['x'] * len(x.type.broadcastable)
for i, v in enumerate(self.new_order):
......@@ -934,7 +941,9 @@ class CAReduce(Op):
else:
return "Reduce{%s}" % self.scalar_op
def perform(self, node, (input, ), (output, )):
def perform(self, node, inp, out):
input, = inp
output, = out
axis = self.axis
if axis is None:
axis = range(input.ndim)
......@@ -959,7 +968,8 @@ class CAReduce(Op):
else:
output[0] = numpy.copy(variable)
def infer_shape(self, node, (ishape,)):
def infer_shape(self, node, shapes):
ishape, = shapes
axis = self.axis
if axis is None:
return (),
......@@ -1115,7 +1125,9 @@ class Sum(CAReduce):
uint32='uint64',
).get(idtype, idtype)
def grad(self, (x, ), (gz, )):
def grad(self, inp, grads):
x, = inp
gz, = grads
gz = as_tensor_variable(gz)
axis = self.axis
if axis is None:
......@@ -1176,7 +1188,7 @@ class Prod(CAReduce):
uint32='uint64',
).get(idtype, idtype)
def grad(self, (prod_in, ), (gz, )):
def grad(self, inp, grads):
'''
The grad of this Op could be very easy, it is was not for the case
where zeros are present in a given "group" (ie. elements reduced
......@@ -1221,6 +1233,8 @@ class Prod(CAReduce):
the "T.eq()" bits), then taking this or that behavior (see T.switch)
based on the result of this count.
'''
prod_in, = inp
gz, = grads
if prod_in.dtype[0:3] in ('int','uin'):
return [None]
......@@ -1314,7 +1328,9 @@ class MulWithoutZeros(scalar.BinaryScalarOp):
return x
return x*y
def c_code(self, node, name, (x,y), (z, ), sub):
def c_code(self, node, name, inp, out, sub):
x, y = inp
z, = out
return ("%(z)s = ((%(x)s == 0) ? (%(y)s) : " + \
"((%(y)s == 0) ? (%(x)s) : ((%(y)s)*(%(x)s))) );") % locals()
......
......@@ -161,7 +161,8 @@ class Conv3D(theano.Op):
def c_header_dirs(self):
return ldflags(libs=False, include_dir=True)
def c_code(self, node, nodename, (V,W,b,d), outputs, sub):
def c_code(self, node, nodename, inputs, outputs, sub):
V, W, b, d = inputs
fail = sub['fail']
H = outputs[0]
......
......@@ -83,7 +83,8 @@ class ConvGrad3D(theano.Op):
flags = ['-Werror']
return flags
def c_code(self, node, nodename, (V,d,WShape,dCdH), outputs, sub):
def c_code(self, node, nodename, inputs, outputs, sub):
V, d, WShape, dCdH = inputs
fail = sub['fail']
dCdW = outputs[0]
......
......@@ -86,7 +86,8 @@ class ConvTransp3D(theano.Op):
print "\t\t\t\tConvTransp3D python code"
output_storage[0][0] = computeR(W,b,d,H,RShape)
def c_code(self, node, nodename, (W, b, d, H, RShape), outputs, sub):
def c_code(self, node, nodename, inputs, outputs, sub):
W, b, d, H, RShape = inputs
fail = sub['fail']
R = outputs[0]
......
......@@ -221,7 +221,7 @@ class ConvOp(Op):
else: return []
@staticmethod
def getOutputShape(inshp, kshp, (dx,dy)=(1,1), mode='valid'):
def getOutputShape(inshp, kshp, stride=(1,1), mode='valid'):
"""
Computes the output dimensions of convolving an image of shape "inshp"
with kernels of shape "kshp".
......@@ -231,6 +231,7 @@ class ConvOp(Op):
:param mode: 'valid' or 'full' (see 'border_mode' in conv2d's doc)
:return: (rows,cols) of output image
"""
dx, dy = stride
if mode=='valid': s = -1
else: s = 1
inshp, kshp = numpy.array(inshp), numpy.array(kshp)
......@@ -583,10 +584,12 @@ class ConvOp(Op):
# we simply let the default function do its work.
raise NotImplementedError()
def perform(self,node, (img2d, filtersflipped), (z,)):
def perform(self,node, inp, out):
"""
By default if len(img2d.shape)==3, we
"""
img2d, filtersflipped = inp
z, = out
if not imported_scipy_signal:
raise theano.gof.utils.MethodNotDefined(
"c_headers", type(self), self.__class__.__name__,
......@@ -696,7 +699,9 @@ class ConvOp(Op):
z[0]=zz
def grad(self, (inputs, kerns), (gz,)):
def grad(self, inp, grads):
inputs, kerns = inp
gz, = grads
if self.imshp != self.imshp_logical or self.kshp != self.kshp_logical:
raise NotImplementedError('todo')
......@@ -897,7 +902,9 @@ using namespace std;
return blas.ldflags(libs=False, include_dir=True)
return []
def c_code(self, node, name, (img2d, filtersflipped), (z, ), sub):
def c_code(self, node, name, inp, out, sub):
img2d, filtersflipped = inp
z, = out
if node.inputs[0].type.dtype != node.inputs[1].type.dtype:
raise NotImplementedError()
assert node.inputs[0].type.dtype == node.inputs[1].type.dtype
......
......@@ -69,7 +69,9 @@ class SoftmaxWithBias(gof.Op):
sm[i] *= 1.0 / numpy.sum(sm[i])
output_storage[0][0] = sm
def grad(self, (x, b), (g_sm,)):
def grad(self, inp, grads):
x, b = inp
g_sm, = grads
sm = softmax_with_bias(x, b)
dx = softmax_grad(g_sm, sm)
db = tensor.sum(dx, axis = 0)
......@@ -190,7 +192,9 @@ class SoftmaxWithBias(gof.Op):
return (init_decl, begin_row_loop, inside_row_loop, end_row_loop)
def c_code(self, node, name, (x, b), (sm,), sub):
def c_code(self, node, name, inp, out, sub):
x, b = inp
sm, = out
code_template = ''.join(self.c_code_template())
return code_template % dict(locals(), **sub)
......@@ -241,7 +245,9 @@ class SoftmaxGrad(gof.Op):
def c_code_cache_version(self):
return (3,)
def c_code(self, node, name, (dy, sm), (dx,), sub):
def c_code(self, node, name, inp, out, sub):
dy, sm = inp
dx, = out
return '''
if ((%(dy)s->descr->type_num != PyArray_DOUBLE) && (%(dy)s->descr->type_num != PyArray_FLOAT))
{
......@@ -335,7 +341,9 @@ class Softmax(gof.Op):
sm[i] /= numpy.sum(sm[i])
output_storage[0][0] = sm
def grad(self, (x,), (g_sm,)):
def grad(self, inp, grads):
x, = inp
g_sm, = grads
sm = softmax(x)
return [softmax_grad(g_sm, sm)]
......@@ -637,13 +645,16 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
output_storage[1][0] = sm
output_storage[2][0] = am
def infer_shape(self, node, (x_shp, b_shp, idx_shp)):
def infer_shape(self, node, shapes):
x_shp, b_shp, idx_shp = shapes
nll_shp = (x_shp[0],)
sm_shp = x_shp
am_shp = idx_shp
return [nll_shp, sm_shp, am_shp]
def grad(self, (x, b, y_idx), (g_nll, g_sm, g_am)):
def grad(self, inp, grads):
x, b, y_idx = inp
g_nll, g_sm, g_am = grads
if g_am is not None:
raise NotImplementedError()
elif g_sm is not None:
......@@ -745,7 +756,9 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
def c_code_cache_version(self):
return (5,) + SoftmaxWithBias.c_code_cache_version()
def c_code(self, node, name, (x, b, y_idx), (nll, sm, am), sub):
def c_code(self, node, name, inp, out, sub):
x, b, y_idx = inp
nll, sm, am = out
y_idx_type = node.inputs[2].type.dtype_specs()[1]
am_type = y_idx_type
code_template = ''.join(self.c_code_template())
......@@ -775,7 +788,9 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
dx[i] = dy[i] * sm[i] #vector scale
dx[i, y_idx[i]] -= dy[i] #scalar decrement
output_storage[0][0] = dx
def grad(self, (dy, sm, y_idx), (g_dx, )):
def grad(self, inp, grads):
dy, sm, y_idx = inp
g_dx, = grads
# TODO: currently we do not compute the gradient w.r.t. dy, because
# advanced indexing is not working yet. When it works, do it to avoid
# potentially misleading behavior in gradient computations! (although
......@@ -790,7 +805,9 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
return [g_dy, g_sm, g_y_idx]
def c_code_cache_version(self):
return (2,)
def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub):
def c_code(self, node, name, inp, out, sub):
dnll, sm, y_idx = inp
dx, = out
y_idx_type = node.inputs[2].type.dtype_specs()[1]
return """
......@@ -906,7 +923,9 @@ class CrossentropyCategorical1HotGrad(gof.Op):
return self.__class__.__name__
def make_node(self, g_y, coding_dist, true_one_of_n):
return Apply(self, [g_y, coding_dist, true_one_of_n], [coding_dist.type()])
def perform(self, node, (g_y, coding_dist, true_one_of_n), (g_coding_strg,)):
def perform(self, node, inp, out):
g_y, coding_dist, true_one_of_n = inp
g_coding_strg, = out
g_coding = numpy.zeros_like(coding_dist)
for i in xrange(len(g_y)):
g_coding[i, true_one_of_n[i]] = -g_y[i]/coding_dist[i, true_one_of_n[i]]
......@@ -956,13 +975,17 @@ class CrossentropyCategorical1Hot(gof.Op):
return Apply(self, [_coding_dist, _true_one_of_n],
[tensor.Tensor(dtype=_coding_dist.dtype, broadcastable=[False])()])
def perform(self, node, (coding, one_of_n), (y_out,)):
def perform(self, node, inp, out):
coding, one_of_n = inp
y_out, = out
y = numpy.zeros_like(coding[:,0])
for i in xrange(len(y)):
y[i] = -numpy.log(coding[i, one_of_n[i]])
y_out[0] = y
def grad(self, (coding, one_of_n), (g_y,)):
def grad(self, inp, grads):
coding, one_of_n = inp
g_y, = grads
return [crossentropy_categorical_1hot_grad(g_y, coding, one_of_n), None]
crossentropy_categorical_1hot = CrossentropyCategorical1Hot()
......@@ -1465,7 +1488,9 @@ class Prepend_scalar_constant_to_each_row(gof.Op):
node = Apply(op=self, inputs=[mat], outputs=[tensor.matrix()])
return node
def perform(self, node, (mat, ), (output, )):
def perform(self, node, inp, out):
mat, = inp
output, = out
new_shape=(mat.shape[0],mat.shape[1]+1)
if output[0] == None:
output[0]=numpy.empty(new_shape,dtype=mat.dtype)
......@@ -1481,7 +1506,9 @@ class Prepend_scalar_constant_to_each_row(gof.Op):
out[:,0].fill(self.val.data)
out[:,1:]=mat
def grad(self, (mat,), (goutput,)):
def grad(self, inp, grads):
mat, = inp
goutput, = grads
return goutput[:,1:]
class Prepend_scalar_to_each_row(gof.Op):
......@@ -1506,7 +1533,9 @@ class Prepend_scalar_to_each_row(gof.Op):
node = Apply(op=self, inputs=[val,mat], outputs=[tensor.matrix()])
return node
def perform(self, node, (val,mat), (output, )):
def perform(self, node, inp, out):
val, mat = inp
output, = out
new_shape=(mat.shape[0],mat.shape[1]+1)
if output[0] == None:
output[0]=numpy.empty(new_shape,dtype=mat.dtype)
......@@ -1521,7 +1550,9 @@ class Prepend_scalar_to_each_row(gof.Op):
out[:,0].fill(val)
out[:,1:]=mat
def grad(self, (val, mat), (goutput,)):
def grad(self, inp, grads):
val, mat = inp
goutput, = grads
return goutput[:,0], goutput[:,1:]
prepend_scalar_to_each_row = Prepend_scalar_to_each_row()
......
......@@ -29,10 +29,14 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
return 1.0 / (1.0 + numpy.exp(-x))
def impl(self, x):
return ScalarSigmoid.st_impl(x)
def grad(self, (x,), (gz,)):
def grad(self, inp, grads):
x, = inp
gz, = grads
y = scalar_sigmoid(x)
return [gz * y * (1.0 - y)]
def c_code(self, node, name, (x,), (z,), sub):
def c_code(self, node, name, inp, out, sub):
x, = inp
z, = out
if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
......@@ -71,9 +75,13 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
return numpy.log1p(numpy.exp(x))
def impl(self, x):
return ScalarSoftplus.static_impl(x)
def grad(self, (x,), (gz,)):
def grad(self, inp, grads):
x, = inp
gz, = grads
return [gz * scalar_sigmoid(x)]
def c_code(self, node, name, (x,), (z,), sub):
def c_code(self, node, name, inp, out, sub):
x, = inp
z, = out
if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
......
......@@ -349,7 +349,8 @@ class MakeVector(T.Op):
return T.Apply(self, inputs, [otype()])
def __str__(self):
return self.__class__.__name__
def perform(self, node, inputs, (out,)):
def perform(self, node, inputs, out_):
out, = out_
# not calling theano._asarray as optimization
if out[0] is None:
out[0] = theano._asarray(inputs, dtype=node.outputs[0].dtype)
......@@ -395,14 +396,18 @@ class Shape_i(T.Op):
if x.ndim <= self.i:
raise TypeError('x has too few dimensions for Shape_i', (x, self.i))
return T.Apply(self, [x], [T.lscalar()])
def perform(self, node, (x, ), (out, )):
def perform(self, node, inp, out_):
x, = inp
out, = out_
if out[0] is None:
out[0] = theano._asarray(x.shape[self.i], dtype='int64')
else:
out[0][...] = x.shape[self.i]
def c_code_cache_version(self):
return (0,1)
def c_code(self, node, name, (x, ), (out, ), sub):
def c_code(self, node, name, inp, out_, sub):
x, = inp
out, = out_
i = self.i
if isinstance(node.inputs[0].type,T.TensorType):
return """
......@@ -423,7 +428,7 @@ class Shape_i(T.Op):
# various types of variables.
# Do not continue this madness.
return super(Shape_i, self).c_code(node, name, (x,), (out,), sub)
def grad(self, (x,), (gz,)):
def grad(self, inp, grads):
return [None]
class ShapeFeature(object):
......@@ -824,7 +829,8 @@ class Assert(T.Op):
def __str__(self):
return self.__class__.__name__
def perform(self, node, inputs, (out,)):
def perform(self, node, inputs, out_):
out, = out_
v = inputs[0]
out[0]=v
assert numpy.all(inputs[1:])
......
......@@ -181,7 +181,8 @@ class RandomFunction(gof.Op):
return [None, [sample_shp[i] for i in xrange(node.outputs[1].ndim)]]
def perform(self, node, inputs, (rout, out)):
def perform(self, node, inputs, out_):
out, rout = out_
# Use self.fn to draw shape worth of random numbers.
# Numbers are drawn from r if self.inplace is True, and from a copy of r if
# self.inplace is False
......
......@@ -119,9 +119,11 @@ class DownsampleFactorMax(Op):
# TODO: consider restrucing the dtype?
return gof.Apply(self, [x], [x.type()])
def perform(self, node, (x,), (z,)):
def perform(self, node, inp, out):
"""
"""
x, = inp
z, = out
if len(x.shape)!=4:
raise NotImplementedError('DownsampleFactorMax requires 4D input for now')
if z[0] is None:
......@@ -143,11 +145,15 @@ class DownsampleFactorMax(Op):
zj = j / ds1
zz[n,k,zi,zj] = __builtin__.max(zz[n,k,zi,zj], x[n,k,i,j])
def grad(self,(x,), (gz,)):
def grad(self, inp, grads):
x, = inp
gz, = grads
maxout = self(x)
return [DownsampleFactorMaxGrad(self.ds, ignore_border=self.ignore_border)(x, maxout, gz)]
def c_code(self, node, name, (x,), (z, ), sub):
def c_code(self, node, name, inp, out, sub):
x, = inp
z, = out
fail=sub['fail']
ignore_border = int(self.ignore_border)
ds0, ds1 = self.ds
......@@ -244,7 +250,9 @@ class DownsampleFactorMaxGrad(Op):
return Apply(self, [x, maxout, gz], [x.type()])
def perform(self, node, (x, maxout, gz), (gx_stg,)):
def perform(self, node, inp, out):
x, maxout, gz = inp
gx_stg, = out
gx = numpy.zeros_like(x)
ds0, ds1 = self.ds
......@@ -263,7 +271,9 @@ class DownsampleFactorMaxGrad(Op):
else: gx[n,k,i,j] = 0
gx_stg[0] = gx
def c_code(self, node, name, (x, z, gz), (gx,), sub):
def c_code(self, node, name, inp, out, sub):
x, z, gz = inp
gx, = out
fail = sub['fail']
ignore_border = int(self.ignore_border)
ds0, ds1 = self.ds
......
......@@ -16,9 +16,13 @@ class XlogX(scalar.UnaryScalarOp):
return x * numpy.log(x)
def impl(self, x):
return XlogX.st_impl(x)
def grad(self, (x,), (gz,)):
def grad(self, inputs, grads):
x, = inputs
gz, = grads
return [gz * (1 + scalar.log(x))]
def c_code(self, node, name, (x,), (z,), sub):
def c_code(self, node, name, inputs, outputs, sub):
x, = inputs
z, = outputs
if node.inputs[0].type in [scalar.float32, scalar.float64]:
return """%(z)s =
%(x)s == 0.0
......@@ -40,9 +44,13 @@ class XlogY0(scalar.BinaryScalarOp):
return x * numpy.log(y)
def impl(self, x, y):
return XlogY0.st_impl(x, y)
def grad(self, (x, y), (gz,)):
def grad(self, inputs, grads):
x, y = inputs
gz, = grads
return [gz * scalar.log(y), gz * x / y]
def c_code(self, node, name, (x, y), (z,), sub):
def c_code(self, node, name, inputs, outputs, sub):
x, y = inputs
z, = outputs
if node.inputs[0].type in [scalar.float32, scalar.float64]:
return """%(z)s =
%(x)s == 0.0
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论