提交 1569a7a9 authored 作者: David Warde-Farley's avatar David Warde-Farley

Bring theano.tensor into PEP 3113 compliance.

上级 ead4f23e
...@@ -1379,9 +1379,13 @@ class TensorFromScalar(Op): ...@@ -1379,9 +1379,13 @@ class TensorFromScalar(Op):
[s], [s],
[tensor(dtype = s.type.dtype, [tensor(dtype = s.type.dtype,
broadcastable = ())]) broadcastable = ())])
def perform(self, node, (s, ), (out, )): def perform(self, node, inp, out_):
s, = inp
out, = out_
out[0] = numpy.asarray(s) out[0] = numpy.asarray(s)
def grad(self, (s,), (dt,)): def grad(self, inp, grads):
s, = inp
dt, = grads
return [scalar_from_tensor(dt)] return [scalar_from_tensor(dt)]
tensor_from_scalar = TensorFromScalar() tensor_from_scalar = TensorFromScalar()
...@@ -1392,9 +1396,13 @@ class ScalarFromTensor(Op): ...@@ -1392,9 +1396,13 @@ class ScalarFromTensor(Op):
return Apply(self, return Apply(self,
[t], [t],
[scal.Scalar(dtype = t.type.dtype).make_variable()]) [scal.Scalar(dtype = t.type.dtype).make_variable()])
def perform(self, node, (s, ), (out, )): def perform(self, node, inp, out_):
s, = inp
out, = out_
out[0] = s.flatten()[0] out[0] = s.flatten()[0]
def grad(self, (s,), (dt,)): def grad(self, inp, grads):
s, = inp
dt, = grads
return [tensor_from_scalar(dt)] return [tensor_from_scalar(dt)]
def __str__(self): def __str__(self):
return self.__class__.__name__ return self.__class__.__name__
...@@ -1503,9 +1511,11 @@ class Shape(Op): ...@@ -1503,9 +1511,11 @@ class Shape(Op):
#the type to TensorVariable to have the optimization working #the type to TensorVariable to have the optimization working
#correctly. #correctly.
return Apply(self, [x], [lvector()]) return Apply(self, [x], [lvector()])
def perform(self, node, (x, ), (out, )): def perform(self, node, inp, out_):
x, = inp
out, = out_
out[0] = theano._asarray(x.shape, dtype = 'int64') out[0] = theano._asarray(x.shape, dtype = 'int64')
def grad(self, (x,), (gz,)): def grad(self, inp, grads):
return [None] return [None]
@constructor @constructor
def old_shape(a): def old_shape(a):
...@@ -1553,12 +1563,15 @@ class SpecifyShape(Op): ...@@ -1553,12 +1563,15 @@ class SpecifyShape(Op):
shape = as_tensor_variable(shape) shape = as_tensor_variable(shape)
return Apply(self, [x, shape], [x.type()]) return Apply(self, [x, shape], [x.type()])
def perform(self, node, (x,shape ), (out, )): def perform(self, node, inp, out_):
x, shape = inp
out, = out_
assert numpy.all(x.shape==shape), ("got shape", x.shape, assert numpy.all(x.shape==shape), ("got shape", x.shape,
"expected", shape) "expected", shape)
out[0] = x out[0] = x
def infer_shape(self, node, (xshape, sshape)): def infer_shape(self, node, shapes):
xshape, sshape = shapes
new_shape=[] new_shape=[]
for dim in range(node.inputs[0].ndim): for dim in range(node.inputs[0].ndim):
try: try:
...@@ -1571,7 +1584,9 @@ class SpecifyShape(Op): ...@@ -1571,7 +1584,9 @@ class SpecifyShape(Op):
assert len(new_shape)==len(xshape) assert len(new_shape)==len(xshape)
return [new_shape] return [new_shape]
def grad(self, (x, s), (gz,)): def grad(self, inp, grads):
x, s = inp
gz, = grads
# Should I set an SpecifyShape on gz? I think so # Should I set an SpecifyShape on gz? I think so
# But I don't do it now as we need to make an optimization # But I don't do it now as we need to make an optimization
# to remove that op from the graph to don't block other optimization # to remove that op from the graph to don't block other optimization
...@@ -1643,18 +1658,21 @@ class MaxAndArgmax(Op): ...@@ -1643,18 +1658,21 @@ class MaxAndArgmax(Op):
outputs = [tensor(x.type.dtype, broadcastable,name='max'), outputs = [tensor(x.type.dtype, broadcastable,name='max'),
tensor('int32', broadcastable,name='argmax')] tensor('int32', broadcastable,name='argmax')]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def perform(self, node, (x, axis), (max, max_idx)): def perform(self, node, inp, outs):
x, axis = inp
max, max_idx = outs
max[0] = numpy.asarray(numpy.max(x, axis)) max[0] = numpy.asarray(numpy.max(x, axis))
max_idx[0] = theano._asarray(numpy.argmax(x, axis), dtype='int32') max_idx[0] = theano._asarray(numpy.argmax(x, axis), dtype='int32')
def infer_shape(self, node, (ishape,axis_shape)): def infer_shape(self, node, shapes):
ishape, axis_shape = shapes
axis=node.inputs[1] axis=node.inputs[1]
if axis is None: if axis is None:
return [(),()] return [(),()]
rval = tuple([ishape[i] for (i,b) in enumerate(node.inputs[0].type.broadcastable) if i !=axis.data]) rval = tuple([ishape[i] for (i,b) in enumerate(node.inputs[0].type.broadcastable) if i !=axis.data])
return [rval,rval] return [rval,rval]
def grad(self, (x, axis), (g_max, g_max_idx)): def grad(self, inp, grads):
# @warning: This only works if axis is 0, else the max is # @warning: This only works if axis is 0, else the max is
# broadcasted wrong in the call to eq. # broadcasted wrong in the call to eq.
# @note: This function should work correctly for L{vector}s. # @note: This function should work correctly for L{vector}s.
...@@ -1663,7 +1681,8 @@ class MaxAndArgmax(Op): ...@@ -1663,7 +1681,8 @@ class MaxAndArgmax(Op):
# gMax * dMax/dx + gArgMax * dArgMax/dx, gMax * dMax/daxis + gArgMax * dArgMax/daxis # gMax * dMax/dx + gArgMax * dArgMax/dx, gMax * dMax/daxis + gArgMax * dArgMax/daxis
# g_max has one less dimension than x, so you need to complete g_max to x's shape # g_max has one less dimension than x, so you need to complete g_max to x's shape
# when axis=0 the broadcasting mechanism does it automatically # when axis=0 the broadcasting mechanism does it automatically
x, axis = inp
g_max, g_max_idx = grads
if not ( axis.data == 0 or axis.data == x.ndim-1): if not ( axis.data == 0 or axis.data == x.ndim-1):
raise NotImplementedError('MaxAndArgmax gradient with axis corresponding to internal dimension') raise NotImplementedError('MaxAndArgmax gradient with axis corresponding to internal dimension')
if axis.data==0: if axis.data==0:
...@@ -2089,10 +2108,12 @@ class Eye(gof.Op): ...@@ -2089,10 +2108,12 @@ class Eye(gof.Op):
k = as_tensor_variable(k) k = as_tensor_variable(k)
return gof.Apply(self, [n,m,k], [TensorType(dtype = self.dtype, broadcastable = (False,False))()]) return gof.Apply(self, [n,m,k], [TensorType(dtype = self.dtype, broadcastable = (False,False))()])
def perform(self, node, (n,m,k), (out,)): def perform(self, node, inp, out_):
n, m, k = inp
out, = out_
out[0] = numpy.eye(n,m,k,dtype=self.dtype) out[0] = numpy.eye(n,m,k,dtype=self.dtype)
def grad(self, (n,m,k),(gout,)): def grad(self, inp, grads):
return [None, None, None] return [None, None, None]
def __eq__(self,other): def __eq__(self,other):
...@@ -2127,7 +2148,9 @@ if 0: ...@@ -2127,7 +2148,9 @@ if 0:
dims = as_tensor_variable(dims) dims = as_tensor_variable(dims)
return gof.Apply(self, [dims], [self.type()]) return gof.Apply(self, [dims], [self.type()])
def perform(self, node, (dims,), (out,)): def perform(self, node, inp, out_):
dims, = inp
out, = out_
if out[0] is not None: if out[0] is not None:
out[0].resize(dims, refcheck = 0) out[0].resize(dims, refcheck = 0)
out[0].fill(self.value) out[0].fill(self.value)
...@@ -2139,7 +2162,7 @@ if 0: ...@@ -2139,7 +2162,7 @@ if 0:
else: else:
out[0] = numpy.ones(dims, dtype = self.dtype) * self.value out[0] = numpy.ones(dims, dtype = self.dtype) * self.value
def grad(self, (dims,), (gout,)): def grad(self, inp, grads):
return None, return None,
def __eq__(self, other): def __eq__(self, other):
...@@ -2212,7 +2235,8 @@ class Alloc(gof.Op): ...@@ -2212,7 +2235,8 @@ class Alloc(gof.Op):
otype = TensorType(dtype=v.dtype, broadcastable=bcast) otype = TensorType(dtype=v.dtype, broadcastable=bcast)
return gof.Apply(self, [v]+sh, [otype()]) return gof.Apply(self, [v]+sh, [otype()])
def perform(self, node, inputs, (out,)): def perform(self, node, inputs, out_):
out, = out_
v = inputs[0] v = inputs[0]
sh = tuple([int(i) for i in inputs[1:]]) sh = tuple([int(i) for i in inputs[1:]])
if out[0] is None or out[0].shape != sh: if out[0] is None or out[0].shape != sh:
...@@ -2228,7 +2252,7 @@ class Alloc(gof.Op): ...@@ -2228,7 +2252,7 @@ class Alloc(gof.Op):
def infer_shape(self, node, input_shapes): def infer_shape(self, node, input_shapes):
return [node.inputs[1:]] return [node.inputs[1:]]
def grad(self, inputs, (gout,)): def grad(self, inputs, grads):
return [None for i in inputs] return [None for i in inputs]
def __call__(self, val, *shapes): def __call__(self, val, *shapes):
...@@ -2286,7 +2310,9 @@ class Mean(elemwise.CAReduce): ...@@ -2286,7 +2310,9 @@ class Mean(elemwise.CAReduce):
# we want to protect against overflow # we want to protect against overflow
return 'float64' return 'float64'
def perform(self, node, (input, ), (output, )): def perform(self, node, inp, out):
input, = inp
output, = out
output[0]=numpy.mean(input,axis=self.axis) output[0]=numpy.mean(input,axis=self.axis)
def c_code(self, node, name, inames, onames, sub): def c_code(self, node, name, inames, onames, sub):
...@@ -2407,10 +2433,14 @@ if 0: ...@@ -2407,10 +2433,14 @@ if 0:
# broadcastable = [False if i==axis else x for i, x in enumerate(input.broadcastable)]) # broadcastable = [False if i==axis else x for i, x in enumerate(input.broadcastable)])
return gof.Apply(self, [inputs, repeats, axis], [type()]) return gof.Apply(self, [inputs, repeats, axis], [type()])
def perform(self, node, (input, repeats, axis), (out, )): def perform(self, node, inp, out_):
input, repeats, axis = inp
out, = out_
out[0] = numpy.repeat(input, repeats, axis) out[0] = numpy.repeat(input, repeats, axis)
def grad(self, (input, repeats, axis), (gout, )): def grad(self, inp, grads):
input, repeats, axis = inp
gout, = grads
return add.grad((input, gout), (gout,))[:1] return add.grad((input, gout), (gout,))[:1]
repeat = Repeat() repeat = Repeat()
...@@ -2428,7 +2458,9 @@ class Default(gof.Op): ...@@ -2428,7 +2458,9 @@ class Default(gof.Op):
if x.type != default.type: if x.type != default.type:
raise TypeError('Both default() arguments must have same type', x, default) raise TypeError('Both default() arguments must have same type', x, default)
return gof.Apply(self, [x, default], [default.type()]) return gof.Apply(self, [x, default], [default.type()])
def perform(self, node, (x, default), (out, )): def perform(self, node, inp, out_):
x, default = inp
out, = out_
if x is None: if x is None:
# why copy? Theano can't yet understand out[0] being a view of either x or y, # why copy? Theano can't yet understand out[0] being a view of either x or y,
# so we can be a view of x, but only a copy of y. # so we can be a view of x, but only a copy of y.
...@@ -2655,7 +2687,8 @@ class Subtensor(Op): ...@@ -2655,7 +2687,8 @@ class Subtensor(Op):
[tensor(dtype = x.type.dtype, [tensor(dtype = x.type.dtype,
broadcastable = broadcastable)]) broadcastable = broadcastable)])
def perform(self, node, inputs, (out, )): def perform(self, node, inputs, out_):
out, = out_
x = inputs[0] x = inputs[0]
# The subtensor (or idx_list) does not depend on the inputs. # The subtensor (or idx_list) does not depend on the inputs.
...@@ -2717,7 +2750,8 @@ class Subtensor(Op): ...@@ -2717,7 +2750,8 @@ class Subtensor(Op):
assert len(outshp) == node.outputs[0].ndim assert len(outshp) == node.outputs[0].ndim
return [outshp] return [outshp]
def grad(self, inputs, (gz,)): def grad(self, inputs, grads):
gz, = grads
x = inputs[0] x = inputs[0]
rest = inputs[1:] rest = inputs[1:]
return [IncSubtensor(self.idx_list)(zeros_like(x), gz, *rest)] + [None] * len(rest) return [IncSubtensor(self.idx_list)(zeros_like(x), gz, *rest)] + [None] * len(rest)
...@@ -2935,7 +2969,8 @@ class IncSubtensor(Op): ...@@ -2935,7 +2969,8 @@ class IncSubtensor(Op):
(x, y) + inputs, (x, y) + inputs,
[x.type()]) [x.type()])
def perform(self, node, inputs, (out, )): def perform(self, node, inputs, out_):
out, = out_
x, y = inputs[:2] x, y = inputs[:2]
indices = list(reversed(inputs[2:])) indices = list(reversed(inputs[2:]))
...@@ -2973,7 +3008,8 @@ class IncSubtensor(Op): ...@@ -2973,7 +3008,8 @@ class IncSubtensor(Op):
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
return [shapes[0]] return [shapes[0]]
def grad(self, inputs, (g_output,)): def grad(self, inputs, grads):
g_output, = grads
x, y = inputs[:2] x, y = inputs[:2]
idx_list = inputs[2:] idx_list = inputs[2:]
...@@ -3052,8 +3088,9 @@ class Split(Op): ...@@ -3052,8 +3088,9 @@ class Split(Op):
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def perform(self, node, (x, axis, splits), outputs): def perform(self, node, inputs, outputs):
"""WRITEME""" """WRITEME"""
x, axis, splits = inputs
#in python 2.4, x.shape[numpy.asarray(1)] don't work. #in python 2.4, x.shape[numpy.asarray(1)] don't work.
if sys.version_info[0:2]==(2, 4) and axis.size==1: if sys.version_info[0:2]==(2, 4) and axis.size==1:
axis=int(axis) axis=int(axis)
...@@ -3084,8 +3121,9 @@ class Split(Op): ...@@ -3084,8 +3121,9 @@ class Split(Op):
outputs[i][0] = x.__getitem__(general_key).copy() outputs[i][0] = x.__getitem__(general_key).copy()
lower_idx = upper_idx lower_idx = upper_idx
def grad(self, (x, axis, splits), g_outputs): def grad(self, inputs, g_outputs):
"""Join the gradients along the axis that was used to split x.""" """Join the gradients along the axis that was used to split x."""
_, axis, _ = inputs
return [join(axis, *g_outputs), None, None] return [join(axis, *g_outputs), None, None]
...@@ -3124,12 +3162,16 @@ class Rebroadcast(Op): ...@@ -3124,12 +3162,16 @@ class Rebroadcast(Op):
broadcastable = [self.axis.get(i, b) broadcastable = [self.axis.get(i, b)
for i, b in enumerate(x.type.broadcastable)]) for i, b in enumerate(x.type.broadcastable)])
return Apply(self, [x], [t()]) return Apply(self, [x], [t()])
def perform(self, node, (x, ), (out, )): def perform(self, node, inp, out_):
x, = inp
out, = out_
for axis, value in self.axis.iteritems(): for axis, value in self.axis.iteritems():
if value and x.shape[axis] != 1: if value and x.shape[axis] != 1:
raise ValueError('Dimension %s in Rebroadcast\'s input was supposed to be 1 (got %s instead)' % (axis, x.shape[axis])) raise ValueError('Dimension %s in Rebroadcast\'s input was supposed to be 1 (got %s instead)' % (axis, x.shape[axis]))
out[0] = x out[0] = x
def grad(self, (x, ), (gz,)): def grad(self, inp, grads):
x, = inp
gz, = grads
# restore the broadcasting pattern of the input # restore the broadcasting pattern of the input
return Rebroadcast(*[(axis, x.type.broadcastable[axis]) for axis, value in self.axis.iteritems()])(gz), return Rebroadcast(*[(axis, x.type.broadcastable[axis]) for axis, value in self.axis.iteritems()])(gz),
...@@ -3272,15 +3314,17 @@ class Join(Op): ...@@ -3272,15 +3314,17 @@ class Join(Op):
node.tag.shape_zero = len(orig) node.tag.shape_zero = len(orig)
return node return node
def perform(self, node, axis_and_tensors, (out, )): def perform(self, node, axis_and_tensors, out_):
out, = out_
axis, tensors = axis_and_tensors[0], axis_and_tensors[1:] axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
out[0] = theano._asarray(numpy.concatenate(tensors, axis = axis), out[0] = theano._asarray(numpy.concatenate(tensors, axis = axis),
dtype=node.outputs[0].type.dtype) dtype=node.outputs[0].type.dtype)
def grad(self, axis_and_tensors, (gz,)): def grad(self, axis_and_tensors, grads):
""" The gradient wrt a join op is a `Split`, used to partition the gradient along the """ The gradient wrt a join op is a `Split`, used to partition the gradient along the
`axis` which was used for joining. `axis` which was used for joining.
""" """
gz, = grads
axis, tensors = axis_and_tensors[0], axis_and_tensors[1:] axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
if 'float' in tensors[0].dtype or 'complex' in tensors[0].dtype: if 'float' in tensors[0].dtype or 'complex' in tensors[0].dtype:
# assume that this is differentiable # assume that this is differentiable
...@@ -3291,8 +3335,9 @@ class Join(Op): ...@@ -3291,8 +3335,9 @@ class Join(Op):
# assume that this isn't differentiable # assume that this isn't differentiable
return [None] * (1 + len(tensors)) return [None] * (1 + len(tensors))
def _native_grad(self, axis_and_tensors, (gz,)): def _native_grad(self, axis_and_tensors, grads):
"""WRITEME""" """WRITEME"""
gz, = grads
axis, tensors = axis_and_tensors[0], axis_and_tensors[1:] axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
sizes_along_axis = [shape(x)[axis] for x in tensors] sizes_along_axis = [shape(x)[axis] for x in tensors]
n_dims = len(shape(tensors[0])) n_dims = len(shape(tensors[0]))
...@@ -3484,12 +3529,14 @@ if 0: #vertical and horizontal stacking are deprecated. Better to use stack() a ...@@ -3484,12 +3529,14 @@ if 0: #vertical and horizontal stacking are deprecated. Better to use stack() a
outputs = [tensor(dtype = x.type.dtype, outputs = [tensor(dtype = x.type.dtype,
broadcastable = bcastable)] broadcastable = bcastable)]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def perform(self, node, (x, y), (out, )): def perform(self, node, inp, out_):
x, y = inp
out, = out_
assert x.ndim == y.ndim assert x.ndim == y.ndim
# Make sure every dimension (save the first) is the same # Make sure every dimension (save the first) is the same
for i in range(x.ndim): assert i == 0 or x.shape[i] == y.shape[i] for i in range(x.ndim): assert i == 0 or x.shape[i] == y.shape[i]
out[0] = numpy.vstack([x, y]) out[0] = numpy.vstack([x, y])
def grad(self, (x, y), (gz,)): def grad(self, inp, grads):
""" """
@todo: Make VSplit (or this grad implementation) its own L{Op}, @todo: Make VSplit (or this grad implementation) its own L{Op},
that way we can do more sanity-checking:: that way we can do more sanity-checking::
...@@ -3498,6 +3545,8 @@ if 0: #vertical and horizontal stacking are deprecated. Better to use stack() a ...@@ -3498,6 +3545,8 @@ if 0: #vertical and horizontal stacking are deprecated. Better to use stack() a
for i in range(x.data.ndim): assert i == 0 or x.data.shape[i] == y.shape[i] for i in range(x.data.ndim): assert i == 0 or x.data.shape[i] == y.shape[i]
etc... etc...
""" """
x, y = inp
gz, = grads
xs = shape(x) xs = shape(x)
ys = shape(y) ys = shape(y)
return gz[:xs[0]], gz[xs[0]:] return gz[:xs[0]], gz[xs[0]:]
...@@ -3548,7 +3597,9 @@ class Reshape(Op): ...@@ -3548,7 +3597,9 @@ class Reshape(Op):
except TypeError: except TypeError:
pass pass
return gof.Apply(self, [x, shp], [tensor(x.type.dtype, bcasts)]) return gof.Apply(self, [x, shp], [tensor(x.type.dtype, bcasts)])
def perform(self, node, (x, shp), (out,)): def perform(self, node, inp, out_):
x, shp = inp
out, = out_
if (len(shp) != self.ndim): if (len(shp) != self.ndim):
raise ValueError('shape argument to Reshape.perform has incorrect length %i' raise ValueError('shape argument to Reshape.perform has incorrect length %i'
', should be %i' % (len(shp), self.ndim), shp) ', should be %i' % (len(shp), self.ndim), shp)
...@@ -3556,7 +3607,9 @@ class Reshape(Op): ...@@ -3556,7 +3607,9 @@ class Reshape(Op):
out[0] = numpy.reshape(x, shp) out[0] = numpy.reshape(x, shp)
except: except:
raise ValueError('Cannot reshape input of shape %s to shape %s' % (x.shape,shp)) raise ValueError('Cannot reshape input of shape %s to shape %s' % (x.shape,shp))
def grad(self, (x, shp), (g_out,)): def grad(self, inp, grads):
x, shp = inp
g_out, = grads
return [reshape(g_out, shape(x), ndim=x.ndim), None] return [reshape(g_out, shape(x), ndim=x.ndim), None]
def infer_shape(self, node, ishapes): def infer_shape(self, node, ishapes):
#we can't just put node.inputs[1] as not all op support interation #we can't just put node.inputs[1] as not all op support interation
...@@ -3589,7 +3642,9 @@ class Flatten(Op): ...@@ -3589,7 +3642,9 @@ class Flatten(Op):
if self.outdim < 1 or (x.ndim and self.outdim > x.ndim): if self.outdim < 1 or (x.ndim and self.outdim > x.ndim):
raise ValueError('invalid output ndimensions(%i) for tensor of rank %i' %(self.outdim, t_x.ndim)) raise ValueError('invalid output ndimensions(%i) for tensor of rank %i' %(self.outdim, t_x.ndim))
return gof.Apply(self, [t_x], [tensor(x.type.dtype, (False,)*self.outdim)]) return gof.Apply(self, [t_x], [tensor(x.type.dtype, (False,)*self.outdim)])
def perform(self, node, (x,), (out,)): def perform(self, node, inp, out_):
x, = inp
out, = out_
outdim = self.outdim outdim = self.outdim
if outdim == 1: if outdim == 1:
try: try:
...@@ -3602,7 +3657,9 @@ class Flatten(Op): ...@@ -3602,7 +3657,9 @@ class Flatten(Op):
newshape = x.shape[:outdim-1] + (numpy.prod(x.shape[outdim-1:]),) newshape = x.shape[:outdim-1] + (numpy.prod(x.shape[outdim-1:]),)
#print 'newshape', newshape, x.shape, x.shape #print 'newshape', newshape, x.shape, x.shape
out[0] = x.reshape(newshape) out[0] = x.reshape(newshape)
def grad(self, (x,), (g_out,)): def grad(self, inp, grads):
x, = inp
g_out, = grads
return [reshape(g_out, shape(x), x.ndim)] return [reshape(g_out, shape(x), x.ndim)]
def flatten(x, outdim=1): def flatten(x, outdim=1):
...@@ -3613,7 +3670,9 @@ class TileGrad(Op): ...@@ -3613,7 +3670,9 @@ class TileGrad(Op):
#this is so weird, I can't think of how to make this a general thing. #this is so weird, I can't think of how to make this a general thing.
def make_node(self, x, reps, g_out): def make_node(self, x, reps, g_out):
return gof.Apply(self, [x, reps, g_out], [x.type()]) return gof.Apply(self, [x, reps, g_out], [x.type()])
def perform(self, node, (x, reps, g_out), (gx,)): def perform(self, node, inp, out):
x, reps, g_out = inp
gx, = out
xsh = x.shape xsh = x.shape
if len(reps)==2 and reps[1] == 1 and len(x.shape) == 1: if len(reps)==2 and reps[1] == 1 and len(x.shape) == 1:
gx[0] = numpy.sum(g_out, axis=0) gx[0] = numpy.sum(g_out, axis=0)
...@@ -3645,11 +3704,15 @@ class Tile(Op): ...@@ -3645,11 +3704,15 @@ class Tile(Op):
x = as_tensor_variable(x) x = as_tensor_variable(x)
reps = as_tensor_variable(reps) reps = as_tensor_variable(reps)
return gof.Apply(self, [x, reps], [tensor(x.type.dtype, [False,] * self.ndim)]) return gof.Apply(self, [x, reps], [tensor(x.type.dtype, [False,] * self.ndim)])
def perform(self, node, (x, reps), (out,)): def perform(self, node, inp, out_):
x, reps = inp
out, = out_
out[0] = numpy.tile(x, reps) out[0] = numpy.tile(x, reps)
if len(out[0].shape) != self.ndim: if len(out[0].shape) != self.ndim:
raise ValueError('Tile.perform produced incorrect shape') raise ValueError('Tile.perform produced incorrect shape')
def grad(self, (x, reps), (g_out,)): def grad(self, inp, grads):
x, reps = inp
g_out, = grads
return [tilegrad(x, reps, g_out), None] return [tilegrad(x, reps, g_out), None]
def tile(x, reps, ndim=None): def tile(x, reps, ndim=None):
...@@ -3712,13 +3775,16 @@ class ARange(Op): ...@@ -3712,13 +3775,16 @@ class ARange(Op):
return [(maximum(cast(ceil(cast((stop-start),'float64') return [(maximum(cast(ceil(cast((stop-start),'float64')
/step),'int64'),0),)] /step),'int64'),0),)]
def perform(self, node, (start, stop, step), (out,)): def perform(self, node, inp, out_):
start, stop, step = inp
out, = out_
start = start.item() start = start.item()
stop = stop.item() stop = stop.item()
step = step.item() step = step.item()
out[0] = numpy.arange(start, stop, step, dtype=self.dtype) out[0] = numpy.arange(start, stop, step, dtype=self.dtype)
def grad(self, inputs, (gz,)): def grad(self, inputs, grads):
gz, = grads
return [None] * len(inputs) return [None] * len(inputs)
_arange = {} _arange = {}
...@@ -3831,7 +3897,9 @@ class PermuteRowElements(Op): ...@@ -3831,7 +3897,9 @@ class PermuteRowElements(Op):
else: else:
raise ValueError('Dimension mismatch: %s, %s' % (xs0, ys0)) raise ValueError('Dimension mismatch: %s, %s' % (xs0, ys0))
def perform(self, node, (x, y, inverse), (outs,)): def perform(self, node, inp, out):
x, y, inverse = inp
outs, = out
x_s = x.shape x_s = x.shape
y_s = y.shape y_s = y.shape
assert len(x_s) == len(y_s) assert len(x_s) == len(y_s)
...@@ -3854,7 +3922,9 @@ class PermuteRowElements(Op): ...@@ -3854,7 +3922,9 @@ class PermuteRowElements(Op):
self._rec_perform(node, x, y, inverse, outs[0], curdim=0) self._rec_perform(node, x, y, inverse, outs[0], curdim=0)
def grad(self, (x, y, inverse), (gz,)): def grad(self, inp, grads):
x, y, inverse = inp
gz, = grads
# First, compute the gradient wrt the broadcasted x. # First, compute the gradient wrt the broadcasted x.
# If 'inverse' is False (0), apply the inverse of y on gz. # If 'inverse' is False (0), apply the inverse of y on gz.
# Else, apply y on gz. # Else, apply y on gz.
...@@ -3930,10 +4000,13 @@ class AdvancedSubtensor1(Op): ...@@ -3930,10 +4000,13 @@ class AdvancedSubtensor1(Op):
return Apply(self, [x_, ilist_], [x_.type()]) return Apply(self, [x_, ilist_], [x_.type()])
def perform(self, node, (x,i), (out,)): def perform(self, node, inp, out_):
x, i = inp
out, = out_
out[0] = x[i] out[0] = x[i]
def grad(self, inputs, (gz,)): def grad(self, inputs, grads):
gz, = grads
class NotImplementedOp(Op): class NotImplementedOp(Op):
# This op should be pruned from the graph. # This op should be pruned from the graph.
# This Op can be created in a graph, # This Op can be created in a graph,
...@@ -3998,14 +4071,16 @@ class AdvancedSubtensor(Op): ...@@ -3998,14 +4071,16 @@ class AdvancedSubtensor(Op):
# Default case, we don't know # Default case, we don't know
return node.env.shape_feature.default_infer_shape(node, ishapes) return node.env.shape_feature.default_infer_shape(node, ishapes)
def perform(self, node, inputs, (out,)): def perform(self, node, inputs, out_):
out, = out_
# TODO: in general, we need to re-pack the inputs into a valid index, just like # TODO: in general, we need to re-pack the inputs into a valid index, just like
# subtensor # subtensor
out[0] = inputs[0].__getitem__(inputs[1:]) out[0] = inputs[0].__getitem__(inputs[1:])
#return #return
#raise NotImplementedError() #raise NotImplementedError()
def grad(self, inputs, (gz,)): def grad(self, inputs, grads):
gz, = grads
x = inputs[0] x = inputs[0]
rest = inputs[1:] rest = inputs[1:]
return [AdvancedIncSubtensor(self.args)(zeros_like(x), gz, *rest)] + [None]*len(rest) return [AdvancedIncSubtensor(self.args)(zeros_like(x), gz, *rest)] + [None]*len(rest)
...@@ -4034,7 +4109,8 @@ class AdvancedIncSubtensor(Op): ...@@ -4034,7 +4109,8 @@ class AdvancedIncSubtensor(Op):
raise NotImplementedError('Advanced indexing increment of x (of dim %i) by y (of dim %i) with arguments (%s) not supported yet'\ raise NotImplementedError('Advanced indexing increment of x (of dim %i) by y (of dim %i) with arguments (%s) not supported yet'\
% (x.ndim, y.ndim, ','.join(str(input) for input in inputs))) % (x.ndim, y.ndim, ','.join(str(input) for input in inputs)))
def perform(self, node, inputs, (out,)): def perform(self, node, inputs, out_):
out, = out_
# TODO: same thing as in AdvancedSubtensor's perform TODO # TODO: same thing as in AdvancedSubtensor's perform TODO
out[0] = inputs[0].copy() out[0] = inputs[0].copy()
out[0][inputs[2:]] += inputs[1] out[0][inputs[2:]] += inputs[1]
...@@ -4116,7 +4192,9 @@ class Dot(Op): ...@@ -4116,7 +4192,9 @@ class Dot(Op):
outputs = [tensor(scal.upcast(*i_dtypes), bz)] outputs = [tensor(scal.upcast(*i_dtypes), bz)]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def perform(self, node, (x, y), (z, )): def perform(self, node, inp, out):
x, y = inp
z, = out
try: try:
# the asarray is here because dot between two vectors gives a numpy float object # the asarray is here because dot between two vectors gives a numpy float object
# but we need to return a 0d ndarray # but we need to return a 0d ndarray
...@@ -4126,7 +4204,9 @@ class Dot(Op): ...@@ -4126,7 +4204,9 @@ class Dot(Op):
e.args = e.args + (x.shape, y.shape) e.args = e.args + (x.shape, y.shape)
raise raise
def grad(self, (x, y), (gz,)): def grad(self, inp, grads):
x, y = inp
gz, = grads
if gz.type.ndim == 0: if gz.type.ndim == 0:
rval = gz * y, gz * x rval = gz * y, gz * x
elif x.type.ndim == 1 and y.type.ndim > 1: elif x.type.ndim == 1 and y.type.ndim > 1:
...@@ -4137,7 +4217,8 @@ class Dot(Op): ...@@ -4137,7 +4217,8 @@ class Dot(Op):
rval = dot(gz, y.T), dot(x.T, gz) rval = dot(gz, y.T), dot(x.T, gz)
return cast(rval[0], x.dtype), cast(rval[1], y.dtype) return cast(rval[0], x.dtype), cast(rval[1], y.dtype)
def infer_shape(self, node, (xshp,yshp)): def infer_shape(self, node, shapes):
xshp, yshp = shapes
x, y = node.inputs x, y = node.inputs
if x.ndim == 2 and y.ndim == 2: if x.ndim == 2 and y.ndim == 2:
return [(xshp[0], yshp[1])] return [(xshp[0], yshp[1])]
...@@ -4181,7 +4262,9 @@ class TensorDotGrad(Op): ...@@ -4181,7 +4262,9 @@ class TensorDotGrad(Op):
op = TensorDotGrad(axes) op = TensorDotGrad(axes)
return Apply(op, [x,y,gz], [gx, gy]) return Apply(op, [x,y,gz], [gx, gy])
def perform(self, node, (x, y, gz), (gx,gy)): def perform(self, node, inp, out):
x, y, gz = inp
gx, gy = out
sum_over_y = range(y.ndim) sum_over_y = range(y.ndim)
[sum_over_y.remove(q) for q in self.axes[1]] [sum_over_y.remove(q) for q in self.axes[1]]
...@@ -4260,7 +4343,9 @@ class TensorDot(Op): ...@@ -4260,7 +4343,9 @@ class TensorDot(Op):
broadcastable=[False]*outdim); broadcastable=[False]*outdim);
return Apply(op, inputs=[x,y], outputs=[output,]) return Apply(op, inputs=[x,y], outputs=[output,])
def perform(self, node, (x, y), (z,)): def perform(self, node, inp, out):
x, y = inp
z, = out
try: try:
z[0] = numpy.asarray(numpy.tensordot(x, y, self.axes)) z[0] = numpy.asarray(numpy.tensordot(x, y, self.axes))
except ValueError, e: except ValueError, e:
...@@ -4268,7 +4353,9 @@ class TensorDot(Op): ...@@ -4268,7 +4353,9 @@ class TensorDot(Op):
e.args = e.args + (x.shape, y.shape, self.axes) e.args = e.args + (x.shape, y.shape, self.axes)
raise raise
def grad(self, (x, y), (gz,)): def grad(self, inp, grads):
x, y = inp
gz, = grads
gx, gy = tensordot_grad(self.axes)(x, y, gz) gx, gy = tensordot_grad(self.axes)(x, y, gz)
return [gx, gy] return [gx, gy]
...@@ -4335,9 +4422,13 @@ class Outer(Op): ...@@ -4335,9 +4422,13 @@ class Outer(Op):
outputs = [tensor(scal.upcast(*i_dtypes), bz)] outputs = [tensor(scal.upcast(*i_dtypes), bz)]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def perform(self, node, (x, y), (z, )): def perform(self, node, inp, out):
x, y = inp
z, = out
z[0] = numpy.outer(x, y) z[0] = numpy.outer(x, y)
def grad(self, (x, y), (gz,)): def grad(self, inp, grads):
x, y = inp
gz, = grads
return dot(gz, y), dot(x, gz) #no transposing necessary return dot(gz, y), dot(x, gz) #no transposing necessary
def __str__(self): def __str__(self):
return "outer" return "outer"
......
...@@ -477,7 +477,9 @@ class Gemm(GemmRelated): ...@@ -477,7 +477,9 @@ class Gemm(GemmRelated):
if len(bb): raise ValueError(Gemm.E_scalar, bb) if len(bb): raise ValueError(Gemm.E_scalar, bb)
output = z.type() output = z.type()
return Apply(self, inputs, [output]) return Apply(self, inputs, [output])
def perform(self, node, (z, a, x, y, b), (zout, )): def perform(self, node, inp, out):
z, a, x, y, b = inp
zout, = out
assert a.shape == () assert a.shape == ()
assert b.shape == () assert b.shape == ()
if not self.inplace: if not self.inplace:
...@@ -596,7 +598,9 @@ class Gemm(GemmRelated): ...@@ -596,7 +598,9 @@ class Gemm(GemmRelated):
#undef REAL #undef REAL
""" """
def c_code(self, node, name, (_z, _a, _x, _y, _b), (_zout, ), sub): #DEBUG def c_code(self, node, name, inp, out, sub): #DEBUG
_z, _a, _x, _y, _b = inp
_zout, = out
if node.inputs[0].type.dtype.startswith('complex'): if node.inputs[0].type.dtype.startswith('complex'):
raise utils.MethodNotDefined('%s.c_code' \ raise utils.MethodNotDefined('%s.c_code' \
% self.__class__.__name__) % self.__class__.__name__)
...@@ -949,7 +953,9 @@ class Dot22(GemmRelated): ...@@ -949,7 +953,9 @@ class Dot22(GemmRelated):
outputs = [T.tensor(x.type.dtype, bz)] outputs = [T.tensor(x.type.dtype, bz)]
return Apply(self, [x,y], outputs) return Apply(self, [x,y], outputs)
def perform(self, node, (x, y), (z, )): def perform(self, node, inp, out):
x, y = inp
z, = out
try: try:
z[0] = numpy.asarray(numpy.dot(x, y)) z[0] = numpy.asarray(numpy.dot(x, y))
except ValueError, e: except ValueError, e:
...@@ -988,7 +994,9 @@ class Dot22(GemmRelated): ...@@ -988,7 +994,9 @@ class Dot22(GemmRelated):
double a = 1.0; double a = 1.0;
double b = 0.0; double b = 0.0;
""" """
def c_code(self, node, name, (_x, _y), (_zout, ), sub): #DEBUG def c_code(self, node, name, inp, out, sub): #DEBUG
_x, _y = inp
_zout, = out
if node.inputs[0].type.dtype.startswith('complex'): if node.inputs[0].type.dtype.startswith('complex'):
raise utils.MethodNotDefined('%s.c_code' \ raise utils.MethodNotDefined('%s.c_code' \
% self.__class__.__name__) % self.__class__.__name__)
...@@ -1083,7 +1091,9 @@ class Dot22Scalar(GemmRelated): ...@@ -1083,7 +1091,9 @@ class Dot22Scalar(GemmRelated):
outputs = [T.tensor(x.type.dtype, bz)] outputs = [T.tensor(x.type.dtype, bz)]
return Apply(self, [x,y,scalar], outputs) return Apply(self, [x,y,scalar], outputs)
def perform(self, node, (x, y, scalar), (z, )): def perform(self, node, inp, out):
x, y, scalar = inp
z, = out
try: try:
z[0] = scalar * numpy.asarray(numpy.dot(x, y)) z[0] = scalar * numpy.asarray(numpy.dot(x, y))
except ValueError, e: except ValueError, e:
...@@ -1117,7 +1127,9 @@ class Dot22Scalar(GemmRelated): ...@@ -1117,7 +1127,9 @@ class Dot22Scalar(GemmRelated):
#undef REAL #undef REAL
double b = 0.0; double b = 0.0;
""" """
def c_code(self, node, name, (_x, _y, _a), (_zout, ), sub): #DEBUG def c_code(self, node, name, inp, out, sub): #DEBUG
_x, _y, _a = inp
_zout, = out
if len(self.c_libraries())<=0: if len(self.c_libraries())<=0:
return super(Dot22Scalar, self).c_code(node, name, (_x, _y), (_zout, ), sub) return super(Dot22Scalar, self).c_code(node, name, (_x, _y), (_zout, ), sub)
full_code = self.build_gemm_call() % dict(locals(), **sub) full_code = self.build_gemm_call() % dict(locals(), **sub)
......
...@@ -179,7 +179,9 @@ class DimShuffle(Op): ...@@ -179,7 +179,9 @@ class DimShuffle(Op):
else: else:
return "DimShuffle{%s}" % ",".join(str(x) for x in self.new_order) return "DimShuffle{%s}" % ",".join(str(x) for x in self.new_order)
def perform(self, node, (input, ), (storage, )): def perform(self, node, inp, out):
input, = inp
storage, = out
# drop # drop
res = input res = input
if type(res) != numpy.ndarray: if type(res) != numpy.ndarray:
...@@ -204,7 +206,8 @@ class DimShuffle(Op): ...@@ -204,7 +206,8 @@ class DimShuffle(Op):
storage[0] = numpy.asarray(res) #asarray puts scalars back into array storage[0] = numpy.asarray(res) #asarray puts scalars back into array
def infer_shape(self, node, (ishp,)): def infer_shape(self, node, shapes):
ishp, = shapes
ishp = list(ishp) ishp = list(ishp)
for drop in reversed(self.drop): for drop in reversed(self.drop):
del ishp[drop] del ishp[drop]
...@@ -216,7 +219,9 @@ class DimShuffle(Op): ...@@ -216,7 +219,9 @@ class DimShuffle(Op):
rval.insert(augm, 1) rval.insert(augm, 1)
return [rval] return [rval]
def c_code(self, node, name, (input,), (res,), sub): def c_code(self, node, name, inp, out, sub):
input, = inp
res, = out
basename = input + '__view_or_copy' basename = input + '__view_or_copy'
def statements(lst): def statements(lst):
...@@ -317,7 +322,9 @@ class DimShuffle(Op): ...@@ -317,7 +322,9 @@ class DimShuffle(Op):
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (1,)
def grad(self, (x, ), (gz, )): def grad(self, inp, grads):
x, = inp
gz, = grads
gz = as_tensor_variable(gz) gz = as_tensor_variable(gz)
grad_order = ['x'] * len(x.type.broadcastable) grad_order = ['x'] * len(x.type.broadcastable)
for i, v in enumerate(self.new_order): for i, v in enumerate(self.new_order):
...@@ -934,7 +941,9 @@ class CAReduce(Op): ...@@ -934,7 +941,9 @@ class CAReduce(Op):
else: else:
return "Reduce{%s}" % self.scalar_op return "Reduce{%s}" % self.scalar_op
def perform(self, node, (input, ), (output, )): def perform(self, node, inp, out):
input, = inp
output, = out
axis = self.axis axis = self.axis
if axis is None: if axis is None:
axis = range(input.ndim) axis = range(input.ndim)
...@@ -959,7 +968,8 @@ class CAReduce(Op): ...@@ -959,7 +968,8 @@ class CAReduce(Op):
else: else:
output[0] = numpy.copy(variable) output[0] = numpy.copy(variable)
def infer_shape(self, node, (ishape,)): def infer_shape(self, node, shapes):
ishape, = shapes
axis = self.axis axis = self.axis
if axis is None: if axis is None:
return (), return (),
...@@ -1115,7 +1125,9 @@ class Sum(CAReduce): ...@@ -1115,7 +1125,9 @@ class Sum(CAReduce):
uint32='uint64', uint32='uint64',
).get(idtype, idtype) ).get(idtype, idtype)
def grad(self, (x, ), (gz, )): def grad(self, inp, grads):
x, = inp
gz, = grads
gz = as_tensor_variable(gz) gz = as_tensor_variable(gz)
axis = self.axis axis = self.axis
if axis is None: if axis is None:
...@@ -1176,7 +1188,7 @@ class Prod(CAReduce): ...@@ -1176,7 +1188,7 @@ class Prod(CAReduce):
uint32='uint64', uint32='uint64',
).get(idtype, idtype) ).get(idtype, idtype)
def grad(self, (prod_in, ), (gz, )): def grad(self, inp, grads):
''' '''
The grad of this Op could be very easy, it is was not for the case The grad of this Op could be very easy, it is was not for the case
where zeros are present in a given "group" (ie. elements reduced where zeros are present in a given "group" (ie. elements reduced
...@@ -1221,6 +1233,8 @@ class Prod(CAReduce): ...@@ -1221,6 +1233,8 @@ class Prod(CAReduce):
the "T.eq()" bits), then taking this or that behavior (see T.switch) the "T.eq()" bits), then taking this or that behavior (see T.switch)
based on the result of this count. based on the result of this count.
''' '''
prod_in, = inp
gz, = grads
if prod_in.dtype[0:3] in ('int','uin'): if prod_in.dtype[0:3] in ('int','uin'):
return [None] return [None]
...@@ -1314,7 +1328,9 @@ class MulWithoutZeros(scalar.BinaryScalarOp): ...@@ -1314,7 +1328,9 @@ class MulWithoutZeros(scalar.BinaryScalarOp):
return x return x
return x*y return x*y
def c_code(self, node, name, (x,y), (z, ), sub): def c_code(self, node, name, inp, out, sub):
x, y = inp
z, = out
return ("%(z)s = ((%(x)s == 0) ? (%(y)s) : " + \ return ("%(z)s = ((%(x)s == 0) ? (%(y)s) : " + \
"((%(y)s == 0) ? (%(x)s) : ((%(y)s)*(%(x)s))) );") % locals() "((%(y)s == 0) ? (%(x)s) : ((%(y)s)*(%(x)s))) );") % locals()
......
...@@ -161,7 +161,8 @@ class Conv3D(theano.Op): ...@@ -161,7 +161,8 @@ class Conv3D(theano.Op):
def c_header_dirs(self): def c_header_dirs(self):
return ldflags(libs=False, include_dir=True) return ldflags(libs=False, include_dir=True)
def c_code(self, node, nodename, (V,W,b,d), outputs, sub): def c_code(self, node, nodename, inputs, outputs, sub):
V, W, b, d = inputs
fail = sub['fail'] fail = sub['fail']
H = outputs[0] H = outputs[0]
......
...@@ -83,7 +83,8 @@ class ConvGrad3D(theano.Op): ...@@ -83,7 +83,8 @@ class ConvGrad3D(theano.Op):
flags = ['-Werror'] flags = ['-Werror']
return flags return flags
def c_code(self, node, nodename, (V,d,WShape,dCdH), outputs, sub): def c_code(self, node, nodename, inputs, outputs, sub):
V, d, WShape, dCdH = inputs
fail = sub['fail'] fail = sub['fail']
dCdW = outputs[0] dCdW = outputs[0]
......
...@@ -86,7 +86,8 @@ class ConvTransp3D(theano.Op): ...@@ -86,7 +86,8 @@ class ConvTransp3D(theano.Op):
print "\t\t\t\tConvTransp3D python code" print "\t\t\t\tConvTransp3D python code"
output_storage[0][0] = computeR(W,b,d,H,RShape) output_storage[0][0] = computeR(W,b,d,H,RShape)
def c_code(self, node, nodename, (W, b, d, H, RShape), outputs, sub): def c_code(self, node, nodename, inputs, outputs, sub):
W, b, d, H, RShape = inputs
fail = sub['fail'] fail = sub['fail']
R = outputs[0] R = outputs[0]
......
...@@ -221,7 +221,7 @@ class ConvOp(Op): ...@@ -221,7 +221,7 @@ class ConvOp(Op):
else: return [] else: return []
@staticmethod @staticmethod
def getOutputShape(inshp, kshp, (dx,dy)=(1,1), mode='valid'): def getOutputShape(inshp, kshp, stride=(1,1), mode='valid'):
""" """
Computes the output dimensions of convolving an image of shape "inshp" Computes the output dimensions of convolving an image of shape "inshp"
with kernels of shape "kshp". with kernels of shape "kshp".
...@@ -231,6 +231,7 @@ class ConvOp(Op): ...@@ -231,6 +231,7 @@ class ConvOp(Op):
:param mode: 'valid' or 'full' (see 'border_mode' in conv2d's doc) :param mode: 'valid' or 'full' (see 'border_mode' in conv2d's doc)
:return: (rows,cols) of output image :return: (rows,cols) of output image
""" """
dx, dy = stride
if mode=='valid': s = -1 if mode=='valid': s = -1
else: s = 1 else: s = 1
inshp, kshp = numpy.array(inshp), numpy.array(kshp) inshp, kshp = numpy.array(inshp), numpy.array(kshp)
...@@ -583,10 +584,12 @@ class ConvOp(Op): ...@@ -583,10 +584,12 @@ class ConvOp(Op):
# we simply let the default function do its work. # we simply let the default function do its work.
raise NotImplementedError() raise NotImplementedError()
def perform(self,node, (img2d, filtersflipped), (z,)): def perform(self,node, inp, out):
""" """
By default if len(img2d.shape)==3, we By default if len(img2d.shape)==3, we
""" """
img2d, filtersflipped = inp
z, = out
if not imported_scipy_signal: if not imported_scipy_signal:
raise theano.gof.utils.MethodNotDefined( raise theano.gof.utils.MethodNotDefined(
"c_headers", type(self), self.__class__.__name__, "c_headers", type(self), self.__class__.__name__,
...@@ -696,7 +699,9 @@ class ConvOp(Op): ...@@ -696,7 +699,9 @@ class ConvOp(Op):
z[0]=zz z[0]=zz
def grad(self, (inputs, kerns), (gz,)): def grad(self, inp, grads):
inputs, kerns = inp
gz, = grads
if self.imshp != self.imshp_logical or self.kshp != self.kshp_logical: if self.imshp != self.imshp_logical or self.kshp != self.kshp_logical:
raise NotImplementedError('todo') raise NotImplementedError('todo')
...@@ -897,7 +902,9 @@ using namespace std; ...@@ -897,7 +902,9 @@ using namespace std;
return blas.ldflags(libs=False, include_dir=True) return blas.ldflags(libs=False, include_dir=True)
return [] return []
def c_code(self, node, name, (img2d, filtersflipped), (z, ), sub): def c_code(self, node, name, inp, out, sub):
img2d, filtersflipped = inp
z, = out
if node.inputs[0].type.dtype != node.inputs[1].type.dtype: if node.inputs[0].type.dtype != node.inputs[1].type.dtype:
raise NotImplementedError() raise NotImplementedError()
assert node.inputs[0].type.dtype == node.inputs[1].type.dtype assert node.inputs[0].type.dtype == node.inputs[1].type.dtype
......
...@@ -69,7 +69,9 @@ class SoftmaxWithBias(gof.Op): ...@@ -69,7 +69,9 @@ class SoftmaxWithBias(gof.Op):
sm[i] *= 1.0 / numpy.sum(sm[i]) sm[i] *= 1.0 / numpy.sum(sm[i])
output_storage[0][0] = sm output_storage[0][0] = sm
def grad(self, (x, b), (g_sm,)): def grad(self, inp, grads):
x, b = inp
g_sm, = grads
sm = softmax_with_bias(x, b) sm = softmax_with_bias(x, b)
dx = softmax_grad(g_sm, sm) dx = softmax_grad(g_sm, sm)
db = tensor.sum(dx, axis = 0) db = tensor.sum(dx, axis = 0)
...@@ -190,7 +192,9 @@ class SoftmaxWithBias(gof.Op): ...@@ -190,7 +192,9 @@ class SoftmaxWithBias(gof.Op):
return (init_decl, begin_row_loop, inside_row_loop, end_row_loop) return (init_decl, begin_row_loop, inside_row_loop, end_row_loop)
def c_code(self, node, name, (x, b), (sm,), sub): def c_code(self, node, name, inp, out, sub):
x, b = inp
sm, = out
code_template = ''.join(self.c_code_template()) code_template = ''.join(self.c_code_template())
return code_template % dict(locals(), **sub) return code_template % dict(locals(), **sub)
...@@ -241,7 +245,9 @@ class SoftmaxGrad(gof.Op): ...@@ -241,7 +245,9 @@ class SoftmaxGrad(gof.Op):
def c_code_cache_version(self): def c_code_cache_version(self):
return (3,) return (3,)
def c_code(self, node, name, (dy, sm), (dx,), sub): def c_code(self, node, name, inp, out, sub):
dy, sm = inp
dx, = out
return ''' return '''
if ((%(dy)s->descr->type_num != PyArray_DOUBLE) && (%(dy)s->descr->type_num != PyArray_FLOAT)) if ((%(dy)s->descr->type_num != PyArray_DOUBLE) && (%(dy)s->descr->type_num != PyArray_FLOAT))
{ {
...@@ -335,7 +341,9 @@ class Softmax(gof.Op): ...@@ -335,7 +341,9 @@ class Softmax(gof.Op):
sm[i] /= numpy.sum(sm[i]) sm[i] /= numpy.sum(sm[i])
output_storage[0][0] = sm output_storage[0][0] = sm
def grad(self, (x,), (g_sm,)): def grad(self, inp, grads):
x, = inp
g_sm, = grads
sm = softmax(x) sm = softmax(x)
return [softmax_grad(g_sm, sm)] return [softmax_grad(g_sm, sm)]
...@@ -637,13 +645,16 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -637,13 +645,16 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
output_storage[1][0] = sm output_storage[1][0] = sm
output_storage[2][0] = am output_storage[2][0] = am
def infer_shape(self, node, (x_shp, b_shp, idx_shp)): def infer_shape(self, node, shapes):
x_shp, b_shp, idx_shp = shapes
nll_shp = (x_shp[0],) nll_shp = (x_shp[0],)
sm_shp = x_shp sm_shp = x_shp
am_shp = idx_shp am_shp = idx_shp
return [nll_shp, sm_shp, am_shp] return [nll_shp, sm_shp, am_shp]
def grad(self, (x, b, y_idx), (g_nll, g_sm, g_am)): def grad(self, inp, grads):
x, b, y_idx = inp
g_nll, g_sm, g_am = grads
if g_am is not None: if g_am is not None:
raise NotImplementedError() raise NotImplementedError()
elif g_sm is not None: elif g_sm is not None:
...@@ -745,7 +756,9 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -745,7 +756,9 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
def c_code_cache_version(self): def c_code_cache_version(self):
return (5,) + SoftmaxWithBias.c_code_cache_version() return (5,) + SoftmaxWithBias.c_code_cache_version()
def c_code(self, node, name, (x, b, y_idx), (nll, sm, am), sub): def c_code(self, node, name, inp, out, sub):
x, b, y_idx = inp
nll, sm, am = out
y_idx_type = node.inputs[2].type.dtype_specs()[1] y_idx_type = node.inputs[2].type.dtype_specs()[1]
am_type = y_idx_type am_type = y_idx_type
code_template = ''.join(self.c_code_template()) code_template = ''.join(self.c_code_template())
...@@ -775,7 +788,9 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op): ...@@ -775,7 +788,9 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
dx[i] = dy[i] * sm[i] #vector scale dx[i] = dy[i] * sm[i] #vector scale
dx[i, y_idx[i]] -= dy[i] #scalar decrement dx[i, y_idx[i]] -= dy[i] #scalar decrement
output_storage[0][0] = dx output_storage[0][0] = dx
def grad(self, (dy, sm, y_idx), (g_dx, )): def grad(self, inp, grads):
dy, sm, y_idx = inp
g_dx, = grads
# TODO: currently we do not compute the gradient w.r.t. dy, because # TODO: currently we do not compute the gradient w.r.t. dy, because
# advanced indexing is not working yet. When it works, do it to avoid # advanced indexing is not working yet. When it works, do it to avoid
# potentially misleading behavior in gradient computations! (although # potentially misleading behavior in gradient computations! (although
...@@ -790,7 +805,9 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op): ...@@ -790,7 +805,9 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
return [g_dy, g_sm, g_y_idx] return [g_dy, g_sm, g_y_idx]
def c_code_cache_version(self): def c_code_cache_version(self):
return (2,) return (2,)
def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub): def c_code(self, node, name, inp, out, sub):
dnll, sm, y_idx = inp
dx, = out
y_idx_type = node.inputs[2].type.dtype_specs()[1] y_idx_type = node.inputs[2].type.dtype_specs()[1]
return """ return """
...@@ -906,7 +923,9 @@ class CrossentropyCategorical1HotGrad(gof.Op): ...@@ -906,7 +923,9 @@ class CrossentropyCategorical1HotGrad(gof.Op):
return self.__class__.__name__ return self.__class__.__name__
def make_node(self, g_y, coding_dist, true_one_of_n): def make_node(self, g_y, coding_dist, true_one_of_n):
return Apply(self, [g_y, coding_dist, true_one_of_n], [coding_dist.type()]) return Apply(self, [g_y, coding_dist, true_one_of_n], [coding_dist.type()])
def perform(self, node, (g_y, coding_dist, true_one_of_n), (g_coding_strg,)): def perform(self, node, inp, out):
g_y, coding_dist, true_one_of_n = inp
g_coding_strg, = out
g_coding = numpy.zeros_like(coding_dist) g_coding = numpy.zeros_like(coding_dist)
for i in xrange(len(g_y)): for i in xrange(len(g_y)):
g_coding[i, true_one_of_n[i]] = -g_y[i]/coding_dist[i, true_one_of_n[i]] g_coding[i, true_one_of_n[i]] = -g_y[i]/coding_dist[i, true_one_of_n[i]]
...@@ -956,13 +975,17 @@ class CrossentropyCategorical1Hot(gof.Op): ...@@ -956,13 +975,17 @@ class CrossentropyCategorical1Hot(gof.Op):
return Apply(self, [_coding_dist, _true_one_of_n], return Apply(self, [_coding_dist, _true_one_of_n],
[tensor.Tensor(dtype=_coding_dist.dtype, broadcastable=[False])()]) [tensor.Tensor(dtype=_coding_dist.dtype, broadcastable=[False])()])
def perform(self, node, (coding, one_of_n), (y_out,)): def perform(self, node, inp, out):
coding, one_of_n = inp
y_out, = out
y = numpy.zeros_like(coding[:,0]) y = numpy.zeros_like(coding[:,0])
for i in xrange(len(y)): for i in xrange(len(y)):
y[i] = -numpy.log(coding[i, one_of_n[i]]) y[i] = -numpy.log(coding[i, one_of_n[i]])
y_out[0] = y y_out[0] = y
def grad(self, (coding, one_of_n), (g_y,)): def grad(self, inp, grads):
coding, one_of_n = inp
g_y, = grads
return [crossentropy_categorical_1hot_grad(g_y, coding, one_of_n), None] return [crossentropy_categorical_1hot_grad(g_y, coding, one_of_n), None]
crossentropy_categorical_1hot = CrossentropyCategorical1Hot() crossentropy_categorical_1hot = CrossentropyCategorical1Hot()
...@@ -1465,7 +1488,9 @@ class Prepend_scalar_constant_to_each_row(gof.Op): ...@@ -1465,7 +1488,9 @@ class Prepend_scalar_constant_to_each_row(gof.Op):
node = Apply(op=self, inputs=[mat], outputs=[tensor.matrix()]) node = Apply(op=self, inputs=[mat], outputs=[tensor.matrix()])
return node return node
def perform(self, node, (mat, ), (output, )): def perform(self, node, inp, out):
mat, = inp
output, = out
new_shape=(mat.shape[0],mat.shape[1]+1) new_shape=(mat.shape[0],mat.shape[1]+1)
if output[0] == None: if output[0] == None:
output[0]=numpy.empty(new_shape,dtype=mat.dtype) output[0]=numpy.empty(new_shape,dtype=mat.dtype)
...@@ -1481,7 +1506,9 @@ class Prepend_scalar_constant_to_each_row(gof.Op): ...@@ -1481,7 +1506,9 @@ class Prepend_scalar_constant_to_each_row(gof.Op):
out[:,0].fill(self.val.data) out[:,0].fill(self.val.data)
out[:,1:]=mat out[:,1:]=mat
def grad(self, (mat,), (goutput,)): def grad(self, inp, grads):
mat, = inp
goutput, = grads
return goutput[:,1:] return goutput[:,1:]
class Prepend_scalar_to_each_row(gof.Op): class Prepend_scalar_to_each_row(gof.Op):
...@@ -1506,7 +1533,9 @@ class Prepend_scalar_to_each_row(gof.Op): ...@@ -1506,7 +1533,9 @@ class Prepend_scalar_to_each_row(gof.Op):
node = Apply(op=self, inputs=[val,mat], outputs=[tensor.matrix()]) node = Apply(op=self, inputs=[val,mat], outputs=[tensor.matrix()])
return node return node
def perform(self, node, (val,mat), (output, )): def perform(self, node, inp, out):
val, mat = inp
output, = out
new_shape=(mat.shape[0],mat.shape[1]+1) new_shape=(mat.shape[0],mat.shape[1]+1)
if output[0] == None: if output[0] == None:
output[0]=numpy.empty(new_shape,dtype=mat.dtype) output[0]=numpy.empty(new_shape,dtype=mat.dtype)
...@@ -1521,7 +1550,9 @@ class Prepend_scalar_to_each_row(gof.Op): ...@@ -1521,7 +1550,9 @@ class Prepend_scalar_to_each_row(gof.Op):
out[:,0].fill(val) out[:,0].fill(val)
out[:,1:]=mat out[:,1:]=mat
def grad(self, (val, mat), (goutput,)): def grad(self, inp, grads):
val, mat = inp
goutput, = grads
return goutput[:,0], goutput[:,1:] return goutput[:,0], goutput[:,1:]
prepend_scalar_to_each_row = Prepend_scalar_to_each_row() prepend_scalar_to_each_row = Prepend_scalar_to_each_row()
......
...@@ -29,10 +29,14 @@ class ScalarSigmoid(scalar.UnaryScalarOp): ...@@ -29,10 +29,14 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
return 1.0 / (1.0 + numpy.exp(-x)) return 1.0 / (1.0 + numpy.exp(-x))
def impl(self, x): def impl(self, x):
return ScalarSigmoid.st_impl(x) return ScalarSigmoid.st_impl(x)
def grad(self, (x,), (gz,)): def grad(self, inp, grads):
x, = inp
gz, = grads
y = scalar_sigmoid(x) y = scalar_sigmoid(x)
return [gz * y * (1.0 - y)] return [gz * y * (1.0 - y)]
def c_code(self, node, name, (x,), (z,), sub): def c_code(self, node, name, inp, out, sub):
x, = inp
z, = out
if node.inputs[0].type == scalar.float32: if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like: # These constants were obtained by looking at the output of python commands like:
# for i in xrange(750): # for i in xrange(750):
...@@ -71,9 +75,13 @@ class ScalarSoftplus(scalar.UnaryScalarOp): ...@@ -71,9 +75,13 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
return numpy.log1p(numpy.exp(x)) return numpy.log1p(numpy.exp(x))
def impl(self, x): def impl(self, x):
return ScalarSoftplus.static_impl(x) return ScalarSoftplus.static_impl(x)
def grad(self, (x,), (gz,)): def grad(self, inp, grads):
x, = inp
gz, = grads
return [gz * scalar_sigmoid(x)] return [gz * scalar_sigmoid(x)]
def c_code(self, node, name, (x,), (z,), sub): def c_code(self, node, name, inp, out, sub):
x, = inp
z, = out
if node.inputs[0].type == scalar.float32: if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like: # These constants were obtained by looking at the output of python commands like:
# for i in xrange(750): # for i in xrange(750):
......
...@@ -349,7 +349,8 @@ class MakeVector(T.Op): ...@@ -349,7 +349,8 @@ class MakeVector(T.Op):
return T.Apply(self, inputs, [otype()]) return T.Apply(self, inputs, [otype()])
def __str__(self): def __str__(self):
return self.__class__.__name__ return self.__class__.__name__
def perform(self, node, inputs, (out,)): def perform(self, node, inputs, out_):
out, = out_
# not calling theano._asarray as optimization # not calling theano._asarray as optimization
if out[0] is None: if out[0] is None:
out[0] = theano._asarray(inputs, dtype=node.outputs[0].dtype) out[0] = theano._asarray(inputs, dtype=node.outputs[0].dtype)
...@@ -395,14 +396,18 @@ class Shape_i(T.Op): ...@@ -395,14 +396,18 @@ class Shape_i(T.Op):
if x.ndim <= self.i: if x.ndim <= self.i:
raise TypeError('x has too few dimensions for Shape_i', (x, self.i)) raise TypeError('x has too few dimensions for Shape_i', (x, self.i))
return T.Apply(self, [x], [T.lscalar()]) return T.Apply(self, [x], [T.lscalar()])
def perform(self, node, (x, ), (out, )): def perform(self, node, inp, out_):
x, = inp
out, = out_
if out[0] is None: if out[0] is None:
out[0] = theano._asarray(x.shape[self.i], dtype='int64') out[0] = theano._asarray(x.shape[self.i], dtype='int64')
else: else:
out[0][...] = x.shape[self.i] out[0][...] = x.shape[self.i]
def c_code_cache_version(self): def c_code_cache_version(self):
return (0,1) return (0,1)
def c_code(self, node, name, (x, ), (out, ), sub): def c_code(self, node, name, inp, out_, sub):
x, = inp
out, = out_
i = self.i i = self.i
if isinstance(node.inputs[0].type,T.TensorType): if isinstance(node.inputs[0].type,T.TensorType):
return """ return """
...@@ -423,7 +428,7 @@ class Shape_i(T.Op): ...@@ -423,7 +428,7 @@ class Shape_i(T.Op):
# various types of variables. # various types of variables.
# Do not continue this madness. # Do not continue this madness.
return super(Shape_i, self).c_code(node, name, (x,), (out,), sub) return super(Shape_i, self).c_code(node, name, (x,), (out,), sub)
def grad(self, (x,), (gz,)): def grad(self, inp, grads):
return [None] return [None]
class ShapeFeature(object): class ShapeFeature(object):
...@@ -824,7 +829,8 @@ class Assert(T.Op): ...@@ -824,7 +829,8 @@ class Assert(T.Op):
def __str__(self): def __str__(self):
return self.__class__.__name__ return self.__class__.__name__
def perform(self, node, inputs, (out,)): def perform(self, node, inputs, out_):
out, = out_
v = inputs[0] v = inputs[0]
out[0]=v out[0]=v
assert numpy.all(inputs[1:]) assert numpy.all(inputs[1:])
......
...@@ -181,7 +181,8 @@ class RandomFunction(gof.Op): ...@@ -181,7 +181,8 @@ class RandomFunction(gof.Op):
return [None, [sample_shp[i] for i in xrange(node.outputs[1].ndim)]] return [None, [sample_shp[i] for i in xrange(node.outputs[1].ndim)]]
def perform(self, node, inputs, (rout, out)): def perform(self, node, inputs, out_):
out, rout = out_
# Use self.fn to draw shape worth of random numbers. # Use self.fn to draw shape worth of random numbers.
# Numbers are drawn from r if self.inplace is True, and from a copy of r if # Numbers are drawn from r if self.inplace is True, and from a copy of r if
# self.inplace is False # self.inplace is False
......
...@@ -119,9 +119,11 @@ class DownsampleFactorMax(Op): ...@@ -119,9 +119,11 @@ class DownsampleFactorMax(Op):
# TODO: consider restrucing the dtype? # TODO: consider restrucing the dtype?
return gof.Apply(self, [x], [x.type()]) return gof.Apply(self, [x], [x.type()])
def perform(self, node, (x,), (z,)): def perform(self, node, inp, out):
""" """
""" """
x, = inp
z, = out
if len(x.shape)!=4: if len(x.shape)!=4:
raise NotImplementedError('DownsampleFactorMax requires 4D input for now') raise NotImplementedError('DownsampleFactorMax requires 4D input for now')
if z[0] is None: if z[0] is None:
...@@ -143,11 +145,15 @@ class DownsampleFactorMax(Op): ...@@ -143,11 +145,15 @@ class DownsampleFactorMax(Op):
zj = j / ds1 zj = j / ds1
zz[n,k,zi,zj] = __builtin__.max(zz[n,k,zi,zj], x[n,k,i,j]) zz[n,k,zi,zj] = __builtin__.max(zz[n,k,zi,zj], x[n,k,i,j])
def grad(self,(x,), (gz,)): def grad(self, inp, grads):
x, = inp
gz, = grads
maxout = self(x) maxout = self(x)
return [DownsampleFactorMaxGrad(self.ds, ignore_border=self.ignore_border)(x, maxout, gz)] return [DownsampleFactorMaxGrad(self.ds, ignore_border=self.ignore_border)(x, maxout, gz)]
def c_code(self, node, name, (x,), (z, ), sub): def c_code(self, node, name, inp, out, sub):
x, = inp
z, = out
fail=sub['fail'] fail=sub['fail']
ignore_border = int(self.ignore_border) ignore_border = int(self.ignore_border)
ds0, ds1 = self.ds ds0, ds1 = self.ds
...@@ -244,7 +250,9 @@ class DownsampleFactorMaxGrad(Op): ...@@ -244,7 +250,9 @@ class DownsampleFactorMaxGrad(Op):
return Apply(self, [x, maxout, gz], [x.type()]) return Apply(self, [x, maxout, gz], [x.type()])
def perform(self, node, (x, maxout, gz), (gx_stg,)): def perform(self, node, inp, out):
x, maxout, gz = inp
gx_stg, = out
gx = numpy.zeros_like(x) gx = numpy.zeros_like(x)
ds0, ds1 = self.ds ds0, ds1 = self.ds
...@@ -263,7 +271,9 @@ class DownsampleFactorMaxGrad(Op): ...@@ -263,7 +271,9 @@ class DownsampleFactorMaxGrad(Op):
else: gx[n,k,i,j] = 0 else: gx[n,k,i,j] = 0
gx_stg[0] = gx gx_stg[0] = gx
def c_code(self, node, name, (x, z, gz), (gx,), sub): def c_code(self, node, name, inp, out, sub):
x, z, gz = inp
gx, = out
fail = sub['fail'] fail = sub['fail']
ignore_border = int(self.ignore_border) ignore_border = int(self.ignore_border)
ds0, ds1 = self.ds ds0, ds1 = self.ds
......
...@@ -16,9 +16,13 @@ class XlogX(scalar.UnaryScalarOp): ...@@ -16,9 +16,13 @@ class XlogX(scalar.UnaryScalarOp):
return x * numpy.log(x) return x * numpy.log(x)
def impl(self, x): def impl(self, x):
return XlogX.st_impl(x) return XlogX.st_impl(x)
def grad(self, (x,), (gz,)): def grad(self, inputs, grads):
x, = inputs
gz, = grads
return [gz * (1 + scalar.log(x))] return [gz * (1 + scalar.log(x))]
def c_code(self, node, name, (x,), (z,), sub): def c_code(self, node, name, inputs, outputs, sub):
x, = inputs
z, = outputs
if node.inputs[0].type in [scalar.float32, scalar.float64]: if node.inputs[0].type in [scalar.float32, scalar.float64]:
return """%(z)s = return """%(z)s =
%(x)s == 0.0 %(x)s == 0.0
...@@ -40,9 +44,13 @@ class XlogY0(scalar.BinaryScalarOp): ...@@ -40,9 +44,13 @@ class XlogY0(scalar.BinaryScalarOp):
return x * numpy.log(y) return x * numpy.log(y)
def impl(self, x, y): def impl(self, x, y):
return XlogY0.st_impl(x, y) return XlogY0.st_impl(x, y)
def grad(self, (x, y), (gz,)): def grad(self, inputs, grads):
x, y = inputs
gz, = grads
return [gz * scalar.log(y), gz * x / y] return [gz * scalar.log(y), gz * x / y]
def c_code(self, node, name, (x, y), (z,), sub): def c_code(self, node, name, inputs, outputs, sub):
x, y = inputs
z, = outputs
if node.inputs[0].type in [scalar.float32, scalar.float64]: if node.inputs[0].type in [scalar.float32, scalar.float64]:
return """%(z)s = return """%(z)s =
%(x)s == 0.0 %(x)s == 0.0
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论