提交 1569a7a9 authored 作者: David Warde-Farley's avatar David Warde-Farley

Bring theano.tensor into PEP 3113 compliance.

上级 ead4f23e
差异被折叠。
......@@ -477,7 +477,9 @@ class Gemm(GemmRelated):
if len(bb): raise ValueError(Gemm.E_scalar, bb)
output = z.type()
return Apply(self, inputs, [output])
def perform(self, node, (z, a, x, y, b), (zout, )):
def perform(self, node, inp, out):
z, a, x, y, b = inp
zout, = out
assert a.shape == ()
assert b.shape == ()
if not self.inplace:
......@@ -596,7 +598,9 @@ class Gemm(GemmRelated):
#undef REAL
"""
def c_code(self, node, name, (_z, _a, _x, _y, _b), (_zout, ), sub): #DEBUG
def c_code(self, node, name, inp, out, sub): #DEBUG
_z, _a, _x, _y, _b = inp
_zout, = out
if node.inputs[0].type.dtype.startswith('complex'):
raise utils.MethodNotDefined('%s.c_code' \
% self.__class__.__name__)
......@@ -949,7 +953,9 @@ class Dot22(GemmRelated):
outputs = [T.tensor(x.type.dtype, bz)]
return Apply(self, [x,y], outputs)
def perform(self, node, (x, y), (z, )):
def perform(self, node, inp, out):
x, y = inp
z, = out
try:
z[0] = numpy.asarray(numpy.dot(x, y))
except ValueError, e:
......@@ -988,7 +994,9 @@ class Dot22(GemmRelated):
double a = 1.0;
double b = 0.0;
"""
def c_code(self, node, name, (_x, _y), (_zout, ), sub): #DEBUG
def c_code(self, node, name, inp, out, sub): #DEBUG
_x, _y = inp
_zout, = out
if node.inputs[0].type.dtype.startswith('complex'):
raise utils.MethodNotDefined('%s.c_code' \
% self.__class__.__name__)
......@@ -1083,7 +1091,9 @@ class Dot22Scalar(GemmRelated):
outputs = [T.tensor(x.type.dtype, bz)]
return Apply(self, [x,y,scalar], outputs)
def perform(self, node, (x, y, scalar), (z, )):
def perform(self, node, inp, out):
x, y, scalar = inp
z, = out
try:
z[0] = scalar * numpy.asarray(numpy.dot(x, y))
except ValueError, e:
......@@ -1117,7 +1127,9 @@ class Dot22Scalar(GemmRelated):
#undef REAL
double b = 0.0;
"""
def c_code(self, node, name, (_x, _y, _a), (_zout, ), sub): #DEBUG
def c_code(self, node, name, inp, out, sub): #DEBUG
_x, _y, _a = inp
_zout, = out
if len(self.c_libraries())<=0:
return super(Dot22Scalar, self).c_code(node, name, (_x, _y), (_zout, ), sub)
full_code = self.build_gemm_call() % dict(locals(), **sub)
......
......@@ -179,7 +179,9 @@ class DimShuffle(Op):
else:
return "DimShuffle{%s}" % ",".join(str(x) for x in self.new_order)
def perform(self, node, (input, ), (storage, )):
def perform(self, node, inp, out):
input, = inp
storage, = out
# drop
res = input
if type(res) != numpy.ndarray:
......@@ -204,7 +206,8 @@ class DimShuffle(Op):
storage[0] = numpy.asarray(res) #asarray puts scalars back into array
def infer_shape(self, node, (ishp,)):
def infer_shape(self, node, shapes):
ishp, = shapes
ishp = list(ishp)
for drop in reversed(self.drop):
del ishp[drop]
......@@ -216,7 +219,9 @@ class DimShuffle(Op):
rval.insert(augm, 1)
return [rval]
def c_code(self, node, name, (input,), (res,), sub):
def c_code(self, node, name, inp, out, sub):
input, = inp
res, = out
basename = input + '__view_or_copy'
def statements(lst):
......@@ -317,7 +322,9 @@ class DimShuffle(Op):
def c_code_cache_version(self):
return (1,)
def grad(self, (x, ), (gz, )):
def grad(self, inp, grads):
x, = inp
gz, = grads
gz = as_tensor_variable(gz)
grad_order = ['x'] * len(x.type.broadcastable)
for i, v in enumerate(self.new_order):
......@@ -934,7 +941,9 @@ class CAReduce(Op):
else:
return "Reduce{%s}" % self.scalar_op
def perform(self, node, (input, ), (output, )):
def perform(self, node, inp, out):
input, = inp
output, = out
axis = self.axis
if axis is None:
axis = range(input.ndim)
......@@ -959,7 +968,8 @@ class CAReduce(Op):
else:
output[0] = numpy.copy(variable)
def infer_shape(self, node, (ishape,)):
def infer_shape(self, node, shapes):
ishape, = shapes
axis = self.axis
if axis is None:
return (),
......@@ -1115,7 +1125,9 @@ class Sum(CAReduce):
uint32='uint64',
).get(idtype, idtype)
def grad(self, (x, ), (gz, )):
def grad(self, inp, grads):
x, = inp
gz, = grads
gz = as_tensor_variable(gz)
axis = self.axis
if axis is None:
......@@ -1176,7 +1188,7 @@ class Prod(CAReduce):
uint32='uint64',
).get(idtype, idtype)
def grad(self, (prod_in, ), (gz, )):
def grad(self, inp, grads):
'''
The grad of this Op could be very easy, it is was not for the case
where zeros are present in a given "group" (ie. elements reduced
......@@ -1221,6 +1233,8 @@ class Prod(CAReduce):
the "T.eq()" bits), then taking this or that behavior (see T.switch)
based on the result of this count.
'''
prod_in, = inp
gz, = grads
if prod_in.dtype[0:3] in ('int','uin'):
return [None]
......@@ -1314,7 +1328,9 @@ class MulWithoutZeros(scalar.BinaryScalarOp):
return x
return x*y
def c_code(self, node, name, (x,y), (z, ), sub):
def c_code(self, node, name, inp, out, sub):
x, y = inp
z, = out
return ("%(z)s = ((%(x)s == 0) ? (%(y)s) : " + \
"((%(y)s == 0) ? (%(x)s) : ((%(y)s)*(%(x)s))) );") % locals()
......
......@@ -161,7 +161,8 @@ class Conv3D(theano.Op):
def c_header_dirs(self):
return ldflags(libs=False, include_dir=True)
def c_code(self, node, nodename, (V,W,b,d), outputs, sub):
def c_code(self, node, nodename, inputs, outputs, sub):
V, W, b, d = inputs
fail = sub['fail']
H = outputs[0]
......
......@@ -83,7 +83,8 @@ class ConvGrad3D(theano.Op):
flags = ['-Werror']
return flags
def c_code(self, node, nodename, (V,d,WShape,dCdH), outputs, sub):
def c_code(self, node, nodename, inputs, outputs, sub):
V, d, WShape, dCdH = inputs
fail = sub['fail']
dCdW = outputs[0]
......
......@@ -86,7 +86,8 @@ class ConvTransp3D(theano.Op):
print "\t\t\t\tConvTransp3D python code"
output_storage[0][0] = computeR(W,b,d,H,RShape)
def c_code(self, node, nodename, (W, b, d, H, RShape), outputs, sub):
def c_code(self, node, nodename, inputs, outputs, sub):
W, b, d, H, RShape = inputs
fail = sub['fail']
R = outputs[0]
......
......@@ -221,7 +221,7 @@ class ConvOp(Op):
else: return []
@staticmethod
def getOutputShape(inshp, kshp, (dx,dy)=(1,1), mode='valid'):
def getOutputShape(inshp, kshp, stride=(1,1), mode='valid'):
"""
Computes the output dimensions of convolving an image of shape "inshp"
with kernels of shape "kshp".
......@@ -231,6 +231,7 @@ class ConvOp(Op):
:param mode: 'valid' or 'full' (see 'border_mode' in conv2d's doc)
:return: (rows,cols) of output image
"""
dx, dy = stride
if mode=='valid': s = -1
else: s = 1
inshp, kshp = numpy.array(inshp), numpy.array(kshp)
......@@ -583,10 +584,12 @@ class ConvOp(Op):
# we simply let the default function do its work.
raise NotImplementedError()
def perform(self,node, (img2d, filtersflipped), (z,)):
def perform(self,node, inp, out):
"""
By default if len(img2d.shape)==3, we
"""
img2d, filtersflipped = inp
z, = out
if not imported_scipy_signal:
raise theano.gof.utils.MethodNotDefined(
"c_headers", type(self), self.__class__.__name__,
......@@ -696,7 +699,9 @@ class ConvOp(Op):
z[0]=zz
def grad(self, (inputs, kerns), (gz,)):
def grad(self, inp, grads):
inputs, kerns = inp
gz, = grads
if self.imshp != self.imshp_logical or self.kshp != self.kshp_logical:
raise NotImplementedError('todo')
......@@ -897,7 +902,9 @@ using namespace std;
return blas.ldflags(libs=False, include_dir=True)
return []
def c_code(self, node, name, (img2d, filtersflipped), (z, ), sub):
def c_code(self, node, name, inp, out, sub):
img2d, filtersflipped = inp
z, = out
if node.inputs[0].type.dtype != node.inputs[1].type.dtype:
raise NotImplementedError()
assert node.inputs[0].type.dtype == node.inputs[1].type.dtype
......
......@@ -69,7 +69,9 @@ class SoftmaxWithBias(gof.Op):
sm[i] *= 1.0 / numpy.sum(sm[i])
output_storage[0][0] = sm
def grad(self, (x, b), (g_sm,)):
def grad(self, inp, grads):
x, b = inp
g_sm, = grads
sm = softmax_with_bias(x, b)
dx = softmax_grad(g_sm, sm)
db = tensor.sum(dx, axis = 0)
......@@ -190,7 +192,9 @@ class SoftmaxWithBias(gof.Op):
return (init_decl, begin_row_loop, inside_row_loop, end_row_loop)
def c_code(self, node, name, (x, b), (sm,), sub):
def c_code(self, node, name, inp, out, sub):
x, b = inp
sm, = out
code_template = ''.join(self.c_code_template())
return code_template % dict(locals(), **sub)
......@@ -241,7 +245,9 @@ class SoftmaxGrad(gof.Op):
def c_code_cache_version(self):
return (3,)
def c_code(self, node, name, (dy, sm), (dx,), sub):
def c_code(self, node, name, inp, out, sub):
dy, sm = inp
dx, = out
return '''
if ((%(dy)s->descr->type_num != PyArray_DOUBLE) && (%(dy)s->descr->type_num != PyArray_FLOAT))
{
......@@ -335,7 +341,9 @@ class Softmax(gof.Op):
sm[i] /= numpy.sum(sm[i])
output_storage[0][0] = sm
def grad(self, (x,), (g_sm,)):
def grad(self, inp, grads):
x, = inp
g_sm, = grads
sm = softmax(x)
return [softmax_grad(g_sm, sm)]
......@@ -637,13 +645,16 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
output_storage[1][0] = sm
output_storage[2][0] = am
def infer_shape(self, node, (x_shp, b_shp, idx_shp)):
def infer_shape(self, node, shapes):
x_shp, b_shp, idx_shp = shapes
nll_shp = (x_shp[0],)
sm_shp = x_shp
am_shp = idx_shp
return [nll_shp, sm_shp, am_shp]
def grad(self, (x, b, y_idx), (g_nll, g_sm, g_am)):
def grad(self, inp, grads):
x, b, y_idx = inp
g_nll, g_sm, g_am = grads
if g_am is not None:
raise NotImplementedError()
elif g_sm is not None:
......@@ -745,7 +756,9 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
def c_code_cache_version(self):
return (5,) + SoftmaxWithBias.c_code_cache_version()
def c_code(self, node, name, (x, b, y_idx), (nll, sm, am), sub):
def c_code(self, node, name, inp, out, sub):
x, b, y_idx = inp
nll, sm, am = out
y_idx_type = node.inputs[2].type.dtype_specs()[1]
am_type = y_idx_type
code_template = ''.join(self.c_code_template())
......@@ -775,7 +788,9 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
dx[i] = dy[i] * sm[i] #vector scale
dx[i, y_idx[i]] -= dy[i] #scalar decrement
output_storage[0][0] = dx
def grad(self, (dy, sm, y_idx), (g_dx, )):
def grad(self, inp, grads):
dy, sm, y_idx = inp
g_dx, = grads
# TODO: currently we do not compute the gradient w.r.t. dy, because
# advanced indexing is not working yet. When it works, do it to avoid
# potentially misleading behavior in gradient computations! (although
......@@ -790,7 +805,9 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
return [g_dy, g_sm, g_y_idx]
def c_code_cache_version(self):
return (2,)
def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub):
def c_code(self, node, name, inp, out, sub):
dnll, sm, y_idx = inp
dx, = out
y_idx_type = node.inputs[2].type.dtype_specs()[1]
return """
......@@ -906,7 +923,9 @@ class CrossentropyCategorical1HotGrad(gof.Op):
return self.__class__.__name__
def make_node(self, g_y, coding_dist, true_one_of_n):
return Apply(self, [g_y, coding_dist, true_one_of_n], [coding_dist.type()])
def perform(self, node, (g_y, coding_dist, true_one_of_n), (g_coding_strg,)):
def perform(self, node, inp, out):
g_y, coding_dist, true_one_of_n = inp
g_coding_strg, = out
g_coding = numpy.zeros_like(coding_dist)
for i in xrange(len(g_y)):
g_coding[i, true_one_of_n[i]] = -g_y[i]/coding_dist[i, true_one_of_n[i]]
......@@ -956,13 +975,17 @@ class CrossentropyCategorical1Hot(gof.Op):
return Apply(self, [_coding_dist, _true_one_of_n],
[tensor.Tensor(dtype=_coding_dist.dtype, broadcastable=[False])()])
def perform(self, node, (coding, one_of_n), (y_out,)):
def perform(self, node, inp, out):
coding, one_of_n = inp
y_out, = out
y = numpy.zeros_like(coding[:,0])
for i in xrange(len(y)):
y[i] = -numpy.log(coding[i, one_of_n[i]])
y_out[0] = y
def grad(self, (coding, one_of_n), (g_y,)):
def grad(self, inp, grads):
coding, one_of_n = inp
g_y, = grads
return [crossentropy_categorical_1hot_grad(g_y, coding, one_of_n), None]
crossentropy_categorical_1hot = CrossentropyCategorical1Hot()
......@@ -1465,7 +1488,9 @@ class Prepend_scalar_constant_to_each_row(gof.Op):
node = Apply(op=self, inputs=[mat], outputs=[tensor.matrix()])
return node
def perform(self, node, (mat, ), (output, )):
def perform(self, node, inp, out):
mat, = inp
output, = out
new_shape=(mat.shape[0],mat.shape[1]+1)
if output[0] == None:
output[0]=numpy.empty(new_shape,dtype=mat.dtype)
......@@ -1481,7 +1506,9 @@ class Prepend_scalar_constant_to_each_row(gof.Op):
out[:,0].fill(self.val.data)
out[:,1:]=mat
def grad(self, (mat,), (goutput,)):
def grad(self, inp, grads):
mat, = inp
goutput, = grads
return goutput[:,1:]
class Prepend_scalar_to_each_row(gof.Op):
......@@ -1506,7 +1533,9 @@ class Prepend_scalar_to_each_row(gof.Op):
node = Apply(op=self, inputs=[val,mat], outputs=[tensor.matrix()])
return node
def perform(self, node, (val,mat), (output, )):
def perform(self, node, inp, out):
val, mat = inp
output, = out
new_shape=(mat.shape[0],mat.shape[1]+1)
if output[0] == None:
output[0]=numpy.empty(new_shape,dtype=mat.dtype)
......@@ -1521,7 +1550,9 @@ class Prepend_scalar_to_each_row(gof.Op):
out[:,0].fill(val)
out[:,1:]=mat
def grad(self, (val, mat), (goutput,)):
def grad(self, inp, grads):
val, mat = inp
goutput, = grads
return goutput[:,0], goutput[:,1:]
prepend_scalar_to_each_row = Prepend_scalar_to_each_row()
......
......@@ -29,10 +29,14 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
return 1.0 / (1.0 + numpy.exp(-x))
def impl(self, x):
return ScalarSigmoid.st_impl(x)
def grad(self, (x,), (gz,)):
def grad(self, inp, grads):
x, = inp
gz, = grads
y = scalar_sigmoid(x)
return [gz * y * (1.0 - y)]
def c_code(self, node, name, (x,), (z,), sub):
def c_code(self, node, name, inp, out, sub):
x, = inp
z, = out
if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
......@@ -71,9 +75,13 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
return numpy.log1p(numpy.exp(x))
def impl(self, x):
return ScalarSoftplus.static_impl(x)
def grad(self, (x,), (gz,)):
def grad(self, inp, grads):
x, = inp
gz, = grads
return [gz * scalar_sigmoid(x)]
def c_code(self, node, name, (x,), (z,), sub):
def c_code(self, node, name, inp, out, sub):
x, = inp
z, = out
if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
......
......@@ -349,7 +349,8 @@ class MakeVector(T.Op):
return T.Apply(self, inputs, [otype()])
def __str__(self):
return self.__class__.__name__
def perform(self, node, inputs, (out,)):
def perform(self, node, inputs, out_):
out, = out_
# not calling theano._asarray as optimization
if out[0] is None:
out[0] = theano._asarray(inputs, dtype=node.outputs[0].dtype)
......@@ -395,14 +396,18 @@ class Shape_i(T.Op):
if x.ndim <= self.i:
raise TypeError('x has too few dimensions for Shape_i', (x, self.i))
return T.Apply(self, [x], [T.lscalar()])
def perform(self, node, (x, ), (out, )):
def perform(self, node, inp, out_):
x, = inp
out, = out_
if out[0] is None:
out[0] = theano._asarray(x.shape[self.i], dtype='int64')
else:
out[0][...] = x.shape[self.i]
def c_code_cache_version(self):
return (0,1)
def c_code(self, node, name, (x, ), (out, ), sub):
def c_code(self, node, name, inp, out_, sub):
x, = inp
out, = out_
i = self.i
if isinstance(node.inputs[0].type,T.TensorType):
return """
......@@ -423,7 +428,7 @@ class Shape_i(T.Op):
# various types of variables.
# Do not continue this madness.
return super(Shape_i, self).c_code(node, name, (x,), (out,), sub)
def grad(self, (x,), (gz,)):
def grad(self, inp, grads):
return [None]
class ShapeFeature(object):
......@@ -824,7 +829,8 @@ class Assert(T.Op):
def __str__(self):
return self.__class__.__name__
def perform(self, node, inputs, (out,)):
def perform(self, node, inputs, out_):
out, = out_
v = inputs[0]
out[0]=v
assert numpy.all(inputs[1:])
......
......@@ -181,7 +181,8 @@ class RandomFunction(gof.Op):
return [None, [sample_shp[i] for i in xrange(node.outputs[1].ndim)]]
def perform(self, node, inputs, (rout, out)):
def perform(self, node, inputs, out_):
out, rout = out_
# Use self.fn to draw shape worth of random numbers.
# Numbers are drawn from r if self.inplace is True, and from a copy of r if
# self.inplace is False
......
......@@ -119,9 +119,11 @@ class DownsampleFactorMax(Op):
# TODO: consider restrucing the dtype?
return gof.Apply(self, [x], [x.type()])
def perform(self, node, (x,), (z,)):
def perform(self, node, inp, out):
"""
"""
x, = inp
z, = out
if len(x.shape)!=4:
raise NotImplementedError('DownsampleFactorMax requires 4D input for now')
if z[0] is None:
......@@ -143,11 +145,15 @@ class DownsampleFactorMax(Op):
zj = j / ds1
zz[n,k,zi,zj] = __builtin__.max(zz[n,k,zi,zj], x[n,k,i,j])
def grad(self,(x,), (gz,)):
def grad(self, inp, grads):
x, = inp
gz, = grads
maxout = self(x)
return [DownsampleFactorMaxGrad(self.ds, ignore_border=self.ignore_border)(x, maxout, gz)]
def c_code(self, node, name, (x,), (z, ), sub):
def c_code(self, node, name, inp, out, sub):
x, = inp
z, = out
fail=sub['fail']
ignore_border = int(self.ignore_border)
ds0, ds1 = self.ds
......@@ -244,7 +250,9 @@ class DownsampleFactorMaxGrad(Op):
return Apply(self, [x, maxout, gz], [x.type()])
def perform(self, node, (x, maxout, gz), (gx_stg,)):
def perform(self, node, inp, out):
x, maxout, gz = inp
gx_stg, = out
gx = numpy.zeros_like(x)
ds0, ds1 = self.ds
......@@ -263,7 +271,9 @@ class DownsampleFactorMaxGrad(Op):
else: gx[n,k,i,j] = 0
gx_stg[0] = gx
def c_code(self, node, name, (x, z, gz), (gx,), sub):
def c_code(self, node, name, inp, out, sub):
x, z, gz = inp
gx, = out
fail = sub['fail']
ignore_border = int(self.ignore_border)
ds0, ds1 = self.ds
......
......@@ -16,9 +16,13 @@ class XlogX(scalar.UnaryScalarOp):
return x * numpy.log(x)
def impl(self, x):
return XlogX.st_impl(x)
def grad(self, (x,), (gz,)):
def grad(self, inputs, grads):
x, = inputs
gz, = grads
return [gz * (1 + scalar.log(x))]
def c_code(self, node, name, (x,), (z,), sub):
def c_code(self, node, name, inputs, outputs, sub):
x, = inputs
z, = outputs
if node.inputs[0].type in [scalar.float32, scalar.float64]:
return """%(z)s =
%(x)s == 0.0
......@@ -40,9 +44,13 @@ class XlogY0(scalar.BinaryScalarOp):
return x * numpy.log(y)
def impl(self, x, y):
return XlogY0.st_impl(x, y)
def grad(self, (x, y), (gz,)):
def grad(self, inputs, grads):
x, y = inputs
gz, = grads
return [gz * scalar.log(y), gz * x / y]
def c_code(self, node, name, (x, y), (z,), sub):
def c_code(self, node, name, inputs, outputs, sub):
x, y = inputs
z, = outputs
if node.inputs[0].type in [scalar.float32, scalar.float64]:
return """%(z)s =
%(x)s == 0.0
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论