提交 8bb23735 authored 作者: nouiz's avatar nouiz

Merge pull request #663 from larseeri/new_ops

Add the Theano op Bartlett, FillDiagonal and Fourier.
...@@ -556,12 +556,13 @@ class numeric_grad(object): ...@@ -556,12 +556,13 @@ class numeric_grad(object):
numpy.dtype('float64'): 1e-7, numpy.dtype('float64'): 1e-7,
numpy.dtype('float32'): 3e-4} numpy.dtype('float32'): 3e-4}
def __init__(self, f, pt, eps=None): def __init__(self, f, pt, eps=None, out_type=None):
"""Return the gradient of f at pt. """Return the gradient of f at pt.
:param f: a differentiable function such that f(*pt) is a scalar :param f: a differentiable function such that f(*pt) is a scalar
:param pt: an ndarray, a list of ndarrays or tuple of ndarrays :param pt: an ndarray, a list of ndarrays or tuple of ndarrays
:param out_type: dtype of output, if complex (i.e. 'complex32' or
'complex64')
This function computes the gradient by a one-sided finite This function computes the gradient by a one-sided finite
differences of a fixed step size (eps). differences of a fixed step size (eps).
...@@ -595,18 +596,21 @@ class numeric_grad(object): ...@@ -595,18 +596,21 @@ class numeric_grad(object):
# raise TypeError('All function arguments must have same dtype') # raise TypeError('All function arguments must have same dtype')
total_size = __builtin__.sum(prod(sh) for sh in shapes) total_size = __builtin__.sum(prod(sh) for sh in shapes)
working_dtype = __builtin__.min((self.type_eps[dt], dt) working_dtype = __builtin__.min((self.type_eps[dt], dt)
for dt in dtypes)[1] for dt in dtypes)[1]
#create un-initialized memory # create un-initialized memory
x = numpy.ndarray((total_size,), dtype=working_dtype) x = numpy.ndarray((total_size,), dtype=working_dtype)
gx = numpy.ndarray((total_size,), dtype=working_dtype) if (not out_type is None) and (out_type.startswith('complex')):
gx = numpy.ndarray((total_size,), dtype=out_type)
else:
gx = numpy.ndarray((total_size,), dtype=working_dtype)
if eps is None: if eps is None:
eps = __builtin__.max(self.type_eps[dt] for dt in dtypes) eps = __builtin__.max(self.type_eps[dt] for dt in dtypes)
#set up aliases so that apt[i] is backed by memory in x # set up aliases so that apt[i] is backed by memory in x
# and self.gf is backed by memory in gx # and self.gf is backed by memory in gx
cur_pos = 0 cur_pos = 0
self.gf = [] self.gf = []
...@@ -629,8 +633,13 @@ class numeric_grad(object): ...@@ -629,8 +633,13 @@ class numeric_grad(object):
x[i] += eps x[i] += eps
f_eps = f(*apt) f_eps = f(*apt)
gx[i] = numpy.asarray((f_eps - f_x) / eps) # TODO: remove this when it is clear that the next
# replacemement does not pose problems of its own. It was replaced
# for its inability to handle complex variables.
# gx[i] = numpy.asarray((f_eps - f_x) / eps)
gx[i] = ((f_eps - f_x) / eps)
if packed_pt: if packed_pt:
self.gf = self.gf[0] self.gf = self.gf[0]
...@@ -712,7 +721,7 @@ class numeric_grad(object): ...@@ -712,7 +721,7 @@ class numeric_grad(object):
return (max_arg, pos[max_arg], abs_errs[max_arg], rel_errs[max_arg]) return (max_arg, pos[max_arg], abs_errs[max_arg], rel_errs[max_arg])
def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, out_type=None, abs_tol=None,
rel_tol=None, mode=None, cast_to_output_type=False): rel_tol=None, mode=None, cast_to_output_type=False):
""" Test a gradient by Finite Difference Method. Raise error on failure. """ Test a gradient by Finite Difference Method. Raise error on failure.
...@@ -736,6 +745,8 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, ...@@ -736,6 +745,8 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
of sum(u * fun) at pt of sum(u * fun) at pt
:param eps: stepsize used in the Finite Difference Method (Default :param eps: stepsize used in the Finite Difference Method (Default
None is type-dependent) None is type-dependent)
:param out_type: dtype of output, if complex (i.e. 'complex32' or
'complex64')
:param abs_tol: absolute tolerance used as threshold for gradient :param abs_tol: absolute tolerance used as threshold for gradient
comparison comparison
:param rel_tol: relative tolerance used as threshold for gradient :param rel_tol: relative tolerance used as threshold for gradient
...@@ -761,7 +772,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, ...@@ -761,7 +772,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
raise TypeError(('verify_grad can work only with floating point ' raise TypeError(('verify_grad can work only with floating point '
'inputs, but input %i has dtype "%s".') % (i, p.dtype)) 'inputs, but input %i has dtype "%s".') % (i, p.dtype))
_type_tol = dict( # relativ error tolerances for different types _type_tol = dict( # relative error tolerances for different types
float32=1e-2, float32=1e-2,
float64=1e-4) float64=1e-4)
...@@ -839,7 +850,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, ...@@ -839,7 +850,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
grad_fn = function(tensor_pt, symbolic_grad) grad_fn = function(tensor_pt, symbolic_grad)
for test_num in xrange(n_tests): for test_num in xrange(n_tests):
num_grad = numeric_grad(cost_fn, [p.copy() for p in pt], eps) num_grad = numeric_grad(cost_fn, [p.copy() for p in pt], eps, out_type)
analytic_grad = grad_fn(*[p.copy() for p in pt]) analytic_grad = grad_fn(*[p.copy() for p in pt])
......
...@@ -1170,27 +1170,21 @@ class Mul(ScalarOp): ...@@ -1170,27 +1170,21 @@ class Mul(ScalarOp):
def grad(self, inputs, (gz, )): def grad(self, inputs, (gz, )):
retval = [] retval = []
for input in inputs: input_type = theano.tensor.as_tensor_variable(inputs).type
if input.type in continuous_types: if input_type in discrete_types:
if gz.type in complex_types: retval = None
# zr+zi = (xr + xi)(yr + yi) elif input_type in complex_types or gz.type in complex_types:
# zr+zi = (xr*yr - xi*yi) + (xr yi + xi yr ) for input in inputs:
otherprod = mul(*(utils.difference(inputs, [input]))) retval += [mul(*([gz] +
yr = real(otherprod) utils.difference(inputs, [input])))]
yi = imag(otherprod) else:
if input.type in complex_types: for input in inputs:
retval += [complex(yr * real(gz) + yi * imag(gz), retval += [cast(mul(*([gz] +
yr * imag(gz) - yi * real(gz))] utils.difference(inputs, [input]))),
else: input_type.dtype)]
retval += [cast(yr * real(gz) + yi * imag(gz),
input.type.dtype)]
else:
retval += [cast(mul(*([gz] + utils.difference(inputs,
[input]))),
input.type.dtype)]
else:
retval += [None]
return retval return retval
mul = Mul(upcast_out, name='mul') mul = Mul(upcast_out, name='mul')
......
import theano
import numpy as np import numpy as np
import numpy
import theano
import basic import basic
from theano import gof, tensor, function, scalar
from theano.sandbox.linalg.ops import diag
class DiffOp(theano.Op): class DiffOp(theano.Op):
...@@ -348,3 +352,143 @@ def repeat(x, repeats, axis=None): ...@@ -348,3 +352,143 @@ def repeat(x, repeats, axis=None):
""" """
return RepeatOp(axis=axis)(x, repeats) return RepeatOp(axis=axis)(x, repeats)
class Bartlett(gof.Op):
"""
An instance of this class returns the Bartlett spectral window in the
time-domain. The Bartlett window is very similar to a triangular window,
except that the end points are at zero. It is often used in signal
processing for tapering a signal, without generating too much ripple in
the frequency domain.
input : (integer scalar) Number of points in the output window. If zero or
less, an empty vector is returned.
output : (vector of doubles) The triangular window, with the maximum value
normalized to one (the value one appears only if the number of samples is
odd), with the first and last samples equal to zero.
"""
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def __str__(self):
return self.__class__.__name__
def make_node(self, M):
M = tensor.as_tensor_variable(M)
if M.ndim != 0:
raise TypeError('%s only works on scalar input'
% self.__class__.__name__)
elif (not M.dtype.startswith('int')) and \
(not M.dtype.startswith('uint')):
# dtype is a theano attribute here
raise TypeError('%s only works on integer input'
% self.__class__.__name__)
return gof.Apply(self, [M], [tensor.dvector()])
def perform(self, node, inputs, out_):
M = inputs[0]
out, = out_
out[0] = numpy.bartlett(M)
def infer_shape(self, node, in_shapes):
temp = node.inputs[0]
M = tensor.switch(tensor.lt(temp, 0),
tensor.cast(0, temp.dtype), temp)
return [[M]]
def grad(self, inputs, output_grads):
return [None for i in inputs]
bartlett = Bartlett()
class FillDiagonal(gof.Op):
"""
An instance of this class returns a copy of an array with all elements of
the main diagonal set to a specified scalar value.
inputs:
a : Rectangular array of at least two dimensions.
val : Scalar value to fill the diagonal whose type must be compatible with
that of array 'a' (i.e. 'val' cannot be viewed as an upcast of 'a').
output:
An array identical to 'a' except that its main diagonal is filled with
scalar 'val'. (For an array 'a' with a.ndim >= 2, the main diagonal is the
list of locations a[i, i, ..., i] (i.e. with indices all identical).)
Support rectangular matrix and tensor with more then 2 dimensions
if the later have all dimensions are equals.
"""
def __eq__(self, other):
return type(self) == type(other)
def __hash_(self):
return hash(type(self))
def __str__(self):
return self.__class__.__name__
def infer_shape(self, node, in_shapes):
return [in_shapes[0]]
def make_node(self, a, val):
a = tensor.as_tensor_variable(a)
val = tensor.as_tensor_variable(val)
if a.ndim < 2:
raise TypeError('%s: first parameter must have at least'
' two dimensions' % self.__class__.__name__)
elif val.ndim != 0:
raise TypeError('%s: second parameter must be a scalar'
% self.__class__.__name__)
val = tensor.cast(val, dtype=scalar.upcast(a.dtype, val.dtype))
if val.dtype != a.dtype:
raise TypeError('%s: type of second parameter must be compatible'
' with first\'s' % self.__class__.__name__)
return gof.Apply(self, [a, val], [a.type()])
def perform(self, node, inputs, output_storage):
a = inputs[0].copy()
val = inputs[1]
if a.ndim == 2:
# numpy.fill_diagonal up to date(including 1.6.2) have a
# bug for tall matrix.
# For 2-d arrays, we accept rectangular ones.
step = a.shape[1] + 1
end = a.shape[1] * a.shape[1]
# Write the value out into the diagonal.
a.flat[:end:step] = val
else:
numpy.fill_diagonal(a, val)
output_storage[0][0] = a
def grad(self, inp, cost_grad):
"""
Note: The gradient is currently implemented for matrices
only.
"""
a, val = inp
grad = cost_grad[0]
if (a.dtype.startswith('complex')):
return [None, None]
elif a.ndim > 2:
raise NotImplementedError('%s: gradient is currently implemented'
' for matrices only' % self.__class__.__name__)
wr_a = fill_diagonal(grad, 0) # valid for any number of dimensions
wr_val = diag(grad).sum() # diag is only valid for matrices
return [wr_a, wr_val]
fill_diagonal = FillDiagonal()
import theano
import numpy
import math
from theano import gof, tensor, function, scalar
from theano.sandbox.linalg.ops import diag
class Fourier(gof.Op):
"""
An instance of this class returns a finite fourier transform calcutated
along one dimension of an input array.
inputs:
a : Array of at least one dimension. Can be complex.
n : Integer, optional. Length of the transformed axis of the output. If n
is smaller than the length of the input, the input is cropped. If it is
larger, the input is padded with zeros. If n is not given, the length of
the input (along the axis specified by axis) is used.
axis : Integer, optional. Axis over which to compute the FFT. If not
supplied, the last axis is used.
output:
Complex array. The input array, transformed along the axis
indicated by 'axis' or along the last axis if 'axis' is not specified. It
is truncated or zero-padded as required if 'n' is specified.
(From numpy.fft.fft's documentation:)
The values in the output follow so-called standard order. If A = fft(a, n),
then A[0] contains the zero-frequency term (the mean of the signal), which
is always purely real for real inputs. Then A[1:n/2] contains the
positive-frequency terms, and A[n/2+1:] contains the negative-frequency
terms, in order of decreasingly negative frequency. For an even number of
input points, A[n/2] represents both positive and negative Nyquist
frequency, and is also purely real for real input. For an odd number of
input points, A[(n-1)/2] contains the largest positive frequency, while
A[(n+1)/2] contains the largest negative frequency.
"""
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(self.__class__)
def __str__(self):
return self.__class__.__name__
def make_node(self, a, n, axis):
a = tensor.as_tensor_variable(a)
if a.ndim < 1:
raise TypeError('%s: input must be an array, not a scalar' %
self.__class__.__name__)
if axis is None:
axis = a.ndim - 1
axis = tensor.as_tensor_variable(axis)
else:
axis = tensor.as_tensor_variable(axis)
if (not axis.dtype.startswith('int')) and \
(not axis.dtype.startswith('uint')):
raise TypeError('%s: index of the transformed axis must be'
' of type integer' % self.__class__.__name__)
elif axis.ndim != 0 or (isinstance(axis, tensor.TensorConstant) and
(axis.data < 0 or axis.data > a.ndim - 1)):
raise TypeError('%s: index of the transformed axis must be'
' a scalar not smaller than 0 and smaller than'
' dimension of array' % self.__class__.__name__)
if n is None:
n = a.shape[axis]
n = tensor.as_tensor_variable(n)
else:
n = tensor.as_tensor_variable(n)
if (not n.dtype.startswith('int')) and \
(not n.dtype.startswith('uint')):
raise TypeError('%s: length of the transformed axis must be'
' of type integer' % self.__class__.__name__)
elif n.ndim != 0 or (isinstance(n, tensor.TensorConstant) and
n.data < 1):
raise TypeError('%s: length of the transformed axis must be a'
' strictly positive scalar'
% self.__class__.__name__)
return gof.Apply(self, [a, n, axis], [tensor.TensorType('complex128',
a.type.broadcastable)()])
def infer_shape(self, node, in_shapes):
shape_a = in_shapes[0]
n = node.inputs[1]
axis = node.inputs[2]
if len(shape_a) == 1:
return [(n,)]
elif isinstance(axis, tensor.TensorConstant):
out_shape = list(shape_a[0: axis.data]) + [n] +\
list(shape_a[axis.data + 1:])
else:
l = len(shape_a)
shape_a = tensor.stack(*shape_a)
out_shape = tensor.concatenate((shape_a[0: axis], [n],
shape_a[axis + 1:]))
n_splits = [1] * l
out_shape = tensor.split(out_shape, n_splits, l)
out_shape = [a[0] for a in out_shape]
return [out_shape]
def perform(self, node, inputs, output_storage):
a = inputs[0]
n = inputs[1]
axis = inputs[2]
output_storage[0][0] = numpy.fft.fft(a, n=int(n), axis=axis)
def grad(self, inputs, cost_grad):
"""
In defining the gradient, the Finite Fourier Transform is viewed as
a complex-differentiable function of a complex variable
"""
a = inputs[0]
n = inputs[1]
axis = inputs[2]
grad = cost_grad[0]
if not isinstance(axis, tensor.TensorConstant):
raise NotImplementedError('%s: gradient is currently implemented'
' only for axis being a Theano constant'
% self.__class__.__name__)
axis = int(axis.data)
# notice that the number of actual elements in wrto is independent of
# possible padding or truncation:
elem = tensor.arange(0, tensor.shape(a)[axis], 1)
# accounts for padding:
freq = tensor.arange(0, n, 1)
outer = tensor.outer(freq, elem)
pow_outer = tensor.exp(((-2 * math.pi * 1j) * outer) / (1. * n))
res = tensor.tensordot(grad, pow_outer, (axis, 0))
# This would be simpler but not implemented by theano:
# res = tensor.switch(tensor.lt(n, tensor.shape(a)[axis]),
# tensor.set_subtensor(res[...,n::], 0, False, False), res)
# Instead we resort to that to account for truncation:
flip_shape = list(numpy.arange(0, a.ndim)[::-1])
res = res.dimshuffle(flip_shape)
res = tensor.switch(tensor.lt(n, tensor.shape(a)[axis]),
tensor.set_subtensor(res[n::, ], 0, False, False), res)
res = res.dimshuffle(flip_shape)
# insures that gradient shape conforms to input shape:
out_shape = list(numpy.arange(0, axis)) + [a.ndim - 1] +\
list(numpy.arange(axis, a.ndim - 1))
res = res.dimshuffle(*out_shape)
return [res, None, None]
fft = Fourier()
import theano
import numpy as np import numpy as np
from theano import tensor as T import numpy
from theano.tests import unittest_tools as utt
import theano
from theano.tests import unittest_tools as utt
from theano.tensor.extra_ops import * from theano.tensor.extra_ops import *
from theano import tensor as T
from theano import tensor, function, scalar
class TestBinCountOp(utt.InferShapeTester): class TestBinCountOp(utt.InferShapeTester):
...@@ -185,3 +187,99 @@ class TestRepeatOp(utt.InferShapeTester): ...@@ -185,3 +187,99 @@ class TestRepeatOp(utt.InferShapeTester):
def repeat_(a): def repeat_(a):
return RepeatOp()(a, 3) return RepeatOp()(a, 3)
utt.verify_grad(repeat_, [a]) utt.verify_grad(repeat_, [a])
class TestBartlett(utt.InferShapeTester):
def setUp(self):
super(TestBartlett, self).setUp()
self.op_class = Bartlett
self.op = bartlett
def test_perform(self):
x = tensor.lscalar()
f = function([x], self.op(x))
M = numpy.random.random_integers(3, 50, size=())
assert numpy.allclose(f(M), numpy.bartlett(M))
assert numpy.allclose(f(0), numpy.bartlett(0))
assert numpy.allclose(f(-1), numpy.bartlett(-1))
b = numpy.array([17], dtype='uint8')
assert numpy.allclose(f(b[0]), numpy.bartlett(b[0]))
def test_infer_shape(self):
x = tensor.lscalar()
self._compile_and_check([x], [self.op(x)],
[numpy.random.random_integers(3, 50, size=())],
self.op_class)
self._compile_and_check([x], [self.op(x)], [0], self.op_class)
self._compile_and_check([x], [self.op(x)], [1], self.op_class)
if __name__ == "__main__":
t = TestBartlett('setUp')
t.setUp()
t.test_perform()
t.test_infer_shape()
class TestFillDiagonal(utt.InferShapeTester):
rng = numpy.random.RandomState(43)
def setUp(self):
super(TestFillDiagonal, self).setUp()
self.op_class = FillDiagonal
self.op = fill_diagonal
def test_perform(self):
x = tensor.dmatrix()
y = tensor.dscalar()
f = function([x, y], fill_diagonal(x, y))
for shp in [(8, 8), (5, 8), (8, 5)]:
a = numpy.random.rand(*shp)
val = numpy.random.rand()
out = f(a, val)
# We can't use numpy.fill_diagonal as it is bugged.
assert numpy.allclose(numpy.diag(out), val)
assert (out == val).sum() == min(a.shape)
# test for 3d tensor
a = numpy.random.rand(3, 3, 3)
x = tensor.dtensor3()
y = tensor.dscalar()
f = function([x, y], fill_diagonal(x, y))
val = numpy.random.rand() + 10
out = f(a, val)
# We can't use numpy.fill_diagonal as it is bugged.
assert out[0, 0, 0] == val
assert out[1, 1, 1] == val
assert out[2, 2, 2] == val
assert (out == val).sum() == min(a.shape)
def test_gradient(self):
utt.verify_grad(fill_diagonal, [numpy.random.rand(5, 8),
numpy.random.rand()],
n_tests=1, rng=TestFillDiagonal.rng)
utt.verify_grad(fill_diagonal, [numpy.random.rand(8, 5),
numpy.random.rand()],
n_tests=1, rng=TestFillDiagonal.rng)
def test_infer_shape(self):
z = tensor.dtensor3()
x = tensor.dmatrix()
y = tensor.dscalar()
self._compile_and_check([x, y], [self.op(x, y)],
[numpy.random.rand(8, 5),
numpy.random.rand()],
self.op_class)
self._compile_and_check([z, y], [self.op(z, y)],
[numpy.random.rand(8, 8, 8),
numpy.random.rand()],
self.op_class)
if __name__ == "__main__":
t = TestFillDiagonal('setUp')
t.setUp()
t.test_perform()
t.test_gradient()
t.test_infer_shape()
import numpy
import theano
from theano import tensor
from theano.tests import unittest_tools as utt
from theano.tensor.fourier import Fourier, fft
class TestFourier(utt.InferShapeTester):
rng = numpy.random.RandomState(43)
def setUp(self):
super(TestFourier, self).setUp()
self.op_class = Fourier
self.op = fft
def test_perform(self):
a = tensor.dmatrix()
f = theano.function([a], self.op(a, n=10, axis=0))
a = numpy.random.rand(8, 6)
assert numpy.allclose(f(a), numpy.fft.fft(a, 10, 0))
def test_infer_shape(self):
a = tensor.dvector()
self._compile_and_check([a], [self.op(a, 16, 0)],
[numpy.random.rand(12)],
self.op_class)
a = tensor.dmatrix()
for var in [self.op(a, 16, 1), self.op(a, None, 1),
self.op(a, 16, None), self.op(a, None, None)]:
self._compile_and_check([a], [var],
[numpy.random.rand(12, 4)],
self.op_class)
b = tensor.iscalar()
for var in [self.op(a, 16, b), self.op(a, None, b)]:
self._compile_and_check([a, b], [var],
[numpy.random.rand(12, 4), 0],
self.op_class)
def test_gradient(self):
def fft_test1(a):
return self.op(a, None, None)
def fft_test2(a):
return self.op(a, None, 0)
def fft_test3(a):
return self.op(a, 4, None)
def fft_test4(a):
return self.op(a, 4, 0)
pts = [numpy.random.rand(5, 2, 4, 3),
numpy.random.rand(2, 3, 4),
numpy.random.rand(2, 5),
numpy.random.rand(5)]
for fft_test in [fft_test1, fft_test2, fft_test3, fft_test4]:
for pt in pts:
theano.gradient.verify_grad(fft_test, [pt],
n_tests=1, rng=TestFourier.rng,
out_type='complex64')
if __name__ == "__main__":
t = TestFourier('setUp')
t.setUp()
t.test_perform()
t.test_infer_shape()
t.test_gradient()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论