提交 c5773332 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #2212 from nouiz/fft_conv

[BUG] Fix bug introduced yesterday, fix FFT conv optimization and small pydotprint fix.
......@@ -152,7 +152,7 @@ def inplace_optimizer(f):
class SeqOptimizer(Optimizer, list):
#inherit from Optimizer first to get Optimizer.__hash__
# inherit from Optimizer first to get Optimizer.__hash__
"""WRITEME
Takes a list of L{Optimizer} instances and applies them
sequentially.
......@@ -823,6 +823,68 @@ class LocalOptimizer(object):
(' ' * level), self.__class__.__name__, id(self))
class LocalSeqOptimizer(LocalOptimizer, list):
"""
This allow to try a group of local optimizer in sequence.
When one do something, we return without trying the following one.
"""
# inherit from Optimizer first to get Optimizer.__hash__
def __init__(self, *opts, **kw):
"""WRITEME"""
if len(opts) == 1 and isinstance(opts[0], (list, tuple)):
opts = opts[0]
self[:] = opts
self.failure_callback = kw.pop('failure_callback', None)
def tracks(self):
t = []
for l in self:
tt = l.tracks()
if tt:
t.extend(tt)
return t
def transform(self, node):
"""Transform a subgraph whose output is `node`.
Subclasses should implement this function so that it returns one of two
kinds of things:
- False to indicate that no optimization can be applied to this `node`;
or
- <list of variables> to use in place of `node`'s outputs in the
greater graph.
- dict(old variables -> new variables). A dictionary that map
from old variables to new variables to replace.
:type node: an Apply instance
"""
for l in self:
ret = l.transform(node)
if ret:
return ret
def add_requirements(self, fgraph):
"""
If this local optimization wants to add some requirements to the
fgraph,
This is the place to do it.
"""
for l in self:
l.add_requirements(fgraph)
def print_summary(self, stream=sys.stdout, level=0, depth=-1):
name = getattr(self, 'name', None)
print >> stream, "%s%s %s id=%i" % (
(' ' * level), self.__class__.__name__, name, id(self))
# This way, -1 will do all depth
if depth != 0:
depth -= 1
for opt in self:
opt.print_summary(stream, level=(level + 2), depth=depth)
class FromFunctionLocalOptimizer(LocalOptimizer):
"""WRITEME"""
def __init__(self, fn, tracks=None, requirements=()):
......
......@@ -223,6 +223,7 @@ class SequenceDB(DB):
other tags) fast_run and fast_compile optimizers are drawn is a SequenceDB.
"""
seq_opt = opt.SeqOptimizer
def __init__(self, failure_callback=opt.SeqOptimizer.warn):
super(SequenceDB, self).__init__()
......@@ -256,13 +257,13 @@ class SequenceDB(DB):
# the order we want.
opts.sort(key=lambda obj: obj.name)
opts.sort(key=lambda obj: self.__position__[obj.name])
ret = opt.SeqOptimizer(opts, failure_callback=self.failure_callback)
ret = self.seq_opt(opts, failure_callback=self.failure_callback)
if hasattr(tags[0], 'name'):
ret.name = tags[0].name
return ret
def print_summary(self, stream=sys.stdout):
print >> stream, "SequenceDB (id %i)" % id(self)
print >> stream, self.__class__.__name__ + " (id %i)" % id(self)
positions = self.__position__.items()
def c(a, b):
......@@ -279,6 +280,13 @@ class SequenceDB(DB):
return sio.getvalue()
class LocalSequenceDB(SequenceDB):
"""
This generate a local optimizer instead of a global optimizer.
"""
seq_opt = opt.LocalSeqOptimizer
class ProxyDB(DB):
"""
Wrap an existing proxy.
......
......@@ -992,7 +992,7 @@ def pydotprint_variables(vars,
if nd.owner:
plot_apply(nd.owner, depth)
try:
g.write_png(outfile, prog='dot')
g.write(outfile, prog='dot', format=format)
except pd.InvocationException, e:
# Some version of pydot are bugged/don't work correctly with
# empty label. Provide a better user error message.
......@@ -1006,6 +1006,7 @@ def pydotprint_variables(vars,
" Theano. Using another version of pydot could"
" fix this problem. The pydot error is: " +
e.message)
raise
print 'The output file is available at', outfile
......
差异被折叠。
......@@ -83,7 +83,7 @@ class TestConv2dFFT(unittest.TestCase):
# make sure we inserted the fft trickery
topo = f_fft.maker.fgraph.toposort()
assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
for n in topo) == 2
for n in topo) == 2, topo
res_ref = f_ref()
......@@ -112,7 +112,7 @@ class TestConv2dFFT(unittest.TestCase):
# make sure we inserted the fft trickery
topo = f_fft.maker.fgraph.toposort()
assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
for n in topo) == 2
for n in topo) == 2, topo
res_ref = f_ref()
res_fft = f_fft()
......
......@@ -306,6 +306,7 @@ def test_consistency_GPU_parallel():
samples = numpy.array(samples).flatten()
assert(numpy.allclose(samples, java_samples))
def test_GPU_nstreams_limit():
"""Verify that a ValueError is raised when n_streams
is greater than 2**20 on GPU. This is the value of
......@@ -316,13 +317,20 @@ def test_GPU_nstreams_limit():
seed = 12345
R = MRG_RandomStreams(seed=seed, use_cuda=True)
def eval_uniform(size, nstreams):
return R.uniform(size=size, nstreams=nstreams, dtype='float32').eval()
if theano.config.mode == "FAST_COMPILE":
mode = "FAST_RUN"
else:
mode = None
out = R.uniform(size=size, nstreams=nstreams, dtype='float32')
f = theano.function([], out, mode=mode)
return f()
eval_uniform((10,), 2**20)
assert_raises(ValueError, eval_uniform, (10,), 2**20 + 1)
def test_consistency_GPUA_serial():
'''Verify that the random numbers generated by GPUA_mrg_uniform, serially,
are the same as the reference (Java) implementation by L'Ecuyer et al.
......
......@@ -2016,26 +2016,26 @@ def zeros_like(model, dtype=None):
return fill(model, constant(0.0, dtype=dtype))
def zeros(shp, dtype=None):
def zeros(shape, dtype=None):
"""
Create a Tensor filled with zeros, closer to Numpy's syntax than ``alloc``.
"""
if not isinstance(shp, (list, tuple, TensorVariable)):
shp = [shp]
if not isinstance(shape, (list, tuple, TensorVariable)):
shape = [shape]
if dtype is None:
dtype = config.floatX
return alloc(numpy.array(0, dtype=dtype), *shp)
return alloc(numpy.array(0, dtype=dtype), *shape)
def ones(shp, dtype=None):
def ones(shape, dtype=None):
"""
Create a Tensor filled with ones, closer to Numpy's syntax than ``alloc``.
"""
if not isinstance(shp, (list, tuple, TensorVariable)):
shp = [shp]
if not isinstance(shape, (list, tuple, TensorVariable)):
shape = [shape]
if dtype is None:
dtype = config.floatX
return alloc(numpy.array(1, dtype=dtype), *shp)
return alloc(numpy.array(1, dtype=dtype), *shape)
class Nonzero(gof.Op):
......
......@@ -182,10 +182,20 @@ class DimShuffle(Op):
input = as_tensor_variable(_input)
ib = tuple(input.type.broadcastable)
if not ib == self.input_broadcastable:
raise TypeError((
"The number of dimensions and/or broadcastable pattern of the "
"input is incorrect for this op. Expected %s, got %s."
% (self.input_broadcastable, ib)))
if len(ib) != len(self.input_broadcastable):
raise TypeError((
"The number of dimensions of the "
"input is incorrect for this op. Expected %s, got %s."
% (self.input_broadcastable, ib)))
for expected, b in zip(self.input_broadcastable, ib):
if expected is True and b is False:
raise TypeError((
"The broadcastable pattern of the "
"input is incorrect for this op. Expected %s, got %s."
% (self.input_broadcastable, ib)))
#else, expected == b or expected is False and b is True
# Both case are good.
ob = []
for value in self.new_order:
if value == 'x':
......
......@@ -1639,7 +1639,18 @@ def local_alloc_elemwise(node):
*[T.eq(i.shape[idx], cmp_op.shape[idx])
for idx in xrange(i.type.ndim)
if not i.type.broadcastable[idx]])
new_i.append(i.owner.inputs[0].owner.inputs[0])
alloc_input = i.owner.inputs[0].owner.inputs[0]
if alloc_input.ndim != i.owner.inputs[0].ndim:
# The alloc can add dimension to the value
# We add a dimshuffle to add them.
# We let later optimization merge the multiple dimshuffle
nb_dim_to_add = i.owner.inputs[0].ndim - alloc_input.ndim
alloc_input = alloc_input.dimshuffle(['x'] * nb_dim_to_add +
range(alloc_input.ndim))
# We need to keep the dimshuffle. It could swap axes or
# add dimensions anywhere.
new_i.append(i.owner.op(alloc_input))
else:
new_i.append(i)
new_i[assert_op_idx] = assert_op
......
......@@ -2687,6 +2687,21 @@ class Test_local_alloc_elemwise(unittest.TestCase):
self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 1)
def test_error(self):
t3fft = theano.tensor.tensor(dtype=self.dtype,
broadcastable=(False, False, True))
row = theano.tensor.row(dtype=self.dtype)
o = T.alloc(row, 5, 5).dimshuffle(0, 1, 'x') + t3fft
func = function(
[t3fft, row],
o,
mode='FAST_RUN'
)
self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 1)
d = numpy.random.rand(5, 5, 1).astype(self.dtype)
r = numpy.random.rand(1, 5).astype(self.dtype)
func(d, r)
def test_local_subtensor_of_alloc():
......
......@@ -11,6 +11,7 @@ import theano.tensor as tensor
from theano.printing import min_informative_str, debugprint
from theano.compat.six import StringIO
def test_pydotprint_cond_highlight():
"""
This is a REALLY PARTIAL TEST.
......@@ -44,6 +45,37 @@ def test_pydotprint_cond_highlight():
' is no IfElse node in the graph\n')
def test_pydotprint_variables():
"""
This is a REALLY PARTIAL TEST.
I did them to help debug stuff.
It make sure the code run.
"""
# Skip test if pydot is not available.
if not theano.printing.pydot_imported:
raise SkipTest('pydot not available')
x = tensor.dvector()
s = StringIO()
new_handler = logging.StreamHandler(s)
new_handler.setLevel(logging.DEBUG)
orig_handler = theano.logging_default_handler
theano.theano_logger.removeHandler(orig_handler)
theano.theano_logger.addHandler(new_handler)
theano.theano_logger.removeHandler(orig_handler)
theano.theano_logger.addHandler(new_handler)
try:
theano.printing.pydotprint_variables(x * 2)
finally:
theano.theano_logger.addHandler(orig_handler)
theano.theano_logger.removeHandler(new_handler)
def test_pydotprint_long_name():
"""This is a REALLY PARTIAL TEST.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论