提交 af804be5 authored 作者: James Bergstra's avatar James Bergstra

merge

......@@ -197,6 +197,11 @@ class BadOptimization(DebugModeError):
print >> ssio, " Mean Abs Diff: ", numpy.mean(numpy.absolute(nv-ov))
print >> ssio, " Median Abs Diff: ", numpy.median(numpy.absolute(nv-ov))
print >> ssio, " Std Abs Diff: ", numpy.std(numpy.absolute(nv-ov))
reldiff = numpy.absolute(nv-ov) / (numpy.absolute(nv)+numpy.absolute(ov))
print >> ssio, " Max Rel Diff: ", numpy.max(reldiff)
print >> ssio, " Mean Rel Diff: ", numpy.mean(reldiff)
print >> ssio, " Median Rel Diff: ", numpy.median(reldiff)
print >> ssio, " Std Rel Diff: ", numpy.std(reldiff)
# only if all succeeds to we add anything to sio
print >> sio, ssio.getvalue()
except:
......@@ -349,14 +354,17 @@ def debugprint(r, prefix='', depth=-1, done=None, file=sys.stdout):
# this variable is the output of computation,
# so just print out the apply
a = r.owner
print >> file, prefix, a.op, id(a)
if len(a.outputs) == 1:
print >> file, '%s%s [@%i]' % (prefix, a.op, id(r))
else:
print >> file, '%s%s.%i [@%i]' % (prefix, a.op, a.outputs.index(r), id(r))
if id(a) not in done:
done.add(id(a))
for i in a.inputs:
debugprint(i, prefix+' ', depth=depth-1, done=done, file=file)
debugprint(i, prefix+' |', depth=depth-1, done=done, file=file)
else:
#this is a variable
print >> file, prefix, r, id(r)
print >> file, '%s%s [@%i]' % (prefix, r, id(r))
return file
......
......@@ -116,7 +116,7 @@ class AddDestroyHandler(gof.Optimizer):
for o in env.outputs:
try:
env.replace_validate(o, _output_guard(o), reason='output_guard')
_logger.warning("Output variable %s required output_guard,"
_logger.info("Output variable %s required output_guard,"
" how was this output left unprotected against destructive operations?"
% o)
except gof.InconsistencyError:
......@@ -127,12 +127,22 @@ class AddDestroyHandler(gof.Optimizer):
env.extend(gof.DestroyHandler())
optdb = gof.SequenceDB()
optdb.register('merge1', gof.MergeOptimizer(), 0, 'fast_run', 'fast_compile')
optdb.register('canonicalize', gof.EquilibriumDB(), 1, 'fast_run')
optdb.register('specialize', gof.EquilibriumDB(), 2, 'fast_run')
optdb.register('merge2', gof.MergeOptimizer(), 49, 'fast_run')
optdb.register('add_destroy_handler', AddDestroyHandler(), 49.5, 'fast_run', 'inplace')
optdb.register('merge3', gof.MergeOptimizer(), 100, 'fast_run')
optdb.register('merge1', gof.MergeOptimizer(),
0, 'fast_run', 'fast_compile')
optdb.register('canonicalize', gof.EquilibriumDB(), # rearranges elemwise expressions
1, 'fast_run')
optdb.register('merge1.2', gof.MergeOptimizer(skip_const_merge=True),
1.2, 'fast_run', 'fast_compile')
optdb.register('stabilize', gof.EquilibriumDB(), # replace unstable subgraphs
1.5, 'fast_run')
optdb.register('specialize', gof.EquilibriumDB(), # misc special cases for speed
2, 'fast_run')
optdb.register('merge2', gof.MergeOptimizer(), # especially constant merge
49, 'fast_run')
optdb.register('add_destroy_handler', AddDestroyHandler(),
49.5, 'fast_run', 'inplace')
optdb.register('merge3', gof.MergeOptimizer(), # final pass just to make sure
100, 'fast_run')
class Mode(object):
......@@ -153,6 +163,12 @@ class Mode(object):
def __init__(self, linker = config.linker, optimizer = config.optimizer):
self.__setstate__((linker, optimizer))
#self.provided_optimizer - typically the `optimizer` arg. But if the `optimizer` arg is
# keyword corresponding to a predefined Query, then this stores the query
#self._optimizer - typically same as provided_optimizer??
#self.__get_optimizer - returns self._optimizer (possibly querying optdb with self._optimizer)
#self.optimizer - property that returns __get_optimizer()
def __getstate__(self):
return (self.provided_linker, self.provided_optimizer)
......@@ -218,7 +234,7 @@ predefined_modes = {'FAST_COMPILE': FAST_COMPILE,
def get_mode(string):
if string is None: string = config.mode
if not isinstance(string, str): return string #it is already a mode...
if not isinstance(string, str): return string #it is hopefully already a mode...
if not predefined_modes.has_key(string):
raise Exception("No predefixed mode exist for string: %s"%string)
return predefined_modes[string]
......
......@@ -197,12 +197,19 @@ class _metadict:
class MergeOptimizer(Optimizer):
"""WRITEME
Merges parts of the graph that are identical, i.e. parts that
take the same inputs and carry out the asme computations so we
can avoid doing them more than once. Also merges variables that
are constant.
"""
Merges parts of the graph that are identical and redundant.
The basic principle is that if two Applies have ops that compare equal, and identical
inputs, then they do not both need to be computed. The clients of one are transfered to
the other and one of them is removed from the graph. This procedure is carried out in
input->output order through the graph.
The first step of merging is constant-merging, so that all clients of an int(1) for example,
are transfered to a particular instance of int(1).
"""
def __init__(self, skip_const_merge=False):
self.skip_const_merge = skip_const_merge
def add_requirements(self, env):
env.extend(toolbox.ReplaceValidate())
......@@ -230,41 +237,6 @@ class MergeOptimizer(Optimizer):
const_sig[c] = sig
const_sig_inv[sig] = c
def exptime_apply_node_merge(self, env):
# we clear the dicts because the Constants signatures are not necessarily hashable
# and it's more efficient to give them an integer like the other Variables
symbol_idx = {} #variable -> int
symbol_idx_inv = {} #int -> variable (inverse of symbol_idx)
#add all graph sources to the symbol_idx dictionaries (arbitrary order)
for i, r in enumerate(r for r in env.variables if r.owner is None):
symbol_idx[r] = i
symbol_idx_inv[i] = r
for node in _list_of_nodes(env):
node_cid = (node.op, tuple([symbol_idx[input] for input in node.inputs]))
#print 'NODE', node, node_cid
dup = symbol_idx_inv.get(node_cid, None)
success = False
if dup is not None:
success = True
pairs = zip(node.outputs, dup.outputs)
for output, new_output in pairs:
if output.name and not new_output.name:
new_output.name = output.name
try:
env.replace_all_validate(pairs, reason='Merge (exptime)')
except InconsistencyError, e:
success = False
if not success:
symbol_idx[node] = node_cid
symbol_idx_inv[node_cid] = node
for i, output in enumerate(node.outputs):
ref = (i, node_cid)
symbol_idx[output] = ref
symbol_idx_inv[ref] = output
def apply_node_merge(self, env):
# we clear the dicts because the Constants signatures are not necessarily hashable
# and it's more efficient to give them an integer like the other Variables
......@@ -316,7 +288,8 @@ class MergeOptimizer(Optimizer):
#TODO: Consider splitting this into a separate optimizer (SeqOptimizer)
def apply(self, env):
self.apply_constant_merge(env)
if not self.skip_const_merge:
self.apply_constant_merge(env)
self.apply_node_merge(env)
merge_optimizer = MergeOptimizer()
......@@ -541,7 +514,7 @@ class PatternSub(LocalOptimizer):
PatternSub((subtract, (add, 'x', 'y'), 'y'), 'x')
PatternSub((power, 'x', Constant(double, 2.0)), (square, 'x'))
PatternSub((boggle, {'pattern': 'x',
'constraint': lambda env, expr: expr.type == scrabble}),
'constraint': lambda expr: expr.type == scrabble}),
(scrabble, 'x'))
"""
......@@ -789,7 +762,10 @@ class NavigatorOptimizer(Optimizer):
raise
if replacements is False or replacements is None:
return False
assert len(node.outputs) == len(replacements)
if not isinstance(replacements, (tuple, list)):
raise TypeError('Optimizer %s gave wrong type of replacement' % lopt)
if len(node.outputs) != len(replacements):
raise ValueError('Optimizer %s gave wrong number of replacements' % lopt)
repl_pairs = zip(node.outputs, replacements)
try:
env.replace_all_validate(repl_pairs, reason=lopt)
......@@ -904,8 +880,13 @@ class EquilibriumOptimizer(NavigatorOptimizer):
max_depth = None,
max_use_ratio = None):
"""
:param local_optimizers: list or set of local optimizations to apply until
equilibrium.
:param max_use_ratio: each optimizer can be applied at most (size of graph * this number)
:param max_depth: TODO what does this do? (EquilibriumDB sets it to 5)
"""
super(EquilibriumOptimizer, self).__init__(
......@@ -916,6 +897,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
self.local_optimizers = local_optimizers
self.max_depth = max_depth
self.max_use_ratio = max_use_ratio
assert self.max_use_ratio is not None, 'max_use_ratio has to be a number'
def apply(self, env, start_from = None):
if start_from is None:
......@@ -960,7 +942,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
changed |= lopt_change
finally:
self.detach_updater(env, u)
self.detach_updater(env, u)
self.detach_updater(env, u) #TODO: erase this line, it's redundant at best
if max_use_abort:
print >> sys.stderr, "WARNING: EquilibriumOptimizer max'ed out"
......
......@@ -26,7 +26,7 @@ class DB(object):
# It is an instance of a DB.In the tests for example,
# this is not always the case.
if not isinstance(obj, (DB, opt.Optimizer, opt.LocalOptimizer)):
raise Exception('Triing to register an optimizer that don\'t herite from theano.gof.opt.Optimizer or theano.gof.opt.LocalOptimizer', obj)
raise TypeError('Object cannot be registered in OptDB', obj)
if self.name is not None:
tags = tags + (self.name,)
......@@ -132,6 +132,18 @@ class Query(object):
class EquilibriumDB(DB):
"""A set of potential optimizations which should be applied in an arbitrary order until
equilibrium is reached.
Canonicalize, Stabilize, and Specialize are all equilibrium optimizations.
.. note::
It seems like this might be supposed to contain LocalOptimizer instances rather than
optimizer instances, because whatever is selected by the query is passed to
EquilibriumOptimizer and EquilibriumOptimizer requires LocalOptimizer instances.
"""
def query(self, *tags, **kwtags):
opts = super(EquilibriumDB, self).query(*tags, **kwtags)
......@@ -142,27 +154,45 @@ class EquilibriumDB(DB):
class SequenceDB(DB):
"""A sequence of potential optimizations.
Retrieve a sequence of optimizations (a SeqOptimizer) by calling query().
Each potential optimization is registered with a floating-point position.
No matter which optimizations are selected by a query, they are carried out in order of
increasing position.
The optdb itself (`theano.compile.mode.optdb`), from which (among many other tags) fast_run
and fast_compile optimizers are drawn is a SequenceDB.
"""
def __init__(self, failure_callback = opt.SeqOptimizer.warn):
super(SequenceDB, self).__init__()
self.__priority__ = {}
self.__position__ = {}
self.failure_callback = failure_callback
def register(self, name, obj, priority, *tags):
def register(self, name, obj, position, *tags):
super(SequenceDB, self).register(name, obj, *tags)
self.__priority__[name] = priority
self.__position__[name] = position
def query(self, *tags, **kwtags):
"""
:type position_cutoff: float or int
:param position_cutoff: only optimizations with position less than the cutoff are returned.
"""
position_cutoff = kwtags.pop('position_cutoff', float('inf'))
opts = super(SequenceDB, self).query(*tags, **kwtags)
opts = list(opts)
opts.sort(key = lambda obj: self.__priority__[obj.name])
opts = [o for o in opts if self.__position__[o.name] < position_cutoff]
opts.sort(key = lambda obj: self.__position__[obj.name])
return opt.SeqOptimizer(opts, failure_callback = self.failure_callback)
def print_summary(self, stream=sys.stdout):
print >> stream, "SequenceDB (id %i)"%id(self)
print >> stream, " priority", self.__priority__
print >> stream, " position", self.__position__
print >> stream, " names", self._names
print >> stream, " db", self.__db__
def __str__(self):
sio = StringIO.StringIO()
self.print_summary(sio)
......
......@@ -7,9 +7,52 @@ import sys,os
from theano import config
from gof import Op, Apply
from theano.gof.python25 import any
from theano.compile import Function, debugmode
#We import the debugprint here to have all printing of graph available from this module
from theano.compile.debugmode import debugprint
def debugprint(obj, depth=-1, file=None):
"""Print a computation graph to file
:type obj: Variable, Apply, or Function instance
:param obj: symbolic thing to print
:type depth: integer
:param depth: print graph to this depth (-1 for unlimited)
:type file: None or file-like object
:param file: print to this file (None means sys.stdout)
:rtype: None or file-like object
:returns: `file` argument
Each line printed represents a Variable in the graph.
The indentation of each line corresponds to its depth in the symbolic graph.
The first part of the text identifies whether it is an input (if a name or type is printed)
or the output of some Apply (in which case the Op is printed).
The second part of the text is the memory location of the Variable.
If a Variable is encountered multiple times in the depth-first search, it is only printed
recursively the first time. Later, just the Variable and its memory location are printed.
If an Apply has multiple outputs, then a '.N' suffix will be appended to the Apply's
identifier, to indicate which output a line corresponds to.
"""
if file is None:
_file = sys.stdout
else:
_file = file
done = set()
results_to_print = []
if isinstance(obj, gof.Variable):
results_to_print.append(obj)
elif isinstance(obj, gof.Apply):
results_to_print.extend(obj.outputs)
elif isinstance(obj, Function):
results_to_print.extend(obj.maker.env.outputs)
for r in results_to_print:
debugmode.debugprint(r, depth=depth, done=done, file=_file)
if file is None:
_file.flush()
return file
class Print(Op):
"""This identity-like Op has the side effect of printing a message followed by its inputs
......@@ -329,7 +372,7 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn
if var.name is not None:
varstr = var.name
elif isinstance(var,gof.Constant):
varstr = str(var.data)
varstr = '%s [%s]'% (str(var.data) , str(var.type))
elif var in input_update and input_update[var].variable.name is not None:
varstr = input_update[var].variable.name
else:
......
......@@ -3,7 +3,7 @@ import numpy
import theano
from theano import Op, Type, Apply, Variable, Constant
from theano import tensor
from theano.compile import shared, SharedVariable, shared_constructor
from theano.compile import shared, SharedVariable
from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda import filter as type_support_filter
......@@ -68,6 +68,11 @@ CudaNdarrayType.SharedVariable = CudaNdarraySharedVariable
def cuda_shared_constructor(value, name, strict=False, broadcastable=None):
"""SharedVariable Constructor for TensorType"""
# THIS CONSTRUCTOR TRIES TO CAST VALUE TO A FLOAT32, WHICH THEN GOES ONTO THE CARD
# SO INT shared vars, float64 shared vars, etc. all end up on the card.
# THIS IS NOT THE DEFAULT BEHAVIOUR THAT WE WANT.
# SEE float32_shared_constructor
#TODO: what should strict mean in this context, since we always have to make a copy?
if strict:
_value = value
......
......@@ -20,8 +20,9 @@ Special cases:
Often a for loop can be expressed as a ``scan()`` operation, and ``scan`` is
the closest that theano comes to looping. The advantage of using ``scan``
over for loops is that it allows you to express the loop symbolically. The
Scan Op should always be used by applying the ``scan`` function.
over for loops is that it allows the number of iterations to be a part of the symbolic graph.
The Scan Op should always be used by applying the ``scan`` function.
"""
__docformat__ = 'restructedtext en'
......@@ -60,7 +61,8 @@ def hash_listsDictsTuples(x):
def scan(fn, sequences, initial_states, non_sequences, inplace_map={}, \
sequences_taps={}, outputs_taps = {}, n_steps = 0, \
truncate_gradient = -1, go_backwards = False, mode = 'FAST_RUN'):
truncate_gradient = -1, go_backwards = False,
mode = None):
'''Function that constructs and applies a Scan op
:param fn: Function that describes the operations involved in one step of scan
......
......@@ -4,7 +4,7 @@ __docformat__ = "restructuredtext en"
import __builtin__
import sys # for sys.maxint
from theano.configparser import config
from theano.configparser import config, AddConfigVar, BoolParam
import traceback #for overriding Op.__call__
if sys.version_info >= (2,5):
import functools
......@@ -570,7 +570,6 @@ class TensorType(Type):
# input received.
return """
%(name)s = NULL;
type_num_%(name)s = ((PyArrayObject*)py_%(name)s)->descr->type_num; //we expect %(type_num)s
if (py_%(name)s == Py_None) {
// We can either fail here or set %(name)s to NULL and rely on Ops using
// tensors to handle the NULL case, but if they fail to do so they'll end up
......@@ -578,18 +577,17 @@ class TensorType(Type):
PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
%(fail)s
}
else if (!PyArray_Check(py_%(name)s)) {
if (!PyArray_Check(py_%(name)s)) {
PyErr_SetString(PyExc_ValueError, "expected an ndarray");
%(fail)s
}
else if (type_num_%(name)s != %(type_num)s) {
type_num_%(name)s = ((PyArrayObject*)py_%(name)s)->descr->type_num; //we expect %(type_num)s
if (type_num_%(name)s != %(type_num)s) {
PyErr_SetString(PyExc_ValueError, "expected %(type_num)s");
%(fail)s
}
else {
%(name)s = (PyArrayObject*)(py_%(name)s);
Py_XINCREF(%(name)s);
}
%(name)s = (PyArrayObject*)(py_%(name)s);
Py_XINCREF(%(name)s);
""" % dict(sub, name = name, type_num = self.dtype_specs()[2])
def c_cleanup(self, name, sub):
......@@ -631,7 +629,7 @@ class TensorType(Type):
def c_code_cache_version(self):
scalar_version = scal.Scalar(self.dtype).c_code_cache_version()
if scalar_version:
return (1,) + scalar_version
return (2,) + scalar_version
else:
return ()
......@@ -943,7 +941,13 @@ class _tensor_py_operators:
break
if advanced:
return AdvancedSubtensor(args)(self, *args)
if config.experimental.advanced_indexing:
if len(args) == 1:
return AdvancedSubtensor1()(self, *args)
else:
return AdvancedSubtensor(args)(self, *args)
else:
return AdvancedSubtensor(args)(self, *args)
else:
return Subtensor(args)(self, *Subtensor.collapse(args, lambda entry: isinstance(entry, Variable)))
......@@ -1029,6 +1033,7 @@ class TensorConstantSignature(tuple):
except:
return False
#N.B. compare shape to ensure no broadcasting in ==
#N.B. compare elementwise last because it is the most expensive check
return (t0 == t1) and (d0.shape == d1.shape) \
and (self.sum == other.sum) and (numpy.all(d0 == d1))
def __hash__(self):
......@@ -1294,9 +1299,15 @@ def shape(a):
pprint.assign(_shape, printing.MemberPrinter('shape'))
class MaxAndArgmax(Op):
"""Calculate the max and argmax over a given axis"""
"""Calculate the max and argmax over a given axis.
.. note::
If axis is None it means to calculate the max over the last dimension which is
DIFFERENT FROM NUMPY!!
"""
nin=2 # tensor, axis
nout=2 # max val, max idx
E_axis = 'invalid axis'
......@@ -1307,7 +1318,8 @@ class MaxAndArgmax(Op):
axis = x.type.ndim - 1
axis = _as_tensor_variable(axis)
inputs = [x, axis]
broadcastable = [False] * (x.type.ndim - 1) #TODO: be less conservative
#TODO: figure things out if axis is a constant
broadcastable = [False] * (x.type.ndim - 1)
outputs = [tensor(x.type.dtype, broadcastable,name='max'),
tensor('int32', broadcastable,name='argmax')]
return Apply(self, inputs, outputs)
......@@ -1666,60 +1678,113 @@ def zeros_like(model):
#return Zeros(model.type.ndim)(shape(model))
return fill(model, constant(0.0, dtype=model.type.dtype))
class Filler(gof.Op):
if 0:
## COMMENTED OUT FEB 17 2010
## TODO (DOCUMENT AND WRITE TESTS) OR DELETE
class Filler(gof.Op):
"""WRITEME"""
def __init__(self, value, ndim, dtype = 'float64'):
self.value = value
self.ndim = ndim
self.dtype = dtype
self.type = TensorType(dtype = dtype,
broadcastable = (False,)*ndim)
def make_node(self, dims):
dims = as_tensor_variable(dims)
return gof.Apply(self, [dims], [self.type()])
def perform(self, node, (dims,), (out,)):
if out[0] is not None:
out[0].resize(dims, refcheck = 0)
out[0].fill(self.value)
else:
if self.value == 0:
out[0] = numpy.zeros(dims, dtype = self.dtype)
elif self.value == 1:
out[0] = numpy.ones(dims, dtype = self.dtype)
else:
out[0] = numpy.ones(dims, dtype = self.dtype) * self.value
def grad(self, (dims,), (gout,)):
return None,
def __eq__(self, other):
return type(self) == type(other) and self.ndim == other.ndim and self.dtype == other.dtype
def __hash__(self):
return hash(self.ndim) ^ hash(self.dtype)
Zeros = partial(Filler, 0)
"""WRITEME"""
Ones = partial(Filler, 1)
"""WRITEME"""
def __init__(self, value, ndim, dtype = 'float64'):
self.value = value
self.ndim = ndim
self.dtype = dtype
self.type = TensorType(dtype = dtype,
broadcastable = (False,)*ndim)
def make_node(self, dims):
dims = as_tensor_variable(dims)
return gof.Apply(self, [dims], [self.type()])
@constructor
def zero():
"""
Return a scalar zero, e.g. for initializing sums.
"""
return Zeros(0)([])
def perform(self, node, (dims,), (out,)):
if out[0] is not None:
out[0].resize(dims, refcheck = 0)
out[0].fill(self.value)
else:
if self.value == 0:
out[0] = numpy.zeros(dims, dtype = self.dtype)
elif self.value == 1:
out[0] = numpy.ones(dims, dtype = self.dtype)
else:
out[0] = numpy.ones(dims, dtype = self.dtype) * self.value
@constructor
def one():
"""WRITEME"""
return Ones(0)([])
def grad(self, (dims,), (gout,)):
return None,
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Filler) and r.owner.op.value == 0, printing.FunctionPrinter('zeros'))
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Filler) and r.owner.op.value == 1, printing.FunctionPrinter('ones'))
def __eq__(self, other):
return type(self) == type(other) and self.ndim == other.ndim and self.dtype == other.dtype
class Alloc(gof.Op):
"""Create a Tensor from an initial value and a desired shape
def __hash__(self):
return hash(self.ndim) ^ hash(self.dtype)
alloc(value, shape0, shape1, ..., shapeN)
Zeros = partial(Filler, 0)
"""WRITEME"""
Returns an N-dimensional tensor initialized by `value` using something equivalent to
>>> z = numpy.zeros(shape, value.dtype)
>>> z += value
Ones = partial(Filler, 1)
"""WRITEME"""
The result has N dimensions, has the dtype of `value` and is obtained by broadcasting value
over the output ndarray.
@constructor
def zero():
"""
Return a scalar zero, e.g. for initializing sums.
This Op is used to replace fill() during optimizations because after shapes are lifted,
the first argument to fill can often be pruned from the graph.
"""
return Zeros(0)([])
def __init__(self, dtype):
self.dtype = dtype
@constructor
def one():
"""WRITEME"""
return Ones(0)([])
def __eq__(self, other):
return type(self) == type(other) and self.dtype == other.dtype
def __hash__(self):
return hash(type(self)) ^ hash(self.dtype)
def __str__(self):
return '%s{%s}' % (self.__class__.__name__, self.dtype)
def make_node(self, value, *shape):
v = as_tensor_variable(value)
sh = [as_tensor_variable(s) for s in shape]
bcast = []
for s in sh:
if s.type.dtype[:3] not in ('int', 'uin'):
raise TypeError('Shape arguments must be integers', s)
# if s is constant 1, then we're broadcastable in that dim
bcast.append(isinstance(s, TensorConstant) and (s.data == 1))
otype = TensorType(dtype=self.dtype, broadcastable=bcast)
return gof.Apply(self, [v]+sh, [otype()])
def perform(self, node, inputs, (out,)):
v = inputs[0]
sh = tuple([int(i) for i in inputs[1:]])
if out[0] is None or out[0].shape != sh:
out[0] = numpy.zeros(sh, dtype=self.dtype)
out[0][...] += v # broadcast v to fill us up
def grad(self, inputs, (gout,)):
return [None for i in inputs]
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Filler) and r.owner.op.value == 0, printing.FunctionPrinter('zeros'))
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Filler) and r.owner.op.value == 1, printing.FunctionPrinter('ones'))
@_redefine(elemwise.Elemwise(scal.identity))
def tensor_copy(a):
......@@ -1841,33 +1906,36 @@ def var(input, axis = None):
#return the mean sqr
return mean(centered_input**2, axis)
class Repeat(gof.Op):
def make_node(self, input, repeats, axis):
assert isinstance(input.type, TensorType)
assert repeats.type == iscalar
assert axis.type == iscalar
broadcastable = []
for i,x in enumerate(input.broadcastable):
if i==axis:
broadcastable += [False]
else:
broadcastable += [x]
type = TensorType(dtype = input.type.dtype, broadcastable = \
broadcastable)
#backport
#type = TensorType(dtype = input.type.dtype,
# broadcastable = [False if i==axis else x for i, x in enumerate(input.broadcastable)])
return gof.Apply(self, [inputs, repeats, axis], [type()])
def perform(self, node, (input, repeats, axis), (out, )):
out[0] = numpy.repeat(input, repeats, axis)
def grad(self, (input, repeats, axis), (gout, )):
return add.grad((input, gout), (gout,))[:1]
repeat = Repeat()
if 0:
## COMMENTED OUT FEB 17 2010
## TODO (DOCUMENT AND WRITE TESTS) OR DELETE
class Repeat(gof.Op):
def make_node(self, input, repeats, axis):
assert isinstance(input.type, TensorType)
assert repeats.type == iscalar
assert axis.type == iscalar
broadcastable = []
for i,x in enumerate(input.broadcastable):
if i==axis:
broadcastable += [False]
else:
broadcastable += [x]
type = TensorType(dtype = input.type.dtype, broadcastable = \
broadcastable)
#backport
#type = TensorType(dtype = input.type.dtype,
# broadcastable = [False if i==axis else x for i, x in enumerate(input.broadcastable)])
return gof.Apply(self, [inputs, repeats, axis], [type()])
def perform(self, node, (input, repeats, axis), (out, )):
out[0] = numpy.repeat(input, repeats, axis)
def grad(self, (input, repeats, axis), (gout, )):
return add.grad((input, gout), (gout,))[:1]
repeat = Repeat()
class Default(gof.Op):
"""
......@@ -2489,6 +2557,10 @@ class Join(Op):
join(2, x, y, z) # WRONG: the axis has to be an index into the shape
join(0, x, u) # WRONG: joined tensors must have the same rank
"""
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def make_node(self, *axis_and_tensors):
"""
......@@ -2765,37 +2837,6 @@ else:
pass
class MakeVector(Op):
"""WRITEME"""
def __init__(self, stype):
self.stype = stype
def make_node(self, *inputs):
inputs = map(as_tensor_variable, inputs)
assert all(a.type == self.stype for a in inputs)
return Apply(self, inputs, [TensorType(broadcastable = (False,),
dtype = self.stype.dtype)()])
def perform(self, node, inputs, (out,)):
out[0] = numpy.asarray(inputs)
def grad(self, inputs, (gout,)):
return [None]*len(inputs)
make_lvector = MakeVector(lscalar)
"""WRITEME"""
class MakeVectorPrinter:
def process(self, r, pstate):
if r.owner is None:
raise TypeError("Can only print make_vector.")
elif isinstance(r.owner.op, MakeVector):
return "[%s]" % ", ".join(pstate.pprinter.process(input, pstate.clone(precedence = 1000)) for input in r.owner.inputs)
else:
raise TypeError("Can only print make_vector.")
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, MakeVector), MakeVectorPrinter())
class Reshape(Op):
"""Perform a reshape operation of the input x to the new shape shp.
The number of dimensions to which to reshape to (ndim) must be known at graph
......@@ -3138,6 +3179,37 @@ def inverse_permutation(perm):
# Should reproduce numpy's behaviour:
# http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing
AddConfigVar('experimental.advanced_indexing',
"enable not-well-tested advanced indexing functionality",
BoolParam(False))
class AdvancedSubtensor1(Op):
"""Implement x[ilist] where ilist is a vector of integers."""
def __hash__(self):
return hash(type(self))
def __eq__(self, other):
type(self) == type(other)
def make_node(self, x, ilist):
x_ = as_tensor_variable(x)
ilist_ = as_tensor_variable(ilist)
if ilist_.type.dtype[:3] not in ('int', 'uin'):
raise TypeError('index must be integers')
if ilist_.type.broadcastable != (False,):
raise TypeError('index must be vector')
if x_.type.ndim == 0:
raise TypeError('cannot index into a scalar')
if x_.type.broadcastable[0]:
# the caller should have made a copy of x len(ilist) times
raise TypeError('cannot index into a broadcastable dimension')
return gof.Apply(self, [x_, ilist_], [x_.type()])
def perform(self, node, (x,i), (out,)):
out[0] = x[i]
class AdvancedSubtensor(Op):
"""Return a subtensor copy, using advanced indexing.
"""
......@@ -3308,6 +3380,18 @@ class Dot(Op):
rval = dot(gz, y.T), dot(x.T, gz)
return cast(rval[0], x.dtype), cast(rval[1], y.dtype)
def infer_shape(self, node, (xshp,yshp)):
x, y = node.inputs
if x.ndim == 2 and y.ndim == 2:
return [(xshp[0], yshp[1])]
if x.ndim == 1 and y.ndim == 2:
return [(yshp[1],)]
if x.ndim == 2 and y.ndim == 1:
return [(xshp[0],)]
if x.ndim == 1 and y.ndim == 1:
return [()]
raise NotImplementedError()
def __str__(self):
return "dot"
dot = Dot()
......
......@@ -6,10 +6,9 @@ import numpy.distutils
from theano.configparser import config, AddConfigVar, StrParam
from theano.gof import (utils, Op, Apply, view_roots, PatternSub, DestroyHandler,
SeqOptimizer, local_optimizer, Optimizer, LocalOptimizer, OpKeyOptimizer,
InconsistencyError, toolbox)
InconsistencyError, toolbox, SequenceDB, EquilibriumOptimizer)
from theano.printing import pprint, FunctionPrinter
from theano.tensor.opt import register_specialize, out2in, insert_inplace_optimizer
# opt.py
from theano.compile.mode import optdb
import basic as T
......@@ -30,7 +29,6 @@ AddConfigVar('blas.ldflags',
"lib[s] to include for [Fortran] level-3 blas implementation",
StrParam(default_blas_ldflags()))
_logger = logging.getLogger('theano.tensor.blas')
_logger.setLevel(logging.WARN)
def debug(*msg): _logger.debug(' '.join(str(m) for m in msg))
......@@ -391,12 +389,22 @@ class Gemm(GemmRelated):
def c_code_cache_version(self):
return (1,) + self.build_gemm_version()
gemm = Gemm()
class PseudoGemm(Op):
# should be replaced by Gemm
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def make_node(self, *args):
inputs = [T.as_tensor_variable(i) for i in args]
return Apply(self, inputs, [inputs[0].type()])
def perform(self, node, (z, a, x, y, b), (zout, )):
zout[0] = a * numpy.dot(x,y) + b * z
gemm = PseudoGemm()
gemm_inplace = Gemm()
pprint.assign(gemm, FunctionPrinter('gemm'))
pprint.assign(gemm_inplace, FunctionPrinter('gemm_inplace'))
def res_is_a(node, op, maxclients=None):
if maxclients is not None:
retval = (len(node.clients) <= maxclients)
......@@ -597,6 +605,7 @@ class GemmOptimizer(Optimizer):
while did_something:
nodelist = list(env.toposort())
did_something = False
nodelist.reverse()
for node in nodelist:
new_outputs = _gemm_from_node(node)
if new_outputs:
......@@ -611,10 +620,6 @@ class GemmOptimizer(Optimizer):
#TODO: retry other applications of gemm (see comment in _gemm_from_node
pass
#neede to make the gemm optimisation(step 70) happen before the fusion of elemwise(step 71)
compile.optdb.register('inplace_gemm', GemmOptimizer(), 70.00, 'fast_run', 'inplace', 'gemm')
class Dot22(GemmRelated):
"""Compute a matrix-matrix product.
This is a specialization of the more general Dot()
......@@ -689,5 +694,34 @@ def local_dot_to_dot22(node):
info('Not optimizing dot with inputs', x, y, x.type, y.type)
else:
return False
register_specialize(local_dot_to_dot22)
@local_optimizer([gemm])
def local_inplace_gemm(node):
if node.op == gemm:
return [gemm_inplace(*node.inputs)]
#################################
#
# Set up the BlasOpt optimizer
#
#################################
blas_optdb = SequenceDB()
# run after numerical stability optimizations (1.5)
optdb.register('BlasOpt', blas_optdb, 1.7, 'fast_run')
# run before specialize (2.0) because specialize is basically a free-for-all that makes the
# graph crazy.
blas_optdb.register('local_dot_to_dot22',
EquilibriumOptimizer([local_dot_to_dot22], max_use_ratio=5),
0, 'fast_run')
blas_optdb.register('local_dot_to_gemm', GemmOptimizer(), 10, 'fast_run')
# After destroyhandler is in but before we try to make elemwise things inplace
# Try to make gemm inplace
# Also, need to make the gemm optimisation(step 70) happen before the fusion of elemwise(step 71)
optdb.register('InplaceBlasOpt',
EquilibriumOptimizer([local_inplace_gemm], max_use_ratio=5),
70.0, 'fast_run', 'inplace')
......@@ -197,6 +197,18 @@ class DimShuffle(Op):
storage[0] = numpy.asarray(res) #asarray puts scalars back into array
def infer_shape(self, node, (ishp,)):
ishp = list(ishp)
for drop in reversed(self.drop):
del ishp[drop]
# transpose
rval = [ishp[i] for i in self.shuffle]
# augment
for augm in self.augment:
rval.insert(augm, 1)
return [rval]
def c_code(self, node, name, (input,), (res,), sub):
basename = input + '__view_or_copy'
......@@ -613,6 +625,25 @@ class Elemwise(Op):
# the following should be used instead of the previous loop, unfortunately it tends to segfault
# self.ufunc(*(ufunc_args+[s[0] for s in output_storage]))
def infer_shape(self, node, i_shapes):
rval = []
for o in node.outputs:
oshp = []
for dim, b in enumerate(o.type.broadcastable):
b_dim = None
if b: # this is broadcastable
b_dim = 1
else: # there must be some input that is not broadcastable
for ishp, i in zip(i_shapes,node.inputs):
if not i.type.broadcastable[dim]:
b_dim = ishp[dim]
assert b_dim, 'AA'
break
assert b_dim, 'BB'
oshp.append(b_dim)
rval.append(oshp)
return rval
def _c_all(self, node, name, inames, onames, sub):
_inames = inames
_onames = onames
......@@ -764,10 +795,14 @@ class CAReduce(Op):
if scalar_op.nin not in [-1, 2] or scalar_op.nout != 1:
raise NotImplementedError("CAReduce only supports binary functions with a single output.")
self.scalar_op = scalar_op
if isinstance(axis, int):
self.axis = [axis]
else:
if axis is None:
self.axis = axis
elif isinstance(axis, int):
self.axis = (axis,)
else:
self.axis = list(set(axis))
self.axis.sort()
self.axis = tuple(self.axis)
self.ufunc = numpy.frompyfunc(scalar_op.impl, 2, 1)
# CAReduce output views input when reducing scalars
......@@ -834,6 +869,13 @@ class CAReduce(Op):
else:
output[0] = numpy.copy(variable)
def infer_shape(self, node, (ishape,)):
axis = self.axis
if axis is None:
return (),
return [ishape[i] for (i,b) in enumerate(node.inputs[0].type.broadcastable) if i not in axis],
def _c_all(self, node, name, inames, onames, sub):
input = node.inputs[0]
......
from nnet import *
from sigm import softplus, sigmoid, sigmoid_inplace, scalar_sigmoid
......@@ -4,89 +4,14 @@
"""
from theano import gof
from theano import scalar
from theano import printing
from theano.printing import pprint
from theano.tensor import basic as tensor
from theano.tensor import elemwise
from theano.tensor import opt
from theano.compile import optdb
import numpy
############
#
# SCALAR OPS
#
class ScalarSigmoid(scalar.UnaryScalarOp):
@staticmethod
def st_impl(x):
if x < -30.0:
return 0.0
if x > 30.0:
return 1.0
return 1.0 / (1.0 + numpy.exp(-x))
def impl(self, x):
return ScalarSigmoid.st_impl(x)
def grad(self, (x,), (gz,)):
y = scalar_sigmoid(x)
return [gz * y * (1.0 - y)]
def c_code(self, node, name, (x,), (z,), sub):
if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
# print i, repr( theano._asarray(1.0, dtype=dt) / (theano._asarray(1.0, dtype=dt) + numpy.exp(-theano._asarray([i,-i], dtype=dt))))
# the boundary checks prevent us from generating inf
return """%(z)s = %(x)s < -88.0f ? 0.0 : %(x)s > 15.0f ? 1.0f : 1.0f /(1.0f + exp(-%(x)s));""" % locals()
elif node.inputs[0].type == scalar.float64:
return """%(z)s = %(x)s < -709.0 ? 0.0 : %(x)s > 19.0 ? 1.0 : 1.0 /(1.0+exp(-%(x)s));""" % locals()
else:
raise NotImplementedError('only floatingpoint is implemented')
def c_code_cache_version(self):
v = super(ScalarSigmoid, self).c_code_cache_version()
if v:
return (2,) + v
else:
return v
scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid')
sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid')
pprint.assign(sigmoid, printing.FunctionPrinter('sigmoid'))
class ScalarSoftplus(scalar.UnaryScalarOp):
@staticmethod
def static_impl(x):
if x < -30.0:
return 0.0
if x > 30.0:
return x
return numpy.log1p(numpy.exp(x))
def impl(self, x):
return ScalarSoftplus.static_impl(x)
def grad(self, (x,), (gz,)):
return [gz * scalar_sigmoid(x)]
def c_code(self, node, name, (x,), (z,), sub):
if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
# print i, repr( numpy.log1p(numpy.exp(theano._asarray([i,-i], dtype=dt))))
# the boundary checks prevent us from generating inf
return """%(z)s = %(x)s < -103.0f ? 0.0 : %(x)s > 14.0f ? %(x)s : log1p(exp(%(x)s));""" % locals()
elif node.inputs[0].type == scalar.float64:
return """%(z)s = %(x)s < -745.0 ? 0.0 : %(x)s > 16.0 ? %(x)s : log1p(exp(%(x)s));""" % locals()
else:
raise NotImplementedError('only floatingpoint is implemented')
def c_code_cache_version(self):
v = super(ScalarSoftplus, self).c_code_cache_version()
if v:
return (2,) + v
else:
return v
scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus')
softplus = elemwise.Elemwise(scalar_softplus, name='softplus')
pprint.assign(softplus, printing.FunctionPrinter('softplus'))
from .sigm import sigmoid, softplus
############
......@@ -1351,6 +1276,7 @@ def categorical_crossentropy(coding_dist, true_dist):
raise TypeError('rank mismatch between coding and true distributions')
from theano import scalar
class Prepend_scalar_constant_to_each_row(gof.Op):
def __init__(self, val = 0):
......@@ -1440,14 +1366,3 @@ prepend_scalar_to_each_row = Prepend_scalar_to_each_row()
prepend_0_to_each_row = Prepend_scalar_constant_to_each_row(0.)
prepend_1_to_each_row = Prepend_scalar_constant_to_each_row(1.)
logsigm_to_softplus = gof.PatternSub(
(tensor.log, (sigmoid, 'x')),
(tensor.neg, (softplus, (tensor.neg, 'x'))),
allow_multiple_clients = True)
log1msigm_to_softplus = gof.PatternSub(
(tensor.log, (tensor.sub, tensor.constant([[1.0]]), (sigmoid, 'x'))),
(tensor.neg, (softplus, 'x')),
allow_multiple_clients = True)
opt.register_specialize(logsigm_to_softplus, name = 'logsigm_to_softplus')
opt.register_specialize(log1msigm_to_softplus, name = 'log1msigm_to_softplus')
"""Ops and optimizations: sigmoid, softplus
These functions implement special cases of exp and log to improve numerical stability.
"""
import numpy
from theano import gof
from theano import scalar
from theano import printing
from theano.tensor import basic as tensor
from theano.printing import pprint
from theano.tensor import elemwise
from theano.tensor import opt
from theano.compile import optdb
############
#
# SCALAR OPS
#
class ScalarSigmoid(scalar.UnaryScalarOp):
@staticmethod
def st_impl(x):
if x < -30.0:
return 0.0
if x > 30.0:
return 1.0
return 1.0 / (1.0 + numpy.exp(-x))
def impl(self, x):
return ScalarSigmoid.st_impl(x)
def grad(self, (x,), (gz,)):
y = scalar_sigmoid(x)
return [gz * y * (1.0 - y)]
def c_code(self, node, name, (x,), (z,), sub):
if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
# print i, repr( theano._asarray(1.0, dtype=dt) / (theano._asarray(1.0, dtype=dt) + numpy.exp(-theano._asarray([i,-i], dtype=dt))))
# the boundary checks prevent us from generating inf
return """%(z)s = %(x)s < -88.0f ? 0.0 : %(x)s > 15.0f ? 1.0f : 1.0f /(1.0f + exp(-%(x)s));""" % locals()
elif node.inputs[0].type == scalar.float64:
return """%(z)s = %(x)s < -709.0 ? 0.0 : %(x)s > 19.0 ? 1.0 : 1.0 /(1.0+exp(-%(x)s));""" % locals()
else:
raise NotImplementedError('only floatingpoint is implemented')
def c_code_cache_version(self):
v = super(ScalarSigmoid, self).c_code_cache_version()
if v:
return (2,) + v
else:
return v
scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid')
sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid')
sigmoid_inplace = elemwise.Elemwise(
ScalarSigmoid(scalar.transfer_type(0)),
inplace_pattern={0:0},
name='sigmoid_inplace',
)
pprint.assign(sigmoid, printing.FunctionPrinter('sigmoid'))
class ScalarSoftplus(scalar.UnaryScalarOp):
@staticmethod
def static_impl(x):
if x < -30.0:
return 0.0
if x > 30.0:
return x
return numpy.log1p(numpy.exp(x))
def impl(self, x):
return ScalarSoftplus.static_impl(x)
def grad(self, (x,), (gz,)):
return [gz * scalar_sigmoid(x)]
def c_code(self, node, name, (x,), (z,), sub):
if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
# print i, repr( numpy.log1p(numpy.exp(theano._asarray([i,-i], dtype=dt))))
# the boundary checks prevent us from generating inf
return """%(z)s = %(x)s < -103.0f ? 0.0 : %(x)s > 14.0f ? %(x)s : log1p(exp(%(x)s));""" % locals()
elif node.inputs[0].type == scalar.float64:
return """%(z)s = %(x)s < -745.0 ? 0.0 : %(x)s > 16.0 ? %(x)s : log1p(exp(%(x)s));""" % locals()
else:
raise NotImplementedError('only floatingpoint is implemented')
def c_code_cache_version(self):
v = super(ScalarSoftplus, self).c_code_cache_version()
if v:
return (2,) + v
else:
return v
scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus')
softplus = elemwise.Elemwise(scalar_softplus, name='softplus')
pprint.assign(softplus, printing.FunctionPrinter('softplus'))
logsigm_to_softplus = gof.PatternSub(
(tensor.log, (sigmoid, 'x')),
(tensor.neg, (softplus, (tensor.neg, 'x'))),
allow_multiple_clients = True)
def _is_1(expr):
"""rtype bool. True iff expr is a constant close to 1
"""
try:
v = opt.get_constant_value(expr)
return numpy.allclose(v, 1)
except TypeError:
return False
log1msigm_to_softplus = gof.PatternSub(
(tensor.log,
(tensor.sub,
dict(pattern='y', constraint = _is_1),
(sigmoid, 'x'))),
(tensor.neg, (softplus, 'x')),
allow_multiple_clients = True)
opt.register_stabilize(logsigm_to_softplus, name = 'logsigm_to_softplus')
opt.register_stabilize(log1msigm_to_softplus, name = 'log1msigm_to_softplus')
def is_1pexp(t):
# if t is of form (1+exp(x)), return x
# else return None
if t.owner and t.owner.op == tensor.add:
scalars, scalar_inputs, nonconsts = \
opt.scalarconsts_rest(t.owner.inputs)
# scalar_inputs are potentially dimshuffled and fill'd scalars
if len(nonconsts) == 1:
maybe_exp = nonconsts[0]
if maybe_exp.owner and maybe_exp.owner.op == tensor.exp:
return False, maybe_exp.owner.inputs[0]
return None
def is_exp(t):
# if t is of form (exp(x)) then return x
# else return None
neg = False
if t.owner and t.owner.op == tensor.neg:
t = t.owner.inputs[0]
neg = True
if t.owner and t.owner.op == tensor.exp:
return neg, t.owner.inputs[0]
def partition_num_or_denom(r, f):
if r.owner and r.owner.op == tensor.mul:
a = r.owner.inputs
else:
a = [r]
# ugly 2.4-compatible thing
f_terms = []
neg = False
rest = []
for t in a:
f_t = f(t)
if f_t is None:
rest.append(t)
else:
neg_t, f_t = f_t
f_terms.append(f_t)
neg ^= neg_t #bit flip if neg_t is true
return f_terms, rest, neg
@opt.register_stabilize
@gof.local_optimizer([tensor.true_div])
def local_exp_over_1_plus_exp(node):
"""exp(x)/(1+exp(x)) -> sigm(x)
c/(1+exp(x)) -> c*sigm(-x)
"""
# this optimization should be done for numerical stability
# so we don't care to check client counts
if node.op == tensor.true_div:
#find all the exp() terms in the numerator
num, denom = node.inputs
num_exp_x, num_rest, num_neg = partition_num_or_denom(num, is_exp)
denom_1pexp, denom_rest, denom_neg = partition_num_or_denom(denom, is_1pexp)
sigmoids = []
for t in denom_1pexp:
if t in num_exp_x:
# case: exp(x) /(1+exp(x))
sigmoids.append(sigmoid(t))
del num_exp_x[num_exp_x.index(t)]
else:
# case: 1/(1+exp(x))
sigmoids.append(sigmoid(-t))
if not sigmoids: # we didn't find any. abort
return
# put the new numerator together
new_num = sigmoids + [tensor.exp(t) for t in num_exp_x] + num_rest
if len(new_num) == 1:
new_num = new_num[0]
else:
new_num = tensor.mul(*new_num)
if num_neg ^ denom_neg:
new_num = -new_num
if len(denom_rest) == 0:
return [new_num]
elif len(denom_rest) == 1:
return [new_num / denom_rest[0]]
else:
return [new_num / tensor.mul(*denom_rest)]
@opt.register_stabilize
@gof.local_optimizer([tensor.mul])
def local_sigm_times_exp(node):
"""
exp(x)*sigm(-x) -> -sigm(x)
"""
# this is a numerical stability thing, so we dont check clients
if node.op == tensor.mul:
exp_x = []
exp_minus_x = []
sigm_x = []
sigm_minus_x = []
other = []
neg = False
for i in node.inputs:
while i.owner and i.owner.op == tensor.neg:
neg ^= True
i = i.owner.inputs[0]
if i.owner and i.owner.op == tensor.exp:
exp_arg = i.owner.inputs[0]
if exp_arg.owner and exp_arg.owner.op == tensor.neg:
exp_minus_x.append(exp_arg.owner.inputs[0])
else:
exp_x.append(exp_arg)
elif i.owner and i.owner.op == sigmoid:
sigm_arg = i.owner.inputs[0]
if sigm_arg.owner and sigm_arg.owner.op == tensor.neg:
sigm_minus_x.append(sigm_arg.owner.inputs[0])
else:
sigm_x.append(sigm_arg)
else:
other.append(i)
# remove matched pairs in exp_x and sigm_minus_x
did_something = False
for i in exp_x:
if i in sigm_minus_x:
del sigm_minus_x[sigm_minus_x.index(i)]
other.append(sigmoid(i))
did_something = True
else:
other.append(i)
# remove matched pairs in exp_minus_x and sigm_x
for i in exp_minus_x:
if i in sigm_x:
del sigm_x[sigm_x.index(i)]
other.append(sigm(-i))
did_something = True
else:
other.append(i)
if did_something:
terms = other + [sigmoid(x) for x in sigm_x] \
+ [sigmoid(-x) for x in sigm_minus_x]
if len(terms)>1:
rval = tensor.mul(*terms)
else:
rval = terms[0]
if neg:
return [-rval]
else:
return [rval]
@opt.register_stabilize
@gof.local_optimizer([tensor.inv])
def local_inv_1_plus_exp(node):
"""
1/(1+exp(x)) -> sigm(-x)
"""
# this optimization should be done for numerical stability
# so we don't care to check client counts
if node.op == tensor.inv:
inv_arg = node.inputs[0]
if inv_arg.owner and inv_arg.owner.op == tensor.add:
scalars, scalar_inputs, nonconsts = \
opt.scalarconsts_rest(inv_arg.owner.inputs)
# scalar_inputs are potentially dimshuffled and fill'd scalars
if len(nonconsts) == 1:
if nonconsts[0].owner and nonconsts[0].owner.op == tensor.exp:
if scalars and numpy.allclose(numpy.sum(scalars), 1):
return opt._fill_chain(
sigmoid(tensor.neg(nonconsts[0].owner.inputs[0])),
scalar_inputs)
#@opt.register_canonicalize
@gof.local_optimizer([tensor.inv])
def local_1msigmoid(node):
"""
1-sigm(x) -> sigm(-x)
"""
if node.op == tensor.sub:
sub_l, sub_r = node.inputs
if len(sub_r.clients) > 1:
return # graph is using both sigm and 1-sigm
if sub_r.owner and sub_r.owner.op == sigmoid:
try:
val_l = opt.get_constant_value(sub_l)
except Exception, e:
return
if numpy.allclose(numpy.sum(val_l), 1):
return [sigmoid(-sub_r.owner.inputs[0])]
import unittest
import theano
from theano import tensor as T
from theano import gof
import numpy
from theano.tests import unittest_tools as utt
from theano.tensor.tests import test_basic as TT
from theano.tensor.nnet import *
class T_sigmoid(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test_elemwise(self):
utt.verify_grad(sigmoid, [numpy.random.rand(3,4)])
class T_softplus(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test_elemwise(self):
utt.verify_grad(softplus, [numpy.random.rand(3,4)])
class T_sigmoid_opts(unittest.TestCase):
def test_exp_over_1_plus_exp(self):
m = theano.config.mode
if m == 'FAST_COMPILE':
m = 'FAST_RUN'
x = T.dvector()
# tests exp_over_1_plus_exp
f = theano.function([x], T.exp(x)/(1+T.exp(x)), mode=m)
#theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [sigmoid]
# tests inv_1_plus_exp
f = theano.function([x], T.fill(x,1.0) / (1+T.exp(-x)), mode=m)
#theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [sigmoid]
# tests inv_1_plus_exp with neg
f = theano.function([x], T.fill(x,-1.0) / (1+T.exp(-x)), mode=m)
#theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [sigmoid,
T.inplace.neg_inplace]
# tests double inv_1_plus_exp with neg
f = theano.function([x], (T.fill(x,-1.0)*T.exp(x)) / ((1+T.exp(x))*(1+T.exp(-x))), mode=m)
#theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [sigmoid,
T.mul]
def test_1msigmoid(self):
m = theano.config.mode
if m == 'FAST_COMPILE':
m = 'FAST_RUN'
x = T.fmatrix()
# tests exp_over_1_plus_exp
f = theano.function([x], 1 - T.exp(x)/(1+T.exp(x)), mode=m)
theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [tensor.neg, sigmoid_inplace]
# tests inv_1_plus_exp
f = theano.function([x], 1 - T.fill(x,1.0) / (1+T.exp(-x)), mode=m)
theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [tensor.neg,
sigmoid_inplace]
......@@ -44,34 +44,65 @@ def _fill_chain(new_out, orig_inputs):
new_out = T.fill(i, new_out)
return [new_out]
def get_constant_value(v, fill=False):
"""return the constant value underlying variable `v`
def encompasses_broadcastable(b1, b2):
"""
Returns True if the broadcastable patterns b1 and b2 are such that b2 is
broadcasted to b1's shape and not the opposite.
:param b1: the broadcastable attribute of a tensor type
:param b2: the broadcastable attribute of a tensor type
"""
if len(b1) < len(b2):
return False
b1 = b1[-len(b2):]
return not any(v1 and not v2 for v1, v2 in zip(b1, b2))
def merge_broadcastables(broadcastables):
return [all(bcast) for bcast in zip(*broadcastables)]
def get_constant_value(v):
"""return the constant scalar(0-D) value underlying variable `v`
If v is the output of dimshuffles, fills, this function digs through them.
If `v` is not some view of constant data, then raise a TypeError.
if fill is True, then it returns (v, [...]) where the second term is a list of variables
that were used in the fill expressions
:note: There may be another function similar to this one in the code, but I'm not sure where it
is.
"""
if isinstance(v, gof.Constant):
if fill:
return v.data, []
return v.data
#TODO: consider checking for arrays of the form e.g. [1,1,1,1] where
# it is not a constant, but in some cases it *could* be replaced with one.
# Note that this would have an effect on the broadcasting of inputs and so on
try:
complex(v.data) #works for all numeric scalars
return v.data
except:
raise TypeError(v)
if v.owner and isinstance(v.owner.op, T.DimShuffle):
return get_constant_value(v.owner.inputs[0], fill=fill)
if fill:
if v.owner and v.owner.op == T.fill:
shape, val = v.owner.inputs
# fill(a,b) fills the shape of 'a' filled with 'b'
rval, rshapes = get_constant_value(val, fill=fill)
return rval, rshapes + [shape]
return get_constant_value(v.owner.inputs[0])
if v.owner and v.owner.op == T.fill:
shape, val = v.owner.inputs
# fill(a,b) fills the shape of 'a' filled with 'b'
return get_constant_value(val)
raise TypeError(v)
def scalarconsts_rest(inputs):
"""Partition a list of variables into two kinds:
scalar constants, and the rest."""
consts = []
origconsts = []
nonconsts = []
for i in inputs:
try:
v = get_constant_value(i)
consts.append(v)
origconsts.append(i)
except:
nonconsts.append(i)
return consts, origconsts, nonconsts
@gof.optimizer
def insert_inplace_optimizer(env):
"""
......@@ -124,6 +155,11 @@ def register_specialize(lopt, *tags, **kwargs):
compile.optdb['specialize'].register(name, lopt, 'fast_run', *tags)
return lopt
def register_stabilize(lopt, *tags, **kwargs):
name = (kwargs and kwargs.pop('name')) or lopt.__name__
compile.optdb['stabilize'].register(name, lopt, 'fast_run', *tags)
return lopt
######################
# DimShuffle lifters #
######################
......@@ -164,130 +200,321 @@ register_canonicalize(local_dimshuffle_lift)
#################
# Shape lifters #
#################
#####################################
# ShapeFeature, Shape optimizations
#####################################
@gof.local_optimizer([T._shape, None])
def local_shape_lift_elemwise(node):
"""
shape(elemwise_op(..., x, ...)) -> shape(x)
class MakeVector(T.Op):
"""Concatenate a number of scalars together into a vector
Where x contains the maximal shape information.
This is a simple version of stack() that introduces far less cruft into the graph.
"""
if not opt.check_chain(node, T._shape, T.Elemwise):
return False
output = node.inputs[0]
parent = output.owner
for input in parent.inputs:
if input.type.broadcastable == output.type.broadcastable:
return T._shape(input),
return False
register_canonicalize(local_shape_lift_elemwise, 'shape_lift')
register_specialize(local_shape_lift_elemwise, 'shape_lift')
def __init__(self, dtype='int64'):
self.dtype = dtype
def __eq__(self, other):
return type(self) == type(other) and self.dtype == other.dtype
def __hash__(self):
return hash(type(self)) ^ hash(self.dtype)
def make_node(self, *inputs):
inputs = map(T.as_tensor_variable, inputs)
if not all(a.type == inputs[0].type for a in inputs):
raise TypeError('This MakeVector instance requires inputs of same type %s' %
inputs[0].type)
if inputs:
dtype = inputs[0].type.dtype
else:
dtype = self.dtype
#bcastable = (len(inputs) == 1)
bcastable = False
otype = T.TensorType(
broadcastable=(bcastable,),
dtype=dtype)
return T.Apply(self, inputs, [otype()])
def __str__(self):
return self.__class__.__name__
def perform(self, node, inputs, (out,)):
out[0] = T.numpy.asarray(inputs)
make_vector = MakeVector()
class MakeVectorPrinter:
def process(self, r, pstate):
if r.owner is None:
raise TypeError("Can only print make_vector.")
elif isinstance(r.owner.op, MakeVector):
return "[%s]" % ", ".join(pstate.pprinter.process(input, pstate.clone(precedence = 1000)) for input in r.owner.inputs)
else:
raise TypeError("Can only print make_vector.")
T.pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, MakeVector), MakeVectorPrinter())
@gof.local_optimizer([T._shape, None])
def local_shape_lift_sum(node):
class Shape_i(T.Op):
"""
shape(sum{n}(x)) -> [shape(x)[0], ..., shape(x)[n-1], shape(x)[n+1], ...]
"""
if not opt.check_chain(node, T._shape, T.Sum):
return False
L{Op} to return the shape of a matrix.
input = node.inputs[0].owner.inputs[0]
axis = node.inputs[0].owner.op.axis
if axis is None:# or len(axis) != 1:
axis = range(input.type.ndim)
@note: Non-differentiable.
"""
def __init__(self, i):
self.i = i
def __hash__(self):
return hash(type(self)) ^ self.i
def __eq__(self, other):
return type(self) == type(other) and self.i == other.i
def __str__(self):
return '%s{%i}'%(self.__class__.__name__, self.i)
def make_node(self, x):
x = T.as_tensor_variable(x)
if x.ndim <= self.i:
raise TypeError('x has too few dimensions for Shape_i', (x, self.i))
return T.Apply(self, [x], [T.lscalar()])
def perform(self, node, (x, ), (out, )):
out[0] = theano._asarray(x.shape[self.i], dtype = 'int64')
def grad(self, (x,), (gz,)):
return [None]
class ShapeFeature(object):
"""Graph optimizer for removing all calls to shape()
This optimizer replaces all Shapes and Subtensors of Shapes with Shape_i and MakeVector
Ops.
This optimizer has several goals:
1. to 'lift' Shapes to as close to the inputs as possible.
2. to infer the shape of every node in the graph in terms of the input shapes.
3. remove all fills (T.second, T.fill) from the graph
Lifting shapes as close to the inputs as possible is important for canonicalization because
it is very bad form to have to compute something just to know how big it will be. Firstly,
it is a waste of time to compute such outputs. But it is important to get rid of these
outputs as early as possible in the compilation process because the
extra computations make it appear as if many internal graph nodes have multiple clients.
Many optimizations refuse to work on nodes with multiple clients.
Lifting is done by using an `<Op>.infer_shape` function if one is present, or else using a
conservative default. An Op that supports shape-lifting should define a
infer_shape(self, node, input_shapes) function. The argument input_shapes is a tuple
of tuples... there is an interior tuple for each input to the node. The tuple has as many
elements as dimensions. The element in position i of tuple j represents the i'th shape
component of the j'th input. The function should return a tuple of tuples. One output
tuple for each node.output. Again, the i'th element of the j'th output tuple represents
the output[j].shape[i] of the function. If an output is not a TensorType, then None should
be returned instead of a tuple for that output.
For example the infer_shape for a matrix-matrix product would accept
input_shapes=((x0,x1), (y0,y1)) and return ((x0, y1),).
Inferring the shape of internal nodes in the graph is important for doing size-driven
optimizations. If we know how big various intermediate results will be, we can estimate
the cost of many Ops accurately, and generate c-code that is specific [e.g. unrolled] to
particular sizes.
ish = T._shape(input)
return T.make_lvector.make_node(*(ish[i] for i in xrange(input.type.ndim) if i not in axis)).outputs
# return T.vertical_stack.make_node(ish[:axis], ish[axis+1:]).outputs
.. note::
register_canonicalize(local_shape_lift_sum, 'shape_lift')
Right now there is only the ConvOp that can really take advantage of this shape
inference, but it is worth it even just for the ConvOp. All that's necessary to do
shape inference is 1) to mark shared inputs as having a particular shape,
either via a .tag or some similar hacking; and 2) to add an optional Param() argument
to promise that inputs will have a certain shape (or even to have certain shapes in
certain dimensions).
@gof.local_optimizer([T._shape, T.dot])
def local_shape_lift_dot(node):
"""
shape(dot(a, b)) -> [shape(a)[0], shape(b)[1]]
"""
if not opt.check_chain(node, T._shape, T.dot):
return False
a, b = node.inputs[0].owner.inputs
if a.type.ndim == 2 and b.type.ndim == 2:
return T.make_lvector.make_node(T._shape(a)[0], T._shape(b)[1]).outputs
elif a.type.ndim == 1 and b.type.ndim == 2:
return T.make_lvector.make_node(T._shape(b)[1]).outputs
elif a.type.ndim == 2 and b.type.ndim == 1:
return T.make_lvector.make_node(T._shape(a)[0]).outputs
elif a.type.ndim == 1 and b.type.ndim == 1:
return T.make_lvector.make_node().outputs
else:
return False
register_canonicalize(local_shape_lift_dot, 'shape_lift')
# local_shape_lift = opt.LocalOptGroup(local_shape_lift_elemwise,
# local_shape_lift_sum,
# local_shape_lift_dot)
def shape_i(self, i):
def op_deco(r):
if r.type.broadcastable[i]:
return self.lscalar_one
else:
return Shape_i(i)(r)
return op_deco
def shape_tuple(self, r):
return tuple([self.shape_i(i)(r) for i in xrange(r.ndim)])
################
# Fill lifters #
################
def default_infer_shape(self, node, i_shapes):
rval = []
for r in node.outputs:
try:
rval.append(self.shape_tuple(r))
except AttributeError:
rval.append(None)
return rval
def unpack(self, s_i):
# unpack the s_i that the Op returned
assert s_i is not None
if s_i == 1:
# don't make the optimizer merge a zillion ones together
return self.lscalar_one
if type(s_i) is int:
# this shape is a constant
assert s_i >= 0
return T.constant(s_i, dtype='int64')
if type(s_i) in (tuple,list):
# this dimension is the same as many of the inputs
# which tells us that if one of the inputs is known,
# the others all become known.
# TODO: should be implemented in Elemwise, and Dot
#
# worst case, we loop over shape_of and replace things
raise NotImplementedError(s_i)
elif s_i.type == T.lscalar:
return s_i
else:
raise TypeError('Unsupported shape element', s_i)
def encompasses_broadcastable(b1, b2):
"""
Returns True if the broadcastable patterns b1 and b2 are such that b2 is
broadcasted to b1's shape and not the opposite.
def set_shape(self, r, s):
assert r not in self.shape_of
if s is None:
self.shape_of[r] = s
else:
self.shape_of[r] = tuple([self.unpack(s_i) for s_i in s])
:param b1: the broadcastable attribute of a tensor type
:param b2: the broadcastable attribute of a tensor type
"""
if len(b1) < len(b2):
return False
b1 = b1[-len(b2):]
return not any(v1 and not v2 for v1, v2 in zip(b1, b2))
def make_vector_shape(self, r):
return make_vector(*self.shape_of[r])
#
#
# Feature inteface
#
#
def on_attach(self, env):
assert not hasattr(env, 'shape_feature')
env.shape_feature = self
self.shape_of = {} # Variable -> tuple(scalars) or None (All tensor vars map to tuple)
self.lscalar_one = T.constant(1, dtype='int64')
assert self.lscalar_one.type == T.lscalar
for node in env.toposort():
self.on_import(env, node)
def on_import(self, env, node):
if node.outputs[0] in self.shape_of:
# this is a revert, not really an import
for r in node.outputs + node.inputs:
assert r in self.shape_of
return
for i, r in enumerate(node.inputs):
# make sure we have shapes for the inputs
if r not in self.shape_of:
try:
self.set_shape(r, self.shape_tuple(r))
except AttributeError:
self.set_shape(r, None ) # not a TensorType variable
def merge_broadcastables(broadcastables):
return [all(bcast) for bcast in zip(*broadcastables)]
try:
shape_infer = node.op.infer_shape
except AttributeError:
shape_infer = self.default_infer_shape
@gof.local_optimizer([T.fill, None])
def local_fill_lift(node):
try:
o_shapes = shape_infer(node, [self.shape_of[r] for r in node.inputs])
except Exception, e:
_logger.error('Failed to infer_shape from Op %s (i_shapes=%s): %s %s'% (node.op,
[self.shape_of[r] for r in node.inputs],
type(e), str(e)))
o_shapes = default_infer_shape(node, [self.shape_of[r] for r in node.inputs])
# this is packed information
# an element of o_shapes is either None or a tuple
# elements of the tuple can be either strings, or ints
assert len(o_shapes) == len(node.outputs)
for r, s in zip(node.outputs, o_shapes):
self.set_shape(r, s)
def on_change_input(self, env, mode, i, r, new_r):
# TODO:
# This tells us that r and new_r must have the same shape
# if we didn't know that the shapes are related, now we do.
pass
class ShapeOptimizer(Optimizer):
"""Optimizer that serves to add ShapeFeature as an env feature.
"""
fill(f(a), b) -> fill(a, b)
If a.type == f(a).type.
def __init__(self):
Optimizer.__init__(self)
fill(a, b) -> b
If a.type == b.type.
"""
if not opt.check_chain(node, T.fill):
return False
def add_requirements(self, env):
env.extend(ShapeFeature())
model, filling = node.inputs
def apply(self, env):
pass
mb, fb = model.type.broadcastable, filling.type.broadcastable
if model.type.dtype == filling.type.dtype and encompasses_broadcastable(fb, mb):
return False# [filling]
# -1 should make it run right before the first merge
theano.compile.mode.optdb.register('ShapeOpt', ShapeOptimizer(), -1, 'fast_run', 'fast_compile')
parent = model.owner
if parent is None or not isinstance(parent, T.Elemwise):
return False
for input in parent.inputs:
if input.type == model.type:
return [T.fill(input, filling)]
@register_specialize
@register_canonicalize
@gof.local_optimizer([T.fill])
def local_fill_to_alloc(node):
"""fill(s,v) -> alloc(v, shape(s))
return False
This is an important optimization because with the shape_to_shape_i optimization, the
dependency on 's' is often removed.
"""
if node.op == T.fill:
r, v = node.inputs
if v.type == node.outputs[0].type:
# this is a useless fill, erase it.
rval = [v]
elif v.type.broadcastable == node.outputs[0].type.broadcastable:
# this is a cast
rval = [T.cast(v, node.outputs[0].type.dtype)]
else:
# we are broadcasting v somehow
shape_of = node.env.shape_feature.shape_of
# TODO: cut out un-necessary dimshuffles of v
rval = [T.Alloc(node.outputs[0].dtype)(v, *shape_of[node.outputs[0]])]
assert rval[0].type == node.outputs[0].type
return rval
register_canonicalize(local_fill_lift, 'fill_lift')
@register_specialize
@register_canonicalize
@gof.local_optimizer([T._shape])
def local_shape_to_shape_i(node):
if node.op == T._shape:
shape_feature = node.env.shape_feature
return [shape_feature.make_vector_shape(node.inputs[0])]
@register_specialize
@register_canonicalize
@gof.local_optimizer([T.Subtensor])
def local_subtensor_make_vector(node):
# replace all subtensor(make_vector) like:
# [a,b,c][0] -> a
# [a,b,c][0:2] -> [a,b]
# we can do this for constant indexes
if isinstance(node.op, T.Subtensor):
shape_feature = node.env.shape_feature
x = node.inputs[0]
if x.owner and x.owner.op == make_vector:
try:
idx, = node.op.idx_list
except:
#'how can you have multiple indexes into a shape?'
raise
if isinstance(idx, int):
return [x.owner.inputs[idx]]
elif isinstance(idx, T.TensorVariable):
# if it is a constant we can do something with it
try:
v = get_constant_value(idx)
return [x.owner.inputs[v]]
except:
pass
else:
# it is a slice of ints and/or Variables
#TODO: check subtensor to see if it can contain constant variables,
# and if it can, then try to unpack them.
try:
return [make_vector(*x.owner.inputs.__getitem__(idx))]
except TypeError:
pass
except:
_logger.error('failed to index with "%s"' % str(idx))
raise
##################
# Subtensor opts #
......@@ -306,38 +533,6 @@ def local_subtensor_unary(node):
x_idx = node.op(u.owner.inputs[0], *idx)
return [u.owner.op(x_idx)]
@gof.local_optimizer([None, None])
def local_subtensor_make_vector(node):
"""
[a,b,c][0] -> a
[a,b,c][0:2] -> [a,b]
If the index or slice is constant.
"""
if not opt.check_chain(node, T.Subtensor, T.MakeVector):
return False
joined_r = node.inputs[0]
try:
#check that join is being used to join scalars
veclen = T.join.vec_length(joined_r)
except:
return False
idxlist = node.op.idx_list
if len(idxlist) != 1:
return False
idx = idxlist[0]
if isinstance(idx, int):
return [node.inputs[0].owner.inputs[idx]]
try:
return T.make_vector(*(node.owner.inputs[0].owner.inputs.__getslice__(idx)))
except TypeError:
return False
register_canonicalize(local_subtensor_make_vector)
@register_canonicalize
@gof.local_optimizer([None])
def local_IncSubtensor_serialize(node):
......@@ -581,13 +776,18 @@ class Canonizer(gof.LocalOptimizer):
# the dtype of the 'input' argument. The leaf-Variables of the graph covered by the
# recursion may be of any Variable type.
if len(input.clients) > 1:
# this logic is too conservative, but doing it is better than not doing it.
#
# we don't want to canonize a subgraph that we will need to compute anyway for the other clients.
# This check is too conservative because if the other clients are also in the subgraph we are canonizing,
# then we should [probably?] recurse anyway.
return [input], []
if 0:
# UPDATE: This logic makes it impossible to recognize some important patterns
# (e.g. variants on the x/x)
# and it is screwing up the RBM free energy gradient.
#TODO: review this
if len(input.clients) > 1:
# this logic is too conservative, but doing it is better than not doing it.
#
# we don't want to canonize a subgraph that we will need to compute anyway for the other clients.
# This check is too conservative because if the other clients are also in the subgraph we are canonizing,
# then we should [probably?] recurse anyway.
return [input], []
if input.owner is None or input.owner.op not in [self.main, self.inverse, self.reciprocal]:
if input.owner and isinstance(input.owner.op, T.DimShuffle):
......@@ -835,8 +1035,8 @@ class Canonizer(gof.LocalOptimizer):
# Here we make the canonical version of the graph around this node
# See the documentation of get_num_denum and simplify
orig_num, orig_denum = self.get_num_denum(node.outputs[0])
num, denum = list(orig_num), list(orig_denum)
num, denum = self.simplify(num, denum)
num, denum = self.simplify(list(orig_num), list(orig_denum))
def same(x, y):
return len(x) == len(y) and all(N.all(xe == ye) for xe, ye in zip(x, y))
......@@ -935,6 +1135,60 @@ def local_sum_mul_by_scalar(node):
if thing_summed.owner and thing_summed.owner.op == T.neg:
return [T.neg(node.op(thing_summed.owner.inputs[0]))]
@register_canonicalize
@gof.local_optimizer([])
def local_sum_all_to_none(node):
"""Sum{0,1,...N} -> Sum{}"""
if isinstance(node.op, T.Sum):
# if all the axes are named, then use None as a shorthand
# this permits more merging
if node.op.axis is None:
return
if set(node.op.axis) == set(range(node.inputs[0].type.ndim)):
return [T.Sum(axis=None)(node.inputs[0])]
@register_canonicalize
@gof.local_optimizer([])
def local_sum_sum(node):
"""Sum(Sum()) -> Sum"""
if isinstance(node.op, T.Sum):
summed, = node.inputs
if len(summed.clients) == 1:
if summed.owner and isinstance(summed.owner.op, T.Sum):
if summed.owner.op.axis is None:
# special case of local_cut_useless_reduce
return [T.Sum(None)(summed.owner.inputs[0])]
if node.op.axis is None:
# we're summing up everything anyway so lets
# do it all at once
return [T.Sum(None)(summed.owner.inputs[0])]
# figure out which dimensions of the original input are preserved
alldims = range(summed.owner.inputs[0].type.ndim)
# trim out the dimensions that were removed by the first sum
alldims = [d for i,d in enumerate(alldims) if i in summed.owner.op.axis]
# trim out the dimensions removed by second sum
alldims = [d for i,d in enumerate(alldims) if i in node.op.axis]
# figure out an axis argument that combines the effect of both
newaxis = [i for i in xrange(summed.owner.inputs[0].type.ndim)
if i not in alldims]
combined_sum = T.Sum(newaxis)
return [combined_sum(summed.owner.inputs[0])]
@register_canonicalize
@gof.local_optimizer([])
def local_cut_useless_reduce(node):
"""Sum(a, axis=[]) -> a """
if isinstance(node.op, T.CAReduce):
summed, = node.inputs
# if reduce were doing anything, the output ndim would be reduced
if summed.type == node.outputs[0].type:
return [summed]
@gof.local_optimizer([T.mul])
def local_mul_to_neg(node):
if node.op == T.mul and N.all(local_mul_canonizer.get_constant(node.inputs[0]) == -1.0):
......@@ -1143,30 +1397,23 @@ register_specialize(local_add_specialize)
mul_canonizer = in2out(gof.LocalOptGroup(local_mul_canonizer, local_fill_cut, local_fill_sink))
@register_specialize
@register_stabilize
@gof.local_optimizer([T.log])
def local_log1p(node):
# log(1+exp(x)) -> log1p(x)
if node.op == T.log:
log_arg, = node.inputs
if log_arg.owner and log_arg.owner.op == T.add:
add_inputs = log_arg.owner.inputs
consts = [0]
fills = []
nonconsts = []
for add_in in add_inputs:
try:
v, f = get_constant_value(add_in, fill=True)
consts.append(v)
fills.extend(f)
except:
nonconsts.append(add_in)
if nonconsts:
if numpy.allclose(numpy.sum(consts), 1):
if len(nonconsts)==1:
return _fill_chain(T.log1p(nonconsts[0]), fills)
else:
return _fill_chain(T.log1p(T.add(*nonconsts)), fills)
scalars, scalar_inputs, nonconsts = \
scalarconsts_rest(log_arg.owner.inputs)
# scalar_inputs are potentially dimshuffled and fill'd scalars
if scalars and numpy.allclose(numpy.sum(scalars), 1):
if not nonconsts:
pass # leave for constant-merge
if len(nonconsts)==1:
return _fill_chain(T.log1p(nonconsts[0]), scalar_inputs)
else:
return _fill_chain(T.log1p(T.add(*nonconsts)), scalar_inputs)
def add_calculate(num, denum, aslist = False, out_type=None):
......
......@@ -136,10 +136,7 @@ class RandomFunction(gof.Op):
draw.
"""
if shape == () or shape == []:
shape = tensor.as_tensor_variable(shape, dtype='int64')
else:
shape = tensor.as_tensor_variable(shape, ndim=1)
shape = tensor.as_tensor_variable(shape, ndim=1)
assert shape.type.ndim == 1
assert (shape.type.dtype == 'int64') or (shape.type.dtype == 'int32')
if not isinstance(r.type, RandomStateType):
......@@ -158,6 +155,22 @@ class RandomFunction(gof.Op):
[r, shape] + args,
[r.type(), self.outtype()])
def infer_shape(self, node, i_shapes):
r, shp = node.inputs[0:2]
#if shp is a constant array of len 0, then it means 'automatic shape'
unknown_shape = len(getattr(shp, 'data', [0,1,2])) == 0
# if ndim_added == 0 and shape != () then shape
if self.ndim_added == 0 and not unknown_shape:
sample_shp = shp
else:
# if shape == () then it will depend on args
# if ndim_added != 0 and shape != () then it will depend on args
sample_shp = node.outputs[1].shape
return [None, [sample_shp[i] for i in xrange(node.outputs[1].ndim)]]
def perform(self, node, inputs, (rout, out)):
# Use self.fn to draw shape worth of random numbers.
# Numbers are drawn from r if self.inplace is True, and from a copy of r if
......
......@@ -89,7 +89,6 @@ class test_greedy_distribute(unittest.TestCase):
g = Env([a,b,c,d,x,y,z], [e])
##print pprint(g.outputs[0])
mul_canonizer.optimize(g)
gof.TopoOptimizer(gof.LocalOptGroup(local_fill_cut, local_fill_lift), order = 'out_to_in').optimize(g)
gof.TopoOptimizer(gof.LocalOptGroup(local_greedy_distributor), order = 'out_to_in').optimize(g)
##print pprint(g.outputs[0])
......@@ -136,7 +135,6 @@ class test_canonize(unittest.TestCase):
g = Env([x, y, z, a, b, c, d], [e])
print pprint(g.outputs[0])
mul_canonizer.optimize(g)
gof.TopoOptimizer(gof.LocalOptGroup(local_fill_cut, local_fill_lift), order = 'out_to_in').optimize(g)
print pprint(g.outputs[0])
def test_elemwise_multiple_inputs_optimisation(self):
......@@ -296,17 +294,17 @@ class test_canonize(unittest.TestCase):
def test_multiple_case(self):
""" test those case take from the comment in Canonizer
x / x -> 1
(x * y) / x -> y
x / y / x -> 1 / y
x / y / z -> x / (y * z)
x / (y / z) -> (x * z) / y
(a / b) * (b / c) * (c / d) -> a / d
(2.0 * x) / (4.0 * y) -> (0.5 * x) / y
2 * x / 2 -> x
with and without DimShuffle
TODO: with DimShuffle
"""
x / x -> 1
(x * y) / x -> y
x / y / x -> 1 / y
x / y / z -> x / (y * z)
x / (y / z) -> (x * z) / y
(a / b) * (b / c) * (c / d) -> a / d
(2.0 * x) / (4.0 * y) -> (0.5 * x) / y
2 * x / 2 -> x
with and without DimShuffle
TODO: with DimShuffle
"""
import theano.tensor, theano.compile
shp=(3,3)
......@@ -331,6 +329,7 @@ class test_canonize(unittest.TestCase):
old_optimizer = mode._optimizer
try:
mode._optimizer=gof.Query(["canonicalize"])
mode._optimizer=mode._optimizer.including('ShapeOpt')
mode._optimizer=mode._optimizer.excluding('local_elemwise_fusion')
#test x / x -> 1
......@@ -344,10 +343,15 @@ class test_canonize(unittest.TestCase):
out = f(*val_inputs)
assert (out==numpy.ones(shp, dtype=out_dtype)).all()
topo=f.maker.env.toposort()
assert len(topo)==1
assert isinstance(topo[0].op,(T.Elemwise,))
assert isinstance(topo[0].op.scalar_op,theano.scalar.basic.Second)
assert len(topo[0].inputs)==2
if sym_inputs[0].broadcastable[0]:
assert len(topo)==2
assert isinstance(topo[0].op, Shape_i)
assert isinstance(topo[1].op, TT.Alloc)
else:
assert len(topo)==3
assert isinstance(topo[0].op, Shape_i)
assert isinstance(topo[1].op, Shape_i)
assert isinstance(topo[2].op, TT.Alloc)
assert(out_dtype==out.dtype)
#test (x * y) / x -> y
......@@ -365,10 +369,16 @@ class test_canonize(unittest.TestCase):
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert(out_dtype==out.dtype)
assert numpy.allclose(out,val_inputs[1])
topo=f.maker.env.toposort()
assert len(topo)==nb_elemwise
assert(out_dtype==out.dtype)
print "ID TOPO", id, topo, sym_inputs
for r,t in f.maker.env.shape_feature.shape_of.items():
print ' ', r, t
if topo:
for node in topo[:-1]:
assert isinstance(node.op, Shape_i)
assert isinstance(topo[-1].op, TT.Alloc)
#test x / y / x -> 1 / y
for id,(g, sym_inputs, val_inputs, nb_elemwise, out_dtype) in enumerate([
......@@ -378,19 +388,21 @@ class test_canonize(unittest.TestCase):
((fv/fy)/fv,[fv,fy],[fvv,fyv],1,'float32'),
#must broadcast as their is a dimshuffle in the computation
((dx/dv)/dx,[dx,dv],[dxv,dvv],2,'float64'),
#topo: [Elemwise{inv,no_inplace}(<TensorType(float64, row)>), Elemwise{second,no_inplace}(x, Elemwise{inv,no_inplace}.0)]
((fx/fv)/fx,[fx,fv],[fxv,fvv],2,'float32'),
#topo:[Elemwise{inv,no_inplace}(<TensorType(float32, row)>), Elemwise{second,no_inplace}(x, Elemwise{inv,no_inplace}.0)]
((dx/dv)/dx,[dx,dv],[dxv,dvv],1,'float64'),
#topo: [Shape_i, Shape_i, Elemwise{inv,no_inplace}(<TensorType(float64, row)>), Alloc(...)]
((fx/fv)/fx,[fx,fv],[fxv,fvv],1,'float32'),
#topo:[Shape_i, Shape_i, Elemwise{inv,no_inplace}(<TensorType(float32, row)>), Alloc(...)]
]):
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert numpy.allclose(out,(1/val_inputs[1]))
topo=f.maker.env.toposort()
assert len(topo)==nb_elemwise
assert isinstance(topo[0].op,(T.Elemwise,))
assert isinstance(topo[0].op.scalar_op,(theano.scalar.basic.Inv, theano.scalar.basic.TrueDiv))
print topo
elem = [t for t in topo if isinstance(t.op, T.Elemwise)]
assert len(elem)==nb_elemwise
assert isinstance(elem[0].op,(T.Elemwise,))
assert isinstance(elem[0].op.scalar_op,(theano.scalar.basic.Inv, theano.scalar.basic.TrueDiv))
assert(out_dtype==out.dtype)
#test (a / b) * (b / c) * (c / d) -> a / d
......@@ -529,29 +541,6 @@ def test_mixeddiv():
d = dscalar()
assert 0 == function([i,d], d*(i/(i+1)))(3, 1.0)
def test_local_shape_lift_dot():
args_to_result = {
(fvector, fvector): "[]",
(fvector, fmatrix): "[<TensorType(float32, matrix)>.shape[1]]",
(fmatrix, fvector): "[<TensorType(float32, matrix)>.shape[0]]",
(fmatrix, fmatrix): "[<TensorType(float32, matrix)>.shape[0], <TensorType(float32, matrix)>.shape[1]]",
}
for x in [fvector, fmatrix]:
for y in [fvector, fmatrix]:
i = x()
j = y()
print 'I SHAPE', i.type.shape
print 'J SHAPE', j.type.shape
d = shape(dot(i,j))
if x is fvector and y is fvector:
assert d == ()
else:
g = Env([i,j], [d])
gof.TopoOptimizer(gof.LocalOptGroup(local_shape_lift_dot), order='out_to_in').optimize(g)
print pprint(g.outputs[0]), args_to_result[(x,y)]
assert pprint(g.outputs[0]) == args_to_result[(x,y)]
def test_const_type_in_mul_canonizer():
input = dmatrix()
w = dmatrix()
......@@ -915,11 +904,16 @@ def test_log1p():
# check trickier cases (and use different dtype)
y = fmatrix()
f = function([x,y], T.log(fill(y,1)+(x)), mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.DimShuffle([False], ['x', 0], True), T.log1p, T.fill]
print f.maker.env.toposort()
# the first three ops are Shape_i, Shape_i, and Dimshuffle
assert [node.op for node in f.maker.env.toposort()][3:] \
== [T.log1p, Alloc('float64')]
f = function([x,y], T.log(0+(x) + fill(y,1.0)), mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.DimShuffle([False], ['x', 0], True), T.log1p, T.fill]
assert [node.op for node in f.maker.env.toposort()][3:] \
== [T.log1p, Alloc('float64')]
f = function([x,y], T.log(2+(x) - fill(y,1.0)), mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.DimShuffle([False], ['x', 0], True), T.log1p, T.fill]
assert [node.op for node in f.maker.env.toposort()][3:] \
== [T.log1p, Alloc('float64')]
f([1e-7, 10], [[0, 0], [0, 0]]) #debugmode will verify values
......@@ -969,6 +963,51 @@ class test_local_subtensor_unary(unittest.TestCase):
f([[0,1],[2,3]], [4,5]) # let debugmode test something
def test_local_fill_useless():
m = theano.config.mode
if m == 'FAST_COMPILE':
m = 'FAST_RUN'
x = dvector()
y = dvector()
z = lvector()
# basic case
f = function([x], T.fill(x,x)*2, mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.mul]
# basic case
f = function([x,y], T.second(y,x)*2, mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.mul]
# now with different type
f = function([x,z], T.fill(z,x)*2, mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.mul]
# now cutting out the input ??
f = function([x,y], T.fill(x,y)*2, mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.mul]
# now filll is serving as a cast
f = function([x,y], T.fill(x,y)*2, mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.mul]
class test_shapeoptimizer(unittest.TestCase):
def test0(self):
v = T.vector()
m = T.matrix()
f = function([v,m], (v+m).shape)
for node in f.maker.env.toposort():
assert node.op != T.add
def test_constant(self):
v = T.vector()
m = T.matrix()
f = function([v,m], v.dimshuffle('x','x',0).shape[1])
print f.maker.env.toposort()
assert [] == f.maker.env.toposort()
if __name__ == '__main__':
# unittest.main()
test_fusion().tes_memory_leak()
......
......@@ -352,7 +352,7 @@ class T_SharedRandomStreams(unittest.TestCase):
def test_vector_arguments(self):
random = RandomStreams(utt.fetch_seed())
low = tensor.vector()
low = tensor.dvector()
out = random.uniform(low=low, high=1)
assert out.ndim == 1
f = function([low], out)
......@@ -402,8 +402,8 @@ class T_SharedRandomStreams(unittest.TestCase):
def test_broadcast_arguments(self):
random = RandomStreams(utt.fetch_seed())
low = tensor.vector()
high = tensor.col()
low = tensor.dvector()
high = tensor.dcol()
out = random.uniform(low=low, high=high)
assert out.ndim == 2
f = function([low, high], out)
......@@ -424,8 +424,8 @@ class T_SharedRandomStreams(unittest.TestCase):
def test_uniform_vector(self):
random = RandomStreams(utt.fetch_seed())
low = tensor.vector()
high = tensor.vector()
low = tensor.dvector()
high = tensor.dvector()
out = random.uniform(low=low, high=high)
assert out.ndim == 1
f = function([low, high], out)
......@@ -438,11 +438,15 @@ class T_SharedRandomStreams(unittest.TestCase):
# Arguments of size (3,)
val0 = f(low_val, high_val)
numpy_val0 = numpy_rng.uniform(low=low_val, high=high_val)
print 'THEANO', val0
print 'NUMPY', numpy_val0
assert numpy.all(val0 == numpy_val0)
# arguments of size (2,)
val1 = f(low_val[:-1], high_val[:-1])
numpy_val1 = numpy_rng.uniform(low=low_val[:-1], high=high_val[:-1])
print 'THEANO', val1
print 'NUMPY', numpy_val1
assert numpy.all(val1 == numpy_val1)
# Specifying the size explicitly
......@@ -486,8 +490,8 @@ class T_SharedRandomStreams(unittest.TestCase):
def test_normal_vector(self):
random = RandomStreams(utt.fetch_seed())
avg = tensor.vector()
std = tensor.vector()
avg = tensor.dvector()
std = tensor.dvector()
out = random.normal(avg=avg, std=std)
assert out.ndim == 1
f = function([avg, std], out)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论