提交 af804be5 authored 作者: James Bergstra's avatar James Bergstra

merge

...@@ -197,6 +197,11 @@ class BadOptimization(DebugModeError): ...@@ -197,6 +197,11 @@ class BadOptimization(DebugModeError):
print >> ssio, " Mean Abs Diff: ", numpy.mean(numpy.absolute(nv-ov)) print >> ssio, " Mean Abs Diff: ", numpy.mean(numpy.absolute(nv-ov))
print >> ssio, " Median Abs Diff: ", numpy.median(numpy.absolute(nv-ov)) print >> ssio, " Median Abs Diff: ", numpy.median(numpy.absolute(nv-ov))
print >> ssio, " Std Abs Diff: ", numpy.std(numpy.absolute(nv-ov)) print >> ssio, " Std Abs Diff: ", numpy.std(numpy.absolute(nv-ov))
reldiff = numpy.absolute(nv-ov) / (numpy.absolute(nv)+numpy.absolute(ov))
print >> ssio, " Max Rel Diff: ", numpy.max(reldiff)
print >> ssio, " Mean Rel Diff: ", numpy.mean(reldiff)
print >> ssio, " Median Rel Diff: ", numpy.median(reldiff)
print >> ssio, " Std Rel Diff: ", numpy.std(reldiff)
# only if all succeeds to we add anything to sio # only if all succeeds to we add anything to sio
print >> sio, ssio.getvalue() print >> sio, ssio.getvalue()
except: except:
...@@ -349,14 +354,17 @@ def debugprint(r, prefix='', depth=-1, done=None, file=sys.stdout): ...@@ -349,14 +354,17 @@ def debugprint(r, prefix='', depth=-1, done=None, file=sys.stdout):
# this variable is the output of computation, # this variable is the output of computation,
# so just print out the apply # so just print out the apply
a = r.owner a = r.owner
print >> file, prefix, a.op, id(a) if len(a.outputs) == 1:
print >> file, '%s%s [@%i]' % (prefix, a.op, id(r))
else:
print >> file, '%s%s.%i [@%i]' % (prefix, a.op, a.outputs.index(r), id(r))
if id(a) not in done: if id(a) not in done:
done.add(id(a)) done.add(id(a))
for i in a.inputs: for i in a.inputs:
debugprint(i, prefix+' ', depth=depth-1, done=done, file=file) debugprint(i, prefix+' |', depth=depth-1, done=done, file=file)
else: else:
#this is a variable #this is a variable
print >> file, prefix, r, id(r) print >> file, '%s%s [@%i]' % (prefix, r, id(r))
return file return file
......
...@@ -116,7 +116,7 @@ class AddDestroyHandler(gof.Optimizer): ...@@ -116,7 +116,7 @@ class AddDestroyHandler(gof.Optimizer):
for o in env.outputs: for o in env.outputs:
try: try:
env.replace_validate(o, _output_guard(o), reason='output_guard') env.replace_validate(o, _output_guard(o), reason='output_guard')
_logger.warning("Output variable %s required output_guard," _logger.info("Output variable %s required output_guard,"
" how was this output left unprotected against destructive operations?" " how was this output left unprotected against destructive operations?"
% o) % o)
except gof.InconsistencyError: except gof.InconsistencyError:
...@@ -127,12 +127,22 @@ class AddDestroyHandler(gof.Optimizer): ...@@ -127,12 +127,22 @@ class AddDestroyHandler(gof.Optimizer):
env.extend(gof.DestroyHandler()) env.extend(gof.DestroyHandler())
optdb = gof.SequenceDB() optdb = gof.SequenceDB()
optdb.register('merge1', gof.MergeOptimizer(), 0, 'fast_run', 'fast_compile') optdb.register('merge1', gof.MergeOptimizer(),
optdb.register('canonicalize', gof.EquilibriumDB(), 1, 'fast_run') 0, 'fast_run', 'fast_compile')
optdb.register('specialize', gof.EquilibriumDB(), 2, 'fast_run') optdb.register('canonicalize', gof.EquilibriumDB(), # rearranges elemwise expressions
optdb.register('merge2', gof.MergeOptimizer(), 49, 'fast_run') 1, 'fast_run')
optdb.register('add_destroy_handler', AddDestroyHandler(), 49.5, 'fast_run', 'inplace') optdb.register('merge1.2', gof.MergeOptimizer(skip_const_merge=True),
optdb.register('merge3', gof.MergeOptimizer(), 100, 'fast_run') 1.2, 'fast_run', 'fast_compile')
optdb.register('stabilize', gof.EquilibriumDB(), # replace unstable subgraphs
1.5, 'fast_run')
optdb.register('specialize', gof.EquilibriumDB(), # misc special cases for speed
2, 'fast_run')
optdb.register('merge2', gof.MergeOptimizer(), # especially constant merge
49, 'fast_run')
optdb.register('add_destroy_handler', AddDestroyHandler(),
49.5, 'fast_run', 'inplace')
optdb.register('merge3', gof.MergeOptimizer(), # final pass just to make sure
100, 'fast_run')
class Mode(object): class Mode(object):
...@@ -153,6 +163,12 @@ class Mode(object): ...@@ -153,6 +163,12 @@ class Mode(object):
def __init__(self, linker = config.linker, optimizer = config.optimizer): def __init__(self, linker = config.linker, optimizer = config.optimizer):
self.__setstate__((linker, optimizer)) self.__setstate__((linker, optimizer))
#self.provided_optimizer - typically the `optimizer` arg. But if the `optimizer` arg is
# keyword corresponding to a predefined Query, then this stores the query
#self._optimizer - typically same as provided_optimizer??
#self.__get_optimizer - returns self._optimizer (possibly querying optdb with self._optimizer)
#self.optimizer - property that returns __get_optimizer()
def __getstate__(self): def __getstate__(self):
return (self.provided_linker, self.provided_optimizer) return (self.provided_linker, self.provided_optimizer)
...@@ -218,7 +234,7 @@ predefined_modes = {'FAST_COMPILE': FAST_COMPILE, ...@@ -218,7 +234,7 @@ predefined_modes = {'FAST_COMPILE': FAST_COMPILE,
def get_mode(string): def get_mode(string):
if string is None: string = config.mode if string is None: string = config.mode
if not isinstance(string, str): return string #it is already a mode... if not isinstance(string, str): return string #it is hopefully already a mode...
if not predefined_modes.has_key(string): if not predefined_modes.has_key(string):
raise Exception("No predefixed mode exist for string: %s"%string) raise Exception("No predefixed mode exist for string: %s"%string)
return predefined_modes[string] return predefined_modes[string]
......
...@@ -197,12 +197,19 @@ class _metadict: ...@@ -197,12 +197,19 @@ class _metadict:
class MergeOptimizer(Optimizer): class MergeOptimizer(Optimizer):
"""WRITEME
Merges parts of the graph that are identical, i.e. parts that
take the same inputs and carry out the asme computations so we
can avoid doing them more than once. Also merges variables that
are constant.
""" """
Merges parts of the graph that are identical and redundant.
The basic principle is that if two Applies have ops that compare equal, and identical
inputs, then they do not both need to be computed. The clients of one are transfered to
the other and one of them is removed from the graph. This procedure is carried out in
input->output order through the graph.
The first step of merging is constant-merging, so that all clients of an int(1) for example,
are transfered to a particular instance of int(1).
"""
def __init__(self, skip_const_merge=False):
self.skip_const_merge = skip_const_merge
def add_requirements(self, env): def add_requirements(self, env):
env.extend(toolbox.ReplaceValidate()) env.extend(toolbox.ReplaceValidate())
...@@ -230,41 +237,6 @@ class MergeOptimizer(Optimizer): ...@@ -230,41 +237,6 @@ class MergeOptimizer(Optimizer):
const_sig[c] = sig const_sig[c] = sig
const_sig_inv[sig] = c const_sig_inv[sig] = c
def exptime_apply_node_merge(self, env):
# we clear the dicts because the Constants signatures are not necessarily hashable
# and it's more efficient to give them an integer like the other Variables
symbol_idx = {} #variable -> int
symbol_idx_inv = {} #int -> variable (inverse of symbol_idx)
#add all graph sources to the symbol_idx dictionaries (arbitrary order)
for i, r in enumerate(r for r in env.variables if r.owner is None):
symbol_idx[r] = i
symbol_idx_inv[i] = r
for node in _list_of_nodes(env):
node_cid = (node.op, tuple([symbol_idx[input] for input in node.inputs]))
#print 'NODE', node, node_cid
dup = symbol_idx_inv.get(node_cid, None)
success = False
if dup is not None:
success = True
pairs = zip(node.outputs, dup.outputs)
for output, new_output in pairs:
if output.name and not new_output.name:
new_output.name = output.name
try:
env.replace_all_validate(pairs, reason='Merge (exptime)')
except InconsistencyError, e:
success = False
if not success:
symbol_idx[node] = node_cid
symbol_idx_inv[node_cid] = node
for i, output in enumerate(node.outputs):
ref = (i, node_cid)
symbol_idx[output] = ref
symbol_idx_inv[ref] = output
def apply_node_merge(self, env): def apply_node_merge(self, env):
# we clear the dicts because the Constants signatures are not necessarily hashable # we clear the dicts because the Constants signatures are not necessarily hashable
# and it's more efficient to give them an integer like the other Variables # and it's more efficient to give them an integer like the other Variables
...@@ -316,7 +288,8 @@ class MergeOptimizer(Optimizer): ...@@ -316,7 +288,8 @@ class MergeOptimizer(Optimizer):
#TODO: Consider splitting this into a separate optimizer (SeqOptimizer) #TODO: Consider splitting this into a separate optimizer (SeqOptimizer)
def apply(self, env): def apply(self, env):
self.apply_constant_merge(env) if not self.skip_const_merge:
self.apply_constant_merge(env)
self.apply_node_merge(env) self.apply_node_merge(env)
merge_optimizer = MergeOptimizer() merge_optimizer = MergeOptimizer()
...@@ -541,7 +514,7 @@ class PatternSub(LocalOptimizer): ...@@ -541,7 +514,7 @@ class PatternSub(LocalOptimizer):
PatternSub((subtract, (add, 'x', 'y'), 'y'), 'x') PatternSub((subtract, (add, 'x', 'y'), 'y'), 'x')
PatternSub((power, 'x', Constant(double, 2.0)), (square, 'x')) PatternSub((power, 'x', Constant(double, 2.0)), (square, 'x'))
PatternSub((boggle, {'pattern': 'x', PatternSub((boggle, {'pattern': 'x',
'constraint': lambda env, expr: expr.type == scrabble}), 'constraint': lambda expr: expr.type == scrabble}),
(scrabble, 'x')) (scrabble, 'x'))
""" """
...@@ -789,7 +762,10 @@ class NavigatorOptimizer(Optimizer): ...@@ -789,7 +762,10 @@ class NavigatorOptimizer(Optimizer):
raise raise
if replacements is False or replacements is None: if replacements is False or replacements is None:
return False return False
assert len(node.outputs) == len(replacements) if not isinstance(replacements, (tuple, list)):
raise TypeError('Optimizer %s gave wrong type of replacement' % lopt)
if len(node.outputs) != len(replacements):
raise ValueError('Optimizer %s gave wrong number of replacements' % lopt)
repl_pairs = zip(node.outputs, replacements) repl_pairs = zip(node.outputs, replacements)
try: try:
env.replace_all_validate(repl_pairs, reason=lopt) env.replace_all_validate(repl_pairs, reason=lopt)
...@@ -904,8 +880,13 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -904,8 +880,13 @@ class EquilibriumOptimizer(NavigatorOptimizer):
max_depth = None, max_depth = None,
max_use_ratio = None): max_use_ratio = None):
""" """
:param local_optimizers: list or set of local optimizations to apply until
equilibrium.
:param max_use_ratio: each optimizer can be applied at most (size of graph * this number) :param max_use_ratio: each optimizer can be applied at most (size of graph * this number)
:param max_depth: TODO what does this do? (EquilibriumDB sets it to 5)
""" """
super(EquilibriumOptimizer, self).__init__( super(EquilibriumOptimizer, self).__init__(
...@@ -916,6 +897,7 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -916,6 +897,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
self.local_optimizers = local_optimizers self.local_optimizers = local_optimizers
self.max_depth = max_depth self.max_depth = max_depth
self.max_use_ratio = max_use_ratio self.max_use_ratio = max_use_ratio
assert self.max_use_ratio is not None, 'max_use_ratio has to be a number'
def apply(self, env, start_from = None): def apply(self, env, start_from = None):
if start_from is None: if start_from is None:
...@@ -960,7 +942,7 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -960,7 +942,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
changed |= lopt_change changed |= lopt_change
finally: finally:
self.detach_updater(env, u) self.detach_updater(env, u)
self.detach_updater(env, u) self.detach_updater(env, u) #TODO: erase this line, it's redundant at best
if max_use_abort: if max_use_abort:
print >> sys.stderr, "WARNING: EquilibriumOptimizer max'ed out" print >> sys.stderr, "WARNING: EquilibriumOptimizer max'ed out"
......
...@@ -26,7 +26,7 @@ class DB(object): ...@@ -26,7 +26,7 @@ class DB(object):
# It is an instance of a DB.In the tests for example, # It is an instance of a DB.In the tests for example,
# this is not always the case. # this is not always the case.
if not isinstance(obj, (DB, opt.Optimizer, opt.LocalOptimizer)): if not isinstance(obj, (DB, opt.Optimizer, opt.LocalOptimizer)):
raise Exception('Triing to register an optimizer that don\'t herite from theano.gof.opt.Optimizer or theano.gof.opt.LocalOptimizer', obj) raise TypeError('Object cannot be registered in OptDB', obj)
if self.name is not None: if self.name is not None:
tags = tags + (self.name,) tags = tags + (self.name,)
...@@ -132,6 +132,18 @@ class Query(object): ...@@ -132,6 +132,18 @@ class Query(object):
class EquilibriumDB(DB): class EquilibriumDB(DB):
"""A set of potential optimizations which should be applied in an arbitrary order until
equilibrium is reached.
Canonicalize, Stabilize, and Specialize are all equilibrium optimizations.
.. note::
It seems like this might be supposed to contain LocalOptimizer instances rather than
optimizer instances, because whatever is selected by the query is passed to
EquilibriumOptimizer and EquilibriumOptimizer requires LocalOptimizer instances.
"""
def query(self, *tags, **kwtags): def query(self, *tags, **kwtags):
opts = super(EquilibriumDB, self).query(*tags, **kwtags) opts = super(EquilibriumDB, self).query(*tags, **kwtags)
...@@ -142,27 +154,45 @@ class EquilibriumDB(DB): ...@@ -142,27 +154,45 @@ class EquilibriumDB(DB):
class SequenceDB(DB): class SequenceDB(DB):
"""A sequence of potential optimizations.
Retrieve a sequence of optimizations (a SeqOptimizer) by calling query().
Each potential optimization is registered with a floating-point position.
No matter which optimizations are selected by a query, they are carried out in order of
increasing position.
The optdb itself (`theano.compile.mode.optdb`), from which (among many other tags) fast_run
and fast_compile optimizers are drawn is a SequenceDB.
"""
def __init__(self, failure_callback = opt.SeqOptimizer.warn): def __init__(self, failure_callback = opt.SeqOptimizer.warn):
super(SequenceDB, self).__init__() super(SequenceDB, self).__init__()
self.__priority__ = {} self.__position__ = {}
self.failure_callback = failure_callback self.failure_callback = failure_callback
def register(self, name, obj, priority, *tags): def register(self, name, obj, position, *tags):
super(SequenceDB, self).register(name, obj, *tags) super(SequenceDB, self).register(name, obj, *tags)
self.__priority__[name] = priority self.__position__[name] = position
def query(self, *tags, **kwtags): def query(self, *tags, **kwtags):
"""
:type position_cutoff: float or int
:param position_cutoff: only optimizations with position less than the cutoff are returned.
"""
position_cutoff = kwtags.pop('position_cutoff', float('inf'))
opts = super(SequenceDB, self).query(*tags, **kwtags) opts = super(SequenceDB, self).query(*tags, **kwtags)
opts = list(opts) opts = [o for o in opts if self.__position__[o.name] < position_cutoff]
opts.sort(key = lambda obj: self.__priority__[obj.name]) opts.sort(key = lambda obj: self.__position__[obj.name])
return opt.SeqOptimizer(opts, failure_callback = self.failure_callback) return opt.SeqOptimizer(opts, failure_callback = self.failure_callback)
def print_summary(self, stream=sys.stdout): def print_summary(self, stream=sys.stdout):
print >> stream, "SequenceDB (id %i)"%id(self) print >> stream, "SequenceDB (id %i)"%id(self)
print >> stream, " priority", self.__priority__ print >> stream, " position", self.__position__
print >> stream, " names", self._names print >> stream, " names", self._names
print >> stream, " db", self.__db__ print >> stream, " db", self.__db__
def __str__(self): def __str__(self):
sio = StringIO.StringIO() sio = StringIO.StringIO()
self.print_summary(sio) self.print_summary(sio)
......
...@@ -7,9 +7,52 @@ import sys,os ...@@ -7,9 +7,52 @@ import sys,os
from theano import config from theano import config
from gof import Op, Apply from gof import Op, Apply
from theano.gof.python25 import any from theano.gof.python25 import any
from theano.compile import Function, debugmode
#We import the debugprint here to have all printing of graph available from this module
from theano.compile.debugmode import debugprint def debugprint(obj, depth=-1, file=None):
"""Print a computation graph to file
:type obj: Variable, Apply, or Function instance
:param obj: symbolic thing to print
:type depth: integer
:param depth: print graph to this depth (-1 for unlimited)
:type file: None or file-like object
:param file: print to this file (None means sys.stdout)
:rtype: None or file-like object
:returns: `file` argument
Each line printed represents a Variable in the graph.
The indentation of each line corresponds to its depth in the symbolic graph.
The first part of the text identifies whether it is an input (if a name or type is printed)
or the output of some Apply (in which case the Op is printed).
The second part of the text is the memory location of the Variable.
If a Variable is encountered multiple times in the depth-first search, it is only printed
recursively the first time. Later, just the Variable and its memory location are printed.
If an Apply has multiple outputs, then a '.N' suffix will be appended to the Apply's
identifier, to indicate which output a line corresponds to.
"""
if file is None:
_file = sys.stdout
else:
_file = file
done = set()
results_to_print = []
if isinstance(obj, gof.Variable):
results_to_print.append(obj)
elif isinstance(obj, gof.Apply):
results_to_print.extend(obj.outputs)
elif isinstance(obj, Function):
results_to_print.extend(obj.maker.env.outputs)
for r in results_to_print:
debugmode.debugprint(r, depth=depth, done=done, file=_file)
if file is None:
_file.flush()
return file
class Print(Op): class Print(Op):
"""This identity-like Op has the side effect of printing a message followed by its inputs """This identity-like Op has the side effect of printing a message followed by its inputs
...@@ -329,7 +372,7 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn ...@@ -329,7 +372,7 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn
if var.name is not None: if var.name is not None:
varstr = var.name varstr = var.name
elif isinstance(var,gof.Constant): elif isinstance(var,gof.Constant):
varstr = str(var.data) varstr = '%s [%s]'% (str(var.data) , str(var.type))
elif var in input_update and input_update[var].variable.name is not None: elif var in input_update and input_update[var].variable.name is not None:
varstr = input_update[var].variable.name varstr = input_update[var].variable.name
else: else:
......
...@@ -3,7 +3,7 @@ import numpy ...@@ -3,7 +3,7 @@ import numpy
import theano import theano
from theano import Op, Type, Apply, Variable, Constant from theano import Op, Type, Apply, Variable, Constant
from theano import tensor from theano import tensor
from theano.compile import shared, SharedVariable, shared_constructor from theano.compile import shared, SharedVariable
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda import filter as type_support_filter from theano.sandbox.cuda import filter as type_support_filter
...@@ -68,6 +68,11 @@ CudaNdarrayType.SharedVariable = CudaNdarraySharedVariable ...@@ -68,6 +68,11 @@ CudaNdarrayType.SharedVariable = CudaNdarraySharedVariable
def cuda_shared_constructor(value, name, strict=False, broadcastable=None): def cuda_shared_constructor(value, name, strict=False, broadcastable=None):
"""SharedVariable Constructor for TensorType""" """SharedVariable Constructor for TensorType"""
# THIS CONSTRUCTOR TRIES TO CAST VALUE TO A FLOAT32, WHICH THEN GOES ONTO THE CARD
# SO INT shared vars, float64 shared vars, etc. all end up on the card.
# THIS IS NOT THE DEFAULT BEHAVIOUR THAT WE WANT.
# SEE float32_shared_constructor
#TODO: what should strict mean in this context, since we always have to make a copy? #TODO: what should strict mean in this context, since we always have to make a copy?
if strict: if strict:
_value = value _value = value
......
...@@ -20,8 +20,9 @@ Special cases: ...@@ -20,8 +20,9 @@ Special cases:
Often a for loop can be expressed as a ``scan()`` operation, and ``scan`` is Often a for loop can be expressed as a ``scan()`` operation, and ``scan`` is
the closest that theano comes to looping. The advantage of using ``scan`` the closest that theano comes to looping. The advantage of using ``scan``
over for loops is that it allows you to express the loop symbolically. The over for loops is that it allows the number of iterations to be a part of the symbolic graph.
Scan Op should always be used by applying the ``scan`` function.
The Scan Op should always be used by applying the ``scan`` function.
""" """
__docformat__ = 'restructedtext en' __docformat__ = 'restructedtext en'
...@@ -60,7 +61,8 @@ def hash_listsDictsTuples(x): ...@@ -60,7 +61,8 @@ def hash_listsDictsTuples(x):
def scan(fn, sequences, initial_states, non_sequences, inplace_map={}, \ def scan(fn, sequences, initial_states, non_sequences, inplace_map={}, \
sequences_taps={}, outputs_taps = {}, n_steps = 0, \ sequences_taps={}, outputs_taps = {}, n_steps = 0, \
truncate_gradient = -1, go_backwards = False, mode = 'FAST_RUN'): truncate_gradient = -1, go_backwards = False,
mode = None):
'''Function that constructs and applies a Scan op '''Function that constructs and applies a Scan op
:param fn: Function that describes the operations involved in one step of scan :param fn: Function that describes the operations involved in one step of scan
......
...@@ -4,7 +4,7 @@ __docformat__ = "restructuredtext en" ...@@ -4,7 +4,7 @@ __docformat__ = "restructuredtext en"
import __builtin__ import __builtin__
import sys # for sys.maxint import sys # for sys.maxint
from theano.configparser import config from theano.configparser import config, AddConfigVar, BoolParam
import traceback #for overriding Op.__call__ import traceback #for overriding Op.__call__
if sys.version_info >= (2,5): if sys.version_info >= (2,5):
import functools import functools
...@@ -570,7 +570,6 @@ class TensorType(Type): ...@@ -570,7 +570,6 @@ class TensorType(Type):
# input received. # input received.
return """ return """
%(name)s = NULL; %(name)s = NULL;
type_num_%(name)s = ((PyArrayObject*)py_%(name)s)->descr->type_num; //we expect %(type_num)s
if (py_%(name)s == Py_None) { if (py_%(name)s == Py_None) {
// We can either fail here or set %(name)s to NULL and rely on Ops using // We can either fail here or set %(name)s to NULL and rely on Ops using
// tensors to handle the NULL case, but if they fail to do so they'll end up // tensors to handle the NULL case, but if they fail to do so they'll end up
...@@ -578,18 +577,17 @@ class TensorType(Type): ...@@ -578,18 +577,17 @@ class TensorType(Type):
PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None"); PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
%(fail)s %(fail)s
} }
else if (!PyArray_Check(py_%(name)s)) { if (!PyArray_Check(py_%(name)s)) {
PyErr_SetString(PyExc_ValueError, "expected an ndarray"); PyErr_SetString(PyExc_ValueError, "expected an ndarray");
%(fail)s %(fail)s
} }
else if (type_num_%(name)s != %(type_num)s) { type_num_%(name)s = ((PyArrayObject*)py_%(name)s)->descr->type_num; //we expect %(type_num)s
if (type_num_%(name)s != %(type_num)s) {
PyErr_SetString(PyExc_ValueError, "expected %(type_num)s"); PyErr_SetString(PyExc_ValueError, "expected %(type_num)s");
%(fail)s %(fail)s
} }
else { %(name)s = (PyArrayObject*)(py_%(name)s);
%(name)s = (PyArrayObject*)(py_%(name)s); Py_XINCREF(%(name)s);
Py_XINCREF(%(name)s);
}
""" % dict(sub, name = name, type_num = self.dtype_specs()[2]) """ % dict(sub, name = name, type_num = self.dtype_specs()[2])
def c_cleanup(self, name, sub): def c_cleanup(self, name, sub):
...@@ -631,7 +629,7 @@ class TensorType(Type): ...@@ -631,7 +629,7 @@ class TensorType(Type):
def c_code_cache_version(self): def c_code_cache_version(self):
scalar_version = scal.Scalar(self.dtype).c_code_cache_version() scalar_version = scal.Scalar(self.dtype).c_code_cache_version()
if scalar_version: if scalar_version:
return (1,) + scalar_version return (2,) + scalar_version
else: else:
return () return ()
...@@ -943,7 +941,13 @@ class _tensor_py_operators: ...@@ -943,7 +941,13 @@ class _tensor_py_operators:
break break
if advanced: if advanced:
return AdvancedSubtensor(args)(self, *args) if config.experimental.advanced_indexing:
if len(args) == 1:
return AdvancedSubtensor1()(self, *args)
else:
return AdvancedSubtensor(args)(self, *args)
else:
return AdvancedSubtensor(args)(self, *args)
else: else:
return Subtensor(args)(self, *Subtensor.collapse(args, lambda entry: isinstance(entry, Variable))) return Subtensor(args)(self, *Subtensor.collapse(args, lambda entry: isinstance(entry, Variable)))
...@@ -1029,6 +1033,7 @@ class TensorConstantSignature(tuple): ...@@ -1029,6 +1033,7 @@ class TensorConstantSignature(tuple):
except: except:
return False return False
#N.B. compare shape to ensure no broadcasting in == #N.B. compare shape to ensure no broadcasting in ==
#N.B. compare elementwise last because it is the most expensive check
return (t0 == t1) and (d0.shape == d1.shape) \ return (t0 == t1) and (d0.shape == d1.shape) \
and (self.sum == other.sum) and (numpy.all(d0 == d1)) and (self.sum == other.sum) and (numpy.all(d0 == d1))
def __hash__(self): def __hash__(self):
...@@ -1294,9 +1299,15 @@ def shape(a): ...@@ -1294,9 +1299,15 @@ def shape(a):
pprint.assign(_shape, printing.MemberPrinter('shape')) pprint.assign(_shape, printing.MemberPrinter('shape'))
class MaxAndArgmax(Op): class MaxAndArgmax(Op):
"""Calculate the max and argmax over a given axis""" """Calculate the max and argmax over a given axis.
.. note::
If axis is None it means to calculate the max over the last dimension which is
DIFFERENT FROM NUMPY!!
"""
nin=2 # tensor, axis nin=2 # tensor, axis
nout=2 # max val, max idx nout=2 # max val, max idx
E_axis = 'invalid axis' E_axis = 'invalid axis'
...@@ -1307,7 +1318,8 @@ class MaxAndArgmax(Op): ...@@ -1307,7 +1318,8 @@ class MaxAndArgmax(Op):
axis = x.type.ndim - 1 axis = x.type.ndim - 1
axis = _as_tensor_variable(axis) axis = _as_tensor_variable(axis)
inputs = [x, axis] inputs = [x, axis]
broadcastable = [False] * (x.type.ndim - 1) #TODO: be less conservative #TODO: figure things out if axis is a constant
broadcastable = [False] * (x.type.ndim - 1)
outputs = [tensor(x.type.dtype, broadcastable,name='max'), outputs = [tensor(x.type.dtype, broadcastable,name='max'),
tensor('int32', broadcastable,name='argmax')] tensor('int32', broadcastable,name='argmax')]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
...@@ -1666,60 +1678,113 @@ def zeros_like(model): ...@@ -1666,60 +1678,113 @@ def zeros_like(model):
#return Zeros(model.type.ndim)(shape(model)) #return Zeros(model.type.ndim)(shape(model))
return fill(model, constant(0.0, dtype=model.type.dtype)) return fill(model, constant(0.0, dtype=model.type.dtype))
class Filler(gof.Op): if 0:
## COMMENTED OUT FEB 17 2010
## TODO (DOCUMENT AND WRITE TESTS) OR DELETE
class Filler(gof.Op):
"""WRITEME"""
def __init__(self, value, ndim, dtype = 'float64'):
self.value = value
self.ndim = ndim
self.dtype = dtype
self.type = TensorType(dtype = dtype,
broadcastable = (False,)*ndim)
def make_node(self, dims):
dims = as_tensor_variable(dims)
return gof.Apply(self, [dims], [self.type()])
def perform(self, node, (dims,), (out,)):
if out[0] is not None:
out[0].resize(dims, refcheck = 0)
out[0].fill(self.value)
else:
if self.value == 0:
out[0] = numpy.zeros(dims, dtype = self.dtype)
elif self.value == 1:
out[0] = numpy.ones(dims, dtype = self.dtype)
else:
out[0] = numpy.ones(dims, dtype = self.dtype) * self.value
def grad(self, (dims,), (gout,)):
return None,
def __eq__(self, other):
return type(self) == type(other) and self.ndim == other.ndim and self.dtype == other.dtype
def __hash__(self):
return hash(self.ndim) ^ hash(self.dtype)
Zeros = partial(Filler, 0)
"""WRITEME"""
Ones = partial(Filler, 1)
"""WRITEME""" """WRITEME"""
def __init__(self, value, ndim, dtype = 'float64'):
self.value = value
self.ndim = ndim
self.dtype = dtype
self.type = TensorType(dtype = dtype,
broadcastable = (False,)*ndim)
def make_node(self, dims): @constructor
dims = as_tensor_variable(dims) def zero():
return gof.Apply(self, [dims], [self.type()]) """
Return a scalar zero, e.g. for initializing sums.
"""
return Zeros(0)([])
def perform(self, node, (dims,), (out,)): @constructor
if out[0] is not None: def one():
out[0].resize(dims, refcheck = 0) """WRITEME"""
out[0].fill(self.value) return Ones(0)([])
else:
if self.value == 0:
out[0] = numpy.zeros(dims, dtype = self.dtype)
elif self.value == 1:
out[0] = numpy.ones(dims, dtype = self.dtype)
else:
out[0] = numpy.ones(dims, dtype = self.dtype) * self.value
def grad(self, (dims,), (gout,)): pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Filler) and r.owner.op.value == 0, printing.FunctionPrinter('zeros'))
return None, pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Filler) and r.owner.op.value == 1, printing.FunctionPrinter('ones'))
def __eq__(self, other): class Alloc(gof.Op):
return type(self) == type(other) and self.ndim == other.ndim and self.dtype == other.dtype """Create a Tensor from an initial value and a desired shape
def __hash__(self): alloc(value, shape0, shape1, ..., shapeN)
return hash(self.ndim) ^ hash(self.dtype)
Zeros = partial(Filler, 0) Returns an N-dimensional tensor initialized by `value` using something equivalent to
"""WRITEME""" >>> z = numpy.zeros(shape, value.dtype)
>>> z += value
Ones = partial(Filler, 1) The result has N dimensions, has the dtype of `value` and is obtained by broadcasting value
"""WRITEME""" over the output ndarray.
@constructor This Op is used to replace fill() during optimizations because after shapes are lifted,
def zero(): the first argument to fill can often be pruned from the graph.
"""
Return a scalar zero, e.g. for initializing sums.
""" """
return Zeros(0)([]) def __init__(self, dtype):
self.dtype = dtype
@constructor def __eq__(self, other):
def one(): return type(self) == type(other) and self.dtype == other.dtype
"""WRITEME"""
return Ones(0)([]) def __hash__(self):
return hash(type(self)) ^ hash(self.dtype)
def __str__(self):
return '%s{%s}' % (self.__class__.__name__, self.dtype)
def make_node(self, value, *shape):
v = as_tensor_variable(value)
sh = [as_tensor_variable(s) for s in shape]
bcast = []
for s in sh:
if s.type.dtype[:3] not in ('int', 'uin'):
raise TypeError('Shape arguments must be integers', s)
# if s is constant 1, then we're broadcastable in that dim
bcast.append(isinstance(s, TensorConstant) and (s.data == 1))
otype = TensorType(dtype=self.dtype, broadcastable=bcast)
return gof.Apply(self, [v]+sh, [otype()])
def perform(self, node, inputs, (out,)):
v = inputs[0]
sh = tuple([int(i) for i in inputs[1:]])
if out[0] is None or out[0].shape != sh:
out[0] = numpy.zeros(sh, dtype=self.dtype)
out[0][...] += v # broadcast v to fill us up
def grad(self, inputs, (gout,)):
return [None for i in inputs]
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Filler) and r.owner.op.value == 0, printing.FunctionPrinter('zeros'))
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Filler) and r.owner.op.value == 1, printing.FunctionPrinter('ones'))
@_redefine(elemwise.Elemwise(scal.identity)) @_redefine(elemwise.Elemwise(scal.identity))
def tensor_copy(a): def tensor_copy(a):
...@@ -1841,33 +1906,36 @@ def var(input, axis = None): ...@@ -1841,33 +1906,36 @@ def var(input, axis = None):
#return the mean sqr #return the mean sqr
return mean(centered_input**2, axis) return mean(centered_input**2, axis)
class Repeat(gof.Op): if 0:
## COMMENTED OUT FEB 17 2010
def make_node(self, input, repeats, axis): ## TODO (DOCUMENT AND WRITE TESTS) OR DELETE
assert isinstance(input.type, TensorType) class Repeat(gof.Op):
assert repeats.type == iscalar
assert axis.type == iscalar def make_node(self, input, repeats, axis):
broadcastable = [] assert isinstance(input.type, TensorType)
for i,x in enumerate(input.broadcastable): assert repeats.type == iscalar
if i==axis: assert axis.type == iscalar
broadcastable += [False] broadcastable = []
else: for i,x in enumerate(input.broadcastable):
broadcastable += [x] if i==axis:
broadcastable += [False]
type = TensorType(dtype = input.type.dtype, broadcastable = \ else:
broadcastable) broadcastable += [x]
#backport
#type = TensorType(dtype = input.type.dtype, type = TensorType(dtype = input.type.dtype, broadcastable = \
# broadcastable = [False if i==axis else x for i, x in enumerate(input.broadcastable)]) broadcastable)
return gof.Apply(self, [inputs, repeats, axis], [type()]) #backport
#type = TensorType(dtype = input.type.dtype,
def perform(self, node, (input, repeats, axis), (out, )): # broadcastable = [False if i==axis else x for i, x in enumerate(input.broadcastable)])
out[0] = numpy.repeat(input, repeats, axis) return gof.Apply(self, [inputs, repeats, axis], [type()])
def grad(self, (input, repeats, axis), (gout, )): def perform(self, node, (input, repeats, axis), (out, )):
return add.grad((input, gout), (gout,))[:1] out[0] = numpy.repeat(input, repeats, axis)
repeat = Repeat() def grad(self, (input, repeats, axis), (gout, )):
return add.grad((input, gout), (gout,))[:1]
repeat = Repeat()
class Default(gof.Op): class Default(gof.Op):
""" """
...@@ -2489,6 +2557,10 @@ class Join(Op): ...@@ -2489,6 +2557,10 @@ class Join(Op):
join(2, x, y, z) # WRONG: the axis has to be an index into the shape join(2, x, y, z) # WRONG: the axis has to be an index into the shape
join(0, x, u) # WRONG: joined tensors must have the same rank join(0, x, u) # WRONG: joined tensors must have the same rank
""" """
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def make_node(self, *axis_and_tensors): def make_node(self, *axis_and_tensors):
""" """
...@@ -2765,37 +2837,6 @@ else: ...@@ -2765,37 +2837,6 @@ else:
pass pass
class MakeVector(Op):
"""WRITEME"""
def __init__(self, stype):
self.stype = stype
def make_node(self, *inputs):
inputs = map(as_tensor_variable, inputs)
assert all(a.type == self.stype for a in inputs)
return Apply(self, inputs, [TensorType(broadcastable = (False,),
dtype = self.stype.dtype)()])
def perform(self, node, inputs, (out,)):
out[0] = numpy.asarray(inputs)
def grad(self, inputs, (gout,)):
return [None]*len(inputs)
make_lvector = MakeVector(lscalar)
"""WRITEME"""
class MakeVectorPrinter:
def process(self, r, pstate):
if r.owner is None:
raise TypeError("Can only print make_vector.")
elif isinstance(r.owner.op, MakeVector):
return "[%s]" % ", ".join(pstate.pprinter.process(input, pstate.clone(precedence = 1000)) for input in r.owner.inputs)
else:
raise TypeError("Can only print make_vector.")
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, MakeVector), MakeVectorPrinter())
class Reshape(Op): class Reshape(Op):
"""Perform a reshape operation of the input x to the new shape shp. """Perform a reshape operation of the input x to the new shape shp.
The number of dimensions to which to reshape to (ndim) must be known at graph The number of dimensions to which to reshape to (ndim) must be known at graph
...@@ -3138,6 +3179,37 @@ def inverse_permutation(perm): ...@@ -3138,6 +3179,37 @@ def inverse_permutation(perm):
# Should reproduce numpy's behaviour: # Should reproduce numpy's behaviour:
# http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing # http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing
AddConfigVar('experimental.advanced_indexing',
"enable not-well-tested advanced indexing functionality",
BoolParam(False))
class AdvancedSubtensor1(Op):
"""Implement x[ilist] where ilist is a vector of integers."""
def __hash__(self):
return hash(type(self))
def __eq__(self, other):
type(self) == type(other)
def make_node(self, x, ilist):
x_ = as_tensor_variable(x)
ilist_ = as_tensor_variable(ilist)
if ilist_.type.dtype[:3] not in ('int', 'uin'):
raise TypeError('index must be integers')
if ilist_.type.broadcastable != (False,):
raise TypeError('index must be vector')
if x_.type.ndim == 0:
raise TypeError('cannot index into a scalar')
if x_.type.broadcastable[0]:
# the caller should have made a copy of x len(ilist) times
raise TypeError('cannot index into a broadcastable dimension')
return gof.Apply(self, [x_, ilist_], [x_.type()])
def perform(self, node, (x,i), (out,)):
out[0] = x[i]
class AdvancedSubtensor(Op): class AdvancedSubtensor(Op):
"""Return a subtensor copy, using advanced indexing. """Return a subtensor copy, using advanced indexing.
""" """
...@@ -3308,6 +3380,18 @@ class Dot(Op): ...@@ -3308,6 +3380,18 @@ class Dot(Op):
rval = dot(gz, y.T), dot(x.T, gz) rval = dot(gz, y.T), dot(x.T, gz)
return cast(rval[0], x.dtype), cast(rval[1], y.dtype) return cast(rval[0], x.dtype), cast(rval[1], y.dtype)
def infer_shape(self, node, (xshp,yshp)):
x, y = node.inputs
if x.ndim == 2 and y.ndim == 2:
return [(xshp[0], yshp[1])]
if x.ndim == 1 and y.ndim == 2:
return [(yshp[1],)]
if x.ndim == 2 and y.ndim == 1:
return [(xshp[0],)]
if x.ndim == 1 and y.ndim == 1:
return [()]
raise NotImplementedError()
def __str__(self): def __str__(self):
return "dot" return "dot"
dot = Dot() dot = Dot()
......
...@@ -6,10 +6,9 @@ import numpy.distutils ...@@ -6,10 +6,9 @@ import numpy.distutils
from theano.configparser import config, AddConfigVar, StrParam from theano.configparser import config, AddConfigVar, StrParam
from theano.gof import (utils, Op, Apply, view_roots, PatternSub, DestroyHandler, from theano.gof import (utils, Op, Apply, view_roots, PatternSub, DestroyHandler,
SeqOptimizer, local_optimizer, Optimizer, LocalOptimizer, OpKeyOptimizer, SeqOptimizer, local_optimizer, Optimizer, LocalOptimizer, OpKeyOptimizer,
InconsistencyError, toolbox) InconsistencyError, toolbox, SequenceDB, EquilibriumOptimizer)
from theano.printing import pprint, FunctionPrinter from theano.printing import pprint, FunctionPrinter
from theano.tensor.opt import register_specialize, out2in, insert_inplace_optimizer from theano.compile.mode import optdb
# opt.py
import basic as T import basic as T
...@@ -30,7 +29,6 @@ AddConfigVar('blas.ldflags', ...@@ -30,7 +29,6 @@ AddConfigVar('blas.ldflags',
"lib[s] to include for [Fortran] level-3 blas implementation", "lib[s] to include for [Fortran] level-3 blas implementation",
StrParam(default_blas_ldflags())) StrParam(default_blas_ldflags()))
_logger = logging.getLogger('theano.tensor.blas') _logger = logging.getLogger('theano.tensor.blas')
_logger.setLevel(logging.WARN) _logger.setLevel(logging.WARN)
def debug(*msg): _logger.debug(' '.join(str(m) for m in msg)) def debug(*msg): _logger.debug(' '.join(str(m) for m in msg))
...@@ -391,12 +389,22 @@ class Gemm(GemmRelated): ...@@ -391,12 +389,22 @@ class Gemm(GemmRelated):
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) + self.build_gemm_version() return (1,) + self.build_gemm_version()
gemm = Gemm() class PseudoGemm(Op):
# should be replaced by Gemm
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def make_node(self, *args):
inputs = [T.as_tensor_variable(i) for i in args]
return Apply(self, inputs, [inputs[0].type()])
def perform(self, node, (z, a, x, y, b), (zout, )):
zout[0] = a * numpy.dot(x,y) + b * z
gemm = PseudoGemm()
gemm_inplace = Gemm()
pprint.assign(gemm, FunctionPrinter('gemm')) pprint.assign(gemm, FunctionPrinter('gemm'))
pprint.assign(gemm_inplace, FunctionPrinter('gemm_inplace'))
def res_is_a(node, op, maxclients=None): def res_is_a(node, op, maxclients=None):
if maxclients is not None: if maxclients is not None:
retval = (len(node.clients) <= maxclients) retval = (len(node.clients) <= maxclients)
...@@ -597,6 +605,7 @@ class GemmOptimizer(Optimizer): ...@@ -597,6 +605,7 @@ class GemmOptimizer(Optimizer):
while did_something: while did_something:
nodelist = list(env.toposort()) nodelist = list(env.toposort())
did_something = False did_something = False
nodelist.reverse()
for node in nodelist: for node in nodelist:
new_outputs = _gemm_from_node(node) new_outputs = _gemm_from_node(node)
if new_outputs: if new_outputs:
...@@ -611,10 +620,6 @@ class GemmOptimizer(Optimizer): ...@@ -611,10 +620,6 @@ class GemmOptimizer(Optimizer):
#TODO: retry other applications of gemm (see comment in _gemm_from_node #TODO: retry other applications of gemm (see comment in _gemm_from_node
pass pass
#neede to make the gemm optimisation(step 70) happen before the fusion of elemwise(step 71)
compile.optdb.register('inplace_gemm', GemmOptimizer(), 70.00, 'fast_run', 'inplace', 'gemm')
class Dot22(GemmRelated): class Dot22(GemmRelated):
"""Compute a matrix-matrix product. """Compute a matrix-matrix product.
This is a specialization of the more general Dot() This is a specialization of the more general Dot()
...@@ -689,5 +694,34 @@ def local_dot_to_dot22(node): ...@@ -689,5 +694,34 @@ def local_dot_to_dot22(node):
info('Not optimizing dot with inputs', x, y, x.type, y.type) info('Not optimizing dot with inputs', x, y, x.type, y.type)
else: else:
return False return False
register_specialize(local_dot_to_dot22)
@local_optimizer([gemm])
def local_inplace_gemm(node):
if node.op == gemm:
return [gemm_inplace(*node.inputs)]
#################################
#
# Set up the BlasOpt optimizer
#
#################################
blas_optdb = SequenceDB()
# run after numerical stability optimizations (1.5)
optdb.register('BlasOpt', blas_optdb, 1.7, 'fast_run')
# run before specialize (2.0) because specialize is basically a free-for-all that makes the
# graph crazy.
blas_optdb.register('local_dot_to_dot22',
EquilibriumOptimizer([local_dot_to_dot22], max_use_ratio=5),
0, 'fast_run')
blas_optdb.register('local_dot_to_gemm', GemmOptimizer(), 10, 'fast_run')
# After destroyhandler is in but before we try to make elemwise things inplace
# Try to make gemm inplace
# Also, need to make the gemm optimisation(step 70) happen before the fusion of elemwise(step 71)
optdb.register('InplaceBlasOpt',
EquilibriumOptimizer([local_inplace_gemm], max_use_ratio=5),
70.0, 'fast_run', 'inplace')
...@@ -197,6 +197,18 @@ class DimShuffle(Op): ...@@ -197,6 +197,18 @@ class DimShuffle(Op):
storage[0] = numpy.asarray(res) #asarray puts scalars back into array storage[0] = numpy.asarray(res) #asarray puts scalars back into array
def infer_shape(self, node, (ishp,)):
ishp = list(ishp)
for drop in reversed(self.drop):
del ishp[drop]
# transpose
rval = [ishp[i] for i in self.shuffle]
# augment
for augm in self.augment:
rval.insert(augm, 1)
return [rval]
def c_code(self, node, name, (input,), (res,), sub): def c_code(self, node, name, (input,), (res,), sub):
basename = input + '__view_or_copy' basename = input + '__view_or_copy'
...@@ -613,6 +625,25 @@ class Elemwise(Op): ...@@ -613,6 +625,25 @@ class Elemwise(Op):
# the following should be used instead of the previous loop, unfortunately it tends to segfault # the following should be used instead of the previous loop, unfortunately it tends to segfault
# self.ufunc(*(ufunc_args+[s[0] for s in output_storage])) # self.ufunc(*(ufunc_args+[s[0] for s in output_storage]))
def infer_shape(self, node, i_shapes):
rval = []
for o in node.outputs:
oshp = []
for dim, b in enumerate(o.type.broadcastable):
b_dim = None
if b: # this is broadcastable
b_dim = 1
else: # there must be some input that is not broadcastable
for ishp, i in zip(i_shapes,node.inputs):
if not i.type.broadcastable[dim]:
b_dim = ishp[dim]
assert b_dim, 'AA'
break
assert b_dim, 'BB'
oshp.append(b_dim)
rval.append(oshp)
return rval
def _c_all(self, node, name, inames, onames, sub): def _c_all(self, node, name, inames, onames, sub):
_inames = inames _inames = inames
_onames = onames _onames = onames
...@@ -764,10 +795,14 @@ class CAReduce(Op): ...@@ -764,10 +795,14 @@ class CAReduce(Op):
if scalar_op.nin not in [-1, 2] or scalar_op.nout != 1: if scalar_op.nin not in [-1, 2] or scalar_op.nout != 1:
raise NotImplementedError("CAReduce only supports binary functions with a single output.") raise NotImplementedError("CAReduce only supports binary functions with a single output.")
self.scalar_op = scalar_op self.scalar_op = scalar_op
if isinstance(axis, int): if axis is None:
self.axis = [axis]
else:
self.axis = axis self.axis = axis
elif isinstance(axis, int):
self.axis = (axis,)
else:
self.axis = list(set(axis))
self.axis.sort()
self.axis = tuple(self.axis)
self.ufunc = numpy.frompyfunc(scalar_op.impl, 2, 1) self.ufunc = numpy.frompyfunc(scalar_op.impl, 2, 1)
# CAReduce output views input when reducing scalars # CAReduce output views input when reducing scalars
...@@ -834,6 +869,13 @@ class CAReduce(Op): ...@@ -834,6 +869,13 @@ class CAReduce(Op):
else: else:
output[0] = numpy.copy(variable) output[0] = numpy.copy(variable)
def infer_shape(self, node, (ishape,)):
axis = self.axis
if axis is None:
return (),
return [ishape[i] for (i,b) in enumerate(node.inputs[0].type.broadcastable) if i not in axis],
def _c_all(self, node, name, inames, onames, sub): def _c_all(self, node, name, inames, onames, sub):
input = node.inputs[0] input = node.inputs[0]
......
from nnet import * from nnet import *
from sigm import softplus, sigmoid, sigmoid_inplace, scalar_sigmoid
...@@ -4,89 +4,14 @@ ...@@ -4,89 +4,14 @@
""" """
from theano import gof from theano import gof
from theano import scalar
from theano import printing from theano import printing
from theano.printing import pprint
from theano.tensor import basic as tensor from theano.tensor import basic as tensor
from theano.tensor import elemwise from theano.tensor import elemwise
from theano.tensor import opt from theano.tensor import opt
from theano.compile import optdb from theano.compile import optdb
import numpy import numpy
############ from .sigm import sigmoid, softplus
#
# SCALAR OPS
#
class ScalarSigmoid(scalar.UnaryScalarOp):
@staticmethod
def st_impl(x):
if x < -30.0:
return 0.0
if x > 30.0:
return 1.0
return 1.0 / (1.0 + numpy.exp(-x))
def impl(self, x):
return ScalarSigmoid.st_impl(x)
def grad(self, (x,), (gz,)):
y = scalar_sigmoid(x)
return [gz * y * (1.0 - y)]
def c_code(self, node, name, (x,), (z,), sub):
if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
# print i, repr( theano._asarray(1.0, dtype=dt) / (theano._asarray(1.0, dtype=dt) + numpy.exp(-theano._asarray([i,-i], dtype=dt))))
# the boundary checks prevent us from generating inf
return """%(z)s = %(x)s < -88.0f ? 0.0 : %(x)s > 15.0f ? 1.0f : 1.0f /(1.0f + exp(-%(x)s));""" % locals()
elif node.inputs[0].type == scalar.float64:
return """%(z)s = %(x)s < -709.0 ? 0.0 : %(x)s > 19.0 ? 1.0 : 1.0 /(1.0+exp(-%(x)s));""" % locals()
else:
raise NotImplementedError('only floatingpoint is implemented')
def c_code_cache_version(self):
v = super(ScalarSigmoid, self).c_code_cache_version()
if v:
return (2,) + v
else:
return v
scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid')
sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid')
pprint.assign(sigmoid, printing.FunctionPrinter('sigmoid'))
class ScalarSoftplus(scalar.UnaryScalarOp):
@staticmethod
def static_impl(x):
if x < -30.0:
return 0.0
if x > 30.0:
return x
return numpy.log1p(numpy.exp(x))
def impl(self, x):
return ScalarSoftplus.static_impl(x)
def grad(self, (x,), (gz,)):
return [gz * scalar_sigmoid(x)]
def c_code(self, node, name, (x,), (z,), sub):
if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
# print i, repr( numpy.log1p(numpy.exp(theano._asarray([i,-i], dtype=dt))))
# the boundary checks prevent us from generating inf
return """%(z)s = %(x)s < -103.0f ? 0.0 : %(x)s > 14.0f ? %(x)s : log1p(exp(%(x)s));""" % locals()
elif node.inputs[0].type == scalar.float64:
return """%(z)s = %(x)s < -745.0 ? 0.0 : %(x)s > 16.0 ? %(x)s : log1p(exp(%(x)s));""" % locals()
else:
raise NotImplementedError('only floatingpoint is implemented')
def c_code_cache_version(self):
v = super(ScalarSoftplus, self).c_code_cache_version()
if v:
return (2,) + v
else:
return v
scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus')
softplus = elemwise.Elemwise(scalar_softplus, name='softplus')
pprint.assign(softplus, printing.FunctionPrinter('softplus'))
############ ############
...@@ -1351,6 +1276,7 @@ def categorical_crossentropy(coding_dist, true_dist): ...@@ -1351,6 +1276,7 @@ def categorical_crossentropy(coding_dist, true_dist):
raise TypeError('rank mismatch between coding and true distributions') raise TypeError('rank mismatch between coding and true distributions')
from theano import scalar
class Prepend_scalar_constant_to_each_row(gof.Op): class Prepend_scalar_constant_to_each_row(gof.Op):
def __init__(self, val = 0): def __init__(self, val = 0):
...@@ -1440,14 +1366,3 @@ prepend_scalar_to_each_row = Prepend_scalar_to_each_row() ...@@ -1440,14 +1366,3 @@ prepend_scalar_to_each_row = Prepend_scalar_to_each_row()
prepend_0_to_each_row = Prepend_scalar_constant_to_each_row(0.) prepend_0_to_each_row = Prepend_scalar_constant_to_each_row(0.)
prepend_1_to_each_row = Prepend_scalar_constant_to_each_row(1.) prepend_1_to_each_row = Prepend_scalar_constant_to_each_row(1.)
logsigm_to_softplus = gof.PatternSub(
(tensor.log, (sigmoid, 'x')),
(tensor.neg, (softplus, (tensor.neg, 'x'))),
allow_multiple_clients = True)
log1msigm_to_softplus = gof.PatternSub(
(tensor.log, (tensor.sub, tensor.constant([[1.0]]), (sigmoid, 'x'))),
(tensor.neg, (softplus, 'x')),
allow_multiple_clients = True)
opt.register_specialize(logsigm_to_softplus, name = 'logsigm_to_softplus')
opt.register_specialize(log1msigm_to_softplus, name = 'log1msigm_to_softplus')
"""Ops and optimizations: sigmoid, softplus
These functions implement special cases of exp and log to improve numerical stability.
"""
import numpy
from theano import gof
from theano import scalar
from theano import printing
from theano.tensor import basic as tensor
from theano.printing import pprint
from theano.tensor import elemwise
from theano.tensor import opt
from theano.compile import optdb
############
#
# SCALAR OPS
#
class ScalarSigmoid(scalar.UnaryScalarOp):
@staticmethod
def st_impl(x):
if x < -30.0:
return 0.0
if x > 30.0:
return 1.0
return 1.0 / (1.0 + numpy.exp(-x))
def impl(self, x):
return ScalarSigmoid.st_impl(x)
def grad(self, (x,), (gz,)):
y = scalar_sigmoid(x)
return [gz * y * (1.0 - y)]
def c_code(self, node, name, (x,), (z,), sub):
if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
# print i, repr( theano._asarray(1.0, dtype=dt) / (theano._asarray(1.0, dtype=dt) + numpy.exp(-theano._asarray([i,-i], dtype=dt))))
# the boundary checks prevent us from generating inf
return """%(z)s = %(x)s < -88.0f ? 0.0 : %(x)s > 15.0f ? 1.0f : 1.0f /(1.0f + exp(-%(x)s));""" % locals()
elif node.inputs[0].type == scalar.float64:
return """%(z)s = %(x)s < -709.0 ? 0.0 : %(x)s > 19.0 ? 1.0 : 1.0 /(1.0+exp(-%(x)s));""" % locals()
else:
raise NotImplementedError('only floatingpoint is implemented')
def c_code_cache_version(self):
v = super(ScalarSigmoid, self).c_code_cache_version()
if v:
return (2,) + v
else:
return v
scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid')
sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid')
sigmoid_inplace = elemwise.Elemwise(
ScalarSigmoid(scalar.transfer_type(0)),
inplace_pattern={0:0},
name='sigmoid_inplace',
)
pprint.assign(sigmoid, printing.FunctionPrinter('sigmoid'))
class ScalarSoftplus(scalar.UnaryScalarOp):
@staticmethod
def static_impl(x):
if x < -30.0:
return 0.0
if x > 30.0:
return x
return numpy.log1p(numpy.exp(x))
def impl(self, x):
return ScalarSoftplus.static_impl(x)
def grad(self, (x,), (gz,)):
return [gz * scalar_sigmoid(x)]
def c_code(self, node, name, (x,), (z,), sub):
if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
# print i, repr( numpy.log1p(numpy.exp(theano._asarray([i,-i], dtype=dt))))
# the boundary checks prevent us from generating inf
return """%(z)s = %(x)s < -103.0f ? 0.0 : %(x)s > 14.0f ? %(x)s : log1p(exp(%(x)s));""" % locals()
elif node.inputs[0].type == scalar.float64:
return """%(z)s = %(x)s < -745.0 ? 0.0 : %(x)s > 16.0 ? %(x)s : log1p(exp(%(x)s));""" % locals()
else:
raise NotImplementedError('only floatingpoint is implemented')
def c_code_cache_version(self):
v = super(ScalarSoftplus, self).c_code_cache_version()
if v:
return (2,) + v
else:
return v
scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus')
softplus = elemwise.Elemwise(scalar_softplus, name='softplus')
pprint.assign(softplus, printing.FunctionPrinter('softplus'))
logsigm_to_softplus = gof.PatternSub(
(tensor.log, (sigmoid, 'x')),
(tensor.neg, (softplus, (tensor.neg, 'x'))),
allow_multiple_clients = True)
def _is_1(expr):
"""rtype bool. True iff expr is a constant close to 1
"""
try:
v = opt.get_constant_value(expr)
return numpy.allclose(v, 1)
except TypeError:
return False
log1msigm_to_softplus = gof.PatternSub(
(tensor.log,
(tensor.sub,
dict(pattern='y', constraint = _is_1),
(sigmoid, 'x'))),
(tensor.neg, (softplus, 'x')),
allow_multiple_clients = True)
opt.register_stabilize(logsigm_to_softplus, name = 'logsigm_to_softplus')
opt.register_stabilize(log1msigm_to_softplus, name = 'log1msigm_to_softplus')
def is_1pexp(t):
# if t is of form (1+exp(x)), return x
# else return None
if t.owner and t.owner.op == tensor.add:
scalars, scalar_inputs, nonconsts = \
opt.scalarconsts_rest(t.owner.inputs)
# scalar_inputs are potentially dimshuffled and fill'd scalars
if len(nonconsts) == 1:
maybe_exp = nonconsts[0]
if maybe_exp.owner and maybe_exp.owner.op == tensor.exp:
return False, maybe_exp.owner.inputs[0]
return None
def is_exp(t):
# if t is of form (exp(x)) then return x
# else return None
neg = False
if t.owner and t.owner.op == tensor.neg:
t = t.owner.inputs[0]
neg = True
if t.owner and t.owner.op == tensor.exp:
return neg, t.owner.inputs[0]
def partition_num_or_denom(r, f):
if r.owner and r.owner.op == tensor.mul:
a = r.owner.inputs
else:
a = [r]
# ugly 2.4-compatible thing
f_terms = []
neg = False
rest = []
for t in a:
f_t = f(t)
if f_t is None:
rest.append(t)
else:
neg_t, f_t = f_t
f_terms.append(f_t)
neg ^= neg_t #bit flip if neg_t is true
return f_terms, rest, neg
@opt.register_stabilize
@gof.local_optimizer([tensor.true_div])
def local_exp_over_1_plus_exp(node):
"""exp(x)/(1+exp(x)) -> sigm(x)
c/(1+exp(x)) -> c*sigm(-x)
"""
# this optimization should be done for numerical stability
# so we don't care to check client counts
if node.op == tensor.true_div:
#find all the exp() terms in the numerator
num, denom = node.inputs
num_exp_x, num_rest, num_neg = partition_num_or_denom(num, is_exp)
denom_1pexp, denom_rest, denom_neg = partition_num_or_denom(denom, is_1pexp)
sigmoids = []
for t in denom_1pexp:
if t in num_exp_x:
# case: exp(x) /(1+exp(x))
sigmoids.append(sigmoid(t))
del num_exp_x[num_exp_x.index(t)]
else:
# case: 1/(1+exp(x))
sigmoids.append(sigmoid(-t))
if not sigmoids: # we didn't find any. abort
return
# put the new numerator together
new_num = sigmoids + [tensor.exp(t) for t in num_exp_x] + num_rest
if len(new_num) == 1:
new_num = new_num[0]
else:
new_num = tensor.mul(*new_num)
if num_neg ^ denom_neg:
new_num = -new_num
if len(denom_rest) == 0:
return [new_num]
elif len(denom_rest) == 1:
return [new_num / denom_rest[0]]
else:
return [new_num / tensor.mul(*denom_rest)]
@opt.register_stabilize
@gof.local_optimizer([tensor.mul])
def local_sigm_times_exp(node):
"""
exp(x)*sigm(-x) -> -sigm(x)
"""
# this is a numerical stability thing, so we dont check clients
if node.op == tensor.mul:
exp_x = []
exp_minus_x = []
sigm_x = []
sigm_minus_x = []
other = []
neg = False
for i in node.inputs:
while i.owner and i.owner.op == tensor.neg:
neg ^= True
i = i.owner.inputs[0]
if i.owner and i.owner.op == tensor.exp:
exp_arg = i.owner.inputs[0]
if exp_arg.owner and exp_arg.owner.op == tensor.neg:
exp_minus_x.append(exp_arg.owner.inputs[0])
else:
exp_x.append(exp_arg)
elif i.owner and i.owner.op == sigmoid:
sigm_arg = i.owner.inputs[0]
if sigm_arg.owner and sigm_arg.owner.op == tensor.neg:
sigm_minus_x.append(sigm_arg.owner.inputs[0])
else:
sigm_x.append(sigm_arg)
else:
other.append(i)
# remove matched pairs in exp_x and sigm_minus_x
did_something = False
for i in exp_x:
if i in sigm_minus_x:
del sigm_minus_x[sigm_minus_x.index(i)]
other.append(sigmoid(i))
did_something = True
else:
other.append(i)
# remove matched pairs in exp_minus_x and sigm_x
for i in exp_minus_x:
if i in sigm_x:
del sigm_x[sigm_x.index(i)]
other.append(sigm(-i))
did_something = True
else:
other.append(i)
if did_something:
terms = other + [sigmoid(x) for x in sigm_x] \
+ [sigmoid(-x) for x in sigm_minus_x]
if len(terms)>1:
rval = tensor.mul(*terms)
else:
rval = terms[0]
if neg:
return [-rval]
else:
return [rval]
@opt.register_stabilize
@gof.local_optimizer([tensor.inv])
def local_inv_1_plus_exp(node):
"""
1/(1+exp(x)) -> sigm(-x)
"""
# this optimization should be done for numerical stability
# so we don't care to check client counts
if node.op == tensor.inv:
inv_arg = node.inputs[0]
if inv_arg.owner and inv_arg.owner.op == tensor.add:
scalars, scalar_inputs, nonconsts = \
opt.scalarconsts_rest(inv_arg.owner.inputs)
# scalar_inputs are potentially dimshuffled and fill'd scalars
if len(nonconsts) == 1:
if nonconsts[0].owner and nonconsts[0].owner.op == tensor.exp:
if scalars and numpy.allclose(numpy.sum(scalars), 1):
return opt._fill_chain(
sigmoid(tensor.neg(nonconsts[0].owner.inputs[0])),
scalar_inputs)
#@opt.register_canonicalize
@gof.local_optimizer([tensor.inv])
def local_1msigmoid(node):
"""
1-sigm(x) -> sigm(-x)
"""
if node.op == tensor.sub:
sub_l, sub_r = node.inputs
if len(sub_r.clients) > 1:
return # graph is using both sigm and 1-sigm
if sub_r.owner and sub_r.owner.op == sigmoid:
try:
val_l = opt.get_constant_value(sub_l)
except Exception, e:
return
if numpy.allclose(numpy.sum(val_l), 1):
return [sigmoid(-sub_r.owner.inputs[0])]
import unittest
import theano
from theano import tensor as T
from theano import gof
import numpy
from theano.tests import unittest_tools as utt
from theano.tensor.tests import test_basic as TT
from theano.tensor.nnet import *
class T_sigmoid(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test_elemwise(self):
utt.verify_grad(sigmoid, [numpy.random.rand(3,4)])
class T_softplus(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test_elemwise(self):
utt.verify_grad(softplus, [numpy.random.rand(3,4)])
class T_sigmoid_opts(unittest.TestCase):
def test_exp_over_1_plus_exp(self):
m = theano.config.mode
if m == 'FAST_COMPILE':
m = 'FAST_RUN'
x = T.dvector()
# tests exp_over_1_plus_exp
f = theano.function([x], T.exp(x)/(1+T.exp(x)), mode=m)
#theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [sigmoid]
# tests inv_1_plus_exp
f = theano.function([x], T.fill(x,1.0) / (1+T.exp(-x)), mode=m)
#theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [sigmoid]
# tests inv_1_plus_exp with neg
f = theano.function([x], T.fill(x,-1.0) / (1+T.exp(-x)), mode=m)
#theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [sigmoid,
T.inplace.neg_inplace]
# tests double inv_1_plus_exp with neg
f = theano.function([x], (T.fill(x,-1.0)*T.exp(x)) / ((1+T.exp(x))*(1+T.exp(-x))), mode=m)
#theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [sigmoid,
T.mul]
def test_1msigmoid(self):
m = theano.config.mode
if m == 'FAST_COMPILE':
m = 'FAST_RUN'
x = T.fmatrix()
# tests exp_over_1_plus_exp
f = theano.function([x], 1 - T.exp(x)/(1+T.exp(x)), mode=m)
theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [tensor.neg, sigmoid_inplace]
# tests inv_1_plus_exp
f = theano.function([x], 1 - T.fill(x,1.0) / (1+T.exp(-x)), mode=m)
theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [tensor.neg,
sigmoid_inplace]
...@@ -44,34 +44,65 @@ def _fill_chain(new_out, orig_inputs): ...@@ -44,34 +44,65 @@ def _fill_chain(new_out, orig_inputs):
new_out = T.fill(i, new_out) new_out = T.fill(i, new_out)
return [new_out] return [new_out]
def get_constant_value(v, fill=False): def encompasses_broadcastable(b1, b2):
"""return the constant value underlying variable `v` """
Returns True if the broadcastable patterns b1 and b2 are such that b2 is
broadcasted to b1's shape and not the opposite.
:param b1: the broadcastable attribute of a tensor type
:param b2: the broadcastable attribute of a tensor type
"""
if len(b1) < len(b2):
return False
b1 = b1[-len(b2):]
return not any(v1 and not v2 for v1, v2 in zip(b1, b2))
def merge_broadcastables(broadcastables):
return [all(bcast) for bcast in zip(*broadcastables)]
def get_constant_value(v):
"""return the constant scalar(0-D) value underlying variable `v`
If v is the output of dimshuffles, fills, this function digs through them. If v is the output of dimshuffles, fills, this function digs through them.
If `v` is not some view of constant data, then raise a TypeError. If `v` is not some view of constant data, then raise a TypeError.
if fill is True, then it returns (v, [...]) where the second term is a list of variables
that were used in the fill expressions
:note: There may be another function similar to this one in the code, but I'm not sure where it :note: There may be another function similar to this one in the code, but I'm not sure where it
is. is.
""" """
if isinstance(v, gof.Constant): if isinstance(v, gof.Constant):
if fill: #TODO: consider checking for arrays of the form e.g. [1,1,1,1] where
return v.data, [] # it is not a constant, but in some cases it *could* be replaced with one.
return v.data # Note that this would have an effect on the broadcasting of inputs and so on
try:
complex(v.data) #works for all numeric scalars
return v.data
except:
raise TypeError(v)
if v.owner and isinstance(v.owner.op, T.DimShuffle): if v.owner and isinstance(v.owner.op, T.DimShuffle):
return get_constant_value(v.owner.inputs[0], fill=fill) return get_constant_value(v.owner.inputs[0])
if fill: if v.owner and v.owner.op == T.fill:
if v.owner and v.owner.op == T.fill: shape, val = v.owner.inputs
shape, val = v.owner.inputs # fill(a,b) fills the shape of 'a' filled with 'b'
# fill(a,b) fills the shape of 'a' filled with 'b' return get_constant_value(val)
rval, rshapes = get_constant_value(val, fill=fill)
return rval, rshapes + [shape]
raise TypeError(v) raise TypeError(v)
def scalarconsts_rest(inputs):
"""Partition a list of variables into two kinds:
scalar constants, and the rest."""
consts = []
origconsts = []
nonconsts = []
for i in inputs:
try:
v = get_constant_value(i)
consts.append(v)
origconsts.append(i)
except:
nonconsts.append(i)
return consts, origconsts, nonconsts
@gof.optimizer @gof.optimizer
def insert_inplace_optimizer(env): def insert_inplace_optimizer(env):
""" """
...@@ -124,6 +155,11 @@ def register_specialize(lopt, *tags, **kwargs): ...@@ -124,6 +155,11 @@ def register_specialize(lopt, *tags, **kwargs):
compile.optdb['specialize'].register(name, lopt, 'fast_run', *tags) compile.optdb['specialize'].register(name, lopt, 'fast_run', *tags)
return lopt return lopt
def register_stabilize(lopt, *tags, **kwargs):
name = (kwargs and kwargs.pop('name')) or lopt.__name__
compile.optdb['stabilize'].register(name, lopt, 'fast_run', *tags)
return lopt
###################### ######################
# DimShuffle lifters # # DimShuffle lifters #
###################### ######################
...@@ -164,130 +200,321 @@ register_canonicalize(local_dimshuffle_lift) ...@@ -164,130 +200,321 @@ register_canonicalize(local_dimshuffle_lift)
################# #####################################
# Shape lifters # # ShapeFeature, Shape optimizations
################# #####################################
@gof.local_optimizer([T._shape, None]) class MakeVector(T.Op):
def local_shape_lift_elemwise(node): """Concatenate a number of scalars together into a vector
"""
shape(elemwise_op(..., x, ...)) -> shape(x)
Where x contains the maximal shape information. This is a simple version of stack() that introduces far less cruft into the graph.
""" """
if not opt.check_chain(node, T._shape, T.Elemwise): def __init__(self, dtype='int64'):
return False self.dtype = dtype
def __eq__(self, other):
output = node.inputs[0] return type(self) == type(other) and self.dtype == other.dtype
parent = output.owner def __hash__(self):
return hash(type(self)) ^ hash(self.dtype)
for input in parent.inputs: def make_node(self, *inputs):
if input.type.broadcastable == output.type.broadcastable: inputs = map(T.as_tensor_variable, inputs)
return T._shape(input), if not all(a.type == inputs[0].type for a in inputs):
raise TypeError('This MakeVector instance requires inputs of same type %s' %
return False inputs[0].type)
if inputs:
register_canonicalize(local_shape_lift_elemwise, 'shape_lift') dtype = inputs[0].type.dtype
register_specialize(local_shape_lift_elemwise, 'shape_lift') else:
dtype = self.dtype
#bcastable = (len(inputs) == 1)
bcastable = False
otype = T.TensorType(
broadcastable=(bcastable,),
dtype=dtype)
return T.Apply(self, inputs, [otype()])
def __str__(self):
return self.__class__.__name__
def perform(self, node, inputs, (out,)):
out[0] = T.numpy.asarray(inputs)
make_vector = MakeVector()
class MakeVectorPrinter:
def process(self, r, pstate):
if r.owner is None:
raise TypeError("Can only print make_vector.")
elif isinstance(r.owner.op, MakeVector):
return "[%s]" % ", ".join(pstate.pprinter.process(input, pstate.clone(precedence = 1000)) for input in r.owner.inputs)
else:
raise TypeError("Can only print make_vector.")
T.pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, MakeVector), MakeVectorPrinter())
@gof.local_optimizer([T._shape, None]) class Shape_i(T.Op):
def local_shape_lift_sum(node):
""" """
shape(sum{n}(x)) -> [shape(x)[0], ..., shape(x)[n-1], shape(x)[n+1], ...] L{Op} to return the shape of a matrix.
"""
if not opt.check_chain(node, T._shape, T.Sum):
return False
input = node.inputs[0].owner.inputs[0] @note: Non-differentiable.
axis = node.inputs[0].owner.op.axis """
if axis is None:# or len(axis) != 1: def __init__(self, i):
axis = range(input.type.ndim) self.i = i
def __hash__(self):
return hash(type(self)) ^ self.i
def __eq__(self, other):
return type(self) == type(other) and self.i == other.i
def __str__(self):
return '%s{%i}'%(self.__class__.__name__, self.i)
def make_node(self, x):
x = T.as_tensor_variable(x)
if x.ndim <= self.i:
raise TypeError('x has too few dimensions for Shape_i', (x, self.i))
return T.Apply(self, [x], [T.lscalar()])
def perform(self, node, (x, ), (out, )):
out[0] = theano._asarray(x.shape[self.i], dtype = 'int64')
def grad(self, (x,), (gz,)):
return [None]
class ShapeFeature(object):
"""Graph optimizer for removing all calls to shape()
This optimizer replaces all Shapes and Subtensors of Shapes with Shape_i and MakeVector
Ops.
This optimizer has several goals:
1. to 'lift' Shapes to as close to the inputs as possible.
2. to infer the shape of every node in the graph in terms of the input shapes.
3. remove all fills (T.second, T.fill) from the graph
Lifting shapes as close to the inputs as possible is important for canonicalization because
it is very bad form to have to compute something just to know how big it will be. Firstly,
it is a waste of time to compute such outputs. But it is important to get rid of these
outputs as early as possible in the compilation process because the
extra computations make it appear as if many internal graph nodes have multiple clients.
Many optimizations refuse to work on nodes with multiple clients.
Lifting is done by using an `<Op>.infer_shape` function if one is present, or else using a
conservative default. An Op that supports shape-lifting should define a
infer_shape(self, node, input_shapes) function. The argument input_shapes is a tuple
of tuples... there is an interior tuple for each input to the node. The tuple has as many
elements as dimensions. The element in position i of tuple j represents the i'th shape
component of the j'th input. The function should return a tuple of tuples. One output
tuple for each node.output. Again, the i'th element of the j'th output tuple represents
the output[j].shape[i] of the function. If an output is not a TensorType, then None should
be returned instead of a tuple for that output.
For example the infer_shape for a matrix-matrix product would accept
input_shapes=((x0,x1), (y0,y1)) and return ((x0, y1),).
Inferring the shape of internal nodes in the graph is important for doing size-driven
optimizations. If we know how big various intermediate results will be, we can estimate
the cost of many Ops accurately, and generate c-code that is specific [e.g. unrolled] to
particular sizes.
ish = T._shape(input) .. note::
return T.make_lvector.make_node(*(ish[i] for i in xrange(input.type.ndim) if i not in axis)).outputs
# return T.vertical_stack.make_node(ish[:axis], ish[axis+1:]).outputs
register_canonicalize(local_shape_lift_sum, 'shape_lift') Right now there is only the ConvOp that can really take advantage of this shape
inference, but it is worth it even just for the ConvOp. All that's necessary to do
shape inference is 1) to mark shared inputs as having a particular shape,
either via a .tag or some similar hacking; and 2) to add an optional Param() argument
to promise that inputs will have a certain shape (or even to have certain shapes in
certain dimensions).
@gof.local_optimizer([T._shape, T.dot])
def local_shape_lift_dot(node):
""" """
shape(dot(a, b)) -> [shape(a)[0], shape(b)[1]] def shape_i(self, i):
""" def op_deco(r):
if not opt.check_chain(node, T._shape, T.dot): if r.type.broadcastable[i]:
return False return self.lscalar_one
a, b = node.inputs[0].owner.inputs else:
if a.type.ndim == 2 and b.type.ndim == 2: return Shape_i(i)(r)
return T.make_lvector.make_node(T._shape(a)[0], T._shape(b)[1]).outputs return op_deco
elif a.type.ndim == 1 and b.type.ndim == 2:
return T.make_lvector.make_node(T._shape(b)[1]).outputs
elif a.type.ndim == 2 and b.type.ndim == 1:
return T.make_lvector.make_node(T._shape(a)[0]).outputs
elif a.type.ndim == 1 and b.type.ndim == 1:
return T.make_lvector.make_node().outputs
else:
return False
register_canonicalize(local_shape_lift_dot, 'shape_lift')
# local_shape_lift = opt.LocalOptGroup(local_shape_lift_elemwise,
# local_shape_lift_sum,
# local_shape_lift_dot)
def shape_tuple(self, r):
return tuple([self.shape_i(i)(r) for i in xrange(r.ndim)])
################ def default_infer_shape(self, node, i_shapes):
# Fill lifters # rval = []
################ for r in node.outputs:
try:
rval.append(self.shape_tuple(r))
except AttributeError:
rval.append(None)
return rval
def unpack(self, s_i):
# unpack the s_i that the Op returned
assert s_i is not None
if s_i == 1:
# don't make the optimizer merge a zillion ones together
return self.lscalar_one
if type(s_i) is int:
# this shape is a constant
assert s_i >= 0
return T.constant(s_i, dtype='int64')
if type(s_i) in (tuple,list):
# this dimension is the same as many of the inputs
# which tells us that if one of the inputs is known,
# the others all become known.
# TODO: should be implemented in Elemwise, and Dot
#
# worst case, we loop over shape_of and replace things
raise NotImplementedError(s_i)
elif s_i.type == T.lscalar:
return s_i
else:
raise TypeError('Unsupported shape element', s_i)
def encompasses_broadcastable(b1, b2): def set_shape(self, r, s):
""" assert r not in self.shape_of
Returns True if the broadcastable patterns b1 and b2 are such that b2 is if s is None:
broadcasted to b1's shape and not the opposite. self.shape_of[r] = s
else:
self.shape_of[r] = tuple([self.unpack(s_i) for s_i in s])
:param b1: the broadcastable attribute of a tensor type def make_vector_shape(self, r):
:param b2: the broadcastable attribute of a tensor type return make_vector(*self.shape_of[r])
""" #
if len(b1) < len(b2): #
return False # Feature inteface
b1 = b1[-len(b2):] #
return not any(v1 and not v2 for v1, v2 in zip(b1, b2)) #
def on_attach(self, env):
assert not hasattr(env, 'shape_feature')
env.shape_feature = self
self.shape_of = {} # Variable -> tuple(scalars) or None (All tensor vars map to tuple)
self.lscalar_one = T.constant(1, dtype='int64')
assert self.lscalar_one.type == T.lscalar
for node in env.toposort():
self.on_import(env, node)
def on_import(self, env, node):
if node.outputs[0] in self.shape_of:
# this is a revert, not really an import
for r in node.outputs + node.inputs:
assert r in self.shape_of
return
for i, r in enumerate(node.inputs):
# make sure we have shapes for the inputs
if r not in self.shape_of:
try:
self.set_shape(r, self.shape_tuple(r))
except AttributeError:
self.set_shape(r, None ) # not a TensorType variable
def merge_broadcastables(broadcastables): try:
return [all(bcast) for bcast in zip(*broadcastables)] shape_infer = node.op.infer_shape
except AttributeError:
shape_infer = self.default_infer_shape
@gof.local_optimizer([T.fill, None]) try:
def local_fill_lift(node): o_shapes = shape_infer(node, [self.shape_of[r] for r in node.inputs])
except Exception, e:
_logger.error('Failed to infer_shape from Op %s (i_shapes=%s): %s %s'% (node.op,
[self.shape_of[r] for r in node.inputs],
type(e), str(e)))
o_shapes = default_infer_shape(node, [self.shape_of[r] for r in node.inputs])
# this is packed information
# an element of o_shapes is either None or a tuple
# elements of the tuple can be either strings, or ints
assert len(o_shapes) == len(node.outputs)
for r, s in zip(node.outputs, o_shapes):
self.set_shape(r, s)
def on_change_input(self, env, mode, i, r, new_r):
# TODO:
# This tells us that r and new_r must have the same shape
# if we didn't know that the shapes are related, now we do.
pass
class ShapeOptimizer(Optimizer):
"""Optimizer that serves to add ShapeFeature as an env feature.
""" """
fill(f(a), b) -> fill(a, b) def __init__(self):
If a.type == f(a).type. Optimizer.__init__(self)
fill(a, b) -> b def add_requirements(self, env):
If a.type == b.type. env.extend(ShapeFeature())
"""
if not opt.check_chain(node, T.fill):
return False
model, filling = node.inputs def apply(self, env):
pass
mb, fb = model.type.broadcastable, filling.type.broadcastable # -1 should make it run right before the first merge
if model.type.dtype == filling.type.dtype and encompasses_broadcastable(fb, mb): theano.compile.mode.optdb.register('ShapeOpt', ShapeOptimizer(), -1, 'fast_run', 'fast_compile')
return False# [filling]
parent = model.owner @register_specialize
if parent is None or not isinstance(parent, T.Elemwise): @register_canonicalize
return False @gof.local_optimizer([T.fill])
for input in parent.inputs: def local_fill_to_alloc(node):
if input.type == model.type: """fill(s,v) -> alloc(v, shape(s))
return [T.fill(input, filling)]
return False This is an important optimization because with the shape_to_shape_i optimization, the
dependency on 's' is often removed.
"""
if node.op == T.fill:
r, v = node.inputs
if v.type == node.outputs[0].type:
# this is a useless fill, erase it.
rval = [v]
elif v.type.broadcastable == node.outputs[0].type.broadcastable:
# this is a cast
rval = [T.cast(v, node.outputs[0].type.dtype)]
else:
# we are broadcasting v somehow
shape_of = node.env.shape_feature.shape_of
# TODO: cut out un-necessary dimshuffles of v
rval = [T.Alloc(node.outputs[0].dtype)(v, *shape_of[node.outputs[0]])]
assert rval[0].type == node.outputs[0].type
return rval
register_canonicalize(local_fill_lift, 'fill_lift') @register_specialize
@register_canonicalize
@gof.local_optimizer([T._shape])
def local_shape_to_shape_i(node):
if node.op == T._shape:
shape_feature = node.env.shape_feature
return [shape_feature.make_vector_shape(node.inputs[0])]
@register_specialize
@register_canonicalize
@gof.local_optimizer([T.Subtensor])
def local_subtensor_make_vector(node):
# replace all subtensor(make_vector) like:
# [a,b,c][0] -> a
# [a,b,c][0:2] -> [a,b]
# we can do this for constant indexes
if isinstance(node.op, T.Subtensor):
shape_feature = node.env.shape_feature
x = node.inputs[0]
if x.owner and x.owner.op == make_vector:
try:
idx, = node.op.idx_list
except:
#'how can you have multiple indexes into a shape?'
raise
if isinstance(idx, int):
return [x.owner.inputs[idx]]
elif isinstance(idx, T.TensorVariable):
# if it is a constant we can do something with it
try:
v = get_constant_value(idx)
return [x.owner.inputs[v]]
except:
pass
else:
# it is a slice of ints and/or Variables
#TODO: check subtensor to see if it can contain constant variables,
# and if it can, then try to unpack them.
try:
return [make_vector(*x.owner.inputs.__getitem__(idx))]
except TypeError:
pass
except:
_logger.error('failed to index with "%s"' % str(idx))
raise
################## ##################
# Subtensor opts # # Subtensor opts #
...@@ -306,38 +533,6 @@ def local_subtensor_unary(node): ...@@ -306,38 +533,6 @@ def local_subtensor_unary(node):
x_idx = node.op(u.owner.inputs[0], *idx) x_idx = node.op(u.owner.inputs[0], *idx)
return [u.owner.op(x_idx)] return [u.owner.op(x_idx)]
@gof.local_optimizer([None, None])
def local_subtensor_make_vector(node):
"""
[a,b,c][0] -> a
[a,b,c][0:2] -> [a,b]
If the index or slice is constant.
"""
if not opt.check_chain(node, T.Subtensor, T.MakeVector):
return False
joined_r = node.inputs[0]
try:
#check that join is being used to join scalars
veclen = T.join.vec_length(joined_r)
except:
return False
idxlist = node.op.idx_list
if len(idxlist) != 1:
return False
idx = idxlist[0]
if isinstance(idx, int):
return [node.inputs[0].owner.inputs[idx]]
try:
return T.make_vector(*(node.owner.inputs[0].owner.inputs.__getslice__(idx)))
except TypeError:
return False
register_canonicalize(local_subtensor_make_vector)
@register_canonicalize @register_canonicalize
@gof.local_optimizer([None]) @gof.local_optimizer([None])
def local_IncSubtensor_serialize(node): def local_IncSubtensor_serialize(node):
...@@ -581,13 +776,18 @@ class Canonizer(gof.LocalOptimizer): ...@@ -581,13 +776,18 @@ class Canonizer(gof.LocalOptimizer):
# the dtype of the 'input' argument. The leaf-Variables of the graph covered by the # the dtype of the 'input' argument. The leaf-Variables of the graph covered by the
# recursion may be of any Variable type. # recursion may be of any Variable type.
if len(input.clients) > 1: if 0:
# this logic is too conservative, but doing it is better than not doing it. # UPDATE: This logic makes it impossible to recognize some important patterns
# # (e.g. variants on the x/x)
# we don't want to canonize a subgraph that we will need to compute anyway for the other clients. # and it is screwing up the RBM free energy gradient.
# This check is too conservative because if the other clients are also in the subgraph we are canonizing, #TODO: review this
# then we should [probably?] recurse anyway. if len(input.clients) > 1:
return [input], [] # this logic is too conservative, but doing it is better than not doing it.
#
# we don't want to canonize a subgraph that we will need to compute anyway for the other clients.
# This check is too conservative because if the other clients are also in the subgraph we are canonizing,
# then we should [probably?] recurse anyway.
return [input], []
if input.owner is None or input.owner.op not in [self.main, self.inverse, self.reciprocal]: if input.owner is None or input.owner.op not in [self.main, self.inverse, self.reciprocal]:
if input.owner and isinstance(input.owner.op, T.DimShuffle): if input.owner and isinstance(input.owner.op, T.DimShuffle):
...@@ -835,8 +1035,8 @@ class Canonizer(gof.LocalOptimizer): ...@@ -835,8 +1035,8 @@ class Canonizer(gof.LocalOptimizer):
# Here we make the canonical version of the graph around this node # Here we make the canonical version of the graph around this node
# See the documentation of get_num_denum and simplify # See the documentation of get_num_denum and simplify
orig_num, orig_denum = self.get_num_denum(node.outputs[0]) orig_num, orig_denum = self.get_num_denum(node.outputs[0])
num, denum = list(orig_num), list(orig_denum) num, denum = self.simplify(list(orig_num), list(orig_denum))
num, denum = self.simplify(num, denum)
def same(x, y): def same(x, y):
return len(x) == len(y) and all(N.all(xe == ye) for xe, ye in zip(x, y)) return len(x) == len(y) and all(N.all(xe == ye) for xe, ye in zip(x, y))
...@@ -935,6 +1135,60 @@ def local_sum_mul_by_scalar(node): ...@@ -935,6 +1135,60 @@ def local_sum_mul_by_scalar(node):
if thing_summed.owner and thing_summed.owner.op == T.neg: if thing_summed.owner and thing_summed.owner.op == T.neg:
return [T.neg(node.op(thing_summed.owner.inputs[0]))] return [T.neg(node.op(thing_summed.owner.inputs[0]))]
@register_canonicalize
@gof.local_optimizer([])
def local_sum_all_to_none(node):
"""Sum{0,1,...N} -> Sum{}"""
if isinstance(node.op, T.Sum):
# if all the axes are named, then use None as a shorthand
# this permits more merging
if node.op.axis is None:
return
if set(node.op.axis) == set(range(node.inputs[0].type.ndim)):
return [T.Sum(axis=None)(node.inputs[0])]
@register_canonicalize
@gof.local_optimizer([])
def local_sum_sum(node):
"""Sum(Sum()) -> Sum"""
if isinstance(node.op, T.Sum):
summed, = node.inputs
if len(summed.clients) == 1:
if summed.owner and isinstance(summed.owner.op, T.Sum):
if summed.owner.op.axis is None:
# special case of local_cut_useless_reduce
return [T.Sum(None)(summed.owner.inputs[0])]
if node.op.axis is None:
# we're summing up everything anyway so lets
# do it all at once
return [T.Sum(None)(summed.owner.inputs[0])]
# figure out which dimensions of the original input are preserved
alldims = range(summed.owner.inputs[0].type.ndim)
# trim out the dimensions that were removed by the first sum
alldims = [d for i,d in enumerate(alldims) if i in summed.owner.op.axis]
# trim out the dimensions removed by second sum
alldims = [d for i,d in enumerate(alldims) if i in node.op.axis]
# figure out an axis argument that combines the effect of both
newaxis = [i for i in xrange(summed.owner.inputs[0].type.ndim)
if i not in alldims]
combined_sum = T.Sum(newaxis)
return [combined_sum(summed.owner.inputs[0])]
@register_canonicalize
@gof.local_optimizer([])
def local_cut_useless_reduce(node):
"""Sum(a, axis=[]) -> a """
if isinstance(node.op, T.CAReduce):
summed, = node.inputs
# if reduce were doing anything, the output ndim would be reduced
if summed.type == node.outputs[0].type:
return [summed]
@gof.local_optimizer([T.mul]) @gof.local_optimizer([T.mul])
def local_mul_to_neg(node): def local_mul_to_neg(node):
if node.op == T.mul and N.all(local_mul_canonizer.get_constant(node.inputs[0]) == -1.0): if node.op == T.mul and N.all(local_mul_canonizer.get_constant(node.inputs[0]) == -1.0):
...@@ -1143,30 +1397,23 @@ register_specialize(local_add_specialize) ...@@ -1143,30 +1397,23 @@ register_specialize(local_add_specialize)
mul_canonizer = in2out(gof.LocalOptGroup(local_mul_canonizer, local_fill_cut, local_fill_sink)) mul_canonizer = in2out(gof.LocalOptGroup(local_mul_canonizer, local_fill_cut, local_fill_sink))
@register_specialize @register_stabilize
@gof.local_optimizer([T.log]) @gof.local_optimizer([T.log])
def local_log1p(node): def local_log1p(node):
# log(1+exp(x)) -> log1p(x) # log(1+exp(x)) -> log1p(x)
if node.op == T.log: if node.op == T.log:
log_arg, = node.inputs log_arg, = node.inputs
if log_arg.owner and log_arg.owner.op == T.add: if log_arg.owner and log_arg.owner.op == T.add:
add_inputs = log_arg.owner.inputs scalars, scalar_inputs, nonconsts = \
consts = [0] scalarconsts_rest(log_arg.owner.inputs)
fills = [] # scalar_inputs are potentially dimshuffled and fill'd scalars
nonconsts = [] if scalars and numpy.allclose(numpy.sum(scalars), 1):
for add_in in add_inputs: if not nonconsts:
try: pass # leave for constant-merge
v, f = get_constant_value(add_in, fill=True) if len(nonconsts)==1:
consts.append(v) return _fill_chain(T.log1p(nonconsts[0]), scalar_inputs)
fills.extend(f) else:
except: return _fill_chain(T.log1p(T.add(*nonconsts)), scalar_inputs)
nonconsts.append(add_in)
if nonconsts:
if numpy.allclose(numpy.sum(consts), 1):
if len(nonconsts)==1:
return _fill_chain(T.log1p(nonconsts[0]), fills)
else:
return _fill_chain(T.log1p(T.add(*nonconsts)), fills)
def add_calculate(num, denum, aslist = False, out_type=None): def add_calculate(num, denum, aslist = False, out_type=None):
......
...@@ -136,10 +136,7 @@ class RandomFunction(gof.Op): ...@@ -136,10 +136,7 @@ class RandomFunction(gof.Op):
draw. draw.
""" """
if shape == () or shape == []: shape = tensor.as_tensor_variable(shape, ndim=1)
shape = tensor.as_tensor_variable(shape, dtype='int64')
else:
shape = tensor.as_tensor_variable(shape, ndim=1)
assert shape.type.ndim == 1 assert shape.type.ndim == 1
assert (shape.type.dtype == 'int64') or (shape.type.dtype == 'int32') assert (shape.type.dtype == 'int64') or (shape.type.dtype == 'int32')
if not isinstance(r.type, RandomStateType): if not isinstance(r.type, RandomStateType):
...@@ -158,6 +155,22 @@ class RandomFunction(gof.Op): ...@@ -158,6 +155,22 @@ class RandomFunction(gof.Op):
[r, shape] + args, [r, shape] + args,
[r.type(), self.outtype()]) [r.type(), self.outtype()])
def infer_shape(self, node, i_shapes):
r, shp = node.inputs[0:2]
#if shp is a constant array of len 0, then it means 'automatic shape'
unknown_shape = len(getattr(shp, 'data', [0,1,2])) == 0
# if ndim_added == 0 and shape != () then shape
if self.ndim_added == 0 and not unknown_shape:
sample_shp = shp
else:
# if shape == () then it will depend on args
# if ndim_added != 0 and shape != () then it will depend on args
sample_shp = node.outputs[1].shape
return [None, [sample_shp[i] for i in xrange(node.outputs[1].ndim)]]
def perform(self, node, inputs, (rout, out)): def perform(self, node, inputs, (rout, out)):
# Use self.fn to draw shape worth of random numbers. # Use self.fn to draw shape worth of random numbers.
# Numbers are drawn from r if self.inplace is True, and from a copy of r if # Numbers are drawn from r if self.inplace is True, and from a copy of r if
......
...@@ -89,7 +89,6 @@ class test_greedy_distribute(unittest.TestCase): ...@@ -89,7 +89,6 @@ class test_greedy_distribute(unittest.TestCase):
g = Env([a,b,c,d,x,y,z], [e]) g = Env([a,b,c,d,x,y,z], [e])
##print pprint(g.outputs[0]) ##print pprint(g.outputs[0])
mul_canonizer.optimize(g) mul_canonizer.optimize(g)
gof.TopoOptimizer(gof.LocalOptGroup(local_fill_cut, local_fill_lift), order = 'out_to_in').optimize(g)
gof.TopoOptimizer(gof.LocalOptGroup(local_greedy_distributor), order = 'out_to_in').optimize(g) gof.TopoOptimizer(gof.LocalOptGroup(local_greedy_distributor), order = 'out_to_in').optimize(g)
##print pprint(g.outputs[0]) ##print pprint(g.outputs[0])
...@@ -136,7 +135,6 @@ class test_canonize(unittest.TestCase): ...@@ -136,7 +135,6 @@ class test_canonize(unittest.TestCase):
g = Env([x, y, z, a, b, c, d], [e]) g = Env([x, y, z, a, b, c, d], [e])
print pprint(g.outputs[0]) print pprint(g.outputs[0])
mul_canonizer.optimize(g) mul_canonizer.optimize(g)
gof.TopoOptimizer(gof.LocalOptGroup(local_fill_cut, local_fill_lift), order = 'out_to_in').optimize(g)
print pprint(g.outputs[0]) print pprint(g.outputs[0])
def test_elemwise_multiple_inputs_optimisation(self): def test_elemwise_multiple_inputs_optimisation(self):
...@@ -296,17 +294,17 @@ class test_canonize(unittest.TestCase): ...@@ -296,17 +294,17 @@ class test_canonize(unittest.TestCase):
def test_multiple_case(self): def test_multiple_case(self):
""" test those case take from the comment in Canonizer """ test those case take from the comment in Canonizer
x / x -> 1 x / x -> 1
(x * y) / x -> y (x * y) / x -> y
x / y / x -> 1 / y x / y / x -> 1 / y
x / y / z -> x / (y * z) x / y / z -> x / (y * z)
x / (y / z) -> (x * z) / y x / (y / z) -> (x * z) / y
(a / b) * (b / c) * (c / d) -> a / d (a / b) * (b / c) * (c / d) -> a / d
(2.0 * x) / (4.0 * y) -> (0.5 * x) / y (2.0 * x) / (4.0 * y) -> (0.5 * x) / y
2 * x / 2 -> x 2 * x / 2 -> x
with and without DimShuffle with and without DimShuffle
TODO: with DimShuffle TODO: with DimShuffle
""" """
import theano.tensor, theano.compile import theano.tensor, theano.compile
shp=(3,3) shp=(3,3)
...@@ -331,6 +329,7 @@ class test_canonize(unittest.TestCase): ...@@ -331,6 +329,7 @@ class test_canonize(unittest.TestCase):
old_optimizer = mode._optimizer old_optimizer = mode._optimizer
try: try:
mode._optimizer=gof.Query(["canonicalize"]) mode._optimizer=gof.Query(["canonicalize"])
mode._optimizer=mode._optimizer.including('ShapeOpt')
mode._optimizer=mode._optimizer.excluding('local_elemwise_fusion') mode._optimizer=mode._optimizer.excluding('local_elemwise_fusion')
#test x / x -> 1 #test x / x -> 1
...@@ -344,10 +343,15 @@ class test_canonize(unittest.TestCase): ...@@ -344,10 +343,15 @@ class test_canonize(unittest.TestCase):
out = f(*val_inputs) out = f(*val_inputs)
assert (out==numpy.ones(shp, dtype=out_dtype)).all() assert (out==numpy.ones(shp, dtype=out_dtype)).all()
topo=f.maker.env.toposort() topo=f.maker.env.toposort()
assert len(topo)==1 if sym_inputs[0].broadcastable[0]:
assert isinstance(topo[0].op,(T.Elemwise,)) assert len(topo)==2
assert isinstance(topo[0].op.scalar_op,theano.scalar.basic.Second) assert isinstance(topo[0].op, Shape_i)
assert len(topo[0].inputs)==2 assert isinstance(topo[1].op, TT.Alloc)
else:
assert len(topo)==3
assert isinstance(topo[0].op, Shape_i)
assert isinstance(topo[1].op, Shape_i)
assert isinstance(topo[2].op, TT.Alloc)
assert(out_dtype==out.dtype) assert(out_dtype==out.dtype)
#test (x * y) / x -> y #test (x * y) / x -> y
...@@ -365,10 +369,16 @@ class test_canonize(unittest.TestCase): ...@@ -365,10 +369,16 @@ class test_canonize(unittest.TestCase):
f = compile.function(list(sym_inputs), g, f = compile.function(list(sym_inputs), g,
mode=mode) mode=mode)
out = f(*val_inputs) out = f(*val_inputs)
assert(out_dtype==out.dtype)
assert numpy.allclose(out,val_inputs[1]) assert numpy.allclose(out,val_inputs[1])
topo=f.maker.env.toposort() topo=f.maker.env.toposort()
assert len(topo)==nb_elemwise print "ID TOPO", id, topo, sym_inputs
assert(out_dtype==out.dtype) for r,t in f.maker.env.shape_feature.shape_of.items():
print ' ', r, t
if topo:
for node in topo[:-1]:
assert isinstance(node.op, Shape_i)
assert isinstance(topo[-1].op, TT.Alloc)
#test x / y / x -> 1 / y #test x / y / x -> 1 / y
for id,(g, sym_inputs, val_inputs, nb_elemwise, out_dtype) in enumerate([ for id,(g, sym_inputs, val_inputs, nb_elemwise, out_dtype) in enumerate([
...@@ -378,19 +388,21 @@ class test_canonize(unittest.TestCase): ...@@ -378,19 +388,21 @@ class test_canonize(unittest.TestCase):
((fv/fy)/fv,[fv,fy],[fvv,fyv],1,'float32'), ((fv/fy)/fv,[fv,fy],[fvv,fyv],1,'float32'),
#must broadcast as their is a dimshuffle in the computation #must broadcast as their is a dimshuffle in the computation
((dx/dv)/dx,[dx,dv],[dxv,dvv],2,'float64'), ((dx/dv)/dx,[dx,dv],[dxv,dvv],1,'float64'),
#topo: [Elemwise{inv,no_inplace}(<TensorType(float64, row)>), Elemwise{second,no_inplace}(x, Elemwise{inv,no_inplace}.0)] #topo: [Shape_i, Shape_i, Elemwise{inv,no_inplace}(<TensorType(float64, row)>), Alloc(...)]
((fx/fv)/fx,[fx,fv],[fxv,fvv],2,'float32'), ((fx/fv)/fx,[fx,fv],[fxv,fvv],1,'float32'),
#topo:[Elemwise{inv,no_inplace}(<TensorType(float32, row)>), Elemwise{second,no_inplace}(x, Elemwise{inv,no_inplace}.0)] #topo:[Shape_i, Shape_i, Elemwise{inv,no_inplace}(<TensorType(float32, row)>), Alloc(...)]
]): ]):
f = compile.function(list(sym_inputs), g, f = compile.function(list(sym_inputs), g,
mode=mode) mode=mode)
out = f(*val_inputs) out = f(*val_inputs)
assert numpy.allclose(out,(1/val_inputs[1])) assert numpy.allclose(out,(1/val_inputs[1]))
topo=f.maker.env.toposort() topo=f.maker.env.toposort()
assert len(topo)==nb_elemwise print topo
assert isinstance(topo[0].op,(T.Elemwise,)) elem = [t for t in topo if isinstance(t.op, T.Elemwise)]
assert isinstance(topo[0].op.scalar_op,(theano.scalar.basic.Inv, theano.scalar.basic.TrueDiv)) assert len(elem)==nb_elemwise
assert isinstance(elem[0].op,(T.Elemwise,))
assert isinstance(elem[0].op.scalar_op,(theano.scalar.basic.Inv, theano.scalar.basic.TrueDiv))
assert(out_dtype==out.dtype) assert(out_dtype==out.dtype)
#test (a / b) * (b / c) * (c / d) -> a / d #test (a / b) * (b / c) * (c / d) -> a / d
...@@ -529,29 +541,6 @@ def test_mixeddiv(): ...@@ -529,29 +541,6 @@ def test_mixeddiv():
d = dscalar() d = dscalar()
assert 0 == function([i,d], d*(i/(i+1)))(3, 1.0) assert 0 == function([i,d], d*(i/(i+1)))(3, 1.0)
def test_local_shape_lift_dot():
args_to_result = {
(fvector, fvector): "[]",
(fvector, fmatrix): "[<TensorType(float32, matrix)>.shape[1]]",
(fmatrix, fvector): "[<TensorType(float32, matrix)>.shape[0]]",
(fmatrix, fmatrix): "[<TensorType(float32, matrix)>.shape[0], <TensorType(float32, matrix)>.shape[1]]",
}
for x in [fvector, fmatrix]:
for y in [fvector, fmatrix]:
i = x()
j = y()
print 'I SHAPE', i.type.shape
print 'J SHAPE', j.type.shape
d = shape(dot(i,j))
if x is fvector and y is fvector:
assert d == ()
else:
g = Env([i,j], [d])
gof.TopoOptimizer(gof.LocalOptGroup(local_shape_lift_dot), order='out_to_in').optimize(g)
print pprint(g.outputs[0]), args_to_result[(x,y)]
assert pprint(g.outputs[0]) == args_to_result[(x,y)]
def test_const_type_in_mul_canonizer(): def test_const_type_in_mul_canonizer():
input = dmatrix() input = dmatrix()
w = dmatrix() w = dmatrix()
...@@ -915,11 +904,16 @@ def test_log1p(): ...@@ -915,11 +904,16 @@ def test_log1p():
# check trickier cases (and use different dtype) # check trickier cases (and use different dtype)
y = fmatrix() y = fmatrix()
f = function([x,y], T.log(fill(y,1)+(x)), mode=m) f = function([x,y], T.log(fill(y,1)+(x)), mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.DimShuffle([False], ['x', 0], True), T.log1p, T.fill] print f.maker.env.toposort()
# the first three ops are Shape_i, Shape_i, and Dimshuffle
assert [node.op for node in f.maker.env.toposort()][3:] \
== [T.log1p, Alloc('float64')]
f = function([x,y], T.log(0+(x) + fill(y,1.0)), mode=m) f = function([x,y], T.log(0+(x) + fill(y,1.0)), mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.DimShuffle([False], ['x', 0], True), T.log1p, T.fill] assert [node.op for node in f.maker.env.toposort()][3:] \
== [T.log1p, Alloc('float64')]
f = function([x,y], T.log(2+(x) - fill(y,1.0)), mode=m) f = function([x,y], T.log(2+(x) - fill(y,1.0)), mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.DimShuffle([False], ['x', 0], True), T.log1p, T.fill] assert [node.op for node in f.maker.env.toposort()][3:] \
== [T.log1p, Alloc('float64')]
f([1e-7, 10], [[0, 0], [0, 0]]) #debugmode will verify values f([1e-7, 10], [[0, 0], [0, 0]]) #debugmode will verify values
...@@ -969,6 +963,51 @@ class test_local_subtensor_unary(unittest.TestCase): ...@@ -969,6 +963,51 @@ class test_local_subtensor_unary(unittest.TestCase):
f([[0,1],[2,3]], [4,5]) # let debugmode test something f([[0,1],[2,3]], [4,5]) # let debugmode test something
def test_local_fill_useless():
m = theano.config.mode
if m == 'FAST_COMPILE':
m = 'FAST_RUN'
x = dvector()
y = dvector()
z = lvector()
# basic case
f = function([x], T.fill(x,x)*2, mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.mul]
# basic case
f = function([x,y], T.second(y,x)*2, mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.mul]
# now with different type
f = function([x,z], T.fill(z,x)*2, mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.mul]
# now cutting out the input ??
f = function([x,y], T.fill(x,y)*2, mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.mul]
# now filll is serving as a cast
f = function([x,y], T.fill(x,y)*2, mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.mul]
class test_shapeoptimizer(unittest.TestCase):
def test0(self):
v = T.vector()
m = T.matrix()
f = function([v,m], (v+m).shape)
for node in f.maker.env.toposort():
assert node.op != T.add
def test_constant(self):
v = T.vector()
m = T.matrix()
f = function([v,m], v.dimshuffle('x','x',0).shape[1])
print f.maker.env.toposort()
assert [] == f.maker.env.toposort()
if __name__ == '__main__': if __name__ == '__main__':
# unittest.main() # unittest.main()
test_fusion().tes_memory_leak() test_fusion().tes_memory_leak()
......
...@@ -352,7 +352,7 @@ class T_SharedRandomStreams(unittest.TestCase): ...@@ -352,7 +352,7 @@ class T_SharedRandomStreams(unittest.TestCase):
def test_vector_arguments(self): def test_vector_arguments(self):
random = RandomStreams(utt.fetch_seed()) random = RandomStreams(utt.fetch_seed())
low = tensor.vector() low = tensor.dvector()
out = random.uniform(low=low, high=1) out = random.uniform(low=low, high=1)
assert out.ndim == 1 assert out.ndim == 1
f = function([low], out) f = function([low], out)
...@@ -402,8 +402,8 @@ class T_SharedRandomStreams(unittest.TestCase): ...@@ -402,8 +402,8 @@ class T_SharedRandomStreams(unittest.TestCase):
def test_broadcast_arguments(self): def test_broadcast_arguments(self):
random = RandomStreams(utt.fetch_seed()) random = RandomStreams(utt.fetch_seed())
low = tensor.vector() low = tensor.dvector()
high = tensor.col() high = tensor.dcol()
out = random.uniform(low=low, high=high) out = random.uniform(low=low, high=high)
assert out.ndim == 2 assert out.ndim == 2
f = function([low, high], out) f = function([low, high], out)
...@@ -424,8 +424,8 @@ class T_SharedRandomStreams(unittest.TestCase): ...@@ -424,8 +424,8 @@ class T_SharedRandomStreams(unittest.TestCase):
def test_uniform_vector(self): def test_uniform_vector(self):
random = RandomStreams(utt.fetch_seed()) random = RandomStreams(utt.fetch_seed())
low = tensor.vector() low = tensor.dvector()
high = tensor.vector() high = tensor.dvector()
out = random.uniform(low=low, high=high) out = random.uniform(low=low, high=high)
assert out.ndim == 1 assert out.ndim == 1
f = function([low, high], out) f = function([low, high], out)
...@@ -438,11 +438,15 @@ class T_SharedRandomStreams(unittest.TestCase): ...@@ -438,11 +438,15 @@ class T_SharedRandomStreams(unittest.TestCase):
# Arguments of size (3,) # Arguments of size (3,)
val0 = f(low_val, high_val) val0 = f(low_val, high_val)
numpy_val0 = numpy_rng.uniform(low=low_val, high=high_val) numpy_val0 = numpy_rng.uniform(low=low_val, high=high_val)
print 'THEANO', val0
print 'NUMPY', numpy_val0
assert numpy.all(val0 == numpy_val0) assert numpy.all(val0 == numpy_val0)
# arguments of size (2,) # arguments of size (2,)
val1 = f(low_val[:-1], high_val[:-1]) val1 = f(low_val[:-1], high_val[:-1])
numpy_val1 = numpy_rng.uniform(low=low_val[:-1], high=high_val[:-1]) numpy_val1 = numpy_rng.uniform(low=low_val[:-1], high=high_val[:-1])
print 'THEANO', val1
print 'NUMPY', numpy_val1
assert numpy.all(val1 == numpy_val1) assert numpy.all(val1 == numpy_val1)
# Specifying the size explicitly # Specifying the size explicitly
...@@ -486,8 +490,8 @@ class T_SharedRandomStreams(unittest.TestCase): ...@@ -486,8 +490,8 @@ class T_SharedRandomStreams(unittest.TestCase):
def test_normal_vector(self): def test_normal_vector(self):
random = RandomStreams(utt.fetch_seed()) random = RandomStreams(utt.fetch_seed())
avg = tensor.vector() avg = tensor.dvector()
std = tensor.vector() std = tensor.dvector()
out = random.normal(avg=avg, std=std) out = random.normal(avg=avg, std=std)
assert out.ndim == 1 assert out.ndim == 1
f = function([avg, std], out) f = function([avg, std], out)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论