提交 af804be5 authored 作者: James Bergstra's avatar James Bergstra

merge

......@@ -197,6 +197,11 @@ class BadOptimization(DebugModeError):
print >> ssio, " Mean Abs Diff: ", numpy.mean(numpy.absolute(nv-ov))
print >> ssio, " Median Abs Diff: ", numpy.median(numpy.absolute(nv-ov))
print >> ssio, " Std Abs Diff: ", numpy.std(numpy.absolute(nv-ov))
reldiff = numpy.absolute(nv-ov) / (numpy.absolute(nv)+numpy.absolute(ov))
print >> ssio, " Max Rel Diff: ", numpy.max(reldiff)
print >> ssio, " Mean Rel Diff: ", numpy.mean(reldiff)
print >> ssio, " Median Rel Diff: ", numpy.median(reldiff)
print >> ssio, " Std Rel Diff: ", numpy.std(reldiff)
# only if all succeeds to we add anything to sio
print >> sio, ssio.getvalue()
except:
......@@ -349,14 +354,17 @@ def debugprint(r, prefix='', depth=-1, done=None, file=sys.stdout):
# this variable is the output of computation,
# so just print out the apply
a = r.owner
print >> file, prefix, a.op, id(a)
if len(a.outputs) == 1:
print >> file, '%s%s [@%i]' % (prefix, a.op, id(r))
else:
print >> file, '%s%s.%i [@%i]' % (prefix, a.op, a.outputs.index(r), id(r))
if id(a) not in done:
done.add(id(a))
for i in a.inputs:
debugprint(i, prefix+' ', depth=depth-1, done=done, file=file)
debugprint(i, prefix+' |', depth=depth-1, done=done, file=file)
else:
#this is a variable
print >> file, prefix, r, id(r)
print >> file, '%s%s [@%i]' % (prefix, r, id(r))
return file
......
......@@ -116,7 +116,7 @@ class AddDestroyHandler(gof.Optimizer):
for o in env.outputs:
try:
env.replace_validate(o, _output_guard(o), reason='output_guard')
_logger.warning("Output variable %s required output_guard,"
_logger.info("Output variable %s required output_guard,"
" how was this output left unprotected against destructive operations?"
% o)
except gof.InconsistencyError:
......@@ -127,12 +127,22 @@ class AddDestroyHandler(gof.Optimizer):
env.extend(gof.DestroyHandler())
optdb = gof.SequenceDB()
optdb.register('merge1', gof.MergeOptimizer(), 0, 'fast_run', 'fast_compile')
optdb.register('canonicalize', gof.EquilibriumDB(), 1, 'fast_run')
optdb.register('specialize', gof.EquilibriumDB(), 2, 'fast_run')
optdb.register('merge2', gof.MergeOptimizer(), 49, 'fast_run')
optdb.register('add_destroy_handler', AddDestroyHandler(), 49.5, 'fast_run', 'inplace')
optdb.register('merge3', gof.MergeOptimizer(), 100, 'fast_run')
optdb.register('merge1', gof.MergeOptimizer(),
0, 'fast_run', 'fast_compile')
optdb.register('canonicalize', gof.EquilibriumDB(), # rearranges elemwise expressions
1, 'fast_run')
optdb.register('merge1.2', gof.MergeOptimizer(skip_const_merge=True),
1.2, 'fast_run', 'fast_compile')
optdb.register('stabilize', gof.EquilibriumDB(), # replace unstable subgraphs
1.5, 'fast_run')
optdb.register('specialize', gof.EquilibriumDB(), # misc special cases for speed
2, 'fast_run')
optdb.register('merge2', gof.MergeOptimizer(), # especially constant merge
49, 'fast_run')
optdb.register('add_destroy_handler', AddDestroyHandler(),
49.5, 'fast_run', 'inplace')
optdb.register('merge3', gof.MergeOptimizer(), # final pass just to make sure
100, 'fast_run')
class Mode(object):
......@@ -153,6 +163,12 @@ class Mode(object):
def __init__(self, linker = config.linker, optimizer = config.optimizer):
self.__setstate__((linker, optimizer))
#self.provided_optimizer - typically the `optimizer` arg. But if the `optimizer` arg is
# keyword corresponding to a predefined Query, then this stores the query
#self._optimizer - typically same as provided_optimizer??
#self.__get_optimizer - returns self._optimizer (possibly querying optdb with self._optimizer)
#self.optimizer - property that returns __get_optimizer()
def __getstate__(self):
return (self.provided_linker, self.provided_optimizer)
......@@ -218,7 +234,7 @@ predefined_modes = {'FAST_COMPILE': FAST_COMPILE,
def get_mode(string):
if string is None: string = config.mode
if not isinstance(string, str): return string #it is already a mode...
if not isinstance(string, str): return string #it is hopefully already a mode...
if not predefined_modes.has_key(string):
raise Exception("No predefixed mode exist for string: %s"%string)
return predefined_modes[string]
......
......@@ -197,12 +197,19 @@ class _metadict:
class MergeOptimizer(Optimizer):
"""WRITEME
Merges parts of the graph that are identical, i.e. parts that
take the same inputs and carry out the asme computations so we
can avoid doing them more than once. Also merges variables that
are constant.
"""
Merges parts of the graph that are identical and redundant.
The basic principle is that if two Applies have ops that compare equal, and identical
inputs, then they do not both need to be computed. The clients of one are transfered to
the other and one of them is removed from the graph. This procedure is carried out in
input->output order through the graph.
The first step of merging is constant-merging, so that all clients of an int(1) for example,
are transfered to a particular instance of int(1).
"""
def __init__(self, skip_const_merge=False):
self.skip_const_merge = skip_const_merge
def add_requirements(self, env):
env.extend(toolbox.ReplaceValidate())
......@@ -230,41 +237,6 @@ class MergeOptimizer(Optimizer):
const_sig[c] = sig
const_sig_inv[sig] = c
def exptime_apply_node_merge(self, env):
# we clear the dicts because the Constants signatures are not necessarily hashable
# and it's more efficient to give them an integer like the other Variables
symbol_idx = {} #variable -> int
symbol_idx_inv = {} #int -> variable (inverse of symbol_idx)
#add all graph sources to the symbol_idx dictionaries (arbitrary order)
for i, r in enumerate(r for r in env.variables if r.owner is None):
symbol_idx[r] = i
symbol_idx_inv[i] = r
for node in _list_of_nodes(env):
node_cid = (node.op, tuple([symbol_idx[input] for input in node.inputs]))
#print 'NODE', node, node_cid
dup = symbol_idx_inv.get(node_cid, None)
success = False
if dup is not None:
success = True
pairs = zip(node.outputs, dup.outputs)
for output, new_output in pairs:
if output.name and not new_output.name:
new_output.name = output.name
try:
env.replace_all_validate(pairs, reason='Merge (exptime)')
except InconsistencyError, e:
success = False
if not success:
symbol_idx[node] = node_cid
symbol_idx_inv[node_cid] = node
for i, output in enumerate(node.outputs):
ref = (i, node_cid)
symbol_idx[output] = ref
symbol_idx_inv[ref] = output
def apply_node_merge(self, env):
# we clear the dicts because the Constants signatures are not necessarily hashable
# and it's more efficient to give them an integer like the other Variables
......@@ -316,7 +288,8 @@ class MergeOptimizer(Optimizer):
#TODO: Consider splitting this into a separate optimizer (SeqOptimizer)
def apply(self, env):
self.apply_constant_merge(env)
if not self.skip_const_merge:
self.apply_constant_merge(env)
self.apply_node_merge(env)
merge_optimizer = MergeOptimizer()
......@@ -541,7 +514,7 @@ class PatternSub(LocalOptimizer):
PatternSub((subtract, (add, 'x', 'y'), 'y'), 'x')
PatternSub((power, 'x', Constant(double, 2.0)), (square, 'x'))
PatternSub((boggle, {'pattern': 'x',
'constraint': lambda env, expr: expr.type == scrabble}),
'constraint': lambda expr: expr.type == scrabble}),
(scrabble, 'x'))
"""
......@@ -789,7 +762,10 @@ class NavigatorOptimizer(Optimizer):
raise
if replacements is False or replacements is None:
return False
assert len(node.outputs) == len(replacements)
if not isinstance(replacements, (tuple, list)):
raise TypeError('Optimizer %s gave wrong type of replacement' % lopt)
if len(node.outputs) != len(replacements):
raise ValueError('Optimizer %s gave wrong number of replacements' % lopt)
repl_pairs = zip(node.outputs, replacements)
try:
env.replace_all_validate(repl_pairs, reason=lopt)
......@@ -904,8 +880,13 @@ class EquilibriumOptimizer(NavigatorOptimizer):
max_depth = None,
max_use_ratio = None):
"""
:param local_optimizers: list or set of local optimizations to apply until
equilibrium.
:param max_use_ratio: each optimizer can be applied at most (size of graph * this number)
:param max_depth: TODO what does this do? (EquilibriumDB sets it to 5)
"""
super(EquilibriumOptimizer, self).__init__(
......@@ -916,6 +897,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
self.local_optimizers = local_optimizers
self.max_depth = max_depth
self.max_use_ratio = max_use_ratio
assert self.max_use_ratio is not None, 'max_use_ratio has to be a number'
def apply(self, env, start_from = None):
if start_from is None:
......@@ -960,7 +942,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
changed |= lopt_change
finally:
self.detach_updater(env, u)
self.detach_updater(env, u)
self.detach_updater(env, u) #TODO: erase this line, it's redundant at best
if max_use_abort:
print >> sys.stderr, "WARNING: EquilibriumOptimizer max'ed out"
......
......@@ -26,7 +26,7 @@ class DB(object):
# It is an instance of a DB.In the tests for example,
# this is not always the case.
if not isinstance(obj, (DB, opt.Optimizer, opt.LocalOptimizer)):
raise Exception('Triing to register an optimizer that don\'t herite from theano.gof.opt.Optimizer or theano.gof.opt.LocalOptimizer', obj)
raise TypeError('Object cannot be registered in OptDB', obj)
if self.name is not None:
tags = tags + (self.name,)
......@@ -132,6 +132,18 @@ class Query(object):
class EquilibriumDB(DB):
"""A set of potential optimizations which should be applied in an arbitrary order until
equilibrium is reached.
Canonicalize, Stabilize, and Specialize are all equilibrium optimizations.
.. note::
It seems like this might be supposed to contain LocalOptimizer instances rather than
optimizer instances, because whatever is selected by the query is passed to
EquilibriumOptimizer and EquilibriumOptimizer requires LocalOptimizer instances.
"""
def query(self, *tags, **kwtags):
opts = super(EquilibriumDB, self).query(*tags, **kwtags)
......@@ -142,27 +154,45 @@ class EquilibriumDB(DB):
class SequenceDB(DB):
"""A sequence of potential optimizations.
Retrieve a sequence of optimizations (a SeqOptimizer) by calling query().
Each potential optimization is registered with a floating-point position.
No matter which optimizations are selected by a query, they are carried out in order of
increasing position.
The optdb itself (`theano.compile.mode.optdb`), from which (among many other tags) fast_run
and fast_compile optimizers are drawn is a SequenceDB.
"""
def __init__(self, failure_callback = opt.SeqOptimizer.warn):
super(SequenceDB, self).__init__()
self.__priority__ = {}
self.__position__ = {}
self.failure_callback = failure_callback
def register(self, name, obj, priority, *tags):
def register(self, name, obj, position, *tags):
super(SequenceDB, self).register(name, obj, *tags)
self.__priority__[name] = priority
self.__position__[name] = position
def query(self, *tags, **kwtags):
"""
:type position_cutoff: float or int
:param position_cutoff: only optimizations with position less than the cutoff are returned.
"""
position_cutoff = kwtags.pop('position_cutoff', float('inf'))
opts = super(SequenceDB, self).query(*tags, **kwtags)
opts = list(opts)
opts.sort(key = lambda obj: self.__priority__[obj.name])
opts = [o for o in opts if self.__position__[o.name] < position_cutoff]
opts.sort(key = lambda obj: self.__position__[obj.name])
return opt.SeqOptimizer(opts, failure_callback = self.failure_callback)
def print_summary(self, stream=sys.stdout):
print >> stream, "SequenceDB (id %i)"%id(self)
print >> stream, " priority", self.__priority__
print >> stream, " position", self.__position__
print >> stream, " names", self._names
print >> stream, " db", self.__db__
def __str__(self):
sio = StringIO.StringIO()
self.print_summary(sio)
......
......@@ -7,9 +7,52 @@ import sys,os
from theano import config
from gof import Op, Apply
from theano.gof.python25 import any
from theano.compile import Function, debugmode
#We import the debugprint here to have all printing of graph available from this module
from theano.compile.debugmode import debugprint
def debugprint(obj, depth=-1, file=None):
"""Print a computation graph to file
:type obj: Variable, Apply, or Function instance
:param obj: symbolic thing to print
:type depth: integer
:param depth: print graph to this depth (-1 for unlimited)
:type file: None or file-like object
:param file: print to this file (None means sys.stdout)
:rtype: None or file-like object
:returns: `file` argument
Each line printed represents a Variable in the graph.
The indentation of each line corresponds to its depth in the symbolic graph.
The first part of the text identifies whether it is an input (if a name or type is printed)
or the output of some Apply (in which case the Op is printed).
The second part of the text is the memory location of the Variable.
If a Variable is encountered multiple times in the depth-first search, it is only printed
recursively the first time. Later, just the Variable and its memory location are printed.
If an Apply has multiple outputs, then a '.N' suffix will be appended to the Apply's
identifier, to indicate which output a line corresponds to.
"""
if file is None:
_file = sys.stdout
else:
_file = file
done = set()
results_to_print = []
if isinstance(obj, gof.Variable):
results_to_print.append(obj)
elif isinstance(obj, gof.Apply):
results_to_print.extend(obj.outputs)
elif isinstance(obj, Function):
results_to_print.extend(obj.maker.env.outputs)
for r in results_to_print:
debugmode.debugprint(r, depth=depth, done=done, file=_file)
if file is None:
_file.flush()
return file
class Print(Op):
"""This identity-like Op has the side effect of printing a message followed by its inputs
......@@ -329,7 +372,7 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn
if var.name is not None:
varstr = var.name
elif isinstance(var,gof.Constant):
varstr = str(var.data)
varstr = '%s [%s]'% (str(var.data) , str(var.type))
elif var in input_update and input_update[var].variable.name is not None:
varstr = input_update[var].variable.name
else:
......
......@@ -3,7 +3,7 @@ import numpy
import theano
from theano import Op, Type, Apply, Variable, Constant
from theano import tensor
from theano.compile import shared, SharedVariable, shared_constructor
from theano.compile import shared, SharedVariable
from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda import filter as type_support_filter
......@@ -68,6 +68,11 @@ CudaNdarrayType.SharedVariable = CudaNdarraySharedVariable
def cuda_shared_constructor(value, name, strict=False, broadcastable=None):
"""SharedVariable Constructor for TensorType"""
# THIS CONSTRUCTOR TRIES TO CAST VALUE TO A FLOAT32, WHICH THEN GOES ONTO THE CARD
# SO INT shared vars, float64 shared vars, etc. all end up on the card.
# THIS IS NOT THE DEFAULT BEHAVIOUR THAT WE WANT.
# SEE float32_shared_constructor
#TODO: what should strict mean in this context, since we always have to make a copy?
if strict:
_value = value
......
......@@ -20,8 +20,9 @@ Special cases:
Often a for loop can be expressed as a ``scan()`` operation, and ``scan`` is
the closest that theano comes to looping. The advantage of using ``scan``
over for loops is that it allows you to express the loop symbolically. The
Scan Op should always be used by applying the ``scan`` function.
over for loops is that it allows the number of iterations to be a part of the symbolic graph.
The Scan Op should always be used by applying the ``scan`` function.
"""
__docformat__ = 'restructedtext en'
......@@ -60,7 +61,8 @@ def hash_listsDictsTuples(x):
def scan(fn, sequences, initial_states, non_sequences, inplace_map={}, \
sequences_taps={}, outputs_taps = {}, n_steps = 0, \
truncate_gradient = -1, go_backwards = False, mode = 'FAST_RUN'):
truncate_gradient = -1, go_backwards = False,
mode = None):
'''Function that constructs and applies a Scan op
:param fn: Function that describes the operations involved in one step of scan
......
差异被折叠。
......@@ -6,10 +6,9 @@ import numpy.distutils
from theano.configparser import config, AddConfigVar, StrParam
from theano.gof import (utils, Op, Apply, view_roots, PatternSub, DestroyHandler,
SeqOptimizer, local_optimizer, Optimizer, LocalOptimizer, OpKeyOptimizer,
InconsistencyError, toolbox)
InconsistencyError, toolbox, SequenceDB, EquilibriumOptimizer)
from theano.printing import pprint, FunctionPrinter
from theano.tensor.opt import register_specialize, out2in, insert_inplace_optimizer
# opt.py
from theano.compile.mode import optdb
import basic as T
......@@ -30,7 +29,6 @@ AddConfigVar('blas.ldflags',
"lib[s] to include for [Fortran] level-3 blas implementation",
StrParam(default_blas_ldflags()))
_logger = logging.getLogger('theano.tensor.blas')
_logger.setLevel(logging.WARN)
def debug(*msg): _logger.debug(' '.join(str(m) for m in msg))
......@@ -391,12 +389,22 @@ class Gemm(GemmRelated):
def c_code_cache_version(self):
return (1,) + self.build_gemm_version()
gemm = Gemm()
class PseudoGemm(Op):
# should be replaced by Gemm
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def make_node(self, *args):
inputs = [T.as_tensor_variable(i) for i in args]
return Apply(self, inputs, [inputs[0].type()])
def perform(self, node, (z, a, x, y, b), (zout, )):
zout[0] = a * numpy.dot(x,y) + b * z
gemm = PseudoGemm()
gemm_inplace = Gemm()
pprint.assign(gemm, FunctionPrinter('gemm'))
pprint.assign(gemm_inplace, FunctionPrinter('gemm_inplace'))
def res_is_a(node, op, maxclients=None):
if maxclients is not None:
retval = (len(node.clients) <= maxclients)
......@@ -597,6 +605,7 @@ class GemmOptimizer(Optimizer):
while did_something:
nodelist = list(env.toposort())
did_something = False
nodelist.reverse()
for node in nodelist:
new_outputs = _gemm_from_node(node)
if new_outputs:
......@@ -611,10 +620,6 @@ class GemmOptimizer(Optimizer):
#TODO: retry other applications of gemm (see comment in _gemm_from_node
pass
#neede to make the gemm optimisation(step 70) happen before the fusion of elemwise(step 71)
compile.optdb.register('inplace_gemm', GemmOptimizer(), 70.00, 'fast_run', 'inplace', 'gemm')
class Dot22(GemmRelated):
"""Compute a matrix-matrix product.
This is a specialization of the more general Dot()
......@@ -689,5 +694,34 @@ def local_dot_to_dot22(node):
info('Not optimizing dot with inputs', x, y, x.type, y.type)
else:
return False
register_specialize(local_dot_to_dot22)
@local_optimizer([gemm])
def local_inplace_gemm(node):
if node.op == gemm:
return [gemm_inplace(*node.inputs)]
#################################
#
# Set up the BlasOpt optimizer
#
#################################
blas_optdb = SequenceDB()
# run after numerical stability optimizations (1.5)
optdb.register('BlasOpt', blas_optdb, 1.7, 'fast_run')
# run before specialize (2.0) because specialize is basically a free-for-all that makes the
# graph crazy.
blas_optdb.register('local_dot_to_dot22',
EquilibriumOptimizer([local_dot_to_dot22], max_use_ratio=5),
0, 'fast_run')
blas_optdb.register('local_dot_to_gemm', GemmOptimizer(), 10, 'fast_run')
# After destroyhandler is in but before we try to make elemwise things inplace
# Try to make gemm inplace
# Also, need to make the gemm optimisation(step 70) happen before the fusion of elemwise(step 71)
optdb.register('InplaceBlasOpt',
EquilibriumOptimizer([local_inplace_gemm], max_use_ratio=5),
70.0, 'fast_run', 'inplace')
......@@ -197,6 +197,18 @@ class DimShuffle(Op):
storage[0] = numpy.asarray(res) #asarray puts scalars back into array
def infer_shape(self, node, (ishp,)):
ishp = list(ishp)
for drop in reversed(self.drop):
del ishp[drop]
# transpose
rval = [ishp[i] for i in self.shuffle]
# augment
for augm in self.augment:
rval.insert(augm, 1)
return [rval]
def c_code(self, node, name, (input,), (res,), sub):
basename = input + '__view_or_copy'
......@@ -613,6 +625,25 @@ class Elemwise(Op):
# the following should be used instead of the previous loop, unfortunately it tends to segfault
# self.ufunc(*(ufunc_args+[s[0] for s in output_storage]))
def infer_shape(self, node, i_shapes):
rval = []
for o in node.outputs:
oshp = []
for dim, b in enumerate(o.type.broadcastable):
b_dim = None
if b: # this is broadcastable
b_dim = 1
else: # there must be some input that is not broadcastable
for ishp, i in zip(i_shapes,node.inputs):
if not i.type.broadcastable[dim]:
b_dim = ishp[dim]
assert b_dim, 'AA'
break
assert b_dim, 'BB'
oshp.append(b_dim)
rval.append(oshp)
return rval
def _c_all(self, node, name, inames, onames, sub):
_inames = inames
_onames = onames
......@@ -764,10 +795,14 @@ class CAReduce(Op):
if scalar_op.nin not in [-1, 2] or scalar_op.nout != 1:
raise NotImplementedError("CAReduce only supports binary functions with a single output.")
self.scalar_op = scalar_op
if isinstance(axis, int):
self.axis = [axis]
else:
if axis is None:
self.axis = axis
elif isinstance(axis, int):
self.axis = (axis,)
else:
self.axis = list(set(axis))
self.axis.sort()
self.axis = tuple(self.axis)
self.ufunc = numpy.frompyfunc(scalar_op.impl, 2, 1)
# CAReduce output views input when reducing scalars
......@@ -834,6 +869,13 @@ class CAReduce(Op):
else:
output[0] = numpy.copy(variable)
def infer_shape(self, node, (ishape,)):
axis = self.axis
if axis is None:
return (),
return [ishape[i] for (i,b) in enumerate(node.inputs[0].type.broadcastable) if i not in axis],
def _c_all(self, node, name, inames, onames, sub):
input = node.inputs[0]
......
from nnet import *
from sigm import softplus, sigmoid, sigmoid_inplace, scalar_sigmoid
......@@ -4,89 +4,14 @@
"""
from theano import gof
from theano import scalar
from theano import printing
from theano.printing import pprint
from theano.tensor import basic as tensor
from theano.tensor import elemwise
from theano.tensor import opt
from theano.compile import optdb
import numpy
############
#
# SCALAR OPS
#
class ScalarSigmoid(scalar.UnaryScalarOp):
@staticmethod
def st_impl(x):
if x < -30.0:
return 0.0
if x > 30.0:
return 1.0
return 1.0 / (1.0 + numpy.exp(-x))
def impl(self, x):
return ScalarSigmoid.st_impl(x)
def grad(self, (x,), (gz,)):
y = scalar_sigmoid(x)
return [gz * y * (1.0 - y)]
def c_code(self, node, name, (x,), (z,), sub):
if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
# print i, repr( theano._asarray(1.0, dtype=dt) / (theano._asarray(1.0, dtype=dt) + numpy.exp(-theano._asarray([i,-i], dtype=dt))))
# the boundary checks prevent us from generating inf
return """%(z)s = %(x)s < -88.0f ? 0.0 : %(x)s > 15.0f ? 1.0f : 1.0f /(1.0f + exp(-%(x)s));""" % locals()
elif node.inputs[0].type == scalar.float64:
return """%(z)s = %(x)s < -709.0 ? 0.0 : %(x)s > 19.0 ? 1.0 : 1.0 /(1.0+exp(-%(x)s));""" % locals()
else:
raise NotImplementedError('only floatingpoint is implemented')
def c_code_cache_version(self):
v = super(ScalarSigmoid, self).c_code_cache_version()
if v:
return (2,) + v
else:
return v
scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid')
sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid')
pprint.assign(sigmoid, printing.FunctionPrinter('sigmoid'))
class ScalarSoftplus(scalar.UnaryScalarOp):
@staticmethod
def static_impl(x):
if x < -30.0:
return 0.0
if x > 30.0:
return x
return numpy.log1p(numpy.exp(x))
def impl(self, x):
return ScalarSoftplus.static_impl(x)
def grad(self, (x,), (gz,)):
return [gz * scalar_sigmoid(x)]
def c_code(self, node, name, (x,), (z,), sub):
if node.inputs[0].type == scalar.float32:
# These constants were obtained by looking at the output of python commands like:
# for i in xrange(750):
# print i, repr( numpy.log1p(numpy.exp(theano._asarray([i,-i], dtype=dt))))
# the boundary checks prevent us from generating inf
return """%(z)s = %(x)s < -103.0f ? 0.0 : %(x)s > 14.0f ? %(x)s : log1p(exp(%(x)s));""" % locals()
elif node.inputs[0].type == scalar.float64:
return """%(z)s = %(x)s < -745.0 ? 0.0 : %(x)s > 16.0 ? %(x)s : log1p(exp(%(x)s));""" % locals()
else:
raise NotImplementedError('only floatingpoint is implemented')
def c_code_cache_version(self):
v = super(ScalarSoftplus, self).c_code_cache_version()
if v:
return (2,) + v
else:
return v
scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus')
softplus = elemwise.Elemwise(scalar_softplus, name='softplus')
pprint.assign(softplus, printing.FunctionPrinter('softplus'))
from .sigm import sigmoid, softplus
############
......@@ -1351,6 +1276,7 @@ def categorical_crossentropy(coding_dist, true_dist):
raise TypeError('rank mismatch between coding and true distributions')
from theano import scalar
class Prepend_scalar_constant_to_each_row(gof.Op):
def __init__(self, val = 0):
......@@ -1440,14 +1366,3 @@ prepend_scalar_to_each_row = Prepend_scalar_to_each_row()
prepend_0_to_each_row = Prepend_scalar_constant_to_each_row(0.)
prepend_1_to_each_row = Prepend_scalar_constant_to_each_row(1.)
logsigm_to_softplus = gof.PatternSub(
(tensor.log, (sigmoid, 'x')),
(tensor.neg, (softplus, (tensor.neg, 'x'))),
allow_multiple_clients = True)
log1msigm_to_softplus = gof.PatternSub(
(tensor.log, (tensor.sub, tensor.constant([[1.0]]), (sigmoid, 'x'))),
(tensor.neg, (softplus, 'x')),
allow_multiple_clients = True)
opt.register_specialize(logsigm_to_softplus, name = 'logsigm_to_softplus')
opt.register_specialize(log1msigm_to_softplus, name = 'log1msigm_to_softplus')
差异被折叠。
import unittest
import theano
from theano import tensor as T
from theano import gof
import numpy
from theano.tests import unittest_tools as utt
from theano.tensor.tests import test_basic as TT
from theano.tensor.nnet import *
class T_sigmoid(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test_elemwise(self):
utt.verify_grad(sigmoid, [numpy.random.rand(3,4)])
class T_softplus(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test_elemwise(self):
utt.verify_grad(softplus, [numpy.random.rand(3,4)])
class T_sigmoid_opts(unittest.TestCase):
def test_exp_over_1_plus_exp(self):
m = theano.config.mode
if m == 'FAST_COMPILE':
m = 'FAST_RUN'
x = T.dvector()
# tests exp_over_1_plus_exp
f = theano.function([x], T.exp(x)/(1+T.exp(x)), mode=m)
#theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [sigmoid]
# tests inv_1_plus_exp
f = theano.function([x], T.fill(x,1.0) / (1+T.exp(-x)), mode=m)
#theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [sigmoid]
# tests inv_1_plus_exp with neg
f = theano.function([x], T.fill(x,-1.0) / (1+T.exp(-x)), mode=m)
#theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [sigmoid,
T.inplace.neg_inplace]
# tests double inv_1_plus_exp with neg
f = theano.function([x], (T.fill(x,-1.0)*T.exp(x)) / ((1+T.exp(x))*(1+T.exp(-x))), mode=m)
#theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [sigmoid,
T.mul]
def test_1msigmoid(self):
m = theano.config.mode
if m == 'FAST_COMPILE':
m = 'FAST_RUN'
x = T.fmatrix()
# tests exp_over_1_plus_exp
f = theano.function([x], 1 - T.exp(x)/(1+T.exp(x)), mode=m)
theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [tensor.neg, sigmoid_inplace]
# tests inv_1_plus_exp
f = theano.function([x], 1 - T.fill(x,1.0) / (1+T.exp(-x)), mode=m)
theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [tensor.neg,
sigmoid_inplace]
差异被折叠。
......@@ -136,10 +136,7 @@ class RandomFunction(gof.Op):
draw.
"""
if shape == () or shape == []:
shape = tensor.as_tensor_variable(shape, dtype='int64')
else:
shape = tensor.as_tensor_variable(shape, ndim=1)
shape = tensor.as_tensor_variable(shape, ndim=1)
assert shape.type.ndim == 1
assert (shape.type.dtype == 'int64') or (shape.type.dtype == 'int32')
if not isinstance(r.type, RandomStateType):
......@@ -158,6 +155,22 @@ class RandomFunction(gof.Op):
[r, shape] + args,
[r.type(), self.outtype()])
def infer_shape(self, node, i_shapes):
r, shp = node.inputs[0:2]
#if shp is a constant array of len 0, then it means 'automatic shape'
unknown_shape = len(getattr(shp, 'data', [0,1,2])) == 0
# if ndim_added == 0 and shape != () then shape
if self.ndim_added == 0 and not unknown_shape:
sample_shp = shp
else:
# if shape == () then it will depend on args
# if ndim_added != 0 and shape != () then it will depend on args
sample_shp = node.outputs[1].shape
return [None, [sample_shp[i] for i in xrange(node.outputs[1].ndim)]]
def perform(self, node, inputs, (rout, out)):
# Use self.fn to draw shape worth of random numbers.
# Numbers are drawn from r if self.inplace is True, and from a copy of r if
......
......@@ -89,7 +89,6 @@ class test_greedy_distribute(unittest.TestCase):
g = Env([a,b,c,d,x,y,z], [e])
##print pprint(g.outputs[0])
mul_canonizer.optimize(g)
gof.TopoOptimizer(gof.LocalOptGroup(local_fill_cut, local_fill_lift), order = 'out_to_in').optimize(g)
gof.TopoOptimizer(gof.LocalOptGroup(local_greedy_distributor), order = 'out_to_in').optimize(g)
##print pprint(g.outputs[0])
......@@ -136,7 +135,6 @@ class test_canonize(unittest.TestCase):
g = Env([x, y, z, a, b, c, d], [e])
print pprint(g.outputs[0])
mul_canonizer.optimize(g)
gof.TopoOptimizer(gof.LocalOptGroup(local_fill_cut, local_fill_lift), order = 'out_to_in').optimize(g)
print pprint(g.outputs[0])
def test_elemwise_multiple_inputs_optimisation(self):
......@@ -296,17 +294,17 @@ class test_canonize(unittest.TestCase):
def test_multiple_case(self):
""" test those case take from the comment in Canonizer
x / x -> 1
(x * y) / x -> y
x / y / x -> 1 / y
x / y / z -> x / (y * z)
x / (y / z) -> (x * z) / y
(a / b) * (b / c) * (c / d) -> a / d
(2.0 * x) / (4.0 * y) -> (0.5 * x) / y
2 * x / 2 -> x
with and without DimShuffle
TODO: with DimShuffle
"""
x / x -> 1
(x * y) / x -> y
x / y / x -> 1 / y
x / y / z -> x / (y * z)
x / (y / z) -> (x * z) / y
(a / b) * (b / c) * (c / d) -> a / d
(2.0 * x) / (4.0 * y) -> (0.5 * x) / y
2 * x / 2 -> x
with and without DimShuffle
TODO: with DimShuffle
"""
import theano.tensor, theano.compile
shp=(3,3)
......@@ -331,6 +329,7 @@ class test_canonize(unittest.TestCase):
old_optimizer = mode._optimizer
try:
mode._optimizer=gof.Query(["canonicalize"])
mode._optimizer=mode._optimizer.including('ShapeOpt')
mode._optimizer=mode._optimizer.excluding('local_elemwise_fusion')
#test x / x -> 1
......@@ -344,10 +343,15 @@ class test_canonize(unittest.TestCase):
out = f(*val_inputs)
assert (out==numpy.ones(shp, dtype=out_dtype)).all()
topo=f.maker.env.toposort()
assert len(topo)==1
assert isinstance(topo[0].op,(T.Elemwise,))
assert isinstance(topo[0].op.scalar_op,theano.scalar.basic.Second)
assert len(topo[0].inputs)==2
if sym_inputs[0].broadcastable[0]:
assert len(topo)==2
assert isinstance(topo[0].op, Shape_i)
assert isinstance(topo[1].op, TT.Alloc)
else:
assert len(topo)==3
assert isinstance(topo[0].op, Shape_i)
assert isinstance(topo[1].op, Shape_i)
assert isinstance(topo[2].op, TT.Alloc)
assert(out_dtype==out.dtype)
#test (x * y) / x -> y
......@@ -365,10 +369,16 @@ class test_canonize(unittest.TestCase):
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert(out_dtype==out.dtype)
assert numpy.allclose(out,val_inputs[1])
topo=f.maker.env.toposort()
assert len(topo)==nb_elemwise
assert(out_dtype==out.dtype)
print "ID TOPO", id, topo, sym_inputs
for r,t in f.maker.env.shape_feature.shape_of.items():
print ' ', r, t
if topo:
for node in topo[:-1]:
assert isinstance(node.op, Shape_i)
assert isinstance(topo[-1].op, TT.Alloc)
#test x / y / x -> 1 / y
for id,(g, sym_inputs, val_inputs, nb_elemwise, out_dtype) in enumerate([
......@@ -378,19 +388,21 @@ class test_canonize(unittest.TestCase):
((fv/fy)/fv,[fv,fy],[fvv,fyv],1,'float32'),
#must broadcast as their is a dimshuffle in the computation
((dx/dv)/dx,[dx,dv],[dxv,dvv],2,'float64'),
#topo: [Elemwise{inv,no_inplace}(<TensorType(float64, row)>), Elemwise{second,no_inplace}(x, Elemwise{inv,no_inplace}.0)]
((fx/fv)/fx,[fx,fv],[fxv,fvv],2,'float32'),
#topo:[Elemwise{inv,no_inplace}(<TensorType(float32, row)>), Elemwise{second,no_inplace}(x, Elemwise{inv,no_inplace}.0)]
((dx/dv)/dx,[dx,dv],[dxv,dvv],1,'float64'),
#topo: [Shape_i, Shape_i, Elemwise{inv,no_inplace}(<TensorType(float64, row)>), Alloc(...)]
((fx/fv)/fx,[fx,fv],[fxv,fvv],1,'float32'),
#topo:[Shape_i, Shape_i, Elemwise{inv,no_inplace}(<TensorType(float32, row)>), Alloc(...)]
]):
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert numpy.allclose(out,(1/val_inputs[1]))
topo=f.maker.env.toposort()
assert len(topo)==nb_elemwise
assert isinstance(topo[0].op,(T.Elemwise,))
assert isinstance(topo[0].op.scalar_op,(theano.scalar.basic.Inv, theano.scalar.basic.TrueDiv))
print topo
elem = [t for t in topo if isinstance(t.op, T.Elemwise)]
assert len(elem)==nb_elemwise
assert isinstance(elem[0].op,(T.Elemwise,))
assert isinstance(elem[0].op.scalar_op,(theano.scalar.basic.Inv, theano.scalar.basic.TrueDiv))
assert(out_dtype==out.dtype)
#test (a / b) * (b / c) * (c / d) -> a / d
......@@ -529,29 +541,6 @@ def test_mixeddiv():
d = dscalar()
assert 0 == function([i,d], d*(i/(i+1)))(3, 1.0)
def test_local_shape_lift_dot():
args_to_result = {
(fvector, fvector): "[]",
(fvector, fmatrix): "[<TensorType(float32, matrix)>.shape[1]]",
(fmatrix, fvector): "[<TensorType(float32, matrix)>.shape[0]]",
(fmatrix, fmatrix): "[<TensorType(float32, matrix)>.shape[0], <TensorType(float32, matrix)>.shape[1]]",
}
for x in [fvector, fmatrix]:
for y in [fvector, fmatrix]:
i = x()
j = y()
print 'I SHAPE', i.type.shape
print 'J SHAPE', j.type.shape
d = shape(dot(i,j))
if x is fvector and y is fvector:
assert d == ()
else:
g = Env([i,j], [d])
gof.TopoOptimizer(gof.LocalOptGroup(local_shape_lift_dot), order='out_to_in').optimize(g)
print pprint(g.outputs[0]), args_to_result[(x,y)]
assert pprint(g.outputs[0]) == args_to_result[(x,y)]
def test_const_type_in_mul_canonizer():
input = dmatrix()
w = dmatrix()
......@@ -915,11 +904,16 @@ def test_log1p():
# check trickier cases (and use different dtype)
y = fmatrix()
f = function([x,y], T.log(fill(y,1)+(x)), mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.DimShuffle([False], ['x', 0], True), T.log1p, T.fill]
print f.maker.env.toposort()
# the first three ops are Shape_i, Shape_i, and Dimshuffle
assert [node.op for node in f.maker.env.toposort()][3:] \
== [T.log1p, Alloc('float64')]
f = function([x,y], T.log(0+(x) + fill(y,1.0)), mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.DimShuffle([False], ['x', 0], True), T.log1p, T.fill]
assert [node.op for node in f.maker.env.toposort()][3:] \
== [T.log1p, Alloc('float64')]
f = function([x,y], T.log(2+(x) - fill(y,1.0)), mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.DimShuffle([False], ['x', 0], True), T.log1p, T.fill]
assert [node.op for node in f.maker.env.toposort()][3:] \
== [T.log1p, Alloc('float64')]
f([1e-7, 10], [[0, 0], [0, 0]]) #debugmode will verify values
......@@ -969,6 +963,51 @@ class test_local_subtensor_unary(unittest.TestCase):
f([[0,1],[2,3]], [4,5]) # let debugmode test something
def test_local_fill_useless():
m = theano.config.mode
if m == 'FAST_COMPILE':
m = 'FAST_RUN'
x = dvector()
y = dvector()
z = lvector()
# basic case
f = function([x], T.fill(x,x)*2, mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.mul]
# basic case
f = function([x,y], T.second(y,x)*2, mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.mul]
# now with different type
f = function([x,z], T.fill(z,x)*2, mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.mul]
# now cutting out the input ??
f = function([x,y], T.fill(x,y)*2, mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.mul]
# now filll is serving as a cast
f = function([x,y], T.fill(x,y)*2, mode=m)
assert [node.op for node in f.maker.env.toposort()] == [T.mul]
class test_shapeoptimizer(unittest.TestCase):
def test0(self):
v = T.vector()
m = T.matrix()
f = function([v,m], (v+m).shape)
for node in f.maker.env.toposort():
assert node.op != T.add
def test_constant(self):
v = T.vector()
m = T.matrix()
f = function([v,m], v.dimshuffle('x','x',0).shape[1])
print f.maker.env.toposort()
assert [] == f.maker.env.toposort()
if __name__ == '__main__':
# unittest.main()
test_fusion().tes_memory_leak()
......
......@@ -352,7 +352,7 @@ class T_SharedRandomStreams(unittest.TestCase):
def test_vector_arguments(self):
random = RandomStreams(utt.fetch_seed())
low = tensor.vector()
low = tensor.dvector()
out = random.uniform(low=low, high=1)
assert out.ndim == 1
f = function([low], out)
......@@ -402,8 +402,8 @@ class T_SharedRandomStreams(unittest.TestCase):
def test_broadcast_arguments(self):
random = RandomStreams(utt.fetch_seed())
low = tensor.vector()
high = tensor.col()
low = tensor.dvector()
high = tensor.dcol()
out = random.uniform(low=low, high=high)
assert out.ndim == 2
f = function([low, high], out)
......@@ -424,8 +424,8 @@ class T_SharedRandomStreams(unittest.TestCase):
def test_uniform_vector(self):
random = RandomStreams(utt.fetch_seed())
low = tensor.vector()
high = tensor.vector()
low = tensor.dvector()
high = tensor.dvector()
out = random.uniform(low=low, high=high)
assert out.ndim == 1
f = function([low, high], out)
......@@ -438,11 +438,15 @@ class T_SharedRandomStreams(unittest.TestCase):
# Arguments of size (3,)
val0 = f(low_val, high_val)
numpy_val0 = numpy_rng.uniform(low=low_val, high=high_val)
print 'THEANO', val0
print 'NUMPY', numpy_val0
assert numpy.all(val0 == numpy_val0)
# arguments of size (2,)
val1 = f(low_val[:-1], high_val[:-1])
numpy_val1 = numpy_rng.uniform(low=low_val[:-1], high=high_val[:-1])
print 'THEANO', val1
print 'NUMPY', numpy_val1
assert numpy.all(val1 == numpy_val1)
# Specifying the size explicitly
......@@ -486,8 +490,8 @@ class T_SharedRandomStreams(unittest.TestCase):
def test_normal_vector(self):
random = RandomStreams(utt.fetch_seed())
avg = tensor.vector()
std = tensor.vector()
avg = tensor.dvector()
std = tensor.dvector()
out = random.normal(avg=avg, std=std)
assert out.ndim == 1
f = function([avg, std], out)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论