提交 cd644635 authored 作者: James Bergstra's avatar James Bergstra

merge

Debugging with a customized so-called StepMode
==============================================
One convenient trick I've found for debugging my programs that are running with theano is to
use what I call a 'StepMode'. There is no such StepMode in the standard library because the
purpose of it is to hack it to investigate what your own particular program is doing.
.. code-block:: python
from theano.gof.link import WrapLinkerMany
from theano.compile.mode import (Mode, register_mode, predefined_modes, predefined_linkers,
predefined_optimizers, default_linker, default_optimizer)
class StepMode(Mode):
def __init__(self, linker=default_linker, optimizer=default_optimizer):
def blah(i, node, th):
# This function will be run for each node in your compiled program.
# here you can inspect all the values as they are computed,
# ... you can even change them !
# 'i' is the execution position in the serialized graph
# node is the symbolic Apply instance
# th is a callable thing that will compute the node.
print i, node, len(th.inputs)
# the symbolic inputs of the node are in node.inputs
# the j'th non-symbolic input of the node is in th.inputs[j][0]
th() # call the function to actually 'run' the graph
# the symbolic outputs of the node are in node.outputs
# the j'th non-symbolic output of the node is in th.outputs[j][0]
print type(th.outputs[0][0])
if i == 39:
print 'this node is weird...', th.outputs[0][0]
self.provided_linker = linker
self.provided_optimizer = optimizer
if isinstance(linker, str) or linker is None:
linker = predefined_linkers[linker]
self.linker = WrapLinkerMany([linker], [blah])
if isinstance(optimizer, str) or optimizer is None:
optimizer = predefined_optimizers[optimizer]
self._optimizer = optimizer
The way to use it is like this:
.. code-block:: python
fn = function(inputs, outputs, mode=StepMode())
When you call fn, your function in the stepmode will be called for each node in the compiled
program. You can print out some or all of the values, you can change them in mid-execution.
You can see where bizarre values are first occurring in your computations. It's a very
powerful way to understand your program's execution.
Remember, if you give names your variables then printing nodes will give you a better idea of
where in the calculations you are.
......@@ -15,4 +15,5 @@ Advanced Topics (under construction)
ccodegen
function
module
debugging_with_stepmode
......@@ -22,7 +22,59 @@ from io import *
import logging
_logger = logging.getLogger('theano.compile.function_module')
def view_map_root(v):
"""Return the variable that v is ultimately a view of"""
if v.owner is None: return v
vmap = getattr(v.owner.op, 'view_map', {})
dmap = getattr(v.owner.op, 'destroy_map', {})
outpos = v.owner.outputs.index(v)
v_views = vmap.get(outpos, []) + dmap.get(outpos, [])
if len(v_views) > 1:
raise NotImplementedError()
elif v_views:
return view_map_root(v.owner.inputs[v_views[0]])
else:
return v
def view_tree_set(v, treeset):
"""Add to `treeset` all variables that are views of v, given that v is not a view"""
treeset.add(v)
for cl, v_input_pos_to_cl in v.clients:
if cl == 'output':
continue
vmap = getattr(cl.op, 'view_map', {})
dmap = getattr(cl.op, 'destroy_map', {})
for opos, iposlist in vmap.items() + dmap.items():
if v_input_pos_to_cl in iposlist:
if cl.outputs[opos] not in treeset:
view_tree_set(cl.outputs[opos], treeset)
def infer_reuse_pattern(env, outputs_to_disown):
"""
Given an env and a list of variables, returns the list or set of all variables which may
share the same underlying data storage as any of the specified variables. Used internally
by function, FunctionMaker.
This list (or set) is also refered to as no_recycling sometimes, especially by linker code.
"""
rval = set()
for o in outputs_to_disown:
view_tree_set(view_map_root(o), rval)
# remove from rval all of the inputs, constants, values.
rval = set(r for r in rval if r.owner is not None)
if 1:
# DEBUG STUFF
# verify that we return a superset of what we've been returning so far...
rval0 = _old_infer_reuse_pattern(env, outputs_to_disown)
rval0_set = set(rval0)
for r in rval0_set:
assert r in rval
return rval
def _old_infer_reuse_pattern(env, outputs_to_disown):
"""
Given an env and a list of variables, returns the list of all
variables which may share the same underlying data storage as any of
......@@ -39,18 +91,8 @@ def infer_reuse_pattern(env, outputs_to_disown):
do_not_reuse.append(r)
node = r.owner
op = node.op
if hasattr(op, 'destroy_map'):
dmap = op.destroy_map
else:
dmap = {}
if hasattr(op, 'view_map'):
vmap = op.view_map
else:
vmap = {}
#backport
#dmap = op.destroy_map if hasattr(op, 'destroy_map') else {}
#vmap = op.view_map if hasattr(op, 'view_map') else {}
dmap = getattr(op, 'destroy_map', {})
vmap = getattr(op, 'view_map', {})
for l in dmap.values() + vmap.values():
for i in l:
walk(node.inputs[i])
......@@ -515,6 +557,7 @@ class SanityCheckFunction(Function):
super(SanityCheckFunction, self).__init__(*args, **kwargs)
self.others = others
self.check_equal = check_equal
# DEPRECATED? Is this just for DualLinker?
def __setitem__(self, item, value):
super(SanityCheckFunction, self).__setitem__(item, value)
......@@ -739,6 +782,7 @@ class FunctionMaker(object):
input_storage_lists.append([input_storage_i])
defaults.append((self.required[i], self.refeed[i], input_storage_i))
# Get a function instance
_fn, _i, _o = self.linker.make_thunk(input_storage = input_storage_lists)
fn = self.function_builder(_fn, _i, _o, self.indices, self.outputs, defaults, self.unpack_single, self.return_none, self)
......@@ -791,7 +835,7 @@ def register_checker(checker):
def function(inputs, outputs, mode=None, accept_inplace = False):
"""
Return a function calculating the outputs from the inputs.
Return a Function that will calculate the outputs from the inputs.
:param inputs: list of `SymbolicInput` or `In` instances
......@@ -804,61 +848,41 @@ def function(inputs, outputs, mode=None, accept_inplace = False):
Currently, the library provides the following mode strings:
- SANITY_CHECK TODO: NotImplemented
- FAST_COMPILE (apply only optimization that are fast to apply)
- FAST_RUN (default) (optimize without too much time)
- EXPENSIVE_OPTIMIZATION TODO: NotImplemented
- FAST_COMPILE (minimal optimization)
- PROFILE_MODE : allow to print a profile mode with mode.print_summary
- DEBUG_MODE : make all the check that we taught of(compare python and c,...)
- DEBUG_MODE : verify many internal conditions that are normally assumed (SLOW)
:param accept_inplace: True iff the graph can contain inplace operations prior to the
optimization phase (default is False)
Every element of the input list will be upgraded to an `In` instance if necessary,
using the rules implemented by the `convert_function_input` function.
Similarly, every element of the output list will be upgraded to an
`Out` instance if necessary:
* a `Variable` instance r will be upgraded like `Out`(r)
Random Numbers
--------------
"""
If your computation involves random numbers, then you have to pass the `RandomKit` as an
input argument. That RandomKit must have a name to be able to seed the generator. To seed
the generator, use the `__setitem__` method:
#Every element of the input list will be upgraded to an `In` instance if necessary,
#using the rules implemented by the `convert_function_input` function.
..code-block: python
f[<kitname>] = seed #re-seed the elements of a RandomKit
#Similarly, every element of the output list will be upgraded to an
#`Out` instance if necessary:
"""
t1 = time.time()
if mode is None:
mode = mode_module.default_mode
#backport
#mode = mode if mode is not None else mode_module.default_mode
mode = mode_module.default_mode
inputs = map(convert_function_input, inputs)
if outputs is not None:
if isinstance(outputs, (list, tuple)):
outputs = map(FunctionMaker.wrap_out, outputs)
else:
outputs = FunctionMaker.wrap_out(outputs)
#backport
#outputs = map(FunctionMaker.wrap_out, outputs) if isinstance(outputs, (list, tuple)) else FunctionMaker.wrap_out(outputs)
if isinstance(outputs, (list, tuple)):
outputs = map(FunctionMaker.wrap_out, outputs)
else:
outputs = FunctionMaker.wrap_out(outputs)
defaults = [getattr(input, 'value', None) for input in inputs]
mode = mode_module.predefined_modes.get(mode, mode)
if isinstance(mode, (list, tuple)): # "mode comparison" semantics
_logger.warning('Passing multiple modes is deprecated (20091019)')
if not mode:
raise ValueError("Please provide at least one mode.")
elif len(mode) == 1:
......
"""WRITEME
"""
import os, logging
import numpy
import os
import scipy.sparse as sp
from theano import gof
_logger = logging.getLogger('theano.compile.mode')
def check_equal(x, y):
"""
Returns True iff x[0] and y[0] are equal (checks the dtype and
......@@ -74,9 +79,51 @@ def register_optimizer(name, opt):
raise ValueError('Optimizer name already taken: %s' % name)
predefined_optimizers[name] = opt
class OutputGuard(gof.Op):
destroy_map = {0:[0]}
view_map = {0:[0]}
def make_node(self, x):
return gof.Apply(self, [x], [x.type()])
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def perform(self, node, (x,), (z,)):
z[0] = x
def __str__(self):
return '%s' % self.__class__.__name__
def c_code(self, node, nodename, (x,), (z,), sub):
return """
Py_XDECREF(%(z)s);
%(z)s = %(x)s;
Py_XINCREF(%(z)s);
""" %locals()
def c_code_cache_version(self):
return (1,)
_output_guard = OutputGuard()
class AddDestroyHandler(gof.Optimizer):
"""This optimizer performs two important functions:
1) it has a 'requirement' of the destroyhandler. This means that the env will include it
as a feature for this optimization, and keep this feature enabled for subsequent
optimizations. All optimizations that work inplace on any of their inputs must run *after*
this optimization to ensure that the DestroyHandler has been included in the env.
2) It tries to replace each output with an Op that purports to destroy it (but it won't I
promise). If this replacement succeeds it means that there is a bug in theano. It should
not be possible to destroy outputs.
"""
def apply(self, env):
pass
for o in env.outputs:
try:
env.replace_validate(o, _output_guard(o), reason='output_guard')
_logger.warning("Output variable %s required output_guard,"
" how was this output left unprotected against destructive operations?"
% o)
except gof.InconsistencyError:
#this output is already impossible to destroy. no guard necessary
pass
def add_requirements(self, env):
super(AddDestroyHandler, self).add_requirements(env)
env.extend(gof.DestroyHandler())
......
......@@ -21,7 +21,7 @@ class ProfileMode(Mode):
op_time, op_cimpl, op_call, compile_time))
def __getstate__(self):
print "__getstate__",self.provided_linker,self.provided_optimizer
#print "__getstate__",self.provided_linker,self.provided_optimizer
return (self.provided_linker, self.provided_optimizer, self.local_time,
self.apply_time, self.apply_call,
self.op_time, self.op_cimpl, self.op_call, self.compile_time)
......@@ -255,7 +255,8 @@ def atexit_print_default_profile_mode():
THEANO_DEFAULT_MODE=PROFILE_MODE
"""
prof_mode=predefined_modes["PROFILE_MODE"]
if prof_mode.local_time[0]>0: prof_mode.print_summary()
if prof_mode.local_time[0]>0:
prof_mode.print_summary()
#Register atexit_print_default_profile_mode to have the summary of the
#predefined mode PROFILE_MODE if it is used printed when the program terminate.
......
......@@ -737,18 +737,19 @@ def test_pickle_aliased_memory():
m.x[0,0] = 3.14
assert m.y[0,0] == 3.14
import StringIO
import StringIO, logging
sio = StringIO.StringIO()
handler = logging.StreamHandler(sio)
logging.getLogger('theano.compile.function_module').addHandler(handler)
try:
m.f.pickle_aliased_memory_strategy = 'warn'
m.g.pickle_aliased_memory_strategy = 'warn'
m_dup = cPickle.loads(cPickle.dumps(m))
assert sio.getvalue().startswith('aliased relat')
finally:
logging.getLogger('theano.compile.function_module').removeHandler(handler)
old_stderr = sys.stderr
sys.stderr = sio
m.f.pickle_aliased_memory_strategy = 'warn'
m.g.pickle_aliased_memory_strategy = 'warn'
m_dup = cPickle.loads(cPickle.dumps(m))
sys.stderr = old_stderr
assert sio.getvalue().startswith('WARNING: aliased relat')
try:
m.f.pickle_aliased_memory_strategy = 'raise'
m.g.pickle_aliased_memory_strategy = 'raise'
......
......@@ -585,9 +585,12 @@ class CLinker(link.Linker):
"""
ret = ["-O3"]
# this is the param the -ffast-math activate. I put the explicitly as FillMissing must disable some of them. Putting -ffast-math would make it disable all other parameter at the same time.
ret += ["-fno-math-errno", "-funsafe-math-optimizations",
"-fno-signaling-nans", "-fcx-limited-range",
"-fno-rounding-math", "-ffinite-math-only",
ret += ["-fno-math-errno",
#"-funsafe-math-optimizations",
#"-fno-signaling-nans",
#"-fcx-limited-range",
#"-fno-rounding-math",
#"-ffinite-math-only",
"-Wno-unused-label",#the current code generate label event if they are not used. Could use gcc attribute for those label only
"-Wno-unused-variable",#idem as the precedent
"-Wno-write-strings",#generated by our code generator...
......@@ -758,38 +761,61 @@ class CLinker(link.Linker):
return res
def cmodule_key(self):
"""Return a complete hashable signature of the module we compiled
"""Return a complete hashable signature of the module we compiled.
This function must have the property that no two programs that compute different things
yield the same key.
The key returned by this function is of the form (version, signature)
The signature has the following form:
{{{
'CLinker.cmodule_key', compilation args, libraries,
op0, (input0.type, input1.type, input0 pos, input1 pos)
op1, (...)
(op0, input_signature0, output_signature0),
(op1, input_signature1, output_signature1),
...
opK, (...)
}}}
(opK, input_signatureK, output_signatureK),
}}}
The signature is a tuple, some elements of which are sub-tuples.
The signature is a tuple of tuples.
The outer tuple has a brief header, followed by elements for every node in the
topological ordering of `self.env`.
If the Op of any Apply in the Env does not have c_code_cache_ok()==True, then this
function raises a KeyError exception.
The outer tuple has one element for every node in the topological ordering of
`self.env`.
Input Signature
---------------
The inner tuple has one element for the op used at that node, and one element for the
inputs to that node. The inputs are identified by their type and "graph position"
Each input signature is a tuple with an element for each input to the corresponding
Apply node.
Each element identifies the type of the node input, and the nature of that input in the
graph.
The graph position of a typical variable is encoded by integer pairs ``(a,b)``:
The nature of a typical variable is encoded by integer pairs ``((a,b),c)``:
``a`` is the topological position of the input's owner (-1 for graph inputs),
``b`` is the index of the variable in the owner's output list.
``c`` is a flag indicating whether the variable is in the no_recycling set.
If a variable is also a graph output, then its position in the outputs list is also
bundled with this tuple (after the b).
The graph position of a Constant instance is defined as its signature, together with
The nature of a Constant instance is defined as its signature, together with
two integers: the topological position of the first Apply using that Constant instance,
and the lowest index into that Apply's inputs that refers to that Constant. (These two
integers are a surrogate for the id() of the Constant. The integers are important
because merge-able constants have the same signature, but require separate containers
in C code.)
in C code.) The membership in no_recycling is also included in the signature.
If the Op of any Apply in the Env does not have c_code_cache_ok()==True, then this
function raises a KeyError exception.
Output Signature
----------------
The outputs of a node are entirely determined by the node's Op and the nature of the
inputs, but the set of outputs that may be re-used by the computation (the elements of
self.no_recycling) can affect the code that is generated.
The format of each Op's output signature is simply a list of booleans, indicating
whether each output is in the no_recycling set.
"""
return self.cmodule_key_(self.env, self.no_recycling,
......@@ -797,68 +823,81 @@ class CLinker(link.Linker):
libraries=self.libraries()
)
@staticmethod
def cmodule_key_(env, no_recycling, compile_args=None, libraries=None):
def cmodule_key_(env, no_recycling, compile_args=[], libraries=[]):
"""
Do the actual computation of cmodule_key in a static method
to allow it to be reused in scalar.Composite.__eq__
"""
order = list(env.toposort())
env_computed_set = set()
env_inputs_dict = dict((i, [-1, pos]) for pos, i in enumerate(env.inputs))
#set of variables that have been computed by nodes we have
# seen 'so far' in the loop below
env_computed_set = set()
env_inputs_dict = dict((i, (-1, pos)) for pos, i in enumerate(env.inputs))
constant_ids = dict()
op_pos = {} # Apply -> topological position
rval = ['CLinker.cmodule_key'] # will be cast to tuple on return
if compile_args is not None: rval.append(tuple(compile_args))
if libraries is not None: rval.append(tuple(libraries))
version = []
# assert that every input to every node is one of'
# - an env input
# - an output from a node in the Env
# - a Constant
# first we put the header, compile_args, library names into the signature
sig = ['CLinker.cmodule_key'] # will be cast to tuple on return
if compile_args is not None: sig.append(tuple(compile_args))
if libraries is not None: sig.append(tuple(libraries))
def in_sig(i, topological_pos, i_idx):
# assert that every input to every node is one of'
# - an env input
# - an output from a node in the Env
# - a Constant
# It is important that a variable (i)
# yield a 'position' that reflects its role in code_gen()
def graphpos(i, topological_pos, i_idx):
rval = []
# It is important that a variable (i)
# yield a 'position' that reflects its role in code_gen()
if isinstance(i, graph.Constant): #orphans
if id(i) not in constant_ids:
constant_ids[id(i)] = [i.signature(), topological_pos, i_idx]
rval += constant_ids[id(i)]
constant_ids[id(i)] = (i.signature(), topological_pos, i_idx)
isig = constant_ids[id(i)]
#print 'SIGNATURE', i.signature()
#return i.signature()
elif i in env_inputs_dict: #inputs
rval += env_inputs_dict[i]
isig = env_inputs_dict[i]
else:
if i.owner is None:
assert all( all(out is not None for out in o.outputs) for o in order)
assert all( input.owner is None for input in env.inputs)
raise Exception('what is this?', (i, type(i), i.clients, env))
if i in env.outputs:
rval += [op_pos[i.owner], # outputs
isig = (op_pos[i.owner], # outputs
i.owner.outputs.index(i),
env.outputs.index(i)]
env.outputs.index(i))
else:
rval += [op_pos[i.owner], i.owner.outputs.index(i)] # temps
assert rval
rval.append(i in no_recycling)
return tuple(rval)
isig = (op_pos[i.owner], i.owner.outputs.index(i)) # temps
return (isig, i in no_recycling)
version = []
for node_pos, node in enumerate(order):
version.append(node.op.c_code_cache_version_apply(node))
for i in node.inputs:
version.append(i.type.c_code_cache_version())
for o in node.outputs:
version.append(o.type.c_code_cache_version())
rval.append((node.op, tuple((i.type, graphpos(i, node_pos, ipos))
for ipos,i in enumerate(node.inputs))))
#add the signature for this node
sig.append((
node.op,
tuple((i.type, in_sig(i, node_pos, ipos))
for ipos,i in enumerate(node.inputs)),
tuple(o in no_recycling for o in node.outputs)))
op_pos[node] = node_pos
env_computed_set.update(node.outputs)
#crystalize the signature and version
sig = tuple(sig)
version = tuple(version)
for v in version:
if not v: #one of the ops or types here is unversioned
return ((), tuple(rval))
return tuple(version), tuple(rval)
if not v:
# one of the ops or types here is unversioned,
# so this env is entirely unversioned
return ((), sig)
return version, sig
def compile_cmodule(self, location=None):
"""
......
......@@ -257,9 +257,13 @@ class ModuleCache(object):
warning(("The __eq__ and __hash__ functions are broken for some element"
" in the following two keys. The cache mechanism will say that"
" graphs like this need recompiling, when they could have been"
" retrieved):"))
" retrieved:"))
warning("Key 0:", k0)
warning("Entry 0:", self.entry_from_key[k0])
warning("hash 0:", hash(k0))
warning("Key 1:", k1)
warning("Entry 1:", self.entry_from_key[k1])
warning("hash 1:", hash(k1))
def refresh(self):
"""Update self.entry_from_key by walking the cache directory structure.
......
......@@ -260,15 +260,15 @@ def streamline(env, thunks, order, post_thunk_old_storage = None, no_recycling =
(len(thunks), len(post_thunk_old_storage)))
def streamline_default_f():
for x in no_recycling:
x[0] = None
try:
for thunk, node, old_storage in zip(thunks, order, post_thunk_old_storage):
thunk()
for old_s in old_storage:
old_s[0] = None
except:
raise_with_op(node)
for x in no_recycling:
x[0] = None
try:
for thunk, node, old_storage in zip(thunks, order, post_thunk_old_storage):
thunk()
for old_s in old_storage:
old_s[0] = None
except:
raise_with_op(node)
f = streamline_default_f
elif nice_errors:
thunk_node_list = zip(thunks, order)
......
......@@ -787,19 +787,17 @@ class Pow(BinaryScalarOp):
return "%(z)s = pow(%(x)s, %(y)s);" % locals()
def grad(self, (x, y), (gz, )):
if x.type in grad_types:
first_part = gz * y * x**(y - 1)
first_part = gz * y * x**(y - 1)
else:
first_part = None
first_part = None
if y.type in grad_types:
second_part = gz * log(x) * x**y
second_part = gz * log(x) * x**y
else:
second_part = None
second_part = None
return (first_part, second_part)
#return (gz * y * x**(y - 1) if x.type in grad_types else None,
# gz * log(x) * x**y if y.type in grad_types else None)
pow = Pow(upcast_out, name = 'pow')
class Clip(ScalarOp):
......
......@@ -180,7 +180,24 @@ def constant_or_value(x, rtype, name=None, ndim=None, dtype=None):
assert len(bcastable) == ndim
try:
return rtype(TensorType(dtype = x_.dtype, broadcastable = bcastable), x_, name=name)
if rtype is TensorConstant:
if 0:
# put the shape into the type
# This is disabled because if a tensor has shape, then the following fails:
# theano.lvector == as_tensor_variable([0,1]).type
# I think the solution is that we should implement something more like
# compatability instead of equality in our Type comparisons... but we're not
# there yet.
x_shape = x_.shape
else:
x_shape = None
return rtype(
TensorType(dtype = x_.dtype, broadcastable = bcastable, shape=x_shape),
x_, name=name)
else:
# leave the shape out of the type
return rtype(TensorType(dtype = x_.dtype, broadcastable = bcastable), x_, name=name)
except:
raise TypeError("Could not convert %s to TensorType" % x, type(x))
......@@ -236,7 +253,7 @@ class TensorType(Type):
When this is True, strict filtering rejects data containing NaN or Inf entries. (Used in `DebugMode`)
"""
def __init__(self, dtype, broadcastable, name = None):
def __init__(self, dtype, broadcastable, name = None, shape=None):
"""Initialize self.dtype and self.broadcastable.
:Parameters:
......@@ -256,6 +273,20 @@ class TensorType(Type):
self.broadcastable = tuple(broadcastable)
self.dtype_specs() # error checking is done there
self.name = name
if shape is None:
self.shape = tuple((1 if b else None) for b in self.broadcastable)
else:
self.shape = tuple(shape)
if len(self.shape) != len(self.broadcastable):
raise ValueError('shape and broadcastable must have equal lengths', (self.shape,
self.broadcastable))
def __setstate__(self, dct):
self.__dict__.update(dct)
#add shape when unpickling old pickled things
if 'shape' not in dct:
self.shape = tuple(1 if b else None for b in self.broadcastable)
def filter(self, data, strict = False):
"""Convert `data` to something which can be associated to a `TensorVariable`.
......@@ -273,6 +304,11 @@ class TensorType(Type):
raise TypeError("%s expected a ndarray object with %s dimensions (got %s)." % (self, self.ndim, data.ndim))
if self.filter_checks_isfinite and (not numpy.all(numpy.isfinite(data))):
raise TypeError("non-finite elements not allowed")
for si, di in zip(self.shape, data.shape):
if not (si is None or si == di):
raise TypeError('%s requires ndarray with shape matching %s (got %s)'%(
self, self.shape, data.shape))
return data
else:
data = numpy.asarray(data, dtype = self.dtype)
......@@ -311,7 +347,9 @@ class TensorType(Type):
def __eq__(self, other):
"""Compare True iff other is the same kind of TensorType"""
return type(self) == type(other) and other.dtype == self.dtype and other.broadcastable == self.broadcastable
return type(self) == type(other) and other.dtype == self.dtype \
and other.broadcastable == self.broadcastable \
and other.shape == self.shape
@staticmethod
def values_eq(a, b):
......@@ -382,7 +420,7 @@ class TensorType(Type):
def __hash__(self):
"""Hash equal for same kinds of TensorType"""
return hashtype(self) ^ hash(self.dtype) ^ hash(self.broadcastable)
return hashtype(self) ^ hash(self.dtype) ^ hash(self.broadcastable) ^ hash(self.shape)
ndim = property(lambda self: len(self.broadcastable), doc = "number of dimensions")
"""Number of dimensions
......@@ -405,6 +443,8 @@ class TensorType(Type):
def __str__(self):
if self.name:
return self.name
elif not all(None == si for si in self.shape):
return 'TensorType{%s, %s}' % (self.dtype, self.shape)
else:
b = self.broadcastable
named_broadcastable = {(): 'scalar',
......@@ -782,7 +822,6 @@ class _tensor_py_operators:
dtype = property(lambda self: self.type.dtype)
""" The dtype of this tensor. """
#extra pseudo-operator symbols
def __dot__(left, right): return dot(left, right)
def __rdot__(right, left): return dot(left, right)
......@@ -806,6 +845,14 @@ class _tensor_py_operators:
"""See `theano.tensor.var`"""
return var(self, axis)
def min(self, axis=None):
"""See `theano.tensor.min`"""
return min(self, axis)
def max(self, axis=None):
"""See `theano.tensor.max`"""
return max(self, axis)
#TO TRUMP NUMPY OPERATORS
__array_priority__ = 1000
......@@ -1051,11 +1098,25 @@ class Shape(Op):
out[0] = numpy.asarray(x.shape, dtype = 'int64')
def grad(self, (x,), (gz,)):
return [None]
@_redefine_asRoutine(Shape())
_shape = Shape()
@constructor
def shape(a):
pass
"""Return the shape tuple of a TensorType Variable, it may be either symbolic or nonsymbolic.
pprint.assign(shape, printing.MemberPrinter('shape'))
If the shape of the expression is not known at graph-construction time, then a symbolic
lvector will be returned, corresponding to the actual shape at graph-execution time.
"""
va = as_tensor_variable(a)
#print 'HERE', va, va.type
if None in va.type.shape:
# Some shape components are unknown at this time
return _shape(va)
else:
# all shape components are known at compile time, so we return
# a tuple directly. This tuple is like the numpy.ndarray.shape tuple.
return va.type.shape
pprint.assign(_shape, printing.MemberPrinter('shape'))
class MaxAndArgmax(Op):
......@@ -2352,7 +2413,7 @@ def get_vector_length(v):
return join.vec_length(v)
except ValueError:
pass
if v.owner and v.owner.op == shape:
if v.owner and v.owner.op == _shape:
return v.owner.inputs[0].type.ndim
raise ValueError("length not known")
......@@ -2806,6 +2867,11 @@ def grad(cost, wrt, g_cost=None, consider_constant=[], warn_type=False):
if not isinstance(cost, TensorVariable):
raise TypeError('In tensor.grad(), cost argument should be a TensorVariable.', cost)
if cost.type.ndim:
_warn('the passing of a non-scalar cost to theano.tensor.grad() is deprecated.'
' Use the lower-level '
'theano.gradient if you really want to do this')
if g_cost is None:
g_cost = ones_like(cost)
inputs = gof.graph.inputs([cost])
......
......@@ -18,6 +18,7 @@ from theano import compile #to register the optimizer built by this file
from theano.tensor.blas_headers import cblas_header_text, blas_header_text
_logger = logging.getLogger('theano.tensor.blas')
_logger.setLevel(logging.INFO)
def debug(*msg): _logger.debug(' '.join(str(m) for m in msg))
def info(*msg): _logger.info(' '.join(str(m) for m in msg))
def warn(*msg): _logger.warn(' '.join(str(m) for m in msg))
......@@ -604,10 +605,15 @@ class Dot22(GemmRelated):
This is a specialization of the more general Dot()
"""
def make_node(self, x, y):
assert _is_real_matrix(x)
assert y.type == x.type #makes sure y is a matrix
if not _is_real_matrix(x):
raise TypeError(x)
if not _is_real_matrix(x):
raise TypeError(y)
if y.type.dtype != x.type.dtype:
raise TypeError('dtype mismatch to Dot22')
out_shape = (x.type.shape[0], y.type.shape[1])
bz = [False, False]
outputs = [T.tensor(x.type.dtype, bz)]
outputs = [T.tensor(x.type.dtype, bz, shape=out_shape)]
return Apply(self, [x,y], outputs)
def perform(self, node, (x, y), (z, )):
......@@ -660,10 +666,10 @@ _dot22 = Dot22()
def local_dot_to_dot22(node):
if node.op == T.dot:
x,y = node.inputs
if _is_real_matrix(x) and y.type == x.type:
if _is_real_matrix(x) and _is_real_matrix(y) and y.type.dtype == x.type.dtype:
return [_dot22(*node.inputs)]
else:
info('Not optimizing dot with inputs', x, y)
info('Not optimizing dot with inputs', x, y, x.type, y.type)
else:
return False
register_specialize(local_dot_to_dot22)
......
......@@ -142,9 +142,6 @@ class SoftmaxWithBias(gof.Op):
return ['<iostream>','<cmath>']
@staticmethod
def c_code_cache_version():
return (4,)
@staticmethod
def c_code_template():
# this implementation was lifted from
# /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
......@@ -180,7 +177,7 @@ class SoftmaxWithBias(gof.Op):
}
if ((%(x)s->dimensions[1] != %(b)s->dimensions[0]))
{
PyErr_Format(PyExc_ValueError, "number of columns in x (%%i) does not match length of b (%%i)",
PyErr_Format(PyExc_ValueError, "number of columns in x (%%zi) does not match length of b (%%zi)",
%(x)s->dimensions[1], %(b)s->dimensions[0]);
%(fail)s;
}
......@@ -236,20 +233,6 @@ class SoftmaxWithBias(gof.Op):
sum += sm_ij;
sm_i[j * Ssm] = sm_ij;
}
//std::cout << "\\n";
if (std::isinf(sum))
{
//that was our best...
PyErr_SetString(PyExc_ValueError, "softmax is impossible (inf)!");
%(fail)s;
}
if (0.0 == sum)
{
//that was our best...
PyErr_SetString(PyExc_ValueError, "softmax is impossible (zero)!");
%(fail)s;
}
//cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
double sum_inv = 1.0 / sum;
......@@ -271,6 +254,10 @@ class SoftmaxWithBias(gof.Op):
code_template = ''.join(self.c_code_template())
return code_template % dict(locals(), **sub)
@staticmethod
def c_code_cache_version():
return (5,)
softmax_with_bias = SoftmaxWithBias()
......
......@@ -196,20 +196,20 @@ def local_shape_lift_sum(node):
register_canonicalize(local_shape_lift_sum, 'shape_lift')
@gof.local_optimizer([T.shape, T.dot])
@gof.local_optimizer([T._shape, T.dot])
def local_shape_lift_dot(node):
"""
shape(dot(a, b)) -> [shape(a)[0], shape(b)[1]]
"""
if not opt.check_chain(node, T.shape, T.dot):
if not opt.check_chain(node, T._shape, T.dot):
return False
a, b = node.inputs[0].owner.inputs
if a.type.ndim == 2 and b.type.ndim == 2:
return T.make_lvector.make_node(T.shape(a)[0], T.shape(b)[1]).outputs
return T.make_lvector.make_node(T._shape(a)[0], T._shape(b)[1]).outputs
elif a.type.ndim == 1 and b.type.ndim == 2:
return T.make_lvector.make_node(T.shape(b)[1]).outputs
return T.make_lvector.make_node(T._shape(b)[1]).outputs
elif a.type.ndim == 2 and b.type.ndim == 1:
return T.make_lvector.make_node(T.shape(a)[0]).outputs
return T.make_lvector.make_node(T._shape(a)[0]).outputs
elif a.type.ndim == 1 and b.type.ndim == 1:
return T.make_lvector.make_node().outputs
else:
......
......@@ -163,7 +163,8 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(env)
assert env.outputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
assert str(env.outputs[0].owner.op) == 'OutputGuard'
assert env.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
def test_softmax_optimizations_w_bias(self):
x = tensor.matrix('x')
......@@ -186,9 +187,10 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(env)
assert len(env.toposort()) == 1
assert len(env.toposort()) == 2
assert env.outputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
assert str(env.outputs[0].owner.op) == 'OutputGuard'
assert env.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
def test_softmax_grad_optimizations(self):
......@@ -249,7 +251,7 @@ def test_argmax_pushdown():
#print 'AFTER'
#for node in env.toposort():
#print node.op
assert len(env.toposort()) == 1
assert len(env.toposort()) == 2 # an output_guard is second
assert env.toposort()[0].op == tensor._max_and_argmax
def test_argmax_pushdown_bias():
......@@ -263,10 +265,14 @@ def test_argmax_pushdown_bias():
theano.compile.mode.optdb.query(
theano.compile.mode.OPT_FAST_RUN).optimize(env)
#print 'AFTER'
#for node in env.toposort():
#print node.op
assert len(env.toposort()) == 3
print 'AFTER'
for node in env.toposort():
print node.op
assert len(env.toposort()) == 4
assert isinstance(env.toposort()[0].op, tensor.DimShuffle)
assert isinstance(env.toposort()[1].op, tensor.Elemwise)
assert isinstance(env.toposort()[2].op, tensor.MaxAndArgmax)
assert str(env.toposort()[3].op) == 'OutputGuard'
def test_asymptotic_32():
"""
......
......@@ -246,16 +246,20 @@ class test_canonize(unittest.TestCase):
#We must be sure that the Canonizer is working, but that we don't have other
# optimisation that could hide bug in the Canonizer as local_elemwise_fusion
mode=compile.mode.predefined_modes[compile.mode.default_mode]
mode._optimizer=gof.Query(["canonicalize"])
mode._optimizer=mode._optimizer.excluding('local_elemwise_fusion')
for id, [g, sym_inputs, val_inputs, nb_elemwise, out_dtype] in enumerate(cases):
f = compile.function(list(sym_inputs), g,
#we need the optimisation enabled, debug do this.
mode=mode)
out = f(*val_inputs)
assert(len(f.maker.env.toposort())==nb_elemwise)
assert(out_dtype==out.dtype)
old_optimizer = mode._optimizer
try:
mode._optimizer=gof.Query(["canonicalize"])
mode._optimizer=mode._optimizer.excluding('local_elemwise_fusion')
for id, [g, sym_inputs, val_inputs, nb_elemwise, out_dtype] in enumerate(cases):
f = compile.function(list(sym_inputs), g,
#we need the optimisation enabled, debug do this.
mode=mode)
out = f(*val_inputs)
assert(len(f.maker.env.toposort())==nb_elemwise)
assert(out_dtype==out.dtype)
finally:
mode._optimizer = old_optimizer
def test_elemwise_multiple_inputs_optimisation2(self):
"""
......@@ -367,130 +371,134 @@ class test_canonize(unittest.TestCase):
#We must be sure that the Canonizer is working, but that we don't have other
# optimisation that could hide bug in the Canonizer as local_elemwise_fusion
mode=compile.mode.predefined_modes[compile.mode.default_mode]
mode._optimizer=gof.Query(["canonicalize"])
mode._optimizer=mode._optimizer.excluding('local_elemwise_fusion')
#test x / x -> 1
for id, (g, sym_inputs, val_inputs, out_dtype) in enumerate([(fx/fx,[fx],[fxv],'float32'),
(dx/dx,[dx],[dxv],'float64'),
(fv/fv,[fv],[fvv],'float32'),
(dv/dv,[dv],[dvv],'float64'),
]):
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert (out==numpy.ones(shp, dtype=out_dtype)).all()
topo=f.maker.env.toposort()
assert len(topo)==1
assert isinstance(topo[0].op,(T.Elemwise,))
assert isinstance(topo[0].op.scalar_op,theano.scalar.basic.Second)
assert len(topo[0].inputs)==2
assert(out_dtype==out.dtype)
#test (x * y) / x -> y
for id,(g, sym_inputs, val_inputs, nb_elemwise, out_dtype) in enumerate([
((dx*dy)/dx,[dx,dy],[dxv,dyv],0,'float64'),
((fx*fy)/fx,[fx,fy],[fxv,fyv],0,'float32'),
((dv*dy)/dv,[dv,dy],[dvv,dyv],0,'float64'),
((fv*fy)/fv,[fv,fy],[fvv,fyv],0,'float32'),
#must broadcast as their is a dimshuffle in the computation
((dx*dv)/dx,[dx,dv],[dxv,dvv],1,'float64'),
#topo: [Elemwise{second,no_inplace}(x, <TensorType(float64, row)>)]
((fx*fv)/fx,[fx,fv],[fxv,fvv],1,'float32')
#topo: [Elemwise{second,no_inplace}(x, <TensorType(float32, row)>)]
]):
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert numpy.allclose(out,val_inputs[1])
topo=f.maker.env.toposort()
assert len(topo)==nb_elemwise
assert(out_dtype==out.dtype)
#test x / y / x -> 1 / y
for id,(g, sym_inputs, val_inputs, nb_elemwise, out_dtype) in enumerate([
((dx/dy)/dx,[dx,dy],[dxv,dyv],1,'float64'),
((fx/fy)/fx,[fx,fy],[fxv,fyv],1,'float32'),
((dv/dy)/dv,[dv,dy],[dvv,dyv],1,'float64'),
((fv/fy)/fv,[fv,fy],[fvv,fyv],1,'float32'),
#must broadcast as their is a dimshuffle in the computation
((dx/dv)/dx,[dx,dv],[dxv,dvv],2,'float64'),
#topo: [Elemwise{inv,no_inplace}(<TensorType(float64, row)>), Elemwise{second,no_inplace}(x, Elemwise{inv,no_inplace}.0)]
((fx/fv)/fx,[fx,fv],[fxv,fvv],2,'float32'),
#topo:[Elemwise{inv,no_inplace}(<TensorType(float32, row)>), Elemwise{second,no_inplace}(x, Elemwise{inv,no_inplace}.0)]
]):
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert numpy.allclose(out,(1/val_inputs[1]))
topo=f.maker.env.toposort()
assert len(topo)==nb_elemwise
assert isinstance(topo[0].op,(T.Elemwise,))
assert isinstance(topo[0].op.scalar_op,(theano.scalar.basic.Inv, theano.scalar.basic.TrueDiv))
assert(out_dtype==out.dtype)
#test (a / b) * (b / c) * (c / d) -> a / d
for id,(g, sym_inputs, val_inputs, out_dtype) in enumerate([
((dx / dy) * (dy / dz) * (dz / dw),[dx,dy,dz,dw],[dxv,dyv,dzv,dwv],'float64'),
((fx / fy) * (fy / fz) * (fz / fw),[fx,fy,fz,fw],[fxv,fyv,fzv,fwv],'float32'),
((dv / dy) * (dy / dz) * (dz / dw),[dv,dy,dz,dw],[dvv,dyv,dzv,dwv],'float64'),
((fv / fy) * (fy / fz) * (fz / fw),[fv,fy,fz,fw],[fvv,fyv,fzv,fwv],'float32'),
((dx / dv) * (dv / dz) * (dz / dw),[dx,dv,dz,dw],[dxv,dvv,dzv,dwv],'float64'),
((fx / fv) * (fv / fz) * (fz / fw),[fx,fv,fz,fw],[fxv,fvv,fzv,fwv],'float32'),
((dx / dy) * (dy / dv) * (dv / dw),[dx,dy,dv,dw],[dxv,dyv,dvv,dwv],'float64'),
((fx / fy) * (fy / fv) * (fv / fw),[fx,fy,fv,fw],[fxv,fyv,fvv,fwv],'float32'),
((dx / dy) * (dy / dz) * (dz / dv),[dx,dy,dz,dv],[dxv,dyv,dzv,dvv],'float64'),
((fx / fy) * (fy / fz) * (fz / fv),[fx,fy,fz,fv],[fxv,fyv,fzv,fvv],'float32'),
]):
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert numpy.allclose(out,(val_inputs[0]/val_inputs[3]))
topo=f.maker.env.toposort()
assert len(topo)==1
assert isinstance(topo[0].op,(T.Elemwise,))
assert isinstance(topo[0].op.scalar_op,theano.scalar.basic.TrueDiv)
assert len(topo[0].inputs)==2
assert(out_dtype==out.dtype)
#test (2.0 * x) / (4.0 * y) -> (0.5 * x) / y
for id,(g, sym_inputs, val_inputs, out_dtype) in enumerate([
(((2.0*dx)/(4.0*dy)),[dx,dy],[dxv,dyv],'float64'),
(((2.0*fx)/(4.0*fy)),[fx,fy],[fxv,fyv],'float32'),
(((2.0*dv)/(4.0*dy)),[dv,dy],[dvv,dyv],'float64'),
(((2.0*fv)/(4.0*fy)),[fv,fy],[fvv,fyv],'float32'),
(((2.0*dx)/(4.0*dv)),[dx,dv],[dxv,dvv],'float64'),
(((2.0*fx)/(4.0*fv)),[fx,fv],[fxv,fvv],'float32'),
]):
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert numpy.allclose(out,(0.5*val_inputs[0]/val_inputs[1]))
topo=f.maker.env.toposort()
assert len(topo)==2
assert isinstance(topo[0].op,(T.Elemwise,))
assert isinstance(topo[0].op.scalar_op,theano.scalar.basic.Mul)
assert len(topo[0].inputs)==2
assert isinstance(topo[1].op,(T.Elemwise,))
assert isinstance(topo[1].op.scalar_op,theano.scalar.basic.TrueDiv)
assert len(topo[1].inputs)==2
assert(out_dtype==out.dtype)
#test 2 * x / 2 -> x
for id,(g, sym_inputs, val_inputs, out_dtype) in enumerate([
((2*dx)/2,[dx],[dxv],'float64'),
((2*fx)/2,[fx],[fxv],'float32'),
((2*dv)/2,[dv],[dvv],'float64'),
((2*fv)/2,[fv],[fvv],'float32'),
]):
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert numpy.allclose(out,val_inputs[0])
topo=f.maker.env.toposort()
assert len(topo)==0
assert(out_dtype==out.dtype)
old_optimizer = mode._optimizer
try:
mode._optimizer=gof.Query(["canonicalize"])
mode._optimizer=mode._optimizer.excluding('local_elemwise_fusion')
#test x / x -> 1
for id, (g, sym_inputs, val_inputs, out_dtype) in enumerate([(fx/fx,[fx],[fxv],'float32'),
(dx/dx,[dx],[dxv],'float64'),
(fv/fv,[fv],[fvv],'float32'),
(dv/dv,[dv],[dvv],'float64'),
]):
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert (out==numpy.ones(shp, dtype=out_dtype)).all()
topo=f.maker.env.toposort()
assert len(topo)==1
assert isinstance(topo[0].op,(T.Elemwise,))
assert isinstance(topo[0].op.scalar_op,theano.scalar.basic.Second)
assert len(topo[0].inputs)==2
assert(out_dtype==out.dtype)
#test (x * y) / x -> y
for id,(g, sym_inputs, val_inputs, nb_elemwise, out_dtype) in enumerate([
((dx*dy)/dx,[dx,dy],[dxv,dyv],0,'float64'),
((fx*fy)/fx,[fx,fy],[fxv,fyv],0,'float32'),
((dv*dy)/dv,[dv,dy],[dvv,dyv],0,'float64'),
((fv*fy)/fv,[fv,fy],[fvv,fyv],0,'float32'),
#must broadcast as their is a dimshuffle in the computation
((dx*dv)/dx,[dx,dv],[dxv,dvv],1,'float64'),
#topo: [Elemwise{second,no_inplace}(x, <TensorType(float64, row)>)]
((fx*fv)/fx,[fx,fv],[fxv,fvv],1,'float32')
#topo: [Elemwise{second,no_inplace}(x, <TensorType(float32, row)>)]
]):
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert numpy.allclose(out,val_inputs[1])
topo=f.maker.env.toposort()
assert len(topo)==nb_elemwise
assert(out_dtype==out.dtype)
#test x / y / x -> 1 / y
for id,(g, sym_inputs, val_inputs, nb_elemwise, out_dtype) in enumerate([
((dx/dy)/dx,[dx,dy],[dxv,dyv],1,'float64'),
((fx/fy)/fx,[fx,fy],[fxv,fyv],1,'float32'),
((dv/dy)/dv,[dv,dy],[dvv,dyv],1,'float64'),
((fv/fy)/fv,[fv,fy],[fvv,fyv],1,'float32'),
#must broadcast as their is a dimshuffle in the computation
((dx/dv)/dx,[dx,dv],[dxv,dvv],2,'float64'),
#topo: [Elemwise{inv,no_inplace}(<TensorType(float64, row)>), Elemwise{second,no_inplace}(x, Elemwise{inv,no_inplace}.0)]
((fx/fv)/fx,[fx,fv],[fxv,fvv],2,'float32'),
#topo:[Elemwise{inv,no_inplace}(<TensorType(float32, row)>), Elemwise{second,no_inplace}(x, Elemwise{inv,no_inplace}.0)]
]):
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert numpy.allclose(out,(1/val_inputs[1]))
topo=f.maker.env.toposort()
assert len(topo)==nb_elemwise
assert isinstance(topo[0].op,(T.Elemwise,))
assert isinstance(topo[0].op.scalar_op,(theano.scalar.basic.Inv, theano.scalar.basic.TrueDiv))
assert(out_dtype==out.dtype)
#test (a / b) * (b / c) * (c / d) -> a / d
for id,(g, sym_inputs, val_inputs, out_dtype) in enumerate([
((dx / dy) * (dy / dz) * (dz / dw),[dx,dy,dz,dw],[dxv,dyv,dzv,dwv],'float64'),
((fx / fy) * (fy / fz) * (fz / fw),[fx,fy,fz,fw],[fxv,fyv,fzv,fwv],'float32'),
((dv / dy) * (dy / dz) * (dz / dw),[dv,dy,dz,dw],[dvv,dyv,dzv,dwv],'float64'),
((fv / fy) * (fy / fz) * (fz / fw),[fv,fy,fz,fw],[fvv,fyv,fzv,fwv],'float32'),
((dx / dv) * (dv / dz) * (dz / dw),[dx,dv,dz,dw],[dxv,dvv,dzv,dwv],'float64'),
((fx / fv) * (fv / fz) * (fz / fw),[fx,fv,fz,fw],[fxv,fvv,fzv,fwv],'float32'),
((dx / dy) * (dy / dv) * (dv / dw),[dx,dy,dv,dw],[dxv,dyv,dvv,dwv],'float64'),
((fx / fy) * (fy / fv) * (fv / fw),[fx,fy,fv,fw],[fxv,fyv,fvv,fwv],'float32'),
((dx / dy) * (dy / dz) * (dz / dv),[dx,dy,dz,dv],[dxv,dyv,dzv,dvv],'float64'),
((fx / fy) * (fy / fz) * (fz / fv),[fx,fy,fz,fv],[fxv,fyv,fzv,fvv],'float32'),
]):
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert numpy.allclose(out,(val_inputs[0]/val_inputs[3]))
topo=f.maker.env.toposort()
assert len(topo)==1
assert isinstance(topo[0].op,(T.Elemwise,))
assert isinstance(topo[0].op.scalar_op,theano.scalar.basic.TrueDiv)
assert len(topo[0].inputs)==2
assert(out_dtype==out.dtype)
#test (2.0 * x) / (4.0 * y) -> (0.5 * x) / y
for id,(g, sym_inputs, val_inputs, out_dtype) in enumerate([
(((2.0*dx)/(4.0*dy)),[dx,dy],[dxv,dyv],'float64'),
(((2.0*fx)/(4.0*fy)),[fx,fy],[fxv,fyv],'float32'),
(((2.0*dv)/(4.0*dy)),[dv,dy],[dvv,dyv],'float64'),
(((2.0*fv)/(4.0*fy)),[fv,fy],[fvv,fyv],'float32'),
(((2.0*dx)/(4.0*dv)),[dx,dv],[dxv,dvv],'float64'),
(((2.0*fx)/(4.0*fv)),[fx,fv],[fxv,fvv],'float32'),
]):
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert numpy.allclose(out,(0.5*val_inputs[0]/val_inputs[1]))
topo=f.maker.env.toposort()
assert len(topo)==2
assert isinstance(topo[0].op,(T.Elemwise,))
assert isinstance(topo[0].op.scalar_op,theano.scalar.basic.Mul)
assert len(topo[0].inputs)==2
assert isinstance(topo[1].op,(T.Elemwise,))
assert isinstance(topo[1].op.scalar_op,theano.scalar.basic.TrueDiv)
assert len(topo[1].inputs)==2
assert(out_dtype==out.dtype)
#test 2 * x / 2 -> x
for id,(g, sym_inputs, val_inputs, out_dtype) in enumerate([
((2*dx)/2,[dx],[dxv],'float64'),
((2*fx)/2,[fx],[fxv],'float32'),
((2*dv)/2,[dv],[dvv],'float64'),
((2*fv)/2,[fv],[fvv],'float32'),
]):
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert numpy.allclose(out,val_inputs[0])
topo=f.maker.env.toposort()
assert len(topo)==0
assert(out_dtype==out.dtype)
finally:
mode._optimizer = old_optimizer
def test_multiple_case_that_fail(self):
......@@ -510,43 +518,48 @@ class test_canonize(unittest.TestCase):
#We must be sure that the Canonizer is working, but that we don't have other
# optimisation that could hide bug in the Canonizer as local_elemwise_fusion
mode=compile.mode.predefined_modes[compile.mode.default_mode]
mode._optimizer=gof.Query(["canonicalize"])
mode._optimizer=mode._optimizer.excluding('local_elemwise_fusion')
#test fail!
#test x / y / z -> x / (y * z)
for (g, sym_inputs, val_inputs, out_dtype) in [
((dx/dy)/dz,[dx,dy,dz],[dxv,dyv,dzv],'float64'),
((fx/fy)/fz,[fx,fy,fz],[fxv,fyv,fzv],'float32')
]:
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert numpy.allclose(out,val_inputs[0]/val_inputs[1]/val_inputs[2])
topo=f.maker.env.toposort()
print topo
assert len(topo)==2
assert isinstance(topo[0].op,(T.Elemwise,))
assert isinstance(topo[0].op.scalar_op,theano.scalar.basic.Inv)
assert len(topo[0].inputs)==1
assert(out_dtype==out.dtype)
#test x / (y / z) -> (x * z) / y
for (g, sym_inputs, val_inputs, out_dtype) in [
(dx/(dy/dz),[dx,dy,dz],[dxv,dyv,dzv],'float64'),
(fx/(fy/fz),[fx,fy,fz],[fxv,fyv,fzv],'float32')
]:
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert numpy.allclose(out,val_inputs[0]/(val_inputs[1]/val_inputs[2]))
topo=f.maker.env.toposort()
print topo
assert len(topo)==2
assert isinstance(topo[0].op,(T.Elemwise,))
assert isinstance(topo[0].op.scalar_op,theano.scalar.basic.Inv)
assert len(topo[0].inputs)==1
assert(out_dtype==out.dtype)
old_optimizer = mode._optimizer
try:
mode._optimizer=gof.Query(["canonicalize"])
mode._optimizer=mode._optimizer.excluding('local_elemwise_fusion')
#test fail!
#test x / y / z -> x / (y * z)
for (g, sym_inputs, val_inputs, out_dtype) in [
((dx/dy)/dz,[dx,dy,dz],[dxv,dyv,dzv],'float64'),
((fx/fy)/fz,[fx,fy,fz],[fxv,fyv,fzv],'float32')
]:
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert numpy.allclose(out,val_inputs[0]/val_inputs[1]/val_inputs[2])
topo=f.maker.env.toposort()
print topo
assert len(topo)==2
assert isinstance(topo[0].op,(T.Elemwise,))
assert isinstance(topo[0].op.scalar_op,theano.scalar.basic.Inv)
assert len(topo[0].inputs)==1
assert(out_dtype==out.dtype)
#test x / (y / z) -> (x * z) / y
for (g, sym_inputs, val_inputs, out_dtype) in [
(dx/(dy/dz),[dx,dy,dz],[dxv,dyv,dzv],'float64'),
(fx/(fy/fz),[fx,fy,fz],[fxv,fyv,fzv],'float32')
]:
f = compile.function(list(sym_inputs), g,
mode=mode)
out = f(*val_inputs)
assert numpy.allclose(out,val_inputs[0]/(val_inputs[1]/val_inputs[2]))
topo=f.maker.env.toposort()
print topo
assert len(topo)==2
assert isinstance(topo[0].op,(T.Elemwise,))
assert isinstance(topo[0].op.scalar_op,theano.scalar.basic.Inv)
assert len(topo[0].inputs)==1
assert(out_dtype==out.dtype)
finally:
mode._optimizer = old_optimizer
def test_dont_merge_if_multiple_client(self):
""" test those case take from the comment in Canonizer
......@@ -571,10 +584,16 @@ def test_local_shape_lift_dot():
for y in [fvector, fmatrix]:
i = x()
j = y()
print 'I SHAPE', i.type.shape
print 'J SHAPE', j.type.shape
d = shape(dot(i,j))
g = Env([i,j], [d])
gof.TopoOptimizer(gof.LocalOptGroup(local_shape_lift_dot), order='out_to_in').optimize(g)
assert pprint(g.outputs[0]) == args_to_result[(x,y)]
if x is fvector and y is fvector:
assert d == ()
else:
g = Env([i,j], [d])
gof.TopoOptimizer(gof.LocalOptGroup(local_shape_lift_dot), order='out_to_in').optimize(g)
print pprint(g.outputs[0]), args_to_result[(x,y)]
assert pprint(g.outputs[0]) == args_to_result[(x,y)]
# def test_plusmin(self):
# x, y, z = inputs()
......@@ -982,23 +1001,27 @@ class test_fusion(unittest.TestCase):
#Follow up. Clinker do the same... second cause?
mode2=compile.Mode(linker(), copy.copy(compile.mode.OPT_FAST_RUN))
# mode2=copy.copy(compile.mode.predefined_modes['FAST_RUN'])
mode2._optimizer=mode2._optimizer.excluding('local_elemwise_fusion')
# mode2=compile.Mode(gof.OpWiseCLinker(allow_gc=True), compile.mode.OPT_FAST_COMPILE)
if s is None:
s=slice(0,49)
#s=slice(49,59)
nb_repeat=10
print "test with linker", str(linker)
times1=self.do(mode1, shared_fn, shp, gpu=gpu, nb_repeat=nb_repeat, assert_len_topo=False,slice=s)
times2=self.do(mode2, shared_fn, shp, gpu=gpu, nb_repeat=nb_repeat, assert_len_topo=False,slice=s)
print "times1 FAST_RUN optimisation"
print times1, times1.min(), times1.max(), times1.sum()
print "times2 FAST_RUN optimisation without local_elemwise_fusion"
print times2, times2.min(), times2.max(), times2.sum()
d=times2/times1
# d.sort()
print "times2/times1",d,d.min(), d.max(), d.mean(), d.std()
old_optimizer = mode2._optimizer
try:
mode2._optimizer=mode2._optimizer.excluding('local_elemwise_fusion')
# mode2=compile.Mode(gof.OpWiseCLinker(allow_gc=True), compile.mode.OPT_FAST_COMPILE)
if s is None:
s=slice(0,49)
#s=slice(49,59)
nb_repeat=10
print "test with linker", str(linker)
times1=self.do(mode1, shared_fn, shp, gpu=gpu, nb_repeat=nb_repeat, assert_len_topo=False,slice=s)
times2=self.do(mode2, shared_fn, shp, gpu=gpu, nb_repeat=nb_repeat, assert_len_topo=False,slice=s)
print "times1 FAST_RUN optimisation"
print times1, times1.min(), times1.max(), times1.sum()
print "times2 FAST_RUN optimisation without local_elemwise_fusion"
print times2, times2.min(), times2.max(), times2.sum()
d=times2/times1
# d.sort()
print "times2/times1",d,d.min(), d.max(), d.mean(), d.std()
finally:
mode2._optimizer = old_optimizer
def speed_fusion_gpu(self):
import theano_cuda_ndarray as tcn
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论