提交 4cf7afb4 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2952 from abergeron/flake8

Flake8 work
......@@ -15,7 +15,9 @@ class OpFromGraph(gof.Op):
TODO:
- examples for a multi-layer mlp. where?
- __hash__, __eq__ otherwise won't merge, try gof.opt.is_same_graph_with_merge(op1.new_outputs, op2, new_outputs)
- __hash__, __eq__ otherwise won't merge, try
gof.opt.is_same_graph_with_merge(op1.new_outputs, op2,
new_outputs)
- c_code() to remove the double overhead?
- opt to unfold it, work inplace on inputs
- grad() make it support DisconnectedType and the new interface
......@@ -68,7 +70,7 @@ class OpFromGraph(gof.Op):
for i in inputs + outputs:
if not isinstance(i, gof.Variable):
raise TypeError(
'inputs and outputs must be Variable instances', i)
'inputs and outputs must be Variable instances', i)
if 'updates' in kwargs:
raise TypeError('updates are not allowed in kwargs')
......@@ -76,8 +78,6 @@ class OpFromGraph(gof.Op):
# not see them. Otherwise their is problem with the gradient.
self.shared_inputs = [var for var in gof.graph.inputs(outputs)
if isinstance(var, SharedVariable)]
used_inputs = [var for var in gof.graph.inputs(outputs)
if not isinstance(var, gof.Constant)]
shared_vars = [var.type() for var in self.shared_inputs]
new = rebuild_collect_shared(outputs, inputs=inputs + shared_vars,
replace=dict(zip(self.shared_inputs,
......@@ -110,8 +110,8 @@ class OpFromGraph(gof.Op):
def make_node(self, *inputs):
for input, type in zip(inputs, self.input_types):
if not type == input.type:
raise TypeError("Wrong type, expected %s but got %s"
% (type, input.type))
raise TypeError("Wrong type, expected %s but got %s" %
(type, input.type))
return gof.Apply(self,
list(inputs) + self.shared_inputs,
[type() for type in self.output_types])
......@@ -143,9 +143,10 @@ class OpFromGraph(gof.Op):
grad_ops = self.grad_ops
else:
gs = theano.gradient.grad(cost=None,
known_grads=dict(zip(self.new_outputs, output_grads)),
wrt=self.new_inputs,
disconnected_inputs='ignore')
known_grads=dict(zip(self.new_outputs,
output_grads)),
wrt=self.new_inputs,
disconnected_inputs='ignore')
grad_ops = []
for g in gs:
......
......@@ -5,10 +5,12 @@
"""
from __future__ import print_function
__docformat__ = "restructuredtext en"
import copy, sys, copy_reg, gc
import copy
import sys
import copy_reg
import gc
from itertools import izip
import logging
import numpy
......@@ -16,10 +18,9 @@ import theano
from theano import gof
from theano.compat import get_unbound_function, product as itertools_product
from theano.compat.six import StringIO
from theano.gof import (FunctionGraph, graph, utils, link,
from theano.gof import (graph, utils, link,
ops_with_inner_function)
from theano.gof.link import raise_with_op
from theano.gof.cc import CLinker
from theano.configparser import (config, AddConfigVar, BoolParam, IntParam,
StrParam)
from theano.compile.function_module import (
......@@ -29,38 +30,40 @@ from theano.compile.function_module import (
from theano.compile.mode import Mode, register_mode
from theano.compile.ops import OutputGuard
__docformat__ = "restructuredtext en"
AddConfigVar('DebugMode.patience',
"Optimize graph this many times to detect inconsistency",
IntParam(10, lambda i: i > 0),
in_c_key=False)
"Optimize graph this many times to detect inconsistency",
IntParam(10, lambda i: i > 0),
in_c_key=False)
AddConfigVar('DebugMode.check_c',
"Run C implementations where possible",
BoolParam(bool(theano.config.cxx)),
in_c_key=False)
"Run C implementations where possible",
BoolParam(bool(theano.config.cxx)),
in_c_key=False)
AddConfigVar('DebugMode.check_py',
"Run Python implementations where possible",
BoolParam(True),
in_c_key=False)
"Run Python implementations where possible",
BoolParam(True),
in_c_key=False)
AddConfigVar('DebugMode.check_finite',
"True -> complain about NaN/Inf results",
BoolParam(True),
in_c_key=False)
"True -> complain about NaN/Inf results",
BoolParam(True),
in_c_key=False)
AddConfigVar('DebugMode.check_strides',
("Check that Python- and C-produced ndarrays have same strides. "
"On difference: (0) - ignore, (1) warn, or (2) raise error"),
IntParam(1, lambda i: i in (0, 1, 2)),
in_c_key=False)
("Check that Python- and C-produced ndarrays have same strides. "
"On difference: (0) - ignore, (1) warn, or (2) raise error"),
IntParam(1, lambda i: i in (0, 1, 2)),
in_c_key=False)
AddConfigVar('DebugMode.warn_input_not_reused',
("Generate a warning when destroy_map or view_map says that an "
"op works inplace, but the op did not reuse the input for its output."
),
BoolParam(True),
in_c_key=False)
("Generate a warning when destroy_map or view_map says that an "
"op works inplace, but the op did not reuse the input for its "
"output."),
BoolParam(True),
in_c_key=False)
def is_valid_check_preallocated_output_param(param):
......@@ -74,27 +77,26 @@ def is_valid_check_preallocated_output_param(param):
return True
AddConfigVar('DebugMode.check_preallocated_output',
('Test thunks with pre-allocated memory as output storage. '
'This is a list of strings separated by ":". Valid values are: '
'"initial" (initial storage in storage map, happens with Scan),'
'"previous" (previously-returned memory), '
'"c_contiguous", "f_contiguous", '
'"strided" (positive and negative strides), '
'"wrong_size" (larger and smaller dimensions), and '
'"ALL" (all of the above).'),
StrParam('', is_valid=is_valid_check_preallocated_output_param),
in_c_key=False)
('Test thunks with pre-allocated memory as output storage. '
'This is a list of strings separated by ":". Valid values are: '
'"initial" (initial storage in storage map, happens with Scan),'
'"previous" (previously-returned memory), '
'"c_contiguous", "f_contiguous", '
'"strided" (positive and negative strides), '
'"wrong_size" (larger and smaller dimensions), and '
'"ALL" (all of the above).'),
StrParam('', is_valid=is_valid_check_preallocated_output_param),
in_c_key=False)
AddConfigVar('DebugMode.check_preallocated_output_ndim',
('When testing with "strided" preallocated output memory, '
'test all combinations of strides over that number of '
'(inner-most) dimensions. You may want to reduce that number '
'to reduce memory or time usage, but it is advised to keep a '
'minimum of 2.'),
IntParam(4, lambda i: i > 0),
in_c_key=False)
('When testing with "strided" preallocated output memory, '
'test all combinations of strides over that number of '
'(inner-most) dimensions. You may want to reduce that number '
'to reduce memory or time usage, but it is advised to keep a '
'minimum of 2.'),
IntParam(4, lambda i: i > 0),
in_c_key=False)
import logging
_logger = logging.getLogger("theano.compile.debugmode")
......@@ -148,7 +150,7 @@ class BadThunkOutput(DebugModeError):
def __init__(self, r, thunk1, val1, thunk2, val2, inputs_val=()):
"""Initialize members"""
DebugModeError.__init__(self) # to be compatible with python2.4
super(BadThunkOutput, self).__init__()
self.r = r
self.thunk1 = thunk1
self.val1 = val1
......@@ -173,12 +175,14 @@ class BadThunkOutput(DebugModeError):
print(" op :", self.offending_op(), file=sio)
print(" Outputs Type:", self.r.type, file=sio)
print(" Outputs Shape:", getattr(self.val1, 'shape', None), file=sio)
print(" Outputs Strides:", getattr(self.val1, 'strides', None), file=sio)
print(" Inputs Type :", [i.type for i in self.r.owner.inputs], file=sio)
print(" Outputs Strides:", getattr(self.val1, 'strides', None),
file=sio)
print(" Inputs Type :", [i.type for i in self.r.owner.inputs],
file=sio)
print(" Inputs Shape:", [getattr(val, 'shape', None)
for val in self.inputs_val], file=sio)
for val in self.inputs_val], file=sio)
print(" Inputs Strides:", [getattr(val, 'strides', None)
for val in self.inputs_val], file=sio)
for val in self.inputs_val], file=sio)
print(" Bad Variable:", self.r, file=sio)
print(" thunk1 :", self.thunk1, file=sio)
print(" thunk2 :", self.thunk2, file=sio)
......@@ -226,7 +230,7 @@ class BadOptimization(DebugModeError):
def __init__(self, old_r, new_r, old_r_val, new_r_val, reason,
old_graph, new_graph):
"""Initialize members"""
DebugModeError.__init__(self) # to be compatible with python2.4
super(BadOptimization, self).__init__()
self.old_r = old_r
self.new_r = new_r
self.old_r_val = old_r_val
......@@ -244,7 +248,7 @@ class BadOptimization(DebugModeError):
sio = StringIO()
val_str_len_limit = 800
print("BadOptimization Error", super(BadOptimization,
self).__str__(), file=sio)
self).__str__(), file=sio)
print(" Variable: id", id(self.new_r), self.new_r, file=sio)
print(" Op", self.new_r.owner, file=sio)
print(" Value Type:", type(self.new_r_val), file=sio)
......@@ -287,25 +291,21 @@ class BadOptimization(DebugModeError):
ov = numpy.asarray(self.old_r_val)
nv = numpy.asarray(self.new_r_val)
ssio = StringIO()
print(" Max Abs Diff: ", numpy.max(numpy.absolute(nv -
ov)), file=ssio)
print(" Mean Abs Diff: ", numpy.mean(numpy.absolute(nv -
ov)), file=ssio)
print(" Median Abs Diff: ", numpy.median(numpy.absolute(
nv - ov)), file=ssio)
print(" Std Abs Diff: ", numpy.std(numpy.absolute(
nv - ov)), file=ssio)
arg_max_val = numpy.argmax(numpy.absolute(nv - ov))
abs_diff = numpy.absolute(nv - ov)
print(" Max Abs Diff: ", numpy.max(abs_diff), file=ssio)
print(" Mean Abs Diff: ", numpy.mean(abs_diff), file=ssio)
print(" Median Abs Diff: ", numpy.median(abs_diff), file=ssio)
print(" Std Abs Diff: ", numpy.std(abs_diff), file=ssio)
arg_max_val = numpy.argmax(abs_diff)
values_at_max = (nv.flatten()[arg_max_val],
ov.flatten()[arg_max_val])
print(" Value at Max Diff: ", values_at_max, file=ssio)
# N.B. the maximum(..., 1e-8) protects against div by 0 when
# nv == ov == 0
reldiff = (numpy.absolute(nv - ov)
/ numpy.maximum(
numpy.absolute(nv) + numpy.absolute(ov),
1e-8))
reldiff = (abs_diff /
numpy.maaximum(numpy.absolute(nv) + numpy.absolute(ov),
1e-8))
print(" Max Rel Diff: ", numpy.max(reldiff), file=ssio)
print(" Mean Rel Diff: ", numpy.mean(reldiff), file=ssio)
print(" Median Rel Diff: ", numpy.median(reldiff), file=ssio)
......@@ -325,8 +325,10 @@ class BadOptimization(DebugModeError):
print(" New Graph:", file=sio)
print(self.new_graph, file=sio)
print("", file=sio)
print("Hint: relax the tolerance by setting tensor.cmp_sloppy=1", file=sio)
print(" or even tensor.cmp_sloppy=2 for less-strict comparison", file=sio)
print("Hint: relax the tolerance by setting tensor.cmp_sloppy=1",
file=sio)
print(" or even tensor.cmp_sloppy=2 for less-strict comparison",
file=sio)
return sio.getvalue()
......@@ -334,8 +336,7 @@ class BadDestroyMap(DebugModeError):
"""Exception: Some perform() or c_code() modified an input that
wasn't in the destroy_map"""
def __init__(self, node, idx, old_val, new_val, perform):
#super(BadDestroyMap, self).__init__()
DebugModeError.__init__(self) # to be compatible with python2.4
super(BadDestroyMap, self).__init__()
self.node = node
self.idx = idx
self.old_val = old_val
......@@ -347,30 +348,42 @@ class BadDestroyMap(DebugModeError):
print(" node:", self.node, file=sio)
print(" perform:", self.perform, file=sio)
print(" node.inputs:", [(str(i), id(i))
for i in self.node.inputs], file=sio)
for i in self.node.inputs], file=sio)
print(" destroy_map:", getattr(self.node.op,
'destroy_map', {}), file=sio)
'destroy_map', {}), file=sio)
print(" changed input idx:", self.idx, file=sio)
print(" changed input type:", self.node.inputs[self.idx].type, file=sio)
print(" changed input type:", self.node.inputs[self.idx].type,
file=sio)
print(" repr (old val):", repr(self.old_val), file=sio)
print(" repr (new val):", repr(self.new_val), file=sio)
try:
npy_old_val = numpy.asarray(self.old_val)
npy_new_val = numpy.asarray(self.new_val)
print(" value dtype (new <space> old):", npy_new_val.dtype, npy_old_val.dtype, file=sio)
print(" value shape (new <space> old):", npy_new_val.shape, npy_old_val.shape, file=sio)
print(" value min (new <space> old):", npy_new_val.min(), npy_old_val.min(), file=sio)
print(" value max (new <space> old):", npy_new_val.max(), npy_old_val.max(), file=sio)
print(" value dtype (new <space> old):", npy_new_val.dtype,
npy_old_val.dtype, file=sio)
print(" value shape (new <space> old):", npy_new_val.shape,
npy_old_val.shape, file=sio)
print(" value min (new <space> old):", npy_new_val.min(),
npy_old_val.min(), file=sio)
print(" value max (new <space> old):", npy_new_val.max(),
npy_old_val.max(), file=sio)
delta = npy_new_val - npy_old_val
print(" value min (new-old):", delta.min(), file=sio)
print(" value max (new-old):", delta.max(), file=sio)
print(" value argmin (new-old):", numpy.unravel_index(delta.argmin(), npy_new_val.shape), file=sio)
print(" value argmax (new-old):", numpy.unravel_index(delta.argmax(), npy_new_val.shape), file=sio)
print(" location of first 10 mismatches:", numpy.transpose(numpy.nonzero(delta))[:10], file=sio)
print(" value argmin (new-old):",
numpy.unravel_index(delta.argmin(), npy_new_val.shape),
file=sio)
print(" value argmax (new-old):",
numpy.unravel_index(delta.argmax(), npy_new_val.shape),
file=sio)
print(" location of first 10 mismatches:",
numpy.transpose(numpy.nonzero(delta))[:10], file=sio)
print("", file=sio)
except Exception as e:
print("(Numpy-hints failed with: %s)" % str(e), file=sio)
print(" Hint: this can also be caused by a deficient values_eq_approx() or __eq__() implementation [which compared input values]", file=sio)
print(" Hint: this can also be caused by a deficient "
"values_eq_approx() or __eq__() implementation "
"[which compared input values]", file=sio)
return sio.getvalue()
......@@ -379,8 +392,7 @@ class BadViewMap(DebugModeError):
that wasn't in the view_map"""
def __init__(self, node, output_idx, out_storage,
in_alias_idx=None, out_alias_idx=None):
#super(BadViewMap, self).__init__()
DebugModeError.__init__(self) # to be compatible with python2.4
super(BadViewMap, self).__init__()
self.node = node
self.output_idx = output_idx
self.out_storage = out_storage
......@@ -391,12 +403,12 @@ class BadViewMap(DebugModeError):
sio = StringIO()
print(" node:", self.node, file=sio)
print(" node.inputs:", [(str(i), id(i))
for i in self.node.inputs], file=sio)
for i in self.node.inputs], file=sio)
print(" node.outputs:", [(str(i), id(i))
for i in self.node.outputs], file=sio)
for i in self.node.outputs], file=sio)
print(" view_map:", getattr(self.node.op, 'view_map', {}), file=sio)
print(" destroy_map:", getattr(self.node.op,
'destroy_map', {}), file=sio)
'destroy_map', {}), file=sio)
print(" aliased output:", self.output_idx, file=sio)
print(" aliased output storage:", self.out_storage, file=sio)
if self.in_alias_idx:
......@@ -425,8 +437,7 @@ class InvalidValueError(DebugModeError):
the Type of that output"""
def __init__(self, r, v, client_node=None, hint='none',
specific_hint='none'):
#super(InvalidValueError, self).__init__()
DebugModeError.__init__(self) # to be compatible with python2.4
super(InvalidValueError, self).__init__()
self.r = r
self.v = v
self.client_node = client_node
......@@ -454,7 +465,8 @@ class InvalidValueError(DebugModeError):
client_node = self.client_node
hint = self.hint
specific_hint = self.specific_hint
context = debugprint(r, prefix=' ', depth=12, file=StringIO()).getvalue()
context = debugprint(r, prefix=' ', depth=12,
file=StringIO()).getvalue()
return """InvalidValueError
type(variable) = %(type_r)s
variable = %(r)s
......@@ -512,7 +524,8 @@ def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
and their associated printed ids
:param print_type: whether to print the Variable type after the other infos
:param file: file-like object to which to print
:param print_destroy_map: whether to print the op destroy_map after other info
:param print_destroy_map: whether to print the op destroy_map after
other info
:param print_view_map: whether to print the op view_map after other info
:param order: If not empty will print the index in the toposort.
:param ids: How do we print the identifier of the variable
......@@ -592,23 +605,23 @@ def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
already_printed = a in done # get_id_str put it in the dict
id_str = get_id_str(a)
if profile == None or a not in profile.apply_time:
if profile is None or a not in profile.apply_time:
if len(a.outputs) == 1:
print('%s%s %s%s \'%s\' %s %s %s' % (prefix, a.op,
id_str,
type_str,
r_name,
destroy_map_str,
view_map_str,
o), file=file)
id_str,
type_str,
r_name,
destroy_map_str,
view_map_str,
o), file=file)
else:
print('%s%s.%i %s%s \'%s\' %s %s %s' % (prefix, a.op,
a.outputs.index(r),
id_str, type_str,
r_name,
destroy_map_str,
view_map_str,
o), file=file)
a.outputs.index(r),
id_str, type_str,
r_name,
destroy_map_str,
view_map_str,
o), file=file)
else:
op_time = profile.apply_time[a]
op_time_percent = (op_time / profile.fct_call_time) * 100
......@@ -617,33 +630,35 @@ def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
tot_time_percent = (tot_time_dict[a] / profile.fct_call_time) * 100
if len(a.outputs) == 1:
print('%s%s %s%s \'%s\' %s %s %s --> %8.2es %4.1f%% %8.2es %4.1f%%'\
% (prefix, a.op,
id_str,
type_str,
r_name,
destroy_map_str,
view_map_str,
o, op_time,
op_time_percent,
tot_time,
tot_time_percent), file=file)
print("%s%s %s%s '%s' %s %s %s --> "
"%8.2es %4.1f%% %8.2es %4.1f%%"
% (prefix, a.op,
id_str,
type_str,
r_name,
destroy_map_str,
view_map_str,
o, op_time,
op_time_percent,
tot_time,
tot_time_percent), file=file)
else:
print('%s%s.%i %s%s \'%s\' %s %s %s --> %8.2es %4.1f%% %8.2es %4.1f%%'\
% (prefix, a.op,
a.outputs.index(r),
id_str, type_str,
r_name,
destroy_map_str,
view_map_str,
o, op_time,
op_time_percent,
tot_time,
tot_time_percent), file=file)
print("%s%s.%i %s%s '%s' %s %s %s --> "
"%8.2es %4.1f%% %8.2es %4.1f%%"
% (prefix, a.op,
a.outputs.index(r),
id_str, type_str,
r_name,
destroy_map_str,
view_map_str,
o, op_time,
op_time_percent,
tot_time,
tot_time_percent), file=file)
if not already_printed:
if (not stop_on_name or
not (hasattr(r, 'name') and r.name is not None)):
not (hasattr(r, 'name') and r.name is not None)):
new_prefix = prefix_child + ' |'
new_prefix_child = prefix_child + ' |'
......@@ -652,14 +667,15 @@ def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
new_prefix_child = prefix_child + ' '
if hasattr(i, 'owner') and hasattr(i.owner, 'op'):
if isinstance(i.owner.op, theano.scan_module.scan_op.Scan):
if isinstance(i.owner.op,
theano.scan_module.scan_op.Scan):
scan_ops.append(i)
debugprint(i, new_prefix, depth=depth - 1, done=done,
print_type=print_type, file=file, order=order,
ids=ids, stop_on_name=stop_on_name,
prefix_child=new_prefix_child, scan_ops=scan_ops,
profile=profile)
prefix_child=new_prefix_child,
scan_ops=scan_ops, profile=profile)
else:
# this is an input variable
......@@ -679,7 +695,8 @@ def _optcheck_fgraph(input_specs, output_specs, accept_inplace=False):
:param accept_inplace: are inplace ops permitted in the original graph?
:type accept_inplace: Bool
:rtype: `FunctionGraph`
:returns: a new FunctionGraph with a cloned graph, with debugging `Feature` instances already installed.
:returns: a new FunctionGraph with a cloned graph, with debugging
`Feature` instances already installed.
"""
orig_inputs = [spec.variable for spec in input_specs]
updates = [spec.update for spec in input_specs if spec.update]
......@@ -687,15 +704,15 @@ def _optcheck_fgraph(input_specs, output_specs, accept_inplace=False):
equivalence_tracker = _VariableEquivalenceTracker()
fgraph = gof.fg.FunctionGraph(orig_inputs, orig_outputs,
# DestroyHandler may not be needed yet, as there is usually no
# inplace operation in the graph at this stage. DestroyHandler
# will be installed by an optimization after canonicalization,
# before the inplace operations are applied.
# This results in a big speed gain.
# If inplace operations are accepted and present, however,
# DestroyHandler will be inserted in the loop below.
# features=[equivalence_tracker, gof.DestroyHandler(do_imports_on_attach=False)])
features=[equivalence_tracker])
features=[equivalence_tracker])
# DestroyHandler may not be needed yet, as there is usually no
# inplace operation in the graph at this stage. DestroyHandler
# will be installed by an optimization after canonicalization,
# before the inplace operations are applied. This results in a big
# speed gain.
#
# If inplace operations are accepted and present, however,
# DestroyHandler will be inserted in the loop below.
if not accept_inplace:
for node in fgraph.apply_nodes:
......@@ -711,9 +728,10 @@ def _optcheck_fgraph(input_specs, output_specs, accept_inplace=False):
break
# We need to protect all immutable inputs from inplace operations.
fgraph.attach_feature(Supervisor(input for spec, input in zip(input_specs, fgraph.inputs)
if not (spec.mutable or (hasattr(fgraph, 'destroyers')
and fgraph.destroyers(input)))))
fgraph.attach_feature(Supervisor(
input for spec, input in zip(input_specs, fgraph.inputs)
if not (spec.mutable or (hasattr(fgraph, 'destroyers') and
fgraph.destroyers(input)))))
for feature in std_fgraph.features:
fgraph.attach_feature(feature())
......@@ -762,7 +780,7 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes,
out_var = storage_map[var][0]
in_var = storage_map[node.inputs[ii[0]]][0]
if (hasattr(var.type, 'may_share_memory') and
var.type.may_share_memory(out_var, in_var)):
var.type.may_share_memory(out_var, in_var)):
actually_inplace_outputs.append(node.outputs[oo])
if warn_input_not_reused and destroyed_res_list:
......@@ -772,8 +790,8 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes,
continue
if out_var is not in_var:
_logger.warning("Optimization Warning: input idx %d marked "
"as destroyed was not changed for node '%s'",
ii[0], str(node))
"as destroyed was not changed for node '%s'",
ii[0], str(node))
vmap = getattr(node.op, 'view_map', {})
for oo, ii in vmap.iteritems():
......@@ -797,8 +815,8 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes,
continue
if not may_share:
_logger.warning("Optimization Warning: input idx %d marked "
"as viewed but new memory allocated by node '%s'",
ii[0], str(node))
"as viewed but new memory allocated by node "
"'%s'", ii[0], str(node))
for r_idx, r in enumerate(node.inputs):
if not r.type.values_eq(r_vals[r], storage_map[r][0]):
......@@ -808,10 +826,12 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes,
# ok, we expected r to be destroyed
if node in active_nodes:
if dr_vals.get(r, (0, node))[1] is not node:
# bad: there should only be one active node that destroys any variable
# bad: there should only be one active node
# that destroys any variable
raise Exception('failure in topological ordering')
if clobber_dr_vals:
dr_vals[r] = (storage_map[r][0], node) # no copy, this is the last use of this variable
# no copy, this is the last use of this variable
dr_vals[r] = (storage_map[r][0], node)
# make sure that dr_vals[r] doens't get used again
storage_map[r][0] = data_destroyed
else:
......@@ -823,8 +843,10 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes,
def _check_viewmap(node, storage_map):
"""
This functions raises a BadViewMap exception when it detects the following:
- output node storages aliased to input storage, with no declaration in view_map
This functions raises a BadViewMap exception when it detects the
following:
- output node storages aliased to input storage, with no declaration
in view_map
- if not aliased to an input, check if two outputs are aliased together
and used subsequently in the graph
"""
......@@ -851,14 +873,14 @@ def _check_viewmap(node, storage_map):
# useless check.
continue
if (hasattr(inode.type, 'may_share_memory') and
inode.type.may_share_memory(outstorage, in_storage)):
inode.type.may_share_memory(outstorage, in_storage)):
nodeid = id(inode)
bad_alias[nodeid] = ii
# check that the aliasing was declared in [view|destroy]_map
if ([ii] == view_map.get(oi, None) or
[ii] == destroy_map.get(oi, None)):
[ii] == destroy_map.get(oi, None)):
good_alias[nodeid] = bad_alias.pop(nodeid)
......@@ -970,12 +992,12 @@ def _find_bad_optimizations0(order, reasons, r_vals):
check = r.type.values_eq_approx(r_val, new_r_val)
if not check:
raise BadOptimization(old_r=r,
new_r=new_r,
old_r_val=r_val,
new_r_val=new_r_val,
reason=reason,
old_graph=old_graph_str,
new_graph=new_graph_str)
new_r=new_r,
old_r_val=r_val,
new_r_val=new_r_val,
reason=reason,
old_graph=old_graph_str,
new_graph=new_graph_str)
def _find_bad_optimizations1(order, reasons, r_vals):
......@@ -993,7 +1015,7 @@ def _find_bad_optimizations1(order, reasons, r_vals):
equivalence_sets.setdefault(new_r, set([new_r]))
for reason, r, old_graph_str, new_graph_str in reasons[new_r]:
equivalence_sets[new_r].update(equivalence_sets.setdefault(
r, set([r])))
r, set([r])))
for er in equivalence_sets[r]:
equivalence_sets[er] = equivalence_sets[new_r]
......@@ -1050,28 +1072,25 @@ def _find_bad_optimizations2(order, reasons, r_vals):
if (r.type != new_r.type) or (not r.type.values_eq_approx(
r_val, new_r_val)):
raise BadOptimization(old_r=r,
new_r=new_r,
old_r_val=r_val,
new_r_val=new_r_val,
reason=reason,
old_graph=old_graph_str,
new_graph=new_graph_str)
new_r=new_r,
old_r_val=r_val,
new_r_val=new_r_val,
reason=reason,
old_graph=old_graph_str,
new_graph=new_graph_str)
def check_variable(r):
if r in checked_variables:
return
checked_variables.add(r)
# (recursively) first check all the variables that could make r look bad:
# (recursively) first check all the variables that could make
# r look bad:
list_of_vars = [old_r for (reason, old_r, olds, news) in reasons[r]]
if (None is not r.owner):
list_of_vars += r.owner.inputs
for var_that_could_make_r_look_bad in \
list_of_vars:
# backport
#[old_r for (reason, old_r, olds, news) in reasons[r]] \
#+ ([] if (None is r.owner) else r.owner.inputs):
for var_that_could_make_r_look_bad in list_of_vars:
check_variable(var_that_could_make_r_look_bad)
check_variable_norec(r)
......@@ -1087,8 +1106,8 @@ _find_bad_optimizations = _find_bad_optimizations0
def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
storage_map, r_vals, dr_vals, perform, active_order_set,
inplace_outs, init_outputs):
storage_map, r_vals, dr_vals, perform,
active_order_set, inplace_outs, init_outputs):
'''Preallocate outputs in different memory layouts'''
# To avoid circular imports
......@@ -1137,7 +1156,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
# inplace.
if isinstance(r.type, (TensorType, CudaNdarrayType)):
reuse_outputs[r][...] = numpy.asarray(
def_val).astype(r.type.dtype)
def_val).astype(r.type.dtype)
if reuse_outputs:
yield ('previous', reuse_outputs)
......@@ -1169,16 +1188,16 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
for r in considered_outputs:
if isinstance(r.type, (TensorType, CudaNdarrayType)):
new_buf = numpy.zeros(
shape=r_vals[r].shape,
dtype=r_vals[r].dtype,
order='F')
shape=r_vals[r].shape,
dtype=r_vals[r].dtype,
order='F')
new_buf[...] = def_val
if isinstance(r.type, CudaNdarrayType):
# When the CudaNdarray is built, the underlying memory
# is c-contiguous, so we transpose it before and after.
new_buf = CudaNdarray(new_buf.T)
new_buf = cuda_dimshuffle(new_buf,
range(new_buf.ndim)[::-1])
range(new_buf.ndim)[::-1])
f_cont_outputs[r] = new_buf
......@@ -1195,8 +1214,8 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
# Dimensions should be align by the innermost index, so we iterate
# from the end of shapes.
if ('strided' in prealloc_modes or
'wrong_size' in prealloc_modes or
'ALL' in prealloc_modes):
'wrong_size' in prealloc_modes or
'ALL' in prealloc_modes):
max_ndim = 0
rev_out_broadcastable = []
for r in considered_outputs:
......@@ -1297,11 +1316,11 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
if isinstance(r.type, (TensorType, CudaNdarrayType)):
r_shape_diff = shape_diff[:r.ndim]
out_shape = [max((s + sd), 0)
for s, sd in zip(r_vals[r].shape,
r_shape_diff)]
for s, sd in zip(r_vals[r].shape,
r_shape_diff)]
new_buf = r.type.value_zeros(out_shape)
new_buf[...] = numpy.asarray(
def_val).astype(r.type.dtype)
def_val).astype(r.type.dtype)
wrong_size[r] = new_buf
if wrong_size:
......@@ -1310,8 +1329,8 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
def _check_preallocated_output(node, thunk, prealloc_modes, def_val,
storage_map, r_vals, dr_vals, perform, active_order_set,
inplace_outs, init_outputs):
storage_map, r_vals, dr_vals, perform,
active_order_set, inplace_outs, init_outputs):
'''Try to apply thunk() on different output storages'''
# If node has an inner compiled Theano function with mode DebugMode,
......@@ -1321,11 +1340,11 @@ def _check_preallocated_output(node, thunk, prealloc_modes, def_val,
if type(getattr(node, 'op', None)) in ops_with_inner_function:
fn_attr_name = ops_with_inner_function[type(node.op)]
fn = getattr(node.op, fn_attr_name, None)
if (not fn
or not hasattr(fn, 'maker')
or not hasattr(fn.maker, 'mode')):
if (not fn or
not hasattr(fn, 'maker') or
not hasattr(fn.maker, 'mode')):
_logger.warn('Expected theano function not found in %s.%s',
node.op, fn_attr_name)
node.op, fn_attr_name)
else:
if isinstance(fn.maker.mode, DebugMode):
backup_mode = fn.maker.mode
......@@ -1378,9 +1397,10 @@ def _check_preallocated_output(node, thunk, prealloc_modes, def_val,
# Check outputs
for r in node.outputs:
if not r.type.is_valid_value(storage_map[r][0]):
raise InvalidValueError(r, storage_map[r][0],
hint=thunk_name,
specific_hint=r.type.value_validity_msg(
raise InvalidValueError(
r, storage_map[r][0],
hint=thunk_name,
specific_hint=r.type.value_validity_msg(
storage_map[r][0]))
_check_inputs(node, storage_map, r_vals, dr_vals, active_order_set,
......@@ -1393,11 +1413,14 @@ def _check_preallocated_output(node, thunk, prealloc_modes, def_val,
for r in node.outputs:
if not check_eq(r, r_vals[r], storage_map[r][0]):
# TODO: indicate it is not a C/Py problem
inputs_val = [storage_map[inp][0] for inp in r.owner.inputs]
inputs_val = [storage_map[inp][0] for inp in
r.owner.inputs]
raise BadThunkOutput(r,
thunk1='Reference value', val1=r_vals[r],
thunk2=thunk_name, val2=storage_map[r][0],
inputs_val=inputs_val)
thunk1='Reference value',
val1=r_vals[r],
thunk2=thunk_name,
val2=storage_map[r][0],
inputs_val=inputs_val)
# Clear storage_map
for r in node.outputs:
......@@ -1451,12 +1474,10 @@ class _FunctionGraphEvent(object):
msg = ''
return ' '.join(['change',
self.reason,
str(self.op),
str(self.idx),
msg])
# backport
# str(len(self.node.inputs)) if (self.op != 'output') else ''])
self.reason,
str(self.op),
str(self.idx),
msg])
else:
return str(self.__dict__)
......@@ -1475,7 +1496,8 @@ class _FunctionGraphEvent(object):
class _VariableEquivalenceTracker(object):
"""A FunctionGraph Feature that keeps tabs on an FunctionGraph and tries to detect problems."""
"""A FunctionGraph Feature that keeps tabs on an FunctionGraph and
tries to detect problems."""
fgraph = None
"""WRITEME"""
......@@ -1524,7 +1546,6 @@ class _VariableEquivalenceTracker(object):
def on_prune(self, fgraph, node, reason):
self.event_list.append(_FunctionGraphEvent('prune', node,
reason=reason))
# print 'PRUNING NODE', node, id(node)
assert node in self.active_nodes
assert node not in self.inactive_nodes
self.active_nodes.remove(node)
......@@ -1534,7 +1555,6 @@ class _VariableEquivalenceTracker(object):
self.event_list.append(_FunctionGraphEvent('import', node,
reason=reason))
# print 'NEW NODE', node, id(node)
assert node not in self.active_nodes
self.active_nodes.add(node)
......@@ -1554,9 +1574,8 @@ class _VariableEquivalenceTracker(object):
self.replaced_by.setdefault(r, [])
def on_change_input(self, fgraph, node, i, r, new_r, reason=None):
# print 'CHANGE by', reason, 'to use', new_r, type(new_r)
self.event_list.append(_FunctionGraphEvent('change', node,
reason=str(reason), idx=i))
reason=str(reason), idx=i))
self.reasons.setdefault(new_r, [])
self.replaced_by.setdefault(new_r, [])
......@@ -1570,12 +1589,13 @@ class _VariableEquivalenceTracker(object):
# N.B. compute the debugprint now, because future
# optimizations will change the graph
done = dict()
self.reasons[new_r].append((reason,
r,
debugprint(r, prefix=' ', depth=6,
file=StringIO(), done=done).getvalue(),
debugprint(new_r, prefix=' ', depth=6,
file=StringIO(), done=done).getvalue()))
self.reasons[new_r].append(
(reason,
r,
debugprint(r, prefix=' ', depth=6,
file=StringIO(), done=done).getvalue(),
debugprint(new_r, prefix=' ', depth=6,
file=StringIO(), done=done).getvalue()))
self.replaced_by[r].append((reason, new_r))
if r in self.equiv:
......@@ -1647,26 +1667,28 @@ class _Linker(gof.link.LocalLinker):
self.no_recycling = no_recycling
return self
def make_all(self, profiler=None, input_storage=None
, output_storage=None):
def make_all(self, profiler=None, input_storage=None,
output_storage=None):
# can't import at toplevel because of circular import TODO:
# don't do this ugly hacky way of setting the
# filter_checks_isfinite
from theano.tensor import TensorType # to set filter_check_isfinite
if 1:
# can't import at toplevel because of circular import TODO:
# don't do this ugly hacky way of setting the
# filter_checks_isfinite
from theano.tensor import TensorType # to set filter_check_isfinite
fgraph = self.fgraph
input_storage_ = input_storage
output_storage_ = output_storage
#order = self.schedule(fgraph)
# Compute a topological ordering that IGNORES the destroy_map of destructive Ops.
# This will be OK, because every thunk is evaluated on a copy of its input.
order_outputs = copy.copy(fgraph.equivalence_tracker.all_variables_ever)
# Compute a topological ordering that IGNORES the destroy_map
# of destructive Ops. This will be OK, because every thunk is
# evaluated on a copy of its input.
fgraph_equiv = fgraph.equivalence_tracker
order_outputs = copy.copy(fgraph_equiv.all_variables_ever)
del fgraph_equiv
order_outputs.reverse()
order = graph.io_toposort(fgraph.inputs, order_outputs)
active_order = self.schedule(fgraph) # an ordering of just the active nodes
# an ordering of just the active nodes
active_order = self.schedule(fgraph)
active_order_set = set(active_order)
# Disable no_recycling, in order to be able to use
......@@ -1682,9 +1704,6 @@ class _Linker(gof.link.LocalLinker):
thunks_c = [] # c thunks
for node in order:
node_input_storage = [storage_map[r] for r in node.inputs]
node_output_storage = [storage_map[r] for r in node.outputs]
compute_map = {}
for k in node.inputs:
compute_map[k] = [True]
......@@ -1696,7 +1715,8 @@ class _Linker(gof.link.LocalLinker):
# the compilation of some dependency is triggered there.
thunk_other = None
if get_unbound_function(node.op.make_thunk) not in default_make_thunk:
if (get_unbound_function(node.op.make_thunk) not in
default_make_thunk):
thunk = node.op.make_thunk(node,
storage_map,
compute_map,
......@@ -1725,7 +1745,8 @@ class _Linker(gof.link.LocalLinker):
# raises an not implemented exception), so in those cases we
# consider that we don't have a python implementation
if ((self.maker.mode.check_py_code or thunks_c[-1] is None) and
node.op.perform.func_code != gof.op.PureOp.perform.func_code):
(node.op.perform.func_code !=
gof.op.PureOp.perform.func_code)):
thunk = node.op.make_py_thunk(node, storage_map, compute_map,
no_recycling)
thunks_py.append(thunk)
......@@ -1739,7 +1760,9 @@ class _Linker(gof.link.LocalLinker):
elif thunks_c[-1] is None:
thunks_c[-1] = thunk_other
else:
_logger.warn("We won't check the perform function of node '%s' but we will check its make_thunk function" % node)
_logger.warn("We won't check the perform function "
"of node '%s' but we will check its "
"make_thunk function" % node)
thunks_py[-1] = thunk_other
# Use self.no_recycling (that was passed in accept()) to always
......@@ -1747,10 +1770,11 @@ class _Linker(gof.link.LocalLinker):
# function's outputs. no_recycling_map will be used in f() below.
if self.no_recycling is True:
no_recycling_map = storage_map.values()
no_recycling_map = utils.difference(no_recycling_map, input_storage)
no_recycling_map = utils.difference(no_recycling_map,
input_storage)
else:
no_recycling_map = [storage_map[r] for r in self.no_recycling
if r not in fgraph.inputs]
if r not in fgraph.inputs]
# Precompute some things for storage pre-allocation
try:
......@@ -1769,7 +1793,7 @@ class _Linker(gof.link.LocalLinker):
#####
_logger.debug("starting a DebugMode call")
_logger.debug("self.maker.mode.check_preallocated_output: %s",
self.maker.mode.check_preallocated_output)
self.maker.mode.check_preallocated_output)
for x in no_recycling_map:
x[0] = None
......@@ -1784,14 +1808,17 @@ class _Linker(gof.link.LocalLinker):
# the evaluation of this function, even when the graph
# has destructive ops in it
#
# This dictionary is used to populate the storage_map as necessary
# This dictionary is used to populate the storage_map
# as necessary
r_vals = {}
# dr_vals are the values taken by variables after being destroyed
# dr_vals are the values taken by variables after
# being destroyed
dr_vals = {}
assert len(thunks_py) == len(order)
# transfer the initial values from the storage_map to the r_vals
# transfer the initial values from the storage_map to
# the r_vals
_logger.debug("DEBUGMODE: transfer initial values")
# r_vals_initialized keeps track of the values that have
# actually been transferred from storage_map to r_vals
......@@ -1803,17 +1830,20 @@ class _Linker(gof.link.LocalLinker):
# for a Generic object). We only want to raise
# an error if it is not valid.
if (storage_map[r][0] is None):
raise InvalidValueError(r, storage_map[r][0],
hint="Graph Input '%s' is missing" % str(r))
raise InvalidValueError(r, storage_map[r][0],
hint=("Graph Input '%s' has invalid value "
"%s" % (r, storage_map[r][0])))
raise InvalidValueError(
r, storage_map[r][0],
hint=("Graph Input '%s' is missing" %
str(r)))
raise InvalidValueError(
r, storage_map[r][0],
hint=("Graph Input '%s' has invalid value "
"%s" % (r, storage_map[r][0])))
r_vals[r] = storage_map[r][0]
storage_map[r][0] = None
r_vals_initialized.append(r)
# store preallocated outputs in another map, and test the thunks on
# them as output storages.
# store preallocated outputs in another map, and test
# the thunks on them as output storages.
init_outputs = {}
for r in storage_map:
if r in fgraph.outputs:
......@@ -1835,8 +1865,6 @@ class _Linker(gof.link.LocalLinker):
for i, (thunk_py, thunk_c, node) in enumerate(zip(thunks_py,
thunks_c,
order)):
this_node_destroyed_variables = set()
_logger.debug("%i - starting node %i %s", i, i, node)
# put a copy of each input into the storage_map
......@@ -1844,7 +1872,6 @@ class _Linker(gof.link.LocalLinker):
for r in node.inputs:
assert isinstance(r, gof.Variable)
assert r in r_vals
# print >> sys.stderr,i, "DEBUGMODE: deepcopy input ", r
storage_map[r][0] = _lessbroken_deepcopy(r_vals[r])
if not r.type.is_valid_value(storage_map[r][0]):
raise InvalidValueError(r, storage_map[r][0],
......@@ -1854,7 +1881,7 @@ class _Linker(gof.link.LocalLinker):
# storage will be None
if thunk_py:
_logger.debug("%i - running thunk_py with None as "
"output storage", i)
"output storage", i)
try:
thunk_py()
except utils.MethodNotDefined:
......@@ -1872,10 +1899,12 @@ class _Linker(gof.link.LocalLinker):
raise
opt = str(reason[0][0])
msg = (
"An optimization (probably %s ) inserted an apply node that raise an error." % opt +
"\nThe information we have about this optimizations is:" + str(reason[0][1]) +
"\n" + reason[0][2] +
"\n\nThe original exception: \n" + str(e))
"An optimization (probably %s) inserted an "
"apply node that raise an error." % opt +
"\nThe information we have about this "
"optimizations is:" + str(reason[0][1]) +
"\n" + reason[0][2] +
"\n\nThe original exception: \n" + str(e))
new_e = e.__class__(msg)
exc_type, exc_value, exc_trace = sys.exc_info()
exc_value = new_e
......@@ -1891,45 +1920,42 @@ class _Linker(gof.link.LocalLinker):
raise InvalidValueError(r, storage_map[r][0],
hint='perform output',
specific_hint=hint2)
warn_inp = config.DebugMode.warn_input_not_reused
py_inplace_outs = _check_inputs(
node, storage_map, r_vals, dr_vals,
active_order_set,
clobber_dr_vals=True, perform='py',
warn_input_not_reused=config.DebugMode.warn_input_not_reused)
node, storage_map, r_vals, dr_vals,
active_order_set,
clobber_dr_vals=True, perform='py',
warn_input_not_reused=warn_inp)
_check_viewmap(node, storage_map)
# Retrieve each output from the storage_map
# The return values of this first run will be the reference ones
# Retrieve each output from the storage_map.
# The return values of this first run will be
# the reference ones
for r in node.outputs:
assert r not in r_vals
# print >> sys.stderr, i, "DEBUGMODE storing reference output %x" % id(storage_map[r][0])
r_vals[r] = storage_map[r][0]
# clear the storage_map of outputs for the thunk_c
storage_map[r][0] = None
if self.maker.mode.check_preallocated_output:
prealloc_modes = \
self.maker.mode.check_preallocated_output
self.maker.mode.check_preallocated_output
_logger.debug(
'%i - calling _check_preallocated_output '
'with thunk_py', i)
'%i - calling _check_preallocated_output '
'with thunk_py', i)
_check_preallocated_output(
node=node,
thunk=thunk_py,
prealloc_modes=prealloc_modes,
def_val=def_val,
storage_map=storage_map,
r_vals=r_vals,
dr_vals=dr_vals,
perform='py',
active_order_set=active_order_set,
inplace_outs=py_inplace_outs,
init_outputs=init_outputs)
# print >> sys.stderr, i, "DEBUGMODE thunk_py %100s %50s %30s" % (node,
#[(id(o), numpy.asarray(storage_map[o][0])[0,0]) for o in node.inputs],
#[(id(o), numpy.asarray(storage_map[o][0])[0,0]) for o in node.outputs])
node=node,
thunk=thunk_py,
prealloc_modes=prealloc_modes,
def_val=def_val,
storage_map=storage_map,
r_vals=r_vals,
dr_vals=dr_vals,
perform='py',
active_order_set=active_order_set,
inplace_outs=py_inplace_outs,
init_outputs=init_outputs)
sys.stdout.flush()
if thunk_c:
......@@ -1939,18 +1965,22 @@ class _Linker(gof.link.LocalLinker):
dmap = getattr(node.op, 'destroy_map', {})
vmap = getattr(node.op, 'view_map', {})
for i, r in enumerate(node.inputs):
# if thunk_py ran, and we still got this far,
# it means that the destroy_map of the Op (and view_map) are
# accurate
# so we can assume that inputs not marked as destroyed have in
# fact not been destroyed.
# Therefore... we only need to overwrite inputs that *have*
# been marked as destroyed.
# Inputs marked as viewd are unsafe too,
# because the corresponding output can
# be destroyed.
if any(i in v for v in (dmap.values() + vmap.values())):
storage_map[r][0] = _lessbroken_deepcopy(r_vals[r])
# if thunk_py ran, and we still got
# this far, it means that the
# destroy_map of the Op (and view_map)
# are accurate so we can assume that
# inputs not marked as destroyed have
# in fact not been destroyed.
# Therefore... we only need to
# overwrite inputs that *have* been
# marked as destroyed. Inputs marked
# as viewd are unsafe too, because the
# corresponding output can be
# destroyed.
if any(i in v for v in (dmap.values() +
vmap.values())):
storage_map[r][0] = _lessbroken_deepcopy(
r_vals[r])
clobber = False
......@@ -1969,10 +1999,12 @@ class _Linker(gof.link.LocalLinker):
raise
opt = str(reason[0][0])
msg = (
"An optimization (probably %s ) inserted an apply node that raise an error." % opt +
"\nThe information we have about this optimizations is:" + str(reason[0][1]) +
"\n" + reason[0][2] +
"\n\nThe original exception: \n" + str(e))
"An optimization (probably %s) inserted "
"an apply node that raise an error." % opt +
"\nThe information we have about this "
"optimizations is:" + str(reason[0][1]) +
"\n" + reason[0][2] +
"\n\nThe original exception: \n" + str(e))
new_e = e.__class__(msg)
exc_type, exc_value, exc_trace = sys.exc_info()
exc_value = new_e
......@@ -1982,21 +2014,26 @@ class _Linker(gof.link.LocalLinker):
for r in node.outputs:
# check output values for type-correctness
if not r.type.is_valid_value(storage_map[r][0]):
raise InvalidValueError(r, storage_map[r][0], hint='c output')
raise InvalidValueError(r, storage_map[r][0],
hint='c output')
if thunk_py:
assert r in r_vals # because we put it in during the thunk_py branch
# check for stride correctness (may raise exception)
_check_strides_match(r_vals[r],
storage_map[r][0],
# because we put it in during the
# thunk_py branch
assert r in r_vals
# check for stride correctness (may
# raise exception)
_check_strides_match(
r_vals[r], storage_map[r][0],
self.maker.mode.require_matching_strides,
node.op)
warn_inp = config.DebugMode.warn_input_not_reused
c_inplace_outs = _check_inputs(
node, storage_map, r_vals,
dr_vals, active_order_set,
clobber_dr_vals=clobber, perform='c',
warn_input_not_reused=config.DebugMode.warn_input_not_reused)
node, storage_map, r_vals,
dr_vals, active_order_set,
clobber_dr_vals=clobber, perform='c',
warn_input_not_reused=warn_inp)
_check_viewmap(node, storage_map)
......@@ -2006,11 +2043,14 @@ class _Linker(gof.link.LocalLinker):
# compares the version from thunk_py
# (in r_vals) to the version produced
# by thunk_c (in storage_map)
if not check_eq(r, r_vals[r], storage_map[r][0]):
inputs_val = [storage_map[inp][0] for inp in r.owner.inputs]
if not check_eq(r, r_vals[r],
storage_map[r][0]):
inputs_val = [storage_map[inp][0]
for inp in r.owner.inputs]
raise BadThunkOutput(
r, thunk1='perform', val1=r_vals[r],
thunk2='c_code', val2=storage_map[r][0],
thunk2='c_code',
val2=storage_map[r][0],
inputs_val=inputs_val)
else:
# retrieve each output from the storage_map
......@@ -2020,37 +2060,34 @@ class _Linker(gof.link.LocalLinker):
if self.maker.mode.check_preallocated_output:
prealloc_modes = \
self.maker.mode.check_preallocated_output
self.maker.mode.check_preallocated_output
def thunk():
try:
thunk_c()
except Exception:
raise_with_op(node, thunk_c)
_logger.debug(
'%i - calling _check_preallocated_output '
'with thunk_c', i)
'%i - calling _check_preallocated_output '
'with thunk_c', i)
_check_preallocated_output(
node=node,
thunk=thunk,
prealloc_modes=prealloc_modes,
def_val=def_val,
storage_map=storage_map,
r_vals=r_vals,
dr_vals=dr_vals,
perform='c code',
active_order_set=active_order_set,
inplace_outs=c_inplace_outs,
init_outputs=init_outputs)
# print >> sys.stderr, i, "DEBUGMODE thunk_c %100s %50s %30s" % (node,
#[(id(o), numpy.asarray(storage_map[o][0])[0,0]) for o in node.inputs],
#[(id(o), numpy.asarray(storage_map[o][0])[0,0]) for o in node.outputs])
node=node,
thunk=thunk,
prealloc_modes=prealloc_modes,
def_val=def_val,
storage_map=storage_map,
r_vals=r_vals,
dr_vals=dr_vals,
perform='c code',
active_order_set=active_order_set,
inplace_outs=c_inplace_outs,
init_outputs=init_outputs)
sys.stdout.flush()
# we're done with this thunk
# clear everything out of the storage_map
for r in node.inputs:
#print >> sys.stderr, i, "DEBUGMODE clearing input", r
storage_map[r][0] = None
_logger.debug("%i - done with node", i)
......@@ -2059,7 +2096,8 @@ class _Linker(gof.link.LocalLinker):
# But it is very slow and it is not sure it will help.
gc.collect()
_find_bad_optimizations(order, fgraph.equivalence_tracker.reasons,
_find_bad_optimizations(order,
fgraph.equivalence_tracker.reasons,
r_vals)
#####
......@@ -2081,18 +2119,24 @@ class _Linker(gof.link.LocalLinker):
for r in r_vals:
if r.owner is None:
if r in fgraph.inputs:
assert storage_map[r] is input_storage[fgraph.inputs.index(r)]
assert (storage_map[r] is
input_storage[fgraph.inputs.index(r)])
storage_map[r][0] = r_vals[r]
# if an input was destroyed, the destroyed value should be returned
# if an input was destroyed, the destroyed value
# should be returned
for r in dr_vals:
assert dr_vals[r][0] is not None
if r.owner is None:
assert r in fgraph.inputs
# HACK TO LOOK LIKE A REAL DESTRUCTIVE ACTION TOOK PLACE
if type(dr_vals[r][0]) in (numpy.ndarray, numpy.memmap) \
and dr_vals[r][0].dtype == storage_map[r][0].dtype \
and dr_vals[r][0].shape == storage_map[r][0].shape:
# HACK TO LOOK LIKE A REAL DESTRUCTIVE ACTION
# TOOK PLACE
if ((type(dr_vals[r][0]) in
(numpy.ndarray, numpy.memmap)) and
(dr_vals[r][0].dtype ==
storage_map[r][0].dtype) and
(dr_vals[r][0].shape ==
storage_map[r][0].shape)):
if len(dr_vals[r][0].shape):
storage_map[r][0][:] = dr_vals[r][0]
else:
......@@ -2111,10 +2155,6 @@ class _Linker(gof.link.LocalLinker):
storage_map[r][0] = None
raise
# print ""
# print output_storage
# print dr_vals
# print storage_map
for r in storage_map:
if (r.owner is None):
if not r.type.is_valid_value(None):
......@@ -2130,12 +2170,14 @@ class _Linker(gof.link.LocalLinker):
# so it will screw up if we are trying to use
# multiple modes at once.
old_filter_checks_isfinite = TensorType.filter_checks_isfinite
TensorType.filter_checks_isfinite = self.maker.mode.check_isfinite
TensorType.filter_checks_isfinite = \
self.maker.mode.check_isfinite
try:
return f()
finally:
# put back the filter_checks_isfinite
TensorType.filter_checks_isfinite = old_filter_checks_isfinite
TensorType.filter_checks_isfinite = \
old_filter_checks_isfinite
return deco
f = run_with_tensortype_filter_check(f)
......@@ -2143,12 +2185,12 @@ class _Linker(gof.link.LocalLinker):
f.allow_gc = True
assert len(fgraph.inputs) == len(input_storage)
assert len(fgraph.outputs) == len(output_storage)
# print 'make_all returning output', [id(z) for z in output_storage]
return f, [link.Container(input, storage, readonly=False)
for input, storage in zip(fgraph.inputs, input_storage)], \
[link.Container(output, storage, readonly=True)
for output, storage in zip(fgraph.outputs, output_storage)], \
thunks_py, order
return (f,
[link.Container(input, storage, readonly=False)
for input, storage in zip(fgraph.inputs, input_storage)],
[link.Container(output, storage, readonly=True)
for output, storage in zip(fgraph.outputs, output_storage)],
thunks_py, order)
_NODEFAULT = ['NODEFAULT']
......@@ -2162,33 +2204,37 @@ class _Maker(FunctionMaker): # inheritance buys a few helper functions
0: silent)"""
def __init__(self, inputs, outputs, optimizer, mode,
accept_inplace=False,
function_builder=Function,
profile=None,
on_unused_input=None,
output_keys=None):
accept_inplace=False,
function_builder=Function,
profile=None,
on_unused_input=None,
output_keys=None):
"""
:type inputs: a list of SymbolicInput instances
:type outputs: a list of SymbolicOutput instances
outputs may also be a single Variable (not a list), in which
case the functions produced by FunctionMaker will return
their output value directly
:type outputs: a list of SymbolicOutput instances outputs may
also be a single Variable (not a list), in
which case the functions produced by
FunctionMaker will return their output value
directly
:param accept_inplace: True iff it is acceptable to have
inplace operations in the graph from the inputs to
the outputs
:param on_unused_input: What to do if a variable in the 'inputs' list is
not used in the graph. Possible values are 'raise', 'warn', and 'ignore'.
:param on_unused_input: What to do if a variable in the
'inputs' list is not used in the
graph. Possible values are 'raise',
'warn', and 'ignore'.
:param output_keys: If the outputs argument for theano.function was a
list, then output_keys is None. If the outputs argument was a dict,
then output_keys is a sorted list of the keys from that dict.
:param output_keys: If the outputs argument for
theano.function was a list, then
output_keys is None. If the outputs
argument was a dict, then output_keys is a
sorted list of the keys from that dict.
:note: this function sets TensorType.filter_checks_isfinite
when `mode.check_isfinite` is True
when `mode.check_isfinite` is True
"""
self.profile = profile
# Handle the case where inputs and/or outputs is a single
......@@ -2205,7 +2251,8 @@ class _Maker(FunctionMaker): # inheritance buys a few helper functions
inputs = [inputs]
# Wrap them in In or Out instances if needed.
inputs, outputs = map(self.wrap_in, inputs), map(self.wrap_out, outputs)
inputs = map(self.wrap_in, inputs)
outputs = map(self.wrap_out, outputs)
_inputs = gof.graph.inputs([o.variable for o in outputs] +
[i.update for i in inputs
if getattr(i, 'update', False)])
......@@ -2213,9 +2260,11 @@ class _Maker(FunctionMaker): # inheritance buys a few helper functions
# Check if some input variables are unused
self._check_unused_inputs(inputs, outputs, on_unused_input)
# Make a list of (SymbolicInput|SymblicInputKits, indices, [SymbolicInput,...]), one
# tuple for each input. (See Function.indices for more details)
indices = [[input] + self.expand_in(input, _inputs) for input in inputs]
# Make a list of (SymbolicInput|SymblicInputKits, indices,
# [SymbolicInput,...]), one tuple for each input. (See
# Function.indices for more details)
indices = [[input] + self.expand_in(input, _inputs)
for input in inputs]
# make the fgraph
for i in xrange(mode.stability_patience):
......@@ -2226,58 +2275,53 @@ class _Maker(FunctionMaker): # inheritance buys a few helper functions
# optimize the fgraph
compute_test_value_orig = theano.config.compute_test_value
try:
theano.config.compute_test_value = theano.config.compute_test_value_opt
theano.config.compute_test_value = \
theano.config.compute_test_value_opt
optimizer(fgraph)
theano.compile.function_module.insert_deepcopy(fgraph, inputs,
outputs + additional_outputs)
theano.compile.function_module.insert_deepcopy(
fgraph, inputs, outputs + additional_outputs)
finally:
theano.config.compute_test_value = compute_test_value_orig
if i:
if i == 0:
fgraph0 = fgraph
else:
li = fgraph.equivalence_tracker.event_list
l0 = fgraph0.equivalence_tracker.event_list
if li != l0 :
if li != l0:
infolog = StringIO()
print("WARNING: Optimization process is unstable...", file=infolog)
print(" (HINT: Ops that the nodes point to must compare equal)", file=infolog)
print("(event index) (one event trace) (other event trace)", file=infolog)
print("-----------------------------------------------------", file=infolog)
print("WARNING: Optimization process is unstable...",
file=infolog)
print(" (HINT: Ops that the nodes point to must compare "
"equal)", file=infolog)
print("(event index) (one event trace) (other event "
"trace)", file=infolog)
print("-------------------------------------------------"
"----", file=infolog)
for j in xrange(max(len(li), len(l0))):
if j >= len(li):
print('trailing event in optimization 0 :', j, file=infolog)
print('trailing event in optimization 0 :', j,
file=infolog)
print(' ', str(l0[j]), file=infolog)
elif j >= len(l0):
print('trailing event in optimization', i, ':', j, file=infolog)
print('trailing event in optimization', i, ':',
j, file=infolog)
print(' ', str(li[j]), file=infolog)
elif li[j] != l0[j]:
print('non-equal optimization events', i, ':', j, file=infolog)
print('non-equal optimization events', i, ':',
j, file=infolog)
print(' ', str(l0[j]), file=infolog)
print(' ', str(li[j]), file=infolog)
#print >> infolog, "* ", j,
# if j < len(li):
# msg = str(li[j])
# else:
# msg = '-'
#print >> infolog, " ", msg
# if j < len(l0):
# msg = str(l0[j])
# else:
# msg = '-'
#print >> infolog, " ", msg
else:
pass
raise StochasticOrder(infolog.getvalue())
else:
if self.verbose:
print("OPTCHECK: optimization", i, \
"of", len(li), "events was stable.", file=sys.stderr)
else:
fgraph0 = fgraph
del fgraph0
print("OPTCHECK: optimization", i,
"of", len(li), "events was stable.",
file=sys.stderr)
self.fgraph = fgraph
# equivalence_tracker.printstuff()
linker = _Linker(self)
......@@ -2285,14 +2329,14 @@ class _Maker(FunctionMaker): # inheritance buys a few helper functions
# the internal storage pointer.
no_borrow = [
output
for output, spec in izip(fgraph.outputs,
outputs + additional_outputs)
if not spec.borrow]
output
for output, spec in izip(fgraph.outputs,
outputs + additional_outputs)
if not spec.borrow]
if no_borrow:
self.linker = linker.accept(
fgraph,
no_recycling=infer_reuse_pattern(fgraph, no_borrow))
fgraph,
no_recycling=infer_reuse_pattern(fgraph, no_borrow))
else:
self.linker = linker.accept(fgraph)
......@@ -2351,8 +2395,8 @@ class _Maker(FunctionMaker): # inheritance buys a few helper functions
input_storage += [default[i].storage for i in indices]
else:
raise ValueError(
'Not enough storage for SymbolicInputKit',
input, indices, default)
'Not enough storage for SymbolicInputKit',
input, indices, default)
default = _NODEFAULT
else:
input_storage += [[None] for i in indices]
......@@ -2389,8 +2433,8 @@ class _Maker(FunctionMaker): # inheritance buys a few helper functions
else:
# This might catch some bugs early
raise ValueError(
"A default (initial) value is required for an "
"input which can update itself.", input)
"A default (initial) value is required for an "
"input which can update itself.", input)
else:
_defaults.append((False, False, default))
else:
......@@ -2512,14 +2556,14 @@ class DebugMode(Mode):
return _Maker(i, o, self.optimizer, self, *args, **kwargs)
def __init__(self,
optimizer='fast_run',
stability_patience=None,
check_c_code=None,
check_py_code=None,
check_isfinite=None,
check_preallocated_output=None,
require_matching_strides=None,
linker=_DummyLinker()):
optimizer='fast_run',
stability_patience=None,
check_c_code=None,
check_py_code=None,
check_isfinite=None,
check_preallocated_output=None,
require_matching_strides=None,
linker=_DummyLinker()):
"""Initialize member variables.
If any of these arguments (except optimizer) is not None, it overrides
......@@ -2532,9 +2576,8 @@ class DebugMode(Mode):
raise Exception("DebugMode can only use its own linker! You "
"should not provide one.", linker)
super(DebugMode, self).__init__(
optimizer=optimizer,
linker=linker)
super(DebugMode, self).__init__(optimizer=optimizer,
linker=linker)
if stability_patience is not None:
self.stability_patience = stability_patience
......@@ -2561,6 +2604,6 @@ class DebugMode(Mode):
def __str__(self):
return "DebugMode(linker=%s, optimizer=%s)" % (
self.provided_linker, self.provided_optimizer)
self.provided_linker, self.provided_optimizer)
register_mode('DEBUG_MODE', DebugMode(optimizer='fast_run'))
"""Define the `function` function
"""
__docformat__ = "restructuredtext en"
import cPickle
import logging
_logger = logging.getLogger('theano.compile.function')
import traceback as tb
import re
......@@ -14,9 +11,11 @@ from theano.compile.function_module import orig_function
from theano.compile.pfunc import pfunc
from numpy import any
import warnings
from theano import gof
from theano import compat
__docformat__ = "restructuredtext en"
_logger = logging.getLogger('theano.compile.function')
def function_dump(filename, inputs, outputs=None, mode=None, updates=None,
givens=None,
......@@ -63,61 +62,74 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
:param inputs: function parameters, these are not allowed to be shared
variables
:type outputs: list or dict of Variables or Out instances. If it is a
dict, the keys must be strings
:type outputs: list or dict of Variables or Out instances. If it is a
dict, the keys must be strings
:param outputs: expressions to compute
:type mode: string or `Mode` instance.
:param mode: compilation mode
:type updates: iterable over pairs (shared_variable, new_expression). List, tuple or OrderedDict.
:param updates: update the values for SharedVariable inputs according to these expressions
:type givens: iterable over pairs (Var1, Var2) of Variables. List, tuple or dict. The Var1
and Var2 in each pair must have the same Type.
:type updates: iterable over pairs (shared_variable, new_expression).
List, tuple or OrderedDict.
:param updates: update the values for SharedVariable inputs
according to these expressions
:param givens: specific substitutions to make in the computation graph (Var2 replaces
Var1).
:type givens: iterable over pairs (Var1, Var2) of Variables. List,
tuple or dict. The Var1 and Var2 in each pair must
have the same Type.
:param givens: specific substitutions to make in the computation
graph (Var2 replaces Var1).
:type no_default_updates: either bool or list of Variables
:param no_default_updates: if True, do not perform any automatic update on Variables.
If False (default), perform them all. Else, perform automatic updates on all Variables
that are neither in "updates" nor in "no_default_updates".
:param name: an optional name for this function. The profile mode will print the time spent in this function.
:param rebuild_strict: True (Default) is the safer and better tested setting, in which case
`givens` must substitute new variables with the same Type as the variables they replace.
False is a you-better-know-what-you-are-doing setting, that permits `givens` to replace
variables with new variables of any Type. The consequence of changing a Type is that all
results depending on that variable may have a different Type too (the graph is rebuilt from
inputs to outputs). If one of the new types does not make sense for one of the Ops in the
graph, an Exception will be raised.
:param no_default_updates: if True, do not perform any automatic
update on Variables. If False (default), perform them
all. Else, perform automatic updates on all Variables that are
neither in "updates" nor in "no_default_updates".
:param name: an optional name for this function. The profile mode
will print the time spent in this function.
:param rebuild_strict: True (Default) is the safer and better
tested setting, in which case `givens` must substitute new
variables with the same Type as the variables they replace.
False is a you-better-know-what-you-are-doing setting, that
permits `givens` to replace variables with new variables of
any Type. The consequence of changing a Type is that all
results depending on that variable may have a different Type
too (the graph is rebuilt from inputs to outputs). If one of
the new types does not make sense for one of the Ops in the
graph, an Exception will be raised.
:type allow_input_downcast: Boolean or None
:param allow_input_downcast: True means that the values passed as
inputs when calling the function can be silently downcasted to fit
the dtype of the corresponding Variable, which may lose precision.
False means that it will only be cast to a more general, or
precise, type. None (default) is almost like False, but allows
downcasting of Python float scalars to floatX.
inputs when calling the function can be silently downcasted to
fit the dtype of the corresponding Variable, which may lose
precision. False means that it will only be cast to a more
general, or precise, type. None (default) is almost like
False, but allows downcasting of Python float scalars to
floatX.
:type profile: None, True, or ProfileStats instance
:param profile: accumulate profiling information into a given ProfileStats
instance. If argument is `True` then a new ProfileStats instance will be
used. This profiling object will be available via self.profile.
:param profile: accumulate profiling information into a given
ProfileStats instance. If argument is `True` then a new
ProfileStats instance will be used. This profiling object
will be available via self.profile.
:param on_unused_input: What to do if a variable in the 'inputs' list is
not used in the graph. Possible values are 'raise', 'warn', 'ignore' and None.
:param on_unused_input: What to do if a variable in the 'inputs'
list is not used in the graph. Possible values are 'raise',
'warn', 'ignore' and None.
:rtype: Function instance
:returns: a callable object that will compute the outputs (given the inputs)
and update the implicit function arguments according to the `updates`.
:returns: a callable object that will compute the outputs (given
the inputs) and update the implicit function arguments
according to the `updates`.
:note: Regarding givens: Be careful to make sure that these substitutions are
independent--behaviour when Var1 of one pair appears in the graph leading to Var2 in
another expression is undefined. Replacements specified with givens are different from
optimizations in that Var2 is not expected to be equivalent to Var1.
:note: Regarding givens: Be careful to make sure that these
substitutions are independent--behaviour when Var1 of one pair
appears in the graph leading to Var2 in another expression is
undefined. Replacements specified with givens are different
from optimizations in that Var2 is not expected to be
equivalent to Var1.
Internal documentation:
......@@ -195,26 +207,21 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
was easier to develop the VM in Python then translate it to C instead
of just writing it in C from scratch.
CVM stands for C Virtual Machine.
"""
if isinstance(outputs, dict):
output_items = outputs.items()
for item_pair in output_items:
for item_pair in output_items:
assert isinstance(item_pair[0], basestring)
output_items_sorted = sorted(output_items)
output_keys = []
outputs = []
for pair in output_items_sorted:
for pair in output_items_sorted:
output_keys.append(pair[0])
outputs.append(pair[1])
else:
output_keys = None
......@@ -256,12 +263,13 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
if givens is None:
givens = []
if not isinstance(inputs, (list, tuple)):
raise Exception("Input variables of a Theano function should be"
" contained in a list, even when there is a single input.")
raise Exception("Input variables of a Theano function should be "
"contained in a list, even when there is a single "
"input.")
# compute some features of the arguments:
uses_In = any([isinstance(i, In) for i in inputs]) # N.B. the square brackets are ncessary
uses_tuple = any([isinstance(i, (list, tuple)) for i in inputs]) # N.B. the square brackets are ncessary
uses_In = any([isinstance(i, In) for i in inputs])
uses_tuple = any([isinstance(i, (list, tuple)) for i in inputs])
uses_updates = bool(updates)
uses_givens = bool(givens)
......@@ -275,29 +283,30 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
if uses_In or uses_tuple:
# we must use old semantics in this case.
if profile:
raise NotImplementedError('profiling not supported in old-style function')
raise NotImplementedError("profiling not supported in old-style "
"function")
if uses_updates or uses_givens:
raise NotImplementedError(
"In() instances and tuple inputs trigger the old "
"semantics, which disallow using updates and givens")
"In() instances and tuple inputs trigger the old "
"semantics, which disallow using updates and givens")
fn = orig_function(inputs, outputs,
mode=mode,
accept_inplace=accept_inplace, name=name)
else:
# note: pfunc will also call orig_function-- orig_function is a choke point
# that all compilation must pass through
# note: pfunc will also call orig_function-- orig_function is
# a choke point that all compilation must pass through
fn = pfunc(params=inputs,
outputs=outputs,
mode=mode,
updates=updates,
givens=givens,
no_default_updates=no_default_updates,
accept_inplace=accept_inplace, name=name,
rebuild_strict=rebuild_strict,
allow_input_downcast=allow_input_downcast,
on_unused_input=on_unused_input,
profile=profile,
output_keys=output_keys)
outputs=outputs,
mode=mode,
updates=updates,
givens=givens,
no_default_updates=no_default_updates,
accept_inplace=accept_inplace, name=name,
rebuild_strict=rebuild_strict,
allow_input_downcast=allow_input_downcast,
on_unused_input=on_unused_input,
profile=profile,
output_keys=output_keys)
# We need to add the flag check_aliased inputs if we have any mutable or
# borrowed used defined inputs
fn._check_for_aliased_inputs = check_for_aliased_inputs
......
"""Driver of graph construction, optimization, and linking.
"""
from __future__ import print_function
__docformat__ = "restructuredtext en"
import copy
import copy_reg
import cPickle
......@@ -26,6 +23,8 @@ from theano.gof.op import ops_with_inner_function
import logging
_logger = logging.getLogger('theano.compile.function_module')
__docformat__ = "restructuredtext en"
class UnusedInputError(Exception):
"""
......@@ -35,7 +34,7 @@ class UnusedInputError(Exception):
def alias_root(v):
"""Return the variable to which v is aliased by view_maps and destroy_maps"""
"Return the variable to which v is aliased by view_maps and destroy_maps"
if v.owner is None:
return v
vmap = getattr(v.owner.op, 'view_map', {})
......@@ -54,7 +53,8 @@ def alias_root(v):
def view_tree_set(v, treeset):
"""Add to `treeset` all variables that are views of v, given that v is not a view"""
"""Add to `treeset` all variables that are views of v, given that v is
not a view"""
treeset.add(v)
for cl, v_input_pos_to_cl in v.clients:
if cl == 'output':
......@@ -69,11 +69,13 @@ def view_tree_set(v, treeset):
def infer_reuse_pattern(fgraph, outputs_to_disown):
"""
Given an fgraph and a list of variables, returns the list or set of all variables which may
share the same underlying data storage as any of the specified variables. Used internally
by function, FunctionMaker.
Given an fgraph and a list of variables, returns the list or set
of all variables which may share the same underlying data storage
as any of the specified variables. Used internally by function,
FunctionMaker.
This list (or set) is also refered to as no_recycling sometimes, especially by linker code.
This list (or set) is also refered to as no_recycling sometimes,
especially by linker code.
"""
rval = set()
for o in outputs_to_disown:
......@@ -103,10 +105,10 @@ def fgraph_updated_vars(fgraph, expanded_inputs):
class Supervisor:
"""
Listener for FunctionGraph events which makes sure that no operation overwrites the
contents of protected Variables. The outputs of the FunctionGraph are protected by default.
Listener for FunctionGraph events which makes sure that no
operation overwrites the contents of protected Variables. The
outputs of the FunctionGraph are protected by default.
"""
def __init__(self, protected):
self.protected = list(protected)
......@@ -176,33 +178,38 @@ class AliasedMemoryError(Exception):
# Function
###
DUPLICATE = ['DUPLICATE'] # unique id object used as a placeholder for duplicate entries
# unique id object used as a placeholder for duplicate entries
DUPLICATE = ['DUPLICATE']
class Function(object):
"""
Type of the functions returned by theano.function or theano.FunctionMaker.create.
`Function` is the callable object that does computation. It has the storage of inputs and
outputs, performs the packing and unpacking of inputs and return values. It implements the
square-bracket indexing so that you can look up the value of a symbolic node.
Type of the functions returned by theano.function or
theano.FunctionMaker.create.
Functions are copyable via {{{fn.copy()}}} and {{{copy.copy(fn)}}}.
When a function is copied, this instance is duplicated. Contrast with self.maker
(instance of `FunctionMaker`) that is shared between copies.
The meaning of copying a function is that the containers and their current values will all be duplicated.
This requires that mutable inputs be copied, whereas immutable inputs may be shared between copies.
`Function` is the callable object that does computation. It has
the storage of inputs and outputs, performs the packing and
unpacking of inputs and return values. It implements the
square-bracket indexing so that you can look up the value of a
symbolic node.
Functions are copyable via {{{fn.copy()}}} and
{{{copy.copy(fn)}}}. When a function is copied, this instance is
duplicated. Contrast with self.maker (instance of
`FunctionMaker`) that is shared between copies. The meaning of
copying a function is that the containers and their current values
will all be duplicated. This requires that mutable inputs be
copied, whereas immutable inputs may be shared between copies.
A Function instance is hashable, on the basis of its memory address (its id).
A Function instance is hashable, on the basis of its memory
address (its id).
A Function instance is only equal to itself.
A Function instance may be serialized using the `pickle` or `cPickle` modules.
This will save all default inputs, the graph, and *** to the pickle file (WRITEME).
A Function instance may be serialized using the `pickle` or
`cPickle` modules. This will save all default inputs, the graph,
and *** to the pickle file (WRITEME).
A Function instance have a ``trust_input`` field that default to
False. When True, we don't do extra check of the input to give
......@@ -210,7 +217,6 @@ class Function(object):
the good results if you pass a python or numpy scalar instead of a
numpy tensor. C code should raise an error if you pass an object
of the wrong type.
"""
pickle_aliased_memory_strategy = 'warn'
......@@ -218,12 +224,11 @@ class Function(object):
Meaningful settings are: 'ignore', 'warn', 'raise'
If the value is 'warn', then a message will be printed to stderr if aliased storage is
dectected during pickle.dump.
If the value is 'raise', then an AliasedMemoryError will be raised if aliased storage is
detected during pickle.dump.
If the value is 'warn', then a message will be printed to stderr
if aliased storage is dectected during pickle.dump.
If the value is 'raise', then an AliasedMemoryError will be raised
if aliased storage is detected during pickle.dump.
"""
input_storage = None
......@@ -233,24 +238,28 @@ class Function(object):
"""list of Container instances"""
indices = None
"""list of (SymbolicInput|SymbolicInputKit, indices, [SymbolicInput,...]), one tuple for
each input
"""list of (SymbolicInput|SymbolicInputKit, indices,
[SymbolicInput,...]), one tuple for each input
The first tuple element is the SymbolicInput object for the corresponding function input.
The first tuple element is the SymbolicInput object for the
corresponding function input.
The second and third tuple elements are used only by Kits, which are deprecated.
The second and third tuple elements are used only by Kits, which
are deprecated.
"""
defaults = None
""" list of 3-tuples, one 3-tuple for each input.
Tuple element 0: Bool: Is this input required at each function call?
Tuple element 1: Bool: Should this inputs value be reverted after each call?
Tuple element 1: Bool: Should this inputs value be reverted after
each call?
Tuple element 2: Any: The value associated with this input.
"""
unpack_single = None
"""Bool: for outputs lists of length 1, should the 0'th element be returned directly?"""
"""Bool: for outputs lists of length 1, should the 0'th element be
returned directly?"""
return_none = None
"""Bool: whether the function should return None or not"""
......@@ -259,8 +268,8 @@ class Function(object):
"""FunctionMaker instance"""
fn = None
"""a function that evaluates the graph. Typically a linker's make_thunk method created this
function."""
"""a function that evaluates the graph. Typically a linker's
make_thunk method created this function."""
finder = None
"""Dictionary mapping several kinds of things to containers.
......@@ -273,7 +282,8 @@ class Function(object):
- the name of the input
All entries map to the container or to DUPLICATE if an ambiguity is detected
All entries map to the container or to DUPLICATE if an ambiguity
is detected
"""
inv_finder = None
......@@ -312,20 +322,22 @@ class Function(object):
input.distribute(value, indices, cs)
for c in cs:
c.provided += 1
# def assign(c, v):
#c.data = v
# Store the list of names of named inputs.
named_inputs = []
# Count the number of un-named inputs.
n_unnamed_inputs = 0
#setters = []
# Initialize the storage
# this loop works by modifying the elements (as variable c) of self.input_storage inplace.
for i, ((input, indices, sinputs), (required, refeed, value)) in enumerate(zip(self.indices, defaults)):
if indices is None: # this is true iff input is not a SymbolicInputKit
c = containers[0] #containers is being used as a stack. Here we pop off the next one.
# this loop works by modifying the elements (as variable c) of
# self.input_storage inplace.
for i, ((input, indices, sinputs), (required, refeed, value)) in \
enumerate(zip(self.indices, defaults)):
# this is true iff input is not a SymbolicInputKit
if indices is None:
# containers is being used as a stack. Here we pop off
# the next one.
c = containers[0]
c.strict = getattr(input, 'strict', False)
c.allow_downcast = getattr(input, 'allow_downcast', None)
......@@ -342,7 +354,9 @@ class Function(object):
c.value = value
c.required = required
c.implicit = input.implicit
c.provided = 0 # this is a count of how many times the input has been provided (reinitialized to 0 on __call__)
# this is a count of how many times the input has been
# provided (reinitialized to 0 on __call__)
c.provided = 0
finder[i] = c
finder[input.variable] = c
if input.name not in finder:
......@@ -353,17 +367,14 @@ class Function(object):
n_unnamed_inputs += 1
else:
named_inputs.append(input.name)
# backport
#finder[input.name] = c if input.name not in finder else DUPLICATE
# inv_finder maps the container to the input (useful for one error message)
inv_finder[c] = input
#setters.append(partial(assign, c))
containers[:1] = []
else:
# TODO The following code may need to do something to handle
# implicit inputs.
# The input is a SymbolicInputKit, so we take as many containers as the Kit provides inputs
# The input is a SymbolicInputKit, so we take as many
# containers as the Kit provides inputs
cs = containers[:len(indices)]
# distribute does the initialization of the containers
input.distribute(value, indices, cs)
......@@ -377,12 +388,11 @@ class Function(object):
finder[input.name] = f
else:
finder[input.name] = DUPLICATE
# backport
#finder[input.name] = f if input.name not in finder else DUPLICATE
# setters.append(f)
# For each input in the kit and its corresponding container, we put an entry in finder.
# This allows the user to micro-manage elements of the kit if need be.
# All containers inherit the required field and have their own "provided" counter
# For each input in the kit and its corresponding
# container, we put an entry in finder. This allows
# the user to micro-manage elements of the kit if need
# be. All containers inherit the required field and
# have their own "provided" counter
for c, sin in zip(cs, sinputs):
finder[sin.variable] = c
finder[sin.name] = c
......@@ -390,8 +400,6 @@ class Function(object):
finder[sin.name] = c
else:
finder[sin.name] = DUPLICATE
# backport
#finder[sin.name] = c if sin.name not in finder else DUPLICATE
inv_finder[c] = input
c.required = required
c.provided = 0
......@@ -410,12 +418,14 @@ class Function(object):
except KeyError:
raise TypeError("Unknown input or state: %s" % str(item))
if s is DUPLICATE:
raise TypeError("Ambiguous name: %s - please check the names "\
"of the inputs of your function for duplicates." % str(item))
raise TypeError("Ambiguous name: %s - please check the "
"names of the inputs of your function "
"for duplicates." % str(item))
if isinstance(s, gof.Container):
return s.value
else:
raise NotImplementedError
def __setitem__(self, item, value):
try:
s = finder[item]
......@@ -425,13 +435,15 @@ class Function(object):
raise TypeError("Unknown input or state: %s. %s" %
(str(item), msg))
if s is DUPLICATE:
raise TypeError("Ambiguous name: %s - please check the names "\
"of the inputs of your function for duplicates." % str(item))
raise TypeError("Ambiguous name: %s - please check the "
"names of the inputs of your function "
"for duplicates." % str(item))
if isinstance(s, gof.Container):
s.value = value
s.provided += 1
else:
s(value)
def __contains__(self, item):
return finder.__contains__(item)
......@@ -441,6 +453,7 @@ class Function(object):
class ContainerAttribute(object):
def __getitem__(self, item):
return finder[item]
def __contains__(self, item):
return finder.__contains__(item)
# You cannot set the container
......@@ -513,20 +526,17 @@ class Function(object):
s.storage[0] = arg
else:
try:
s.storage[0] = s.type.filter(arg, strict=s.strict,
allow_downcast=s.allow_downcast)
s.storage[0] = s.type.filter(
arg, strict=s.strict,
allow_downcast=s.allow_downcast)
except Exception as e:
function_name = "theano function"
if self.name:
function_name += ' with name "' + self.name + '" '
# end if
e.args = tuple(["Bad input argument to " + function_name +
" at index %d(0-based)" % i] +
list(e.args))
e.args = ("Bad input argument to " + function_name +
" at index %d(0-based)" % i,) + e.args
raise
# end except
# end if
s.provided += 1
i += 1
......@@ -535,9 +545,8 @@ class Function(object):
for k, arg in kwargs.iteritems():
self[k] = arg
if not self.trust_input and (
not hasattr(self, '_check_for_aliased_inputs') or
self._check_for_aliased_inputs):
if (not self.trust_input and
getattr(self, '_check_for_aliased_inputs', True)):
# Collect aliased inputs among the storage space
args_share_memory = []
for i in xrange(len(self.input_storage)):
......@@ -553,8 +562,8 @@ class Function(object):
[self.input_storage[k].storage[0] for k
in args_share_memory[j]])
if numpy.any([(var.type is i_var.type and
var.type.may_share_memory(val, i_val))
for (var, val) in group_j]):
var.type.may_share_memory(val, i_val))
for (var, val) in group_j]):
is_aliased = True
args_share_memory[j].append(i)
......@@ -566,10 +575,6 @@ class Function(object):
# Check for groups of more than one argument that share memory
for group in args_share_memory:
if len(group) > 1:
# see if any of these arguments are mutable
mutable = numpy.any([(self.maker.inputs[idx].mutable or
self.maker.inputs[idx].borrow)
for idx in group])
# copy all but the first
for idx in group[1:]:
self.input_storage[i].storage[0] = copy.copy(
......@@ -696,13 +701,15 @@ class Function(object):
container = property(
lambda self: self._container,
None, # this property itself is not settable
doc="""dictionary-like access to the containers associated with Variables""")
doc=("dictionary-like access to the containers associated with "
"Variables"))
def free(self):
"""
When allow_gc = False, clear the Variables in storage_map
"""
# 1.no allow_gc return False 2.has allow_gc, if allow_gc is False, return True
# 1.no allow_gc return False
# 2.has allow_gc, if allow_gc is False, return True
if not getattr(self.fn, 'allow_gc', True):
for key in self.fn.storage_map.keys():
if not isinstance(key, theano.gof.Constant):
......@@ -719,7 +726,8 @@ def _pickle_Function(f):
ins = list(f.input_storage)
input_storage = []
for (input, indices, inputs), (required, refeed, default) in zip(f.indices, f.defaults):
for (input, indices, inputs), (required, refeed, default) in \
zip(f.indices, f.defaults):
if isinstance(input, SymbolicInputKit):
li = len(indices)
if not default:
......@@ -734,18 +742,21 @@ def _pickle_Function(f):
inputs_data = [x.data for x in f.input_storage]
# HACK to detect aliased storage.
# This is here because aliased relationships are not [currently] preserved across the pickle operation
# This is here because aliased relationships are not [currently]
# preserved across the pickle operation
if not (f.pickle_aliased_memory_strategy == 'ignore'):
all_data = input_storage + inputs_data # addition here means list append
all_data = input_storage + inputs_data
for i, d_i in enumerate(all_data):
for j, d_j in enumerate(all_data):
if (i < j) and isinstance(d_i, numpy.ndarray) and isinstance(d_j, numpy.ndarray):
if ((i < j) and isinstance(d_i, numpy.ndarray) and
isinstance(d_j, numpy.ndarray)):
if numpy.may_share_memory(d_i, d_j):
if f.pickle_aliased_memory_strategy == 'warn':
_logger.warning(('aliased relationship between'
' Function arguments %s, %s'
' will not be preserved by un-pickling'
' operation') % (str(d_i), str(d_j)))
_logger.warning('aliased relationship between '
'Function arguments %s, %s '
'will not be preserved by '
'un-pickling operation' %
(str(d_i), str(d_j)))
else:
raise AliasedMemoryError(d_i, d_j)
rval = (_constructor_Function, (f.maker, input_storage, inputs_data))
......@@ -774,20 +785,25 @@ def insert_deepcopy(fgraph, wrapped_inputs, wrapped_outputs):
"""
Insert deepcopy in the fgraph to break aliasing of outputs
"""
# This loop was inserted to remove aliasing between outputs when they all
# evaluete to the same value. Originally it was OK for outputs to be aliased,
# but some of the outputs can be shared variables, and is not good for shared
# variables to be aliased. It might be possible to optimize this by making sure
# This loop was inserted to remove aliasing between outputs when
# they all evaluete to the same value. Originally it was OK for
# outputs to be aliased, but some of the outputs can be shared
# variables, and is not good for shared variables to be
# aliased. It might be possible to optimize this by making sure
# there is no aliasing only between shared variables.
# If some outputs are constant, we add deep copy to respect the memory contract
# If some outputs are constant, we add deep copy to respect the
# memory contract
# We don't insert deep copy when the output.borrow is True for all conserned outputs.
# We don't insert deep copy when the output.borrow is True for all
# conserned outputs.
assert len(wrapped_inputs) == len(fgraph.inputs)
assert len(wrapped_outputs) == len(fgraph.outputs)
reason = "insert_deepcopy"
updated_fgraph_inputs = [fgraph_i for i, fgraph_i in zip(wrapped_inputs, fgraph.inputs) if getattr(i, 'update', False)]
updated_fgraph_inputs = [fgraph_i for i, fgraph_i in
zip(wrapped_inputs, fgraph.inputs)
if getattr(i, 'update', False)]
# We can't use fgraph.inputs as this don't include Constant Value.
all_graph_inputs = gof.graph.inputs(fgraph.outputs)
......@@ -802,43 +818,54 @@ def insert_deepcopy(fgraph, wrapped_inputs, wrapped_outputs):
# and not(wrapped_outputs[i].borrow and wrapped_outputs[j].borrow):
if fgraph.outputs[j] in views_of_output_i:
if wrapped_outputs[i].borrow and wrapped_outputs[j].borrow:
fgraph.change_input('output', i, view_op(fgraph.outputs[i]),
reason=reason)
fgraph.change_input('output', i,
view_op(fgraph.outputs[i]),
reason=reason)
else:
fgraph.change_input('output', i, deep_copy_op(fgraph.outputs[i]),
reason=reason)
fgraph.change_input('output', i,
deep_copy_op(fgraph.outputs[i]),
reason=reason)
copied = True
break
if not copied:
for input_j in all_graph_inputs:
# do not allow outputs to be aliased to an inputs (j), unless
# a) that j'th input has been 'destroyed' by e.g. in-place computations
# b) that j'th input is a shared variable that is also being updated
# a) that j'th input has been 'destroyed' by
# e.g. in-place computations
# b) that j'th input is a shared variable that is also
# being updated
if (hasattr(fgraph, 'get_destroyers_of') and
fgraph.get_destroyers_of(input_j)):
fgraph.get_destroyers_of(input_j)):
continue
if input_j in updated_fgraph_inputs:
continue
if input_j in views_of_output_i:
# We don't put deep_copy_op if the input and the output have borrow==True
# We don't put deep_copy_op if the input and the
# output have borrow==True
if input_j in fgraph.inputs:
j = fgraph.inputs.index(input_j)
if wrapped_outputs[i].borrow and wrapped_inputs[j].borrow:
fgraph.change_input('output', i, view_op(fgraph.outputs[i]),
reason="insert_deepcopy")
if (wrapped_outputs[i].borrow and
wrapped_inputs[j].borrow):
fgraph.change_input('output', i,
view_op(fgraph.outputs[i]),
reason="insert_deepcopy")
break
else:
fgraph.change_input('output', i, deep_copy_op(fgraph.outputs[i]),
reason="insert_deepcopy")
fgraph.change_input(
'output', i,
deep_copy_op(fgraph.outputs[i]),
reason="insert_deepcopy")
break
elif wrapped_outputs[i].borrow:
fgraph.change_input('output', i, view_op(fgraph.outputs[i]),
reason="insert_deepcopy")
fgraph.change_input('output', i,
view_op(fgraph.outputs[i]),
reason="insert_deepcopy")
break
else:
fgraph.change_input('output', i, deep_copy_op(fgraph.outputs[i]),
reason="insert_deepcopy")
fgraph.change_input('output', i,
deep_copy_op(fgraph.outputs[i]),
reason="insert_deepcopy")
break
NODEFAULT = ['NODEFAULT']
......@@ -866,17 +893,20 @@ class FunctionMaker(object):
if len(input) == 2:
return SymbolicInput(input[0], update=input[1])
else:
raise TypeError("Expected two elements in the list or tuple.", input)
raise TypeError("Expected two elements in the list or tuple.",
input)
else:
raise TypeError("Unknown input type: %s (%s), expected Variable instance", type(input), input)
raise TypeError("Unknown input type: %s (%s), expected Variable "
"instance", type(input), input)
@staticmethod
def expand_in(sinput, rinputs):
# For SymbolicInputKits, this extracts a list of SymbolicInput instances
# and corresponding indices such that these SymbolicInputs are representative
# of some of the Variable instances in inputs.
# For SymbolicInput, this returns None as the list of indices and a list with
# just the SymbolicInput.
# For SymbolicInputKits, this extracts a list of SymbolicInput
# instances and corresponding indices such that these
# SymbolicInputs are representative of some of the Variable
# instances in inputs. For SymbolicInput, this returns None
# as the list of indices and a list with just the
# SymbolicInput.
if isinstance(sinput, SymbolicInputKit):
return sinput.complete(rinputs)
elif isinstance(sinput, SymbolicInput):
......@@ -889,24 +919,25 @@ class FunctionMaker(object):
elif isinstance(output, gof.Variable):
return SymbolicOutput(output)
else:
raise TypeError("Unknown output type: %s (%s)", type(output), output)
raise TypeError("Unknown output type: %s (%s)", type(output),
output)
def optimize_graph_with_cache(self, optimizer, inputs, outputs):
# This function is not finished
from theano.gof.compilelock import get_lock, release_lock
import os.path
graph_db_file = os.path.join(theano.config.compiledir, 'optimized_graphs.pkl')
graph_db_file = os.path.join(theano.config.compiledir,
'optimized_graphs.pkl')
# the inputs, outputs, and size of the graph to be optimized
inputs_new = [inp.variable for inp in inputs]
outputs_new = [out.variable for out in outputs]
size_new = len(self.fgraph.apply_nodes)
need_optimize = False
get_lock()
key = None
# Beginning of cache optimizations.
# Could be refactored in different functions.
def load_graph_db():
if os.path.isfile(graph_db_file):
print('graph_db already exists')
......@@ -919,8 +950,9 @@ class FunctionMaker(object):
# load the graph_db dictionary
try:
f = open(graph_db_file, 'rb')
# Temporary hack to allow theano.scan_module.tests.test_scan.T_Scan
# to finish. Should be changed in definitive version.
# Temporary hack to allow
# theano.scan_module.tests.test_scan.T_Scan to
# finish. Should be changed in definitive version.
tmp = theano.config.unpickle_function
theano.config.unpickle_function = False
graph_db = cPickle.load(f)
......@@ -961,16 +993,21 @@ class FunctionMaker(object):
# two graphs are for sure different
print('need to optimize, because output size is different')
continue
elif not all(input_new.type == input_old.type for
input_new, input_old in zip(inputs_new, inputs_old)):
print('need to optimize, because inputs are of different types')
elif not all(input_new.type == input_old.type
for input_new, input_old in
zip(inputs_new, inputs_old)):
print('need to optimize, because inputs are of different '
'types')
continue
elif not all(output_new.type == output_old.type for
output_new, output_old in zip(outputs_new, outputs_old)):
print('need to optimize, because outputs are of different types')
elif not all(output_new.type == output_old.type
for output_new, output_old in
zip(outputs_new, outputs_old)):
print('need to optimize, because outputs are of different '
'types')
continue
elif not size_old == size_new:
print('need to optimize, because numbers of nodes in graph are different')
print('need to optimize, because numbers of nodes in graph'
' are different')
continue
else:
flags = []
......@@ -1007,10 +1044,10 @@ class FunctionMaker(object):
t2 = removeAllFgraph(t2)
givens = dict(zip(gof.graph.inputs([t1]),
gof.graph.inputs([t2])))
gof.graph.inputs([t2])))
temp = dict(zip(gof.graph.inputs([t1]),
gof.graph.inputs([t2])))
gof.graph.inputs([t2])))
# hack to remove inconstent entry in givens
# seems to work that but source of inconsistency
......@@ -1032,7 +1069,8 @@ class FunctionMaker(object):
return found_graph_in_db
graph_db = load_graph_db()
print('loaded graph_db from %s, size=%d' % (graph_db_file, len(graph_db)))
print('loaded graph_db from %s, size=%d' % (graph_db_file,
len(graph_db)))
found_graph = find_same_graph_in_db(graph_db)
if found_graph:
self.fgraph = found_graph
......@@ -1043,7 +1081,7 @@ class FunctionMaker(object):
self.fgraph.variables = set(gof.graph.variables(
self.fgraph.inputs, self.fgraph.outputs))
# check_integrity parameters was added to ignore
#"excess cached variables" errors. Works that way
# "excess cached variables" errors. Works that way
# but once again the error couldbe worth
# investigating.
before_opt = self.fgraph.clone(check_integrity=False)
......@@ -1057,22 +1095,24 @@ class FunctionMaker(object):
return optimizer_profile
def __init__(self, inputs, outputs,
mode=None, accept_inplace=False, function_builder=Function,
profile=None, on_unused_input=None, fgraph=None,
output_keys=None):
mode=None, accept_inplace=False, function_builder=Function,
profile=None, on_unused_input=None, fgraph=None,
output_keys=None):
"""
:type inputs: a list of SymbolicInput instances
:type outputs: a list of SymbolicOutput instances
outputs may also be a single Variable (not a list), in which
case the functions produced by FunctionMaker will return
their output value directly
:param mode: a Mode instance telling FunctionMaker how to optimize and link. None
means to use the `config.mode`.
:type outputs: a list of SymbolicOutput instances outputs may
also be a single Variable (not a list), in which case the
functions produced by FunctionMaker will return their
output value directly
:param accept_inplace: True iff it is acceptable to have inplace operations
in the graph from the inputs to the outputs
:param mode: a Mode instance telling FunctionMaker how to
optimize and link. None means to use the `config.mode`.
:param accept_inplace: True iff it is acceptable to have
inplace operations in the graph from the inputs to the
outputs
:param on_unused_input: What to do if a variable in the 'inputs' list
is not used in the graph. Possible values are:
......@@ -1089,18 +1129,19 @@ class FunctionMaker(object):
# using this somewhat awkward mechanism.
mode_profile = getattr(mode, 'profile', None)
if (profile is not None and
profile is not False and
mode_profile is not None):
profile is not False and
mode_profile is not None):
raise TypeError(
'profile passed via both "mode" and "profile" arguments')
'profile passed via both "mode" and "profile" arguments')
self.profile = profile = profile or mode_profile
if profile:
# This is very important:
# 1) We preload the cache here to don't have its timming
# included in optimization that compile function.
# 2) Do not refresh the cache here by default. It cause too much
# execution time during testing as we compile much more functions
# then the number of compile c module.
# 2) Do not refresh the cache here by default. It cause
# too much execution time during testing as we compile
# much more functions then the number of compile c
# module.
theano.gof.cc.get_module_cache().refresh()
# Handle the case where inputs and/or outputs is a single
# Variable (not in a list)
......@@ -1117,21 +1158,27 @@ class FunctionMaker(object):
inputs = [inputs]
# Wrap them in In or Out instances if needed.
inputs, outputs = map(self.wrap_in, inputs), map(self.wrap_out, outputs)
_inputs = gof.graph.inputs([o.variable for o in outputs] + [i.update
for i in inputs if getattr(i, 'update', False)])
inputs = map(self.wrap_in, inputs)
outputs = map(self.wrap_out, outputs)
_inputs = gof.graph.inputs([o.variable for o in outputs] +
[i.update for i in inputs
if getattr(i, 'update', False)])
# Check if some input variables are unused
self._check_unused_inputs(inputs, outputs, on_unused_input)
# Make a list of (SymbolicInput|SymblicInputKits, indices, [SymbolicInput,...]), one
# tuple for each input. (See Function.indices for more details)
indices = [[input] + self.expand_in(input, _inputs) for input in inputs]
# Make a list of (SymbolicInput|SymblicInputKits, indices,
# [SymbolicInput,...]), one tuple for each input. (See
# Function.indices for more details)
indices = [[input] + self.expand_in(input, _inputs)
for input in inputs]
if fgraph is None:
need_opt = True
# make the fgraph (copies the graph, creates NEW INPUT AND OUTPUT VARIABLES)
fgraph, additional_outputs = std_fgraph(inputs, outputs, accept_inplace)
# make the fgraph (copies the graph, creates NEW INPUT AND
# OUTPUT VARIABLES)
fgraph, additional_outputs = std_fgraph(inputs, outputs,
accept_inplace)
fgraph.profile = profile
else:
# fgraph is already an optimized one
......@@ -1149,7 +1196,8 @@ class FunctionMaker(object):
# Why we add stack on node when it get done in output var?
try:
# optimize the fgraph
theano.config.compute_test_value = theano.config.compute_test_value_opt
theano.config.compute_test_value = \
theano.config.compute_test_value_opt
theano.config.traceback.limit = 0
start_optimizer = time.time()
......@@ -1165,7 +1213,8 @@ class FunctionMaker(object):
if profile:
profile.optimizer_time += opt_time
if theano.config.profile_optimizer:
profile.optimizer_profile = (optimizer, optimizer_profile)
profile.optimizer_profile = (optimizer,
optimizer_profile)
_logger.debug('Optimizing took %f seconds', opt_time)
# Add deep copy to respect the memory interface
......@@ -1176,21 +1225,26 @@ class FunctionMaker(object):
# initialize the linker
if not hasattr(linker, 'accept'):
raise ValueError("'linker' parameter of FunctionMaker should be a Linker with an accept method " \
"or one of %s" % theano.compile.mode.predefined_linkers.keys())
raise ValueError("'linker' parameter of FunctionMaker should be "
"a Linker with an accept method or one of %s" %
theano.compile.mode.predefined_linkers.keys())
# the 'no_borrow' outputs are the ones for which that we can't return the internal storage pointer.
# the 'no_borrow' outputs are the ones for which that we can't
# return the internal storage pointer.
assert len(fgraph.outputs) == len(outputs + additional_outputs)
no_borrow = [output for output, spec in zip(fgraph.outputs, outputs + additional_outputs) if not spec.borrow]
no_borrow = [output for output, spec in
zip(fgraph.outputs, outputs + additional_outputs)
if not spec.borrow]
if no_borrow:
self.linker = linker.accept(fgraph, no_recycling=infer_reuse_pattern(fgraph, no_borrow))
self.linker = linker.accept(
fgraph, no_recycling=infer_reuse_pattern(fgraph, no_borrow))
else:
self.linker = linker.accept(fgraph)
if hasattr(linker, 'accept_var_updates'):
# hacky thing so VMLinker knows about updates
self.linker.accept_var_updates(
fgraph_updated_vars(fgraph, inputs))
fgraph_updated_vars(fgraph, inputs))
self.indices = indices
self.inputs = inputs
......@@ -1206,11 +1260,10 @@ class FunctionMaker(object):
self.required = [(i.value is None) for i in self.inputs]
self.refeed = [
(i.value is not None and
not isinstance(i.value, gof.Container) and
i.update is None)
for i in self.inputs
]
(i.value is not None and
not isinstance(i.value, gof.Container) and
i.update is None)
for i in self.inputs]
def _check_unused_inputs(self, inputs, outputs, on_unused_input):
if on_unused_input is None:
......@@ -1223,57 +1276,64 @@ class FunctionMaker(object):
# - variables that have to be provided (used_inputs)
# - shared variables that will be updated
used_inputs = gof.graph.ancestors(
([o.variable for o in outputs]
+ [i.update for i in inputs if getattr(i, 'update', False)]),
blockers=[i.variable for i in inputs])
([o.variable for o in outputs] +
[i.update for i in inputs if getattr(i, 'update', False)]),
blockers=[i.variable for i in inputs])
msg = ("theano.function was asked to create a function computing "
"outputs given certain inputs, but the provided input "
"variable at index %i is not part of the computational graph "
"needed to compute the outputs: %s.\n%s")
"outputs given certain inputs, but the provided input "
"variable at index %i is not part of the computational graph "
"needed to compute the outputs: %s.\n%s")
warn_msg = ("To make this warning into an error, you can pass the "
"parameter on_unused_input='raise' to theano.function. "
"To disable it completely, use on_unused_input='ignore'.")
"parameter on_unused_input='raise' to theano.function. "
"To disable it completely, use on_unused_input='ignore'.")
err_msg = ("To make this error into a warning, you can pass the "
"parameter on_unused_input='warn' to theano.function. "
"To disable it completely, use on_unused_input='ignore'.")
"parameter on_unused_input='warn' to theano.function. "
"To disable it completely, use on_unused_input='ignore'.")
for i in inputs:
if ((i.variable not in used_inputs) and (i.update is None)):
if on_unused_input == 'warn':
warnings.warn(msg % (inputs.index(i), i.variable, warn_msg), stacklevel=6)
warnings.warn(msg % (inputs.index(i), i.variable,
warn_msg), stacklevel=6)
elif on_unused_input == 'raise':
raise UnusedInputError(msg % (inputs.index(i), i.variable, err_msg))
raise UnusedInputError(msg % (inputs.index(i),
i.variable, err_msg))
else:
raise ValueError(("Invalid value for keyword "
"on_unused_input of theano.function: '%s'. "
"valid values are 'raise', 'warn', and 'ignore'."
% on_unused_input))
raise ValueError("Invalid value for keyword "
"on_unused_input of theano.function: "
"'%s'.\nValid values are 'raise', "
"'warn', and 'ignore'." % on_unused_input)
def create(self, input_storage=None, trustme=False):
"""
Create a function.
input_storage -> a list matching the inputs list and providing default values
if the default for an input is None, then that input is a
required input. For an input with an update, the default
acts as initialization.
input_storage -> a list matching the inputs list and providing
default values if the default for an input is
None, then that input is a required input. For an
input with an update, the default acts as
initialization.
trustme -> disables some exceptions, used internally
"""
if input_storage is None:
input_storage = [None] * len(self.inputs)
input_storage_lists = [] # list of independent one-element lists, will be passed to the linker
# list of independent one-element lists, will be passed to the linker
input_storage_lists = []
defaults = []
# The following loop is to fill in the input_storage_lists and defaults lists.
# The following loop is to fill in the input_storage_lists and
# defaults lists.
assert len(self.indices) == len(input_storage)
for i, ((input, indices, subinputs), input_storage_i) in enumerate(zip(self.indices, input_storage)):
# Replace any default value given as a variable by its container.
# Note that this makes sense only in the context of shared variables,
# but for now we avoid dealing directly with them to avoid dependency
# on the shared variables work-in-progress repository.
for i, ((input, indices, subinputs), input_storage_i) in \
enumerate(zip(self.indices, input_storage)):
# Replace any default value given as a variable by its
# container. Note that this makes sense only in the
# context of shared variables, but for now we avoid
# dealing directly with them to avoid dependency on the
# shared variables work-in-progress repository.
if isinstance(input_storage_i, gof.Variable):
input_storage_i = input_storage_i.container
......@@ -1282,7 +1342,8 @@ class FunctionMaker(object):
# share the same storage. This is done by appending
# input_storage_i.storage to input_storage_lists.
if indices is not None:
raise TypeError("Cannot take a Container instance as default for a SymbolicInputKit.")
raise TypeError("Cannot take a Container instance as "
"default for a SymbolicInputKit.")
input_storage_lists.append(input_storage_i.storage)
storage = input_storage[i].storage[0]
......@@ -1295,7 +1356,8 @@ class FunctionMaker(object):
required = self.required[i]
refeed = self.refeed[i]
# sanity check-- if an input is required it should not need to be refed
# sanity check-- if an input is required it should not
# need to be refed
assert not (required and refeed)
# shared variables need neither be input by the user nor refed
......@@ -1312,9 +1374,7 @@ class FunctionMaker(object):
if storage is not None:
assert refeed or not required
defaults.append((required,
refeed,
storage))
defaults.append((required, refeed, storage))
# Get a function instance
start_linker = time.time()
......@@ -1338,22 +1398,23 @@ class FunctionMaker(object):
self.profile.import_time += import_time
fn = self.function_builder(_fn, _i, _o, self.indices, self.outputs,
defaults, self.unpack_single, self.return_none, self.output_keys, self)
defaults, self.unpack_single,
self.return_none, self.output_keys, self)
fn.profile = self.profile
return fn
def _pickle_FunctionMaker(self):
kwargs = dict(
inputs=self.inputs,
outputs=self.orig_outputs,
fgraph=self.fgraph,
mode=self.mode,
accept_inplace=self.accept_inplace,
function_builder=self.function_builder,
profile=self.profile,
on_unused_input=self.on_unused_input,
)
inputs=self.inputs,
outputs=self.orig_outputs,
fgraph=self.fgraph,
mode=self.mode,
accept_inplace=self.accept_inplace,
function_builder=self.function_builder,
profile=self.profile,
on_unused_input=self.on_unused_input,
)
return (_constructor_FunctionMaker, (kwargs,))
......@@ -1367,19 +1428,6 @@ def _constructor_FunctionMaker(kwargs):
copy_reg.pickle(FunctionMaker, _pickle_FunctionMaker)
try:
# Pickle of slice is implemented on python 2.6. To enabled be
# compatible with python 2.4, we implement pickling of slice
# ourself.
cPickle.dumps(slice(0, 10, 100))
except TypeError:
# This slice pickle implementation seam backward and forward compatible.
def _pickle_slice(s):
return (slice, (s.start, s.stop, s.step))
copy_reg.pickle(slice, _pickle_slice)
__checkers = []
......@@ -1390,7 +1438,6 @@ def check_equal(x, y):
except Exception:
continue
return x == y
#raise Exception('No checker for equality between %s and %s' % (x, y))
def register_checker(checker):
......@@ -1405,10 +1452,10 @@ def orig_function(inputs, outputs, mode=None, accept_inplace=False,
:param inputs: list of `SymbolicInput` or `In` instances
:param outputs: a SymbolicOutput or a list of `SymbolicOutput` or `Out`
instances. The return value of the returned function will match the
format of this argument (either the value itself or a list of one or more
return values)
:param outputs: a SymbolicOutput or a list of `SymbolicOutput` or
`Out` instances. The return value of the returned function
will match the format of this argument (either the value
itself or a list of one or more return values)
:param mode: a descriptive string or a Mode instance. (Default of None
means to use `config.mode` (See below for descriptive string list).
......@@ -1422,7 +1469,8 @@ def orig_function(inputs, outputs, mode=None, accept_inplace=False,
- FAST_COMPILE (minimal optimization)
- ProfileMode(deprecated): allow to print a profile mode with mode.print_summary
- ProfileMode(deprecated): allow to print a profile mode with
mode.print_summary
- DebugMode: verify many internal conditions that are normally assumed
(slow)
......@@ -1471,8 +1519,8 @@ def orig_function(inputs, outputs, mode=None, accept_inplace=False,
accept_inplace=accept_inplace,
profile=profile,
on_unused_input=on_unused_input,
output_keys = output_keys).create(
defaults)
output_keys=output_keys).create(
defaults)
t2 = time.time()
if profile:
......@@ -1552,7 +1600,7 @@ def convert_function_input(input):
raise TypeError("Unknown update type: %s, expected Variable "
"instance" % type(update), update)
if (value is not None and
isinstance(value, (gof.Variable, SymbolicInput))):
isinstance(value, (gof.Variable, SymbolicInput))):
raise TypeError("The value for input %s should not be a Variable "
"or SymbolicInput instance (got: %s)" %
(variable, value))
......@@ -1579,26 +1627,26 @@ def get_info_on_inputs(named_inputs, n_unnamed_inputs):
else:
if n_unnamed_inputs == 1:
msg = ("The function has a single input variable which has no "
"name, and thus cannot be assigned through a keyword"
" argument (use 'name=...' in a Variable's "
"constructor to give it a name).")
"name, and thus cannot be assigned through a keyword"
" argument (use 'name=...' in a Variable's "
"constructor to give it a name).")
else:
# Use plural.
msg = ("The function has %s inputs, but none of them is named,"
" and thus they cannot be assigned through keyword "
"arguments (use 'name=...' in a Variable's "
"constructor to give it a name)." % n_unnamed_inputs)
" and thus they cannot be assigned through keyword "
"arguments (use 'name=...' in a Variable's "
"constructor to give it a name)." % n_unnamed_inputs)
else:
if n_unnamed_inputs == 0:
msg = ("The function has %s named input%s (%s)." % (
n_named_inputs, get_plural(n_named_inputs),
', '.join(named_inputs)))
msg = ("The function has %s named input%s (%s)." %
(n_named_inputs, get_plural(n_named_inputs),
', '.join(named_inputs)))
else:
msg = ("The function has %s named input%s (%s), and %s unnamed "
"input%s which thus cannot be accessed through keyword "
"argument%s (use 'name=...' in a variable's constructor "
"to give it a name)." % (
n_named_inputs, get_plural(n_named_inputs),
"input%s which thus cannot be accessed through keyword "
"argument%s (use 'name=...' in a variable's constructor "
"to give it a name)." %
(n_named_inputs, get_plural(n_named_inputs),
', '.join(named_inputs), n_unnamed_inputs,
get_plural(n_unnamed_inputs),
get_plural(n_unnamed_inputs)))
......
"""Define `SymbolicInput`, `SymbolicOutput`, `In`, `Out` """
__docformat__ = 'restructuredtext en'
from theano import gof
from sharedvalue import SharedVariable
......@@ -7,6 +6,8 @@ from sharedvalue import SharedVariable
import logging
_logger = logging.getLogger("theano.compile.io")
__docformat__ = 'restructuredtext en'
class SymbolicInput(object):
"""
......@@ -17,42 +18,55 @@ class SymbolicInput(object):
not computed from its owner.
name: Any type. (If autoname=True, defaults to variable.name).
If name is a valid Python identifier, this input can be set by kwarg, and its value
can be accessed by self.<name>.
If name is a valid Python identifier, this input can be set by
kwarg, and its value can be accessed by self.<name>.
update: Variable instance (default: None)
value (see previous) will be replaced with this expression variable after each function call.
If update is None, the update will be the default value of the input.
value (see previous) will be replaced with this expression
variable after each function call. If update is None, the
update will be the default value of the input.
mutable: Bool (default: False if update is None, True if update is
not None)
True: permit the compiled function to modify the python object
being passed as the input
mutable: Bool (default: False if update is None, True if update is not None)
True: permit the compiled function to modify the python object being passed as the input
False: do not permit the compiled function to modify the python object being passed as the input.
False: do not permit the compiled function to modify the
python object being passed as the input.
strict: Bool (default: False)
True: means that the value you pass for this input must have exactly the right type
False: the value you pass for this input may be cast automatically to the proper type
True: means that the value you pass for this input must have
exactly the right type
False: the value you pass for this input may be cast
automatically to the proper type
allow_downcast: Bool or None (default: None)
Only applies when `strict` is False.
True: the value you pass for this input can be silently
downcasted to fit the right type, which may lose precision.
False: the value will only be cast to a more general, or precise, type.
None: Almost like False, but allows downcast of Python floats to floatX.
False: the value will only be cast to a more general, or
precise, type. None: Almost like False, but allows downcast
of Python floats to floatX.
autoname: Bool (default: True)
See the name option.
implicit: Bool (default: False)
See help(In). Note that 'None' is not allowed here, since we are in the
symbolic case.
See help(In). Note that 'None' is not allowed here, since we
are in the symbolic case.
"""
def __init__(self, variable, name=None, update=None, mutable=None,
strict=False, allow_downcast=None, autoname=True,
implicit=False):
strict=False, allow_downcast=None, autoname=True,
implicit=False):
assert implicit is not None # Safety check.
self.variable = variable
if (autoname and name is None):
if (autoname and name is None):
self.name = variable.name
else:
self.name = name
......@@ -146,36 +160,54 @@ class In(SymbolicInput):
not computed from its owner.
name: Any type. (If autoname=True, defaults to variable.name).
If name is a valid Python identifier, this input can be set by kwarg, and its value
can be accessed by self.<name>.
If name is a valid Python identifier, this input can be set by
kwarg, and its value can be accessed by self.<name>.
value: Any type.
The initial/default value for this input. If update is None, this input acts just like
an argument with a default value in Python. If update is not None, changes to this
value will "stick around", whether due to an update or a user's explicit action.
The initial/default value for this input. If update is None,
this input acts just like an argument with a default value in
Python. If update is not None, changes to this value will
"stick around", whether due to an update or a user's explicit
action.
update: Variable instance (default: None)
value (see previous) will be replaced with this expression variable after each function call.
If update is None, the update will be the default value of the input.
value (see previous) will be replaced with this expression
variable after each function call. If update is None, the
update will be the default value of the input.
mutable: Bool (default: False if update is None, True if update is not None)
True: permit the compiled function to modify the python object being passed as the input
False: do not permit the compiled function to modify the python object being passed as the input.
mutable: Bool (default: False if update is None, True if update is
not None)
True: permit the compiled function to modify the python object
being passed as the input
False: do not permit the compiled function to modify the
python object being passed as the input.
borrow: Bool (default: take the same value as mutable)
True: permit the output of the compiled function to be aliased to the input
True: permit the output of the compiled function to be aliased
to the input
False: do not permit any output to be aliased to the input
strict: Bool (default: False)
True: means that the value you pass for this input must have exactly the right type
False: the value you pass for this input may be cast automatically to the proper type
True: means that the value you pass for this input must have
exactly the right type
False: the value you pass for this input may be cast
automatically to the proper type
allow_downcast: Bool or None (default: None)
Only applies when `strict` is False.
True: the value you pass for this input can be silently
downcasted to fit the right type, which may lose precision.
False: the value will only be cast to a more general, or precise, type.
None: Almost like False, but allows downcast of Python floats to floatX.
False: the value will only be cast to a more general, or
precise, type. None: Almost like False, but allows downcast
of Python floats to floatX.
autoname: Bool (default: True)
See the name option.
......@@ -194,11 +226,11 @@ class In(SymbolicInput):
# Note: the documentation above is duplicated in doc/topics/function.txt,
# try to keep it synchronized.
def __init__(self, variable, name=None, value=None, update=None,
mutable=None, strict=False, allow_downcast=None, autoname=True,
implicit=None, borrow=None, shared=False):
# if shared, an input's value comes from its persistent storage, not from a default stored
# in the function or from the caller
mutable=None, strict=False, allow_downcast=None,
autoname=True, implicit=None, borrow=None, shared=False):
# if shared, an input's value comes from its persistent
# storage, not from a default stored in the function or from
# the caller
self.shared = shared
if borrow is None:
......@@ -211,25 +243,25 @@ class In(SymbolicInput):
# aliased to the input. Thus mutable=True should require borrow=True.
if mutable and not self.borrow:
raise AssertionError(
"Symbolic input for variable %s (name=%s) has "
"flags mutable=True, borrow=False. This combination is "
"incompatible since mutable=True implies that the "
"input variable may be both aliased (borrow=True) and "
"overwritten.",
variable, name)
"Symbolic input for variable %s (name=%s) has "
"flags mutable=True, borrow=False. This combination is "
"incompatible since mutable=True implies that the "
"input variable may be both aliased (borrow=True) and "
"overwritten.",
variable, name)
if implicit is None:
implicit = (isinstance(value, gof.Container) or
isinstance(value, SharedVariable))
isinstance(value, SharedVariable))
super(In, self).__init__(
variable=variable,
name=name,
update=update,
mutable=mutable,
strict=strict,
allow_downcast=allow_downcast,
autoname=autoname,
implicit=implicit)
variable=variable,
name=name,
update=update,
mutable=mutable,
strict=strict,
allow_downcast=allow_downcast,
autoname=autoname,
implicit=implicit)
self.value = value
if self.implicit and value is None:
raise TypeError('An implicit input must be given a default value')
......
......@@ -2,35 +2,33 @@
"""
from __future__ import print_function
import logging
import warnings
from textwrap import dedent
import numpy
import theano
import theano
from theano import gof
import theano.gof.vm
from theano.configparser import config, AddConfigVar, StrParam
from theano.compile.ops import register_view_op_c_code, _output_guard
from theano.compile.ops import _output_guard
_logger = logging.getLogger('theano.compile.mode')
AddConfigVar('optimizer_excluding',
("When using the default mode, we will remove optimizer with these "
"tags. Separate tags with ':'."),
StrParam("", allow_override=False),
in_c_key=False)
("When using the default mode, we will remove optimizer with "
"these tags. Separate tags with ':'."),
StrParam("", allow_override=False),
in_c_key=False)
AddConfigVar('optimizer_including',
("When using the default mode, we will add optimizer with these tags. "
"Separate tags with ':'."),
StrParam("", allow_override=False),
in_c_key=False)
("When using the default mode, we will add optimizer with "
"these tags. Separate tags with ':'."),
StrParam("", allow_override=False),
in_c_key=False)
AddConfigVar('optimizer_requiring',
("When using the default mode, we will require optimizer with these "
"tags. Separate tags with ':'."),
StrParam("", allow_override=False),
in_c_key=False)
("When using the default mode, we will require optimizer with "
"these tags. Separate tags with ':'."),
StrParam("", allow_override=False),
in_c_key=False)
def check_equal(x, y):
......@@ -50,15 +48,15 @@ def check_equal(x, y):
y = y.todense()
if isinstance(x, numpy.ndarray) and isinstance(y, numpy.ndarray):
if (x.dtype != y.dtype
or x.shape != y.shape
or numpy.any(abs(x - y) > 1e-10)):
if (x.dtype != y.dtype or
x.shape != y.shape or
numpy.any(abs(x - y) > 1e-10)):
raise Exception("Output mismatch.",
{'performlinker': x, 'clinker': y})
{'performlinker': x, 'clinker': y})
else:
if x != y:
raise Exception("Output mismatch.",
{'performlinker': x, 'clinker': y})
{'performlinker': x, 'clinker': y})
# If a string is passed as the linker argument in the constructor for
......@@ -144,11 +142,11 @@ class AddDestroyHandler(gof.Optimizer):
for o in fgraph.outputs:
try:
fgraph.replace_validate(o, _output_guard(o),
reason='output_guard')
reason='output_guard')
_logger.info("Output variable %s required output_guard, "
"how was this output left unprotected against "
"destructive operations?"
% o)
"how was this output left unprotected against "
"destructive operations?"
% o)
except gof.InconsistencyError:
# This output is already impossible to destroy.
# No guard necessary
......@@ -188,50 +186,50 @@ class PrintCurrentFunctionGraph(gof.Optimizer):
optdb = gof.SequenceDB()
optdb.register('merge1', gof.MergeOptimizer(),
0, 'fast_run', 'fast_compile', 'merge')
0, 'fast_run', 'fast_compile', 'merge')
# rearranges elemwise expressions
optdb.register('canonicalize', gof.EquilibriumDB(),
1, 'fast_run', 'fast_compile')
1, 'fast_run', 'fast_compile')
optdb.register('merge1.2', gof.MergeOptimizer(),
1.2, 'fast_run', 'fast_compile', 'merge')
1.2, 'fast_run', 'fast_compile', 'merge')
optdb.register('Print1.21', PrintCurrentFunctionGraph('Post-canonicalize'),
1.21,) # 'fast_run', 'fast_compile')
1.21,) # 'fast_run', 'fast_compile')
# replace unstable subgraphs
optdb.register('stabilize', gof.EquilibriumDB(),
1.5, 'fast_run')
1.5, 'fast_run')
optdb.register('Print1.51', PrintCurrentFunctionGraph('Post-stabilize'),
1.51,) # 'fast_run', 'fast_compile')
1.51,) # 'fast_run', 'fast_compile')
# misc special cases for speed
optdb.register('specialize', gof.EquilibriumDB(),
2, 'fast_run', 'fast_compile_gpu')
2, 'fast_run', 'fast_compile_gpu')
# misc special cases for speed that break canonicalization
optdb.register('uncanonicalize', gof.EquilibriumDB(),
3, 'fast_run')
3, 'fast_run')
# misc special cases for speed that are dependent on the device.
optdb.register('specialize_device', gof.EquilibriumDB(),
48.6, 'fast_run') # must be after gpu stuff at 48.5
48.6, 'fast_run') # must be after gpu stuff at 48.5
# especially constant merge
optdb.register('merge2', gof.MergeOptimizer(),
49, 'fast_run', 'merge')
49, 'fast_run', 'merge')
optdb.register('add_no_output_from_inplace', AddNoOutputFromInplace(),
49.4)
49.4)
optdb.register('add_destroy_handler', AddDestroyHandler(),
49.5, 'fast_run', 'inplace')
49.5, 'fast_run', 'inplace')
# final pass just to make sure
optdb.register('merge3', gof.MergeOptimizer(),
100, 'fast_run', 'merge')
100, 'fast_run', 'merge')
class Mode(object):
......@@ -287,7 +285,8 @@ class Mode(object):
def __str__(self):
return "%s(linker = %s, optimizer = %s)" % (self.__class__.__name__,
self.provided_linker, self.provided_optimizer)
self.provided_linker,
self.provided_optimizer)
def __get_optimizer(self):
if isinstance(self._optimizer, gof.Query):
......@@ -306,19 +305,19 @@ class Mode(object):
def including(self, *tags):
link, opt = self.get_linker_optimizer(self.provided_linker,
self.provided_optimizer)
self.provided_optimizer)
# N.B. opt might be a Query instance, not sure what else it might be...
# string? Optimizer? OptDB? who knows???
return self.__class__(linker=link, optimizer=opt.including(*tags))
def excluding(self, *tags):
link, opt = self.get_linker_optimizer(self.provided_linker,
self.provided_optimizer)
self.provided_optimizer)
return self.__class__(linker=link, optimizer=opt.excluding(*tags))
def requiring(self, *tags):
link, opt = self.get_linker_optimizer(self.provided_linker,
self.provided_optimizer)
self.provided_optimizer)
return self.__class__(linker=link, optimizer=opt.requiring(*tags))
# If a string is passed as the mode argument in function or
......@@ -364,10 +363,11 @@ def get_mode(orig_string):
# DebugMode use its own linker.
ret = DebugMode(optimizer=config.optimizer)
else:
# The import is needed in case string is ProfileMode
from profilemode import ProfileMode, prof_mode_instance_to_print
ret = eval(string
+ '(linker=config.linker, optimizer=config.optimizer)')
# This might be required if the string is 'ProfileMode'
from profilemode import ProfileMode # noqa
from profilemode import prof_mode_instance_to_print
ret = eval(string +
'(linker=config.linker, optimizer=config.optimizer)')
elif string in predefined_modes:
ret = predefined_modes[string]
else:
......
from __future__ import print_function
# Note: this code was initially copied from the 'pyutools' package by its
# original author, and re-licensed under Theano's license.
import numpy
import theano
from theano.compile.mode import Mode
......@@ -48,7 +48,7 @@ class MonitorMode(Mode):
if optimizer == 'default':
optimizer = theano.config.optimizer
if (linker is not None and
not isinstance(linker.mode, MonitorMode)):
not isinstance(linker.mode, MonitorMode)):
raise Exception("MonitorMode can only use its own linker! You "
"should not provide one.", linker)
......@@ -86,7 +86,7 @@ class MonitorMode(Mode):
def detect_nan(i, node, fn):
for output in fn.outputs:
if (not isinstance(numpy.random.RandomState, output[0]) and
numpy.isnan(output[0]).any()):
numpy.isnan(output[0]).any()):
print('*** NaN detected ***')
theano.printing.debugprint(node)
print('Inputs : %s' % [input[0] for input in fn.inputs])
......
......@@ -71,12 +71,13 @@ class ViewOp(gof.Op):
version = []
# If any of the c code is unversionned, we have to return ()
# Else, we will return a list of (type name, version) pairs.
for t, (c, v) in sorted(self.c_code_and_version.items(), key=lambda pair: str(pair[0])):
for t, (c, v) in sorted(self.c_code_and_version.items(),
key=lambda pair: str(pair[0])):
if not v:
warnings.warn("Type %s has C code for ViewOp, but it has "
"no version. You should add a 'version' keyword arg "
"when calling register_view_op_c_code." % t,
stacklevel=2)
warnings.warn("Type %s has C code for ViewOp, but it has no "
"version. You should add a 'version' keyword "
"arg when calling register_view_op_c_code." % t,
stacklevel=2)
return ()
version.append((str(t), v))
......@@ -165,12 +166,14 @@ class DeepCopyOp(gof.Op):
version = []
# If any of the c code is unversionned, we have to return ()
# Else, we will return a list of (type name, version) pairs.
for t, (c, v) in sorted(self.c_code_and_version.items(), key=lambda pair: str(pair[0])):
for t, (c, v) in sorted(self.c_code_and_version.items(),
key=lambda pair: str(pair[0])):
if not v:
warnings.warn("Type %s has C code for DeepCopyOp, but it has "
"no version. You should add a 'version' keyword arg "
"when calling register_deep_copy_op_c_code." % t,
stacklevel=2)
"no version. You should add a 'version' keyword"
" arg when calling "
"register_deep_copy_op_c_code." % t,
stacklevel=2)
return ()
version.append((str(t), v))
......@@ -284,12 +287,13 @@ class Shape(gof.Op):
version = []
# If any of the c code is unversionned, we have to return ()
# Else, we will return a list of (type name, version) pairs.
for t, (c, v) in sorted(self.c_code_and_version.items(), key=lambda pair: str(pair[0])):
for t, (c, v) in sorted(self.c_code_and_version.items(),
key=lambda pair: str(pair[0])):
if not v:
warnings.warn("Type %s has C code for Shape, but it has "
"no version. You should add a 'version' keyword arg "
"when calling register_shape_c_code." % t,
stacklevel=2)
warnings.warn("Type %s has C code for Shape, but it has no "
"version. You should add a 'version' keyword "
"arg when calling register_shape_c_code." % t,
stacklevel=2)
return ()
version.append((str(t), v))
......@@ -301,7 +305,6 @@ class Shape(gof.Op):
shape = Shape()
_shape = shape # was used in the past, now use shape directly.
#pprint.assign(_shape, printing.MemberPrinter('shape'))
class Shape_i(gof.Op):
......@@ -389,8 +392,11 @@ class Shape_i(gof.Op):
return [()]
def grad(self, inp, grads):
return [theano.gradient.grad_not_implemented(op=self, x_pos=0, x=inp[0],
comment="No gradient for the shape of a matrix is implemented.")]
return [theano.gradient.grad_not_implemented(
op=self, x_pos=0, x=inp[0],
comment=("No gradient for the shape of a matrix "
"is implemented."))]
def shape_i(var, i, fgraph=None):
"""Equivalent of var.shape[i], but apply if possible the shape
......@@ -421,7 +427,7 @@ def shape_i(var, i, fgraph=None):
# If the output var isn't marked as being in the graph,
# we need to att it in the ShapeFeature.
shape_feature.on_import(fgraph, node,
'gof.ops.shape_i')
'gof.ops.shape_i')
if var not in shape_of:
recur(var.owner)
return shape_of[var][i]
......@@ -435,9 +441,10 @@ def shape_i(var, i, fgraph=None):
def register_shape_i_c_code(typ, code, check_input, version=()):
""" Tell Shape_i how to generate C code for a Theano Type
:param typ: A Theano type. It must be the Theano class itself and not an
instance of the class.
:param code: C code that gets the shape of dimensions %(i)s for the Theano type 'typ'.
:param typ: A Theano type. It must be the Theano class itself and not
an instance of the class.
:param code: C code that gets the shape of dimensions %(i)s for the
Theano type 'typ'.
Use %(iname)s and %(oname)s for the input and output C
variable names respectively.
:param version: A number indicating the version of the code, for cache.
......@@ -620,7 +627,8 @@ class Rebroadcast(gof.Op):
return type(self) == type(other) and self.axis == other.axis
def __hash__(self):
items = sorted(self.axis.iteritems()) # no ambiguity because each item key is unique
# no ambiguity because each item key is unique
items = sorted(self.axis.iteritems())
return hash((type(self), tuple(items)))
def __str__(self):
......@@ -637,9 +645,9 @@ class Rebroadcast(gof.Op):
def make_node(self, x):
if self.axis.keys() and (x.ndim <= numpy.max(self.axis.keys())):
raise ValueError('Trying to rebroadcast non-existent dimension')
t = x.type.clone(broadcastable=[self.axis.get(i, b)
for i, b in enumerate(
x.type.broadcastable)])
t = x.type.clone(
broadcastable=[self.axis.get(i, b)
for i, b in enumerate(x.type.broadcastable)])
return gof.Apply(self, [x], [t()])
def perform(self, node, inp, out_):
......@@ -702,10 +710,11 @@ class Rebroadcast(gof.Op):
for t, (c, v) in sorted(self.c_code_and_version.items(),
key=lambda pair: str(pair[0])):
if not v:
warnings.warn("Type %s has C code for Rebroadcast, but it has "
"no version. You should add a 'version' keyword arg "
"when calling register_rebroadcast_c_code." % t,
stacklevel=2)
warnings.warn("Type %s has C code for Rebroadcast, but it "
"has no version. You should add a 'version' "
"keyword arg when calling "
"register_rebroadcast_c_code." % t,
stacklevel=2)
return ()
version.append((str(t), v))
......@@ -718,17 +727,18 @@ def register_specify_shape_c_code(typ, code, version=(),
c_support_code_apply=None):
""" Tell SpecifyShape how to generate C code for a Theano Type
:param typ: A Theano type. It must be the Theano class itself and not an
instance of the class.
:param code: C code that checks the shape and returns a view for the Theano type 'typ'.
Use %(iname)s and %(oname)s for the input and output C
variable names respectively.
%(shape)s is the vector of shape of %(iname)s.
Check that its length is good.
:param typ: A Theano type. It must be the Theano class itself and
not an instance of the class.
:param code: C code that checks the shape and returns a view for
the Theano type 'typ'. Use %(iname)s and %(oname)s
for the input and output C variable names
respectively. %(shape)s is the vector of shape of
%(iname)s. Check that its length is good.
:param version: A number indicating the version of the code, for cache.
:param c_support_code_apply: extra code.
"""
SpecifyShape.c_code_and_version[typ] = (code, version, c_support_code_apply)
SpecifyShape.c_code_and_version[typ] = (code, version,
c_support_code_apply)
class SpecifyShape(gof.Op):
......@@ -784,7 +794,8 @@ class SpecifyShape(gof.Op):
new_shape = []
for dim in xrange(node.inputs[0].ndim):
try:
s = theano.tensor.get_scalar_constant_value(node.inputs[1][dim])
s = theano.tensor.get_scalar_constant_value(
node.inputs[1][dim])
s = theano.tensor.as_tensor_variable(s)
new_shape.append(s)
except theano.tensor.NotScalarConstantError:
......@@ -832,19 +843,21 @@ class SpecifyShape(gof.Op):
code, version, _ = self.c_code_and_version[itype]
return code % locals()
return super(SpecifyShape, self).c_code(node, node, inames, onames, sub)
return super(SpecifyShape, self).c_code(node, node, inames,
onames, sub)
def c_code_cache_version(self):
version = []
# If any of the c code is unversionned, we have to return ()
# Else, we will return a list of (type name, version) pairs.
for t, (c, v, _) in sorted(self.c_code_and_version.items(),
key=lambda pair: str(pair[0])):
key=lambda pair: str(pair[0])):
if not v:
warnings.warn("Type %s has C code for SpecifyShape, but it has "
"no version. You should add a 'version' keyword arg "
"when calling register_specify_shape_c_code." % t,
stacklevel=2)
warnings.warn("Type %s has C code for SpecifyShape, but it "
"has no version. You should add a 'version' "
"keyword arg when calling "
"register_specify_shape_c_code." % t,
stacklevel=2)
return ()
version.append((str(t), v))
......
"""Provide a simple user friendly API """
__docformat__ = 'restructuredtext en'
from theano import config
from theano.compile import orig_function, In, Out
from theano.compile import UnusedInputError
......@@ -13,6 +9,8 @@ from theano.gof import Variable, Constant
import logging
_logger = logging.getLogger("theano.compile.pfunc")
__docformat__ = 'restructuredtext en'
def rebuild_collect_shared(outputs,
inputs=None,
......@@ -21,7 +19,7 @@ def rebuild_collect_shared(outputs,
rebuild_strict=True,
copy_inputs_over=True,
no_default_updates=False,
):
):
"""
Function that allows replacing subgraphs of a computational
graph.
......@@ -152,12 +150,12 @@ def rebuild_collect_shared(outputs,
if v_orig in clone_d:
raise AssertionError(
"When using 'givens' or 'replace' with several "
"(old_v, new_v) replacement pairs, you can not have a "
"new_v variable depend on an old_v one. For instance, "
"givens = {a:b, b:(a+1)} is not allowed. Here, the old_v "
"%s is used to compute other new_v's, but it is scheduled "
"to be replaced by %s." % (v_orig, v_repl))
"When using 'givens' or 'replace' with several "
"(old_v, new_v) replacement pairs, you can not have a "
"new_v variable depend on an old_v one. For instance, "
"givens = {a:b, b:(a+1)} is not allowed. Here, the old_v "
"%s is used to compute other new_v's, but it is scheduled "
"to be replaced by %s." % (v_orig, v_repl))
clone_d[v_orig] = clone_v_get_shared_updates(v_repl,
copy_inputs_over)
......@@ -199,7 +197,7 @@ def rebuild_collect_shared(outputs,
# filter_variable ensure smooth conversion of cpu/gpu Types
try:
update_val = store_into.type.filter_variable(update_val)
except TypeError as e:
except TypeError:
err_msg = ('An update must have the same type as the'
' original shared variable (shared_var=%s,'
' shared_var.type=%s,'
......@@ -232,8 +230,8 @@ def rebuild_collect_shared(outputs,
cloned_outputs.append(Out(cloned_v, borrow=v.borrow))
else:
raise TypeError('Outputs must be theano Variable or '
'Out instances. Received ' + str(v)
+ ' of type ' + str(type(v)))
'Out instances. Received ' + str(v) +
' of type ' + str(type(v)))
# computed_list.append(cloned_v)
else:
if isinstance(outputs, Variable):
......@@ -275,35 +273,38 @@ def rebuild_collect_shared(outputs,
class Param(object):
def __init__(self, variable, default=None, name=None, mutable=False,
strict=False, allow_downcast=None, implicit=None, borrow=None):
strict=False, allow_downcast=None, implicit=None,
borrow=None):
"""
:param variable: A variable in an expression graph to use as a
compiled-function parameter
:param default: The default value to use at call-time (can also be a Container where
the function will find a value at call-time.)
:param default: The default value to use at call-time (can
also be a Container where the function will find a value
at call-time.)
:param name: A string to identify this parameter from function kwargs.
:param mutable: True -> function is allowed to modify this argument.
:param borrow: Whether the function is allowed to alias some output to
this input. Using None (default) means we re-use the same value as the
`mutable` flag.
:param borrow: Whether the function is allowed to alias some
output to this input. Using None (default) means we re-use
the same value as the `mutable` flag.
False: do not permit any output to be aliased to the input
:param strict: False -> function arguments may be copied or cast to match the
type required by the parameter `variable`.
:param strict: False -> function arguments may be copied or
cast to match the type required by the parameter
`variable`.
True -> function arguments must exactly match the type
required by `variable`.
:param allow_downcast: Only applies if `strict` is False.
True -> allow assigned value to lose precision when cast during assignment.
True -> allow assigned value to lose precision when cast
during assignment.
False -> never allow precision loss.
None -> only allow downcasting of a Python float to a scalar floatX.
:param implicit: see help(theano.io.In)
"""
self.variable = variable
self.default = default
......@@ -320,12 +321,12 @@ class Param(object):
# aliased to the input. Thus mutable=True should require borrow=True.
if self.mutable and not self.borrow:
raise AssertionError(
"Symbolic input for variable %s (name=%s) has "
"flags mutable=True, borrow=False. This combination is "
"incompatible since mutable=True implies that the "
"input variable may be both aliased (borrow=True) and "
"overwritten.",
variable, name)
"Symbolic input for variable %s (name=%s) has "
"flags mutable=True, borrow=False. This combination is "
"incompatible since mutable=True implies that the "
"input variable may be both aliased (borrow=True) and "
"overwritten.",
variable, name)
self.strict = strict
self.allow_downcast = allow_downcast
......@@ -333,9 +334,9 @@ class Param(object):
def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
no_default_updates=False, accept_inplace=False, name=None,
rebuild_strict=True, allow_input_downcast=None,
profile=None, on_unused_input=None,output_keys=None):
no_default_updates=False, accept_inplace=False, name=None,
rebuild_strict=True, allow_input_downcast=None,
profile=None, on_unused_input=None, output_keys=None):
"""Function-constructor for graphs with shared variables.
:type params: list of either Variable or Param instances.
......@@ -348,30 +349,35 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
:type mode: string or `theano.compile.Mode` instance.
:param mode: compilation mode
:type updates: iterable over pairs (shared_variable, new_expression). List, tuple or dict.
:param updates: update the values for SharedVariable inputs according to these expressions
:type updates: iterable over pairs (shared_variable,
new_expression). List, tuple or dict.
:param updates: update the values for SharedVariable inputs
according to these expressions
:type givens: iterable over pairs (Var1, Var2) of Variables. List, tuple or dict. The Var1
and Var2 in each pair must have the same Type.
:type givens: iterable over pairs (Var1, Var2) of Variables. List,
tuple or dict. The Var1 and Var2 in each pair must have the
same Type.
:param givens: specific substitutions to make in the computation graph (Var2 replaces
Var1).
:param givens: specific substitutions to make in the computation
graph (Var2 replaces Var1).
:type no_default_updates: either bool or list of Variables
:param no_default_updates: if True, do not perform any automatic update on Variables.
If False (default), perform them all. Else, perform automatic updates on all Variables
that are neither in "updates" nor in "no_default_updates".
:param no_default_updates: if True, do not perform any automatic
update on Variables. If False (default), perform them
all. Else, perform automatic updates on all Variables that are
neither in "updates" nor in "no_default_updates".
:type name: None or string
:param name: attaches a name to the profiling result of this function.
:type allow_input_downcast: Boolean
:param allow_input_downcast: True means that the values passed as
inputs when calling the function can be silently downcasted to fit
the dtype of the corresponding Variable, which may lose precision.
False means that it will only be cast to a more general, or
precise, type. None (default) is almost like False, but allows
downcasting of Python float scalars to floatX.
inputs when calling the function can be silently downcasted to
fit the dtype of the corresponding Variable, which may lose
precision. False means that it will only be cast to a more
general, or precise, type. None (default) is almost like
False, but allows downcasting of Python float scalars to
floatX.
:type profile: None, True, str, or ProfileStats instance
:param profile: accumulate profiling information into a given ProfileStats
......@@ -389,30 +395,32 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
:rtype: theano.compile.Function
:returns: a callable object that will compute the outputs (given the inputs)
and update the implicit function arguments according to the `updates`.
:returns: a callable object that will compute the outputs (given
the inputs) and update the implicit function arguments
according to the `updates`.
:note: Regarding givens: Be careful to make sure that these substitutions are
independent--behaviour when Var1 of one pair appears in the graph leading to Var2 in
another expression is undefined. Replacements specified with givens are different from
optimizations in that Var2 is not expected to be equivalent to Var1.
:note: Regarding givens: Be careful to make sure that these
substitutions are independent--behaviour when Var1 of one pair
appears in the graph leading to Var2 in another expression is
undefined. Replacements specified with givens are different
from optimizations in that Var2 is not expected to be
equivalent to Var1.
"""
#
# This function works by cloning the graph (except for the inputs), and then shipping it
# off to compile.function
# (There it will be cloned again, unnecessarily, because it doesn't know that we already
# cloned it.)
# This function works by cloning the graph (except for the
# inputs), and then shipping it off to compile.function (There it
# will be cloned again, unnecessarily, because it doesn't know
# that we already cloned it.)
#
# First, it clones the replacements named in the givens argument, and points each Var1 to
# the clone of Var2.
# Then it sets the inputs in the clone dictionary.
# After these steps, we are assuming that the clone dictionary contains all the inputs to
# First, it clones the replacements named in the givens argument,
# and points each Var1 to the clone of Var2. Then it sets the
# inputs in the clone dictionary. After these steps, we are
# assuming that the clone dictionary contains all the inputs to
# the computation graph.
#
# Then it clones the outputs and the update expressions. This rebuilds a computation graph
# from the inputs and the givens.
# Then it clones the outputs and the update expressions. This
# rebuilds a computation graph from the inputs and the givens.
#
if updates is None:
updates = []
......@@ -431,11 +439,13 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
# useful.
if not isinstance(params, (list, tuple)):
raise Exception("in pfunc() the first argument must be a list or a tuple")
raise Exception("in pfunc() the first argument must be a list or "
"a tuple")
if not isinstance(no_default_updates, bool)\
and not isinstance(no_default_updates, list):
raise TypeError("no_default_update should be either a boolean or a list")
raise TypeError("no_default_update should be either a boolean or "
"a list")
if len(updates) > 0 and any(isinstance(v, Variable)
for v in iter_over_pairs(updates)):
......@@ -453,10 +463,10 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
if v in in_variables[(i + 1):]:
dup_v_i = in_variables.index(v, (i + 1))
raise UnusedInputError(
("Variable %s is used twice in inputs to theano.function, "
"at indices %i and %i. This would result in values "
"provided for it being ignored. Please do not duplicate "
"variables in the inputs list." % (v, i, dup_v_i)))
("Variable %s is used twice in inputs to theano.function, "
"at indices %i and %i. This would result in values "
"provided for it being ignored. Please do not duplicate "
"variables in the inputs list." % (v, i, dup_v_i)))
# Check that we are not using `givens` to replace input variables, because
# this typically does nothing, contrary to what one may expect.
......@@ -494,9 +504,10 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
i.variable = iv
for sv in shared_inputs:
# pass value of None here
# value will be stored in the resulting functions' defaults list
# but since the value of shared variables never needs to be refed, it is not needed
# pass value of None
# value will be stored in the resulting functions' defaults
# list but since the value of shared variables never needs to
# be refed, it is not needed
if sv in update_d:
si = In(variable=sv, value=sv.container, mutable=True,
borrow=True, update=update_d[sv], shared=True)
......@@ -506,8 +517,9 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
inputs.append(si)
return orig_function(inputs, cloned_outputs, mode,
accept_inplace=accept_inplace, name=name, profile=profile,
on_unused_input=on_unused_input, output_keys=output_keys)
accept_inplace=accept_inplace, name=name,
profile=profile, on_unused_input=on_unused_input,
output_keys=output_keys)
def _pfunc_param_to_in(param, strict=False, allow_downcast=None):
......@@ -517,14 +529,14 @@ def _pfunc_param_to_in(param, strict=False, allow_downcast=None):
return In(variable=param, strict=strict, allow_downcast=allow_downcast)
elif isinstance(param, Param):
return In(
variable=param.variable,
name=param.name,
value=param.default,
mutable=param.mutable,
strict=param.strict,
borrow=param.borrow,
allow_downcast=param.allow_downcast,
implicit=param.implicit)
variable=param.variable,
name=param.name,
value=param.default,
mutable=param.mutable,
strict=param.strict,
borrow=param.borrow,
allow_downcast=param.allow_downcast,
implicit=param.implicit)
raise TypeError('Unknown parameter type: %s' % type(param))
......
......@@ -10,35 +10,35 @@ from theano.gof.link import WrapLinker
from theano.compile.mode import (Mode, register_mode,
predefined_modes, predefined_linkers,
predefined_optimizers)
from theano import gof
from theano.configparser import config, AddConfigVar, IntParam, BoolParam
from theano.compile.function_module import FunctionMaker
run_cthunk = None # Will be imported only when needed.
from profiling import ProfileStats
run_cthunk = None # Will be imported only when needed.
import_time = time.time()
AddConfigVar('ProfileMode.n_apply_to_print',
"Number of apply instances to print by default",
IntParam(15, lambda i: i > 0),
in_c_key=False)
"Number of apply instances to print by default",
IntParam(15, lambda i: i > 0),
in_c_key=False)
AddConfigVar('ProfileMode.n_ops_to_print',
"Number of ops to print by default",
IntParam(20, lambda i: i > 0),
in_c_key=False)
"Number of ops to print by default",
IntParam(20, lambda i: i > 0),
in_c_key=False)
AddConfigVar('ProfileMode.min_memory_size',
"""For the memory profile, do not print apply nodes if the size
of their outputs (in bytes) is lower then this threshold""",
IntParam(1024, lambda i: i >= 0),
in_c_key=False)
"For the memory profile, do not print apply nodes if the size "
"of their outputs (in bytes) is lower then this threshold",
IntParam(1024, lambda i: i >= 0),
in_c_key=False)
AddConfigVar('ProfileMode.profile_memory',
"""Enable profiling of memory used by Theano functions""",
BoolParam(False),
in_c_key=False)
BoolParam(False),
in_c_key=False)
class Profile_Maker(FunctionMaker):
......@@ -46,8 +46,8 @@ class Profile_Maker(FunctionMaker):
ret = super(Profile_Maker, self).create(input_storage, trustme)
if (hasattr(theano, 'sandbox') and
hasattr(theano.sandbox, 'cuda') and
theano.sandbox.cuda.cuda_enabled):
hasattr(theano.sandbox, 'cuda') and
theano.sandbox.cuda.cuda_enabled):
if os.environ.get('CUDA_LAUNCH_BLOCKING', '0') != '1':
raise Exception(
"You are running the Theano profiler with CUDA enabled."
......@@ -70,8 +70,8 @@ class Profile_Maker(FunctionMaker):
# corresponding to the i'th position in the toposort.
assert len(ret.fn.thunk_groups[i]) == 1
profile.apply_cimpl[node] = hasattr(
ret.fn.thunk_groups[i][0],
'cthunk')
ret.fn.thunk_groups[i][0],
'cthunk')
# Here we replace the linker function.
# This ugliness makes WrapLinker (an object that *generates*
......@@ -84,7 +84,8 @@ class Profile_Maker(FunctionMaker):
def new_fn():
self.mode.apply_time = self.mode.profile_stats[ret].apply_time
self.mode.variable_shape = self.mode.profile_stats[ret].variable_shape
self.mode.variable_shape = \
self.mode.profile_stats[ret].variable_shape
ret_fn()
# delete the old apply_time variable
# because it doesn't mean the same thing anymore.
......@@ -97,12 +98,12 @@ class Profile_Maker(FunctionMaker):
global run_cthunk
if run_cthunk is None and any(profile.apply_cimpl.values()):
# Lazy import to avoid compilation when importing theano.
from theano.gof.cutils import run_cthunk
from theano.gof.cutils import run_cthunk # noqa
warnings.warn(
"DEPRECATION WARNING: The ProfileMode is deprecated. Use the Theano"
" flags/parameter to theano.function 'profile=True' instead"
" of 'mode=ProfileMode'")
"DEPRECATION WARNING: The ProfileMode is deprecated. "
"Use the Theano flags/parameter to theano.function "
"'profile=True' instead of 'mode=ProfileMode'")
return ret
......@@ -115,9 +116,9 @@ class ProfileMode(Mode):
message = ""
profile_stats = {}
self.__setstate__((linker,
optimizer,
message,
profile_stats))
optimizer,
message,
profile_stats))
def function_maker(self, i, o, m, *args, **kwargs):
"""Return an instance of `Profiler_Maker` which init the count"""
......@@ -156,7 +157,7 @@ class ProfileMode(Mode):
raise RuntimeError(
('A C Op raised an exception. ProfileMode cannot'
' tell you what it was though. Use a standard mode'
' such as FAST_RUN to correct the problem.'))
' such as FAST_RUN to correct the problem.'))
else:
t0 = time.time()
th()
......@@ -209,26 +210,43 @@ class ProfileMode(Mode):
self.fn_time = 0
def print_summary(self, **kwargs):
""" Print 3 summary that show where the time is spend. The first show an Apply-wise summary, the second show an Op-wise summary, the third show an type-Op-wise summary.
The Apply-wise summary print the timing information for the worst offending Apply nodes. This corresponds to individual Op applications within your graph which take the longest to execute (so if you use dot twice, you will see two entries there).
The Op-wise summary print the execution time of all Apply nodes executing the same Op are grouped together and the total execution time per Op is shown (so if you use dot twice, you will see only one entry there corresponding to the sum of the time spent in each of them). If two Op have different hash value, they will be separate.
The type-Op-wise summary group the result by type of op. So event if two Op have different hash value, they will be merged.
Their is an hack with the Op-wise summary. Go see it if you want to know more.
""" Print 3 summaries that show where time is spent. The first shows
an Apply-wise summary, the second an Op-wise summary and the
third a type-Op-wise summary.
The Apply-wise summary prints the timing information for the
worst offending Apply nodes. This corresponds to individual Op
applications within your graph which take the longest to
execute (so if you use dot twice, you will see two entries
there).
The Op-wise summary prints the execution time of all Apply
nodes executing the same Op grouped together and the total
execution time per Op is shown (so if you use dot twice, you
will see only one entry there corresponding to the sum of the
time spent in each of them). If two Ops have different hash
value, they will be separate.
The type-Op-wise summary group the result by type of op. So
event if two Op have different hash value, they will be
merged.
There is an hack with the Op-wise summary. Go see it if you
want to know more.
:param kwargs: They are passed to print_summary_ expanded.
Currently there is n_apply_to_print, n_ops_to_print and min_memory_size
that are accepted.
Currently there is n_apply_to_print,
n_ops_to_print and min_memory_size that are
accepted.
"""
compile_time = sum([ps.compile_time for ps
in self.profile_stats.values()])
fct_call = dict([(fn, ps.fct_callcount)
for (fn, ps) in self.profile_stats.items()])
for (fn, ps) in self.profile_stats.items()])
fct_call_time = dict([(fn, ps.fct_call_time)
for (fn, ps) in self.profile_stats.items()])
for (fn, ps) in self.profile_stats.items()])
apply_time = {}
for fn, ps in self.profile_stats.items():
......@@ -249,10 +267,10 @@ class ProfileMode(Mode):
variable_shape.update(ps.variable_shape)
other_time = dict(
linker_time=sum(
[ps.linker_time for ps in self.profile_stats.values()]),
optimizer_time=sum(
[ps.optimizer_time for ps in self.profile_stats.values()]))
linker_time=sum(
[ps.linker_time for ps in self.profile_stats.values()]),
optimizer_time=sum(
[ps.optimizer_time for ps in self.profile_stats.values()]))
self.print_summary_("print_summary",
compile_time, fct_call_time, fct_call,
......@@ -261,14 +279,18 @@ class ProfileMode(Mode):
**kwargs)
def print_diff_summary(self, other, **kwargs):
""" As print_summary, but print the difference on two different profile mode.
TODO: Also we don't print the Apply-wise summary as it don't work for now.
""" As print_summary, but print the difference on two different
profile mode.
TODO: Also we don't print the Apply-wise summary as it don't
work for now.
TODO: make comparaison with gpu code.
:param other: the other instance of ProfileMode that we want to be compared to.
:param other: the other instance of ProfileMode that we want
to be compared to.
:param kwargs: They are passed to print_summary_ expanded.
Currently there is n_apply_to_print, n_ops_to_print and min_memory_size
that are accepted.
Currently there is n_apply_to_print, n_ops_to_print and
min_memory_size that are accepted.
"""
def diff_dict(a_time, b_time_):
......@@ -293,17 +315,17 @@ class ProfileMode(Mode):
message = self.message
variable_shape = diff_dict(self.variable_shape, other.variable_shape)
self_linker_time = sum([ps.linker_time for ps
in self.profile_stats.values()])
in self.profile_stats.values()])
other_linker_time = sum([ps.linker_time for ps
in other.profile_stats.values()])
self_optimizer_time = sum([ps.optimizer_time for ps
in self.profile_stats.values()])
in self.profile_stats.values()])
other_optimizer_time = sum([ps.optimizer_time for ps
in other.profile_stats.values()])
in other.profile_stats.values()])
other_time = {'linker_time': self_linker_time - other_linker_time,
'optimizer_time': self_optimizer_time -
other_optimizer_time}
other_optimizer_time}
self.print_summary_("print_diff_summary", compile_time,
fct_call_time, fct_call,
apply_time, apply_cimpl, message, variable_shape,
......@@ -318,7 +340,7 @@ class ProfileMode(Mode):
n_ops_to_print=config.ProfileMode.n_ops_to_print,
print_apply=True,
min_memory_size=config.ProfileMode.min_memory_size,
):
):
"""
do the actual printing of print_summary and print_diff_summary.
......@@ -331,7 +353,8 @@ class ProfileMode(Mode):
print("ProfileMode is deprecated! Use the new profiler.")
print(" The Theano flags to enable it ise: profile=True")
print(" The Theano flags for the memory profile to it is: profile_memory=True")
print(" The Theano flags for the memory profile to it is: "
"profile_memory=True")
total_time = time.time() - import_time
total_fct_time = sum(fct_call_time.values())
......@@ -352,25 +375,37 @@ class ProfileMode(Mode):
print('ProfileMode.%s(%s)' % (fct_name, message))
print('---------------------------')
print()
print('Time since import %.3fs'%(total_time))
print('Theano compile time: %.3fs (%.1f%% since import)'%(compile_time, compile_time/total_time*100))
print(' Optimization time: %.3fs'%(other_time['optimizer_time']))
print(' Linker time: %.3fs'%(other_time['linker_time']))
print('Theano fct call %.3fs (%.1f%% since import)'%(total_fct_time, total_fct_time/total_time*100))
print(' Theano Op time %.3fs %.1f%%(since import) %.1f%%(of fct call)' % (
local_time, local_time/total_time*100, time_pr_in_fct))
print(' Theano function overhead in ProfileMode %.3fs %.1f%%(since import) %.1f%%(of fct call)' % (
overhead_time, overhead_time/total_time*100, overhead_time_pourcent_fct_time))
print('%i Theano fct call, %.3fs per call'%(total_fct_call, time_per_call))
print('Rest of the time since import %.3fs %.1f%%'%(unknown_time, unknown_time/total_time*100))
print('Time since import %.3fs' % (total_time))
print('Theano compile time: %.3fs (%.1f%% since import)' %
(compile_time, compile_time/total_time*100))
print(' Optimization time: %.3fs' % (other_time['optimizer_time']))
print(' Linker time: %.3fs' % (other_time['linker_time']))
print('Theano fct call %.3fs (%.1f%% since import)' %
(total_fct_time, total_fct_time/total_time*100))
print(' Theano Op time %.3fs %.1f%%(since import) %.1f%%'
'(of fct call)' % (local_time, local_time/total_time*100,
time_pr_in_fct))
print(' Theano function overhead in ProfileMode %.3fs %.1f%%'
'(since import) %.1f%%(of fct call)' % (
overhead_time, overhead_time/total_time*100,
overhead_time_pourcent_fct_time))
print('%i Theano fct call, %.3fs per call' %
(total_fct_call, time_per_call))
print('Rest of the time since import %.3fs %.1f%%' %
(unknown_time, unknown_time/total_time*100))
print()
print('Theano fct summary:')
print('<% total fct time> <total time> <time per call> <nb call> <fct name>')
print('<% total fct time> <total time> <time per call> <nb call> '
'<fct name>')
for key in fct_call.keys():
if fct_call[key] > 0:
print(' %4.1f%% %.3fs %.2es %d %s'%(fct_call_time[key]/total_fct_time*100 , fct_call_time[key],
fct_call_time[key]/fct_call[key], fct_call[key], key.name))
print(' %4.1f%% %.3fs %.2es %d %s' %
(fct_call_time[key]/total_fct_time*100,
fct_call_time[key],
fct_call_time[key]/fct_call[key],
fct_call[key],
key.name))
else:
print(' NOT CALLED', key.name)
......@@ -387,7 +422,8 @@ class ProfileMode(Mode):
op_apply.setdefault(op, 0)
sop_apply.setdefault(type(a.op), 0)
op_time[op] += t
nb_call = [v for k, v in fct_call.items() if k.maker.fgraph is a.fgraph][0]
nb_call = [v for k, v in fct_call.items()
if k.maker.fgraph is a.fgraph][0]
op_cimpl.setdefault(a.op, True)
op_cimpl[a.op] = op_cimpl[a.op] and apply_cimpl.get(a, False)
if t == 0:
......@@ -401,7 +437,8 @@ class ProfileMode(Mode):
sop_time = {}
sop_call = {}
sop_op = {}
sop_cimpl = {} # map each op class to Bool. True iff all applies were done in c.
# map each op class to Bool. True iff all applies were done in c.
sop_cimpl = {}
for a, t in op_time.items():
typ = type(a)
sop_time.setdefault(typ, 0)
......@@ -415,8 +452,11 @@ class ProfileMode(Mode):
# Print the summary per op class.
print()
print('Single Op-wise summary:')
print('<% of local_time spent on this kind of Op> <cumulative %> <self seconds> <cumulative seconds> <time per call> [*] <nb_call> <nb_op> <nb_apply> <Op name>')
sotimes = [(t*100/local_time, t, a, sop_cimpl[a], sop_call[a], sop_op[a], sop_apply[a]) for a, t in sop_time.items()]
print('<% of local_time spent on this kind of Op> <cumulative %> '
'<self seconds> <cumulative seconds> <time per call> [*] '
'<nb_call> <nb_op> <nb_apply> <Op name>')
sotimes = [(t*100/local_time, t, a, sop_cimpl[a], sop_call[a],
sop_op[a], sop_apply[a]) for a, t in sop_time.items()]
sotimes.sort()
sotimes.reverse()
tot = 0
......@@ -430,11 +470,14 @@ class ProfileMode(Mode):
msg = '*'
else:
msg = ' '
print(' %4.1f%% %5.1f%% %5.3fs %5.3fs %.2es %s %5d %2d %2d %s' % (f, ftot, t, tot, t/nb_call, msg, nb_call, nb_op, nb_apply, a))
print(' ... (remaining %i single Op account for %.2f%%(%.2fs) of the runtime)'\
% (max(0, len(sotimes)-n_ops_to_print),
sum(soinfo[0] for soinfo in sotimes[n_ops_to_print:]),
sum(soinfo[1] for soinfo in sotimes[n_ops_to_print:])))
print(' %4.1f%% %5.1f%% %5.3fs %5.3fs %.2es %s %5d %2d '
'%2d %s' % (f, ftot, t, tot, t/nb_call, msg, nb_call,
nb_op, nb_apply, a))
print(' ... (remaining %i single Op account for %.2f%%(%.2fs) of '
'the runtime)' %
(max(0, len(sotimes)-n_ops_to_print),
sum(soinfo[0] for soinfo in sotimes[n_ops_to_print:]),
sum(soinfo[1] for soinfo in sotimes[n_ops_to_print:])))
print('(*) Op is running a c implementation')
......@@ -446,13 +489,19 @@ class ProfileMode(Mode):
flops_msg = ''
if op_flops:
flops_msg = ' <MFlops/s>'
print('\nHACK WARNING: we print the flops for some OP, but the logic don\'t always work. You need to know the internal of Theano to make it work correctly. Otherwise don\'t use!')
print("\nHACK WARNING: we print the flops for some OP, but the "
"logic doesn't always work. You need to know the "
"internals of Theano to make it work correctly. "
"Otherwise don't use it!")
print()
print('Op-wise summary:')
print('<%% of local_time spent on this kind of Op> <cumulative %%> <self seconds> <cumulative seconds> <time per call> [*] %s <nb_call> <nb apply> <Op name>'%(flops_msg))
print('<%% of local_time spent on this kind of Op> <cumulative %%> '
'<self seconds> <cumulative seconds> <time per call> [*] %s '
'<nb_call> <nb apply> <Op name>' % (flops_msg))
otimes = [(t*100/local_time, t, a, op_cimpl.get(a, 0), op_call.get(a, 0), op_apply.get(a, 0))
for a, t in op_time.items()]
otimes = [(t*100/local_time, t, a, op_cimpl.get(a, 0),
op_call.get(a, 0), op_apply.get(a, 0))
for a, t in op_time.items()]
otimes.sort()
otimes.reverse()
tot = 0
......@@ -467,20 +516,33 @@ class ProfileMode(Mode):
else:
msg = ' '
if op_flops:
print(' %4.1f%% %5.1f%% %5.3fs %5.3fs %.2es %s %7.1f %5d %2d %s' % (f, ftot, t, tot, t/nb_call, msg, op_flops.get(a, -1), nb_call, nb_apply, a))
print(' %4.1f%% %5.1f%% %5.3fs %5.3fs %.2es %s %7.1f '
'%5d %2d %s' % (f, ftot, t, tot, t/nb_call, msg,
op_flops.get(a, -1), nb_call, nb_apply,
a))
else:
print(' %4.1f%% %5.1f%% %5.3fs %5.3fs %.2es %s %5d %2d %s' % (f, ftot, t, tot, t/nb_call, msg, nb_call, nb_apply, a))
print(' ... (remaining %i Op account for %6.2f%%(%.2fs) of the runtime)'\
% (max(0, len(otimes)-n_ops_to_print),
sum(f for f, t, a, ci, nb_call, nb_op in otimes[n_ops_to_print:]),
sum(t for f, t, a, ci, nb_call, nb_op in otimes[n_ops_to_print:])))
print(' %4.1f%% %5.1f%% %5.3fs %5.3fs %.2es %s %5d %2d '
'%s' % (f, ftot, t, tot, t/nb_call, msg, nb_call,
nb_apply, a))
print(' ... (remaining %i Op account for %6.2f%%(%.2fs) of the '
'runtime)' %
(max(0, len(otimes)-n_ops_to_print),
sum(f for f, t, a, ci, nb_call, nb_op in
otimes[n_ops_to_print:]),
sum(t for f, t, a, ci, nb_call, nb_op in
otimes[n_ops_to_print:])))
print('(*) Op is running a c implementation')
if print_apply:
print()
print('Apply-wise summary:')
print('<% of local_time spent at this position> <cumulative %%> <apply time> <cumulative seconds> <time per call> [*] <nb_call> <Apply position> <Apply Op name>')
atimes = [(t*100/local_time, t, a, [v for k, v in fct_call.items() if k.maker.fgraph is a[1].fgraph][0]) for a, t in apply_time.items()]
print('<% of local_time spent at this position> <cumulative %%> '
'<apply time> <cumulative seconds> <time per call> [*] '
'<nb_call> <Apply position> <Apply Op name>')
atimes = [(t*100/local_time, t, a,
[v for k, v in fct_call.items()
if k.maker.fgraph is a[1].fgraph][0])
for a, t in apply_time.items()]
atimes.sort()
atimes.reverse()
tot = 0
......@@ -493,12 +555,15 @@ class ProfileMode(Mode):
msg = '*'
else:
msg = ' '
print(' %4.1f%% %5.1f%% %5.3fs %5.3fs %.2es %s %i %2i %s' % (
f, ftot, t, tot, t/nb_call, msg, nb_call, a[0], str(a[1])))
print(' ... (remaining %i Apply instances account for %.2f%%(%.2fs) of the runtime)'\
% (max(0, len(atimes)-n_apply_to_print),
sum(f for f, t, a, nb_call in atimes[n_apply_to_print:]),
sum(t for f, t, a, nb_call in atimes[n_apply_to_print:])))
print(' %4.1f%% %5.1f%% %5.3fs %5.3fs %.2es %s %i '
'%2i %s' %
(f, ftot, t, tot, t/nb_call, msg, nb_call, a[0],
str(a[1])))
print(' ... (remaining %i Apply instances account for '
'%.2f%%(%.2fs) of the runtime)' %
(max(0, len(atimes)-n_apply_to_print),
sum(f for f, t, a, nb_call in atimes[n_apply_to_print:]),
sum(t for f, t, a, nb_call in atimes[n_apply_to_print:])))
print('(*) Op is running a c implementation')
for printer in profiler_printers:
printer(fct_name, compile_time, fct_call_time, fct_call,
......@@ -506,8 +571,9 @@ class ProfileMode(Mode):
other_time)
if not variable_shape:
print("""\nProfile of Theano intermediate memory disabled.
To enabled, put the Theano flag ProfileMode.profile_memory to True.""")
print("\nProfile of Theano intermediate memory disabled. "
"To enable, set the Theano flag ProfileMode.profile_memory "
"to True.""")
else:
print("""
The memory profile in ProfileMode is removed!
......@@ -540,7 +606,6 @@ Test them first, as they are not guaranteed to always provide a speedup.""")
scal.Cosh, scal.Sinh,
T.nnet.sigm.ScalarSigmoid,
T.nnet.sigm.ScalarSoftplus]
# Abs, Mod in float{32,64} only
def get_scalar_ops(s):
if isinstance(s, theano.scalar.Composite):
......@@ -566,7 +631,8 @@ Test them first, as they are not guaranteed to always provide a speedup.""")
if s_op.__class__ in scalar_op_amdlibm_speed_up:
return True
elif s_op.__class__ not in scalar_op_amdlibm_no_speed_up:
print("We don't know if amdlibm will accelerate this scalar op.", s_op)
print("We don't know if amdlibm will accelerate "
"this scalar op.", s_op)
return False
def exp_float32_op(op):
......@@ -585,7 +651,9 @@ Test them first, as they are not guaranteed to always provide a speedup.""")
# tip 2
if not config.lib.amdlibm and any([amdlibm_speed_up(a.op) for i, a
in apply_time]):
print(" - Try installing amdlibm and set the Theano flag lib.amdlibm=True. This speeds up only some Elemwise operation.")
print(" - Try installing amdlibm and set the Theano flag "
"lib.amdlibm=True. This speeds up only some Elemwise "
"operation.")
printed_tip = True
# tip 3
......@@ -601,7 +669,8 @@ Test them first, as they are not guaranteed to always provide a speedup.""")
for a, t in apply_time.iteritems():
node = a[1]
if (isinstance(node.op, T.Dot) and
all([len(i.type.broadcastable) == 2 for i in node.inputs])):
all([len(i.type.broadcastable) == 2
for i in node.inputs])):
print((" - You have a dot operation that was not optimized to"
" dot22 (which is faster). Make sure the inputs are "
"float32 or float64, and are the same for both inputs. "
......
......@@ -240,7 +240,6 @@ class ProfileStats(object):
else:
self.flag_time_thunks = flag_time_thunks
self.__dict__.update(kwargs)
#print >> sys.stderr, "self.message", self.message
if atexit_print:
global _atexit_print_list
_atexit_print_list.append(self)
......@@ -358,7 +357,7 @@ class ProfileStats(object):
local_time = 0
if local_time == 0:
print(('ProfileMode.summary_class: total time 0'
' (did you forget to enable counters?)'), file=file)
' (did you forget to enable counters?)'), file=file)
return
class_time = self.class_time()
class_call = self.class_callcount()
......@@ -377,9 +376,6 @@ class ProfileStats(object):
tot = 0
print('Class', file=file)
print('---', file=file)
#print >> file, '<% time> <cumulative %%> <apply time>,'
#print >>file, '<cumulative seconds> <time per call> <nb_call>'
#print >>file, '<Class name>'
hs = []
# formatting string
es = []
......@@ -421,18 +417,21 @@ class ProfileStats(object):
tot += t
ftot = tot * 100 / local_time
# Remove the useless start and end of the class name:
# "<class 'theano.sandbox.cuda.blas.GpuDot22'>" -> "theano.sandbox.cuda.blas.GpuDot22"
# "<class 'theano.sandbox.cuda.blas.GpuDot22'>" ->
# "theano.sandbox.cuda.blas.GpuDot22"
class_name = str(a)[8:-2][:maxlen]
print(format_str % (f, ftot, t, t / nb_call,
impl, nb_call,
nb_apply, class_name), file=file)
impl, nb_call,
nb_apply, class_name), file=file)
# While this carries over less information, it is arranged such
# that it way more readeable that the previous output of the
# profiler
print(' ... (remaining %i Classes account for %6.2f%%(%.2fs) of the runtime)'\
% (max(0, len(otimes) - N),
print(' ... (remaining %i Classes account for %6.2f%%(%.2fs) of '
'the runtime)' %
(max(0, len(otimes) - N),
sum(f for f, t, a, ci, nb_call, nb_op in otimes[N:]),
sum(t for f, t, a, ci, nb_call, nb_op in otimes[N:])), file=file)
sum(t for f, t, a, ci, nb_call, nb_op in otimes[N:])),
file=file)
print('', file=file)
def summary_ops(self, file=sys.stderr, N=None):
......@@ -442,7 +441,7 @@ class ProfileStats(object):
local_time = 0
if local_time == 0:
print(('ProfileMode.summary_ops: total time 0'
' (did you forget to enable counters?)'), file=file)
' (did you forget to enable counters?)'), file=file)
return
op_time = self.op_time()
op_call = self.op_callcount()
......@@ -459,9 +458,6 @@ class ProfileStats(object):
tot = 0
print('Ops', file=file)
print('---', file=file)
#print >> file, '<% time> <cumulative %%> <apply time>,'
#print >>file, '<cumulative seconds> <time per call> <nb_call>'
#print >>file, '<Op name>'
hs = []
# formatting string
es = []
......@@ -503,15 +499,17 @@ class ProfileStats(object):
tot += t
ftot = tot * 100 / local_time
print(format_str % (f, ftot, t, t / nb_call,
impl, nb_call,
nb_apply, str(a)[:maxlen]), file=file)
impl, nb_call,
nb_apply, str(a)[:maxlen]), file=file)
# While this carries over less information, it is arranged such
# that it way more readeable that the previous output of the
# profiler
print(' ... (remaining %i Ops account for %6.2f%%(%.2fs) of the runtime)'\
% (max(0, len(otimes) - N),
print(' ... (remaining %i Ops account for %6.2f%%(%.2fs) of '
'the runtime)' %
(max(0, len(otimes) - N),
sum(f for f, t, a, ci, nb_call, nb_op in otimes[N:]),
sum(t for f, t, a, ci, nb_call, nb_op in otimes[N:])), file=file)
sum(t for f, t, a, ci, nb_call, nb_op in otimes[N:])),
file=file)
print('', file=file)
def summary_nodes(self, file=sys.stderr, N=None):
......@@ -521,12 +519,11 @@ class ProfileStats(object):
local_time = 0
if local_time == 0:
print(('ProfileMode.summary_nodes: total time 0'
' (did you forget to enable counters?)'), file=file)
' (did you forget to enable counters?)'), file=file)
return
print('Apply', file=file)
print('------', file=file)
#print >> file, '<% time> <cumulative %%> <apply time> <cumulative seconds> <time per call> <nb_call> <Apply Op name>'
# headers
hs = []
# formatting string
......@@ -601,9 +598,9 @@ class ProfileStats(object):
flops = " "
flops_s = " "
print(format_str % (f, ftot, t, t / nb_call, nb_call,
nd_id,
flops, flops_s,
str(a)[:maxlen]), file=file)
nd_id,
flops, flops_s,
str(a)[:maxlen]), file=file)
if not config.profile_memory:
continue
for idx, var in enumerate(a.inputs):
......@@ -620,10 +617,9 @@ class ProfileStats(object):
idx, dtype, sh, st), file=file)
# Same as before, this I've sacrificied some information making
# the output more readable
# print >> file, ' %4.1f%% %5.1f%% %5.3fs %5.3fs %.2es %i %s'%(
# f, ftot, t, tot, t/nb_call,nb_call, str(a))
print(' ... (remaining %i Apply instances account for %.2f%%(%.2fs) of the runtime)'\
% (max(0, len(atimes) - N),
print(' ... (remaining %i Apply instances account for '
'%.2f%%(%.2fs) of the runtime)' %
(max(0, len(atimes) - N),
sum(f for f, t, a, nd_id, nb_call in atimes[N:]),
sum(t for f, t, a, nd_id, nb_call in atimes[N:])), file=file)
print('', file=file)
......@@ -640,15 +636,17 @@ class ProfileStats(object):
100 * self.vm_call_time / self.fct_call_time), file=file)
local_time = sum(self.apply_time.values())
if local_time > 0:
print(' Time in thunks: %es (%.3f%%)' % (
local_time, 100 * local_time / self.fct_call_time), file=file)
print(' Time in thunks: %es (%.3f%%)' %
(local_time, 100 * local_time / self.fct_call_time),
file=file)
print(' Total compile time: %es' % self.compile_time, file=file)
print(' Number of Apply nodes: %d' % self.nb_nodes, file=file)
print(' Theano Optimizer time: %es' % self.optimizer_time, file=file)
print(' Theano validate time: %es' % self.validate_time, file=file)
print((' Theano Linker time (includes C,'
' CUDA code generation/compiling): %es' %
self.linker_time), file=file)
print(' Theano Optimizer time: %es' % self.optimizer_time,
file=file)
print(' Theano validate time: %es' % self.validate_time,
file=file)
print(' Theano Linker time (includes C, CUDA code '
'generation/compiling): %es' % self.linker_time, file=file)
print(' Import time %es' % self.import_time, file=file)
print('', file=file)
......@@ -656,7 +654,8 @@ class ProfileStats(object):
assert self.validate_time < self.optimizer_time
def summary_globals(self, file):
print('Time in all call to theano.grad() %es' % theano.gradient.grad_time, file=file)
print('Time in all call to theano.grad() %es' %
theano.gradient.grad_time, file=file)
def summary_memory(self, file, N=None):
fct_memory = {} # fgraph->dict(node->[outputs size])
......@@ -742,7 +741,8 @@ class ProfileStats(object):
# two data structure used to mimic Python gc
viewed_by = {} # {var1: [vars that view var1]}
# The len of the list is the value of python ref count. But we use a list, not just the ref count value.
# The len of the list is the value of python ref
# count. But we use a list, not just the ref count value.
# This is more safe to help detect potential bug in the algo
for var in fgraph.variables:
viewed_by[var] = []
......@@ -778,14 +778,16 @@ class ProfileStats(object):
ins = None
if dmap and idx2 in dmap:
vidx = dmap[idx2]
assert len(
vidx) == 1, "Here we only support the possibility to destroy one input"
assert len(vidx) == 1, ("Here we only support the "
"possibility to destroy one "
"input")
ins = node.inputs[vidx[0]]
if vmap and idx2 in vmap:
assert ins is None
vidx = vmap[idx2]
assert len(
vidx) == 1, "Here we only support the possibility to view one input"
assert len(vidx) == 1, ("Here we only support the "
"possibility to view one "
"input")
ins = node.inputs[vidx[0]]
if ins is not None:
# This is needed for destroy_map in case it
......@@ -818,7 +820,8 @@ class ProfileStats(object):
if (dependencies[ins] and
ins not in fgraph.outputs and
ins.owner and
all([compute_map[v][0] for v in dependencies[ins]])):
all([compute_map[v][0]
for v in dependencies[ins]])):
if ins not in view_of and not viewed_by.get(ins, []):
running_memory_size[cg] -= var_mem[ins]
elif ins in view_of:
......@@ -907,22 +910,24 @@ class ProfileStats(object):
ins = None
if dmap and idx in dmap:
vidx = dmap[idx]
assert len(
vidx) == 1, "Here we only support the possibility to destroy one input"
assert len(vidx) == 1, ("Here we only support "
"the possibility to "
"destroy one input")
ins = node.inputs[vidx[0]]
if vmap and idx in vmap:
assert ins is None
vidx = vmap[idx]
assert len(
vidx) == 1, "Here we only support the possibility to destroy one input"
assert len(vidx) == 1, ("Here we only support "
"the possibility to "
"view one input")
ins = node.inputs[vidx[0]]
if ins is not None:
# This is needed for destroy_map in case it
# return a partial view that is destroyed. So
# return a partial view that is destroyed. So
# the output could be different then the
# input.
assert isinstance(ins, theano.Variable)
# We keep trac of view only again the original
# We keep track of view only again the original
origin = view_of.get(ins, ins)
view_of[out] = origin
viewof_change.append(out)
......@@ -944,8 +949,10 @@ class ProfileStats(object):
if (dependencies[ins] and
ins not in fgraph.outputs and
ins.owner and
all([compute_map[v][0] for v in dependencies[ins]])):
if ins not in view_of and not viewed_by.get(ins, []):
all([compute_map[v][0]
for v in dependencies[ins]])):
if (ins not in view_of and
not viewed_by.get(ins, [])):
mem_freed += var_mem[ins]
elif ins in view_of:
origin = view_of[ins]
......@@ -953,7 +960,8 @@ class ProfileStats(object):
viewedby_remove[origin].append(ins)
if (not viewed_by[origin] and
origin not in fgraph.inputs and
not isinstance(origin, theano.Constant)):
not isinstance(origin,
theano.Constant)):
mem_freed += var_mem[origin]
else:
# ins is viewed_by something else, so its
......@@ -964,7 +972,8 @@ class ProfileStats(object):
done_set.add(node)
frozen_set = frozenset(done_set)
if done_dict.get(frozen_set, max_mem_count + 1) > max_mem_count:
if (done_dict.get(frozen_set, max_mem_count + 1) >
max_mem_count):
# check if frozen_set is in done_set
# no, add it to done_set
# yes, then compare the past mem and current mem
......@@ -1008,7 +1017,8 @@ class ProfileStats(object):
# two data structure used to mimic Python gc
viewed_by = {} # {var1: [vars that view var1]}
# The len of the list is the value of python ref count. But we use a list, not just the ref count value.
# The len of the list is the value of python ref
# count. But we use a list, not just the ref count value.
# This is more safe to help detect potential bug in the algo
for var in fgraph.variables:
viewed_by[var] = []
......@@ -1043,44 +1053,48 @@ class ProfileStats(object):
max_sum_size = max(max_sum_size, sum_size)
max_node_memory_size[0] = max(max_node_memory_size[0],
sum(old_running_memory[0]))
max_running_max_memory_size[0] = max(max_running_max_memory_size[0],
sum(old_running_memory[2]))
max_running_max_memory_size[0] = \
max(max_running_max_memory_size[0], sum(old_running_memory[2]))
# Separate CPU and GPU
max_node_memory_size[1] = max(max_node_memory_size[1],
old_running_memory[0][0])
max_node_memory_size[2] = max(max_node_memory_size[2],
old_running_memory[0][1])
max_running_max_memory_size[1] = max(max_running_max_memory_size[1],
old_running_memory[2][0])
max_running_max_memory_size[2] = max(max_running_max_memory_size[2],
old_running_memory[2][1])
max_running_max_memory_size[1] = \
max(max_running_max_memory_size[1], old_running_memory[2][0])
max_running_max_memory_size[2] = \
max(max_running_max_memory_size[2], old_running_memory[2][1])
max_node_memory_saved_by_inplace = max(
max_node_memory_saved_by_inplace, old_running_memory[3])
max_node_memory_saved_by_inplace = \
max(max_node_memory_saved_by_inplace, old_running_memory[3])
max_node_memory_saved_by_view = max(max_node_memory_saved_by_view,
old_running_memory[4])
# Store max of some stats with new order
new_max_node_memory_size[0] = max(new_max_node_memory_size[0],
sum(new_running_memory[0]))
new_max_running_max_memory_size[0] = max(new_max_running_max_memory_size[0],
sum(new_running_memory[2]))
new_max_running_max_memory_size[0] = \
max(new_max_running_max_memory_size[0],
sum(new_running_memory[2]))
# Separate CPU and GPU
new_max_node_memory_size[1] = max(new_max_node_memory_size[1],
new_running_memory[0][0])
new_max_node_memory_size[2] = max(new_max_node_memory_size[2],
new_running_memory[0][1])
new_max_running_max_memory_size[1] = max(new_max_running_max_memory_size[1],
new_running_memory[2][0])
new_max_running_max_memory_size[2] = max(new_max_running_max_memory_size[2],
new_running_memory[2][1])
new_max_node_memory_saved_by_inplace = max(
new_max_node_memory_saved_by_inplace, new_running_memory[3])
new_max_node_memory_saved_by_view = max(new_max_node_memory_saved_by_view,
new_running_memory[4])
new_max_running_max_memory_size[1] = \
max(new_max_running_max_memory_size[1],
new_running_memory[2][0])
new_max_running_max_memory_size[2] = \
max(new_max_running_max_memory_size[2],
new_running_memory[2][1])
new_max_node_memory_saved_by_inplace = \
max(new_max_node_memory_saved_by_inplace,
new_running_memory[3])
new_max_node_memory_saved_by_view = \
max(new_max_node_memory_saved_by_view, new_running_memory[4])
# Config: whether print min memory peak
if config.profiling.min_peak_memory:
......@@ -1093,8 +1107,8 @@ class ProfileStats(object):
del fgraph, nodes_mem
if len(fct_memory) > 1:
print(("Memory Profile "
"(the max between all functions in that profile)"), file=file)
print("Memory Profile (the max between all functions in "
"that profile)", file=file)
else:
print("Memory Profile", file=file)
......@@ -1129,17 +1143,21 @@ class ProfileStats(object):
print("---", file=file)
if min_max_peak:
print(" Minimum peak from all valid apply node order is %dKB(took %.3fs to compute)" % (int(round(
min_max_peak / 1024.)), min_peak_time), file=file)
print(" Memory saved if views are used: %dKB (%dKB)" % (int(
round(new_max_node_memory_saved_by_view / 1024.)), int(
round(max_node_memory_saved_by_view / 1024.))), file=file)
print(" Memory saved if inplace ops are used: %dKB (%dKB)" % \
(int(round(new_max_node_memory_saved_by_inplace / 1024.)),
int(round(max_node_memory_saved_by_inplace / 1024.))), file=file)
print(" Memory saved if gc is enabled: %dKB (%dKB)" % (int(
round(new_max_node_memory_size[0] - new_max_running_max_memory_size[0]) / 1024.), int(
round(max_node_memory_size[0] - max_running_max_memory_size[0]) / 1024.)), file=file)
print(" Minimum peak from all valid apply node order is "
"%dKB(took %.3fs to compute)" %
(int(round(min_max_peak / 1024.)), min_peak_time), file=file)
print(" Memory saved if views are used: %dKB (%dKB)" %
(int(round(new_max_node_memory_saved_by_view / 1024.)),
int(round(max_node_memory_saved_by_view / 1024.))), file=file)
print(" Memory saved if inplace ops are used: %dKB (%dKB)" %
(int(round(new_max_node_memory_saved_by_inplace / 1024.)),
int(round(max_node_memory_saved_by_inplace / 1024.))),
file=file)
print(" Memory saved if gc is enabled: %dKB (%dKB)" %
(int(round(new_max_node_memory_size[0] -
new_max_running_max_memory_size[0]) / 1024.),
int(round(max_node_memory_size[0] -
max_running_max_memory_size[0]) / 1024.)), file=file)
print("---", file=file)
......@@ -1148,19 +1166,19 @@ class ProfileStats(object):
hasattr(theano.sandbox.cuda, 'cuda_ndarray') and
hasattr(theano.sandbox.cuda.cuda_ndarray.cuda_ndarray,
'theano_allocated')):
_, gpu_max = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.theano_allocated()
print((" Max Memory allocated on the GPU "
"(for all functions): %dKB" %
int(round(gpu_max / 1024.))), file=file)
cuda_ndarray = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray
_, gpu_max = cuda_ndarray.theano_allocated()
print(" Max Memory allocated on the GPU (for all functions): "
"%dKB" % int(round(gpu_max / 1024.)), file=file)
print("", file=file)
if len(fct_memory) > 1:
print((
" This list is based on all functions in the profile"), file=file)
print((" <Sum apply outputs (bytes)>"
" <Apply outputs shape>"
" <created/inplace/view>"
" <Apply node>"), file=file)
print(" This list is based on all functions in the profile",
file=file)
print(" <Sum apply outputs (bytes)>"
" <Apply outputs shape>"
" <created/inplace/view>"
" <Apply node>", file=file)
print("", file=file)
items = node_mem.items()
items.sort(key=lambda a: a[1], reverse=True)
......@@ -1181,9 +1199,8 @@ class ProfileStats(object):
else:
size = "%10s" % "Unknown"
print(' %s %s %s %s' % (size,
shapes,
' '.join(code), node), file=file)
print(' %s %s %s %s' % (size, shapes, ' '.join(code), node),
file=file)
sum_remaining = sum(size for _, size in items[N:])
size_sum_dense = sum(node_mem.values())
......@@ -1191,23 +1208,21 @@ class ProfileStats(object):
p = "0%"
else:
p = "(%.2f%%)" % (float(sum_remaining) / size_sum_dense * 100)
print((
' ... (remaining %i Apply account for %4dB/%dB (%s) of the'
' Apply with dense outputs sizes)') % (max(0, len(node_mem) - N),
sum_remaining,
size_sum_dense, p
), file=file)
print(' ... (remaining %i Apply account for %4dB/%dB (%s) of the'
' Apply with dense outputs sizes)' % (max(0, len(node_mem) - N),
sum_remaining,
size_sum_dense, p),
file=file)
print('', file=file)
if N == 0:
print((' All Apply nodes have output sizes that take'
' less than %dB.' %
config.profiling.min_memory_size), file=file)
print((
" <created/inplace/view> is taken from the Op's declaration."), file=file)
print((" Apply nodes marked 'inplace' or 'view' may"
" actually allocate memory, this is not reported"
" here. If you use DebugMode, warnings will be"
" emitted in those cases."), file=file)
print(' All Apply nodes have output sizes that take less '
'than %dB.' % config.profiling.min_memory_size, file=file)
print(" <created/inplace/view> is taken from the Op's declaration.",
file=file)
print(" Apply nodes marked 'inplace' or 'view' may"
" actually allocate memory, this is not reported"
" here. If you use DebugMode, warnings will be"
" emitted in those cases.", file=file)
print('', file=file)
def summary(self, file=sys.stderr, n_ops_to_print=20,
......@@ -1220,8 +1235,8 @@ class ProfileStats(object):
self.summary_ops(file, n_ops_to_print)
self.summary_nodes(file, n_apply_to_print)
elif self.fct_callcount > 0:
print((" No execution time accumulated "
"(hint: try config profiling.time_thunks=1)"), file=file)
print(" No execution time accumulated "
"(hint: try config profiling.time_thunks=1)", file=file)
if self.variable_shape or self.variable_strides:
self.summary_memory(file, n_apply_to_print)
if self.optimizer_profile:
......@@ -1231,7 +1246,7 @@ class ProfileStats(object):
self.optimizer_profile[1])
if 0: # old code still to be ported from ProfileMode
if False: # old code still to be ported from ProfileMode
def long_print(self, file=sys.stderr, fct_name=None, message=None,
n_apply_to_print=15, n_ops_to_print=20, print_apply=False):
"""
......@@ -1565,8 +1580,8 @@ class ScanProfileStats(ProfileStats):
print(' Message: %s' % self.message, file=file)
print((' Time in %i calls of the op (for a total of %i '
'steps) %es' %
(self.callcount, self.nbsteps, self.call_time)), file=file)
'steps) %es' %
(self.callcount, self.nbsteps, self.call_time)), file=file)
print('', file=file)
val = 0
if self.call_time > 0:
......
"""Provide a simple user friendly API to Theano-managed memory"""
__docformat__ = 'restructuredtext en'
# Standard imports
import copy
import logging
......@@ -12,6 +10,7 @@ import numpy
from theano.gof import Container, Variable, generic, utils
_logger = logging.getLogger('theano.compile.sharedvalue')
__docformat__ = 'restructuredtext en'
class SharedVariable(Variable):
......@@ -49,7 +48,8 @@ class SharedVariable(Variable):
or copied, so they must have the correct type.
:param allow_downcast: Only applies if `strict` is False.
True -> allow assigned value to lose precision when cast during assignment.
True -> allow assigned value to lose precision when cast
during assignment.
False -> never allow precision loss.
None -> only allow downcasting of a Python float to a scalar floatX.
......@@ -65,17 +65,18 @@ class SharedVariable(Variable):
if container is not None:
self.container = container
if (value is not None) or (strict is not None):
raise TypeError(
'value and strict are ignored if you pass a container here')
raise TypeError('value and strict are ignored if you pass '
'a container here')
else:
if container is not None:
raise TypeError('Error to specify both value and container')
self.container = Container(self,
storage=[type.filter(value, strict=strict,
allow_downcast=allow_downcast)],
readonly=False,
strict=strict,
allow_downcast=allow_downcast)
self.container = Container(
self,
storage=[type.filter(value, strict=strict,
allow_downcast=allow_downcast)],
readonly=False,
strict=strict,
allow_downcast=allow_downcast)
def get_value(self, borrow=False, return_internal_type=False):
"""Get the non-symbolic value associated with this SharedVariable.
......@@ -114,11 +115,11 @@ class SharedVariable(Variable):
def clone(self):
cp = self.__class__(
name=self.name,
type=self.type,
value=None,
strict=None,
container=self.container)
name=self.name,
type=self.type,
value=None,
strict=None,
container=self.container)
cp.tag = copy.copy(self.tag)
return cp
......@@ -140,8 +141,8 @@ class SharedVariable(Variable):
type(value))
raise TypeError(
"The generic 'SharedVariable' object is not subscriptable. "
"This shared variable contains %s" % msg)
"The generic 'SharedVariable' object is not subscriptable. "
"This shared variable contains %s" % msg)
def _value_get(self):
raise Exception("sharedvar.value does not exist anymore. Use "
......@@ -183,7 +184,8 @@ def shared(value, name=None, strict=False, allow_downcast=None, **kwargs):
potential constructors to those that can accept those kwargs.
:note: Some shared variable have ``borrow`` as extra kwargs.
`See <http://deeplearning.net/software/theano/tutorial/aliasing.html#borrowing-when-creating-shared-variables>`_ for detail.
`See <http://deeplearning.net/software/theano/tutorial/aliasing.\
html#borrowing-when-creating-shared-variables>`_ for detail.
:note: Some shared variable have ``broadcastable`` as extra kwargs.
As shared variable shapes can change, all dimensions default
......@@ -200,7 +202,8 @@ def shared(value, name=None, strict=False, allow_downcast=None, **kwargs):
try:
if isinstance(value, Variable):
raise TypeError(" Shared variable constructor needs numeric values and not symbolic variables.")
raise TypeError("Shared variable constructor needs numeric "
"values and not symbolic variables.")
for ctor in reversed(shared.constructors):
try:
......@@ -234,4 +237,4 @@ shared.constructors = []
def generic_constructor(value, name=None, strict=False, allow_downcast=None):
"""SharedVariable Constructor"""
return SharedVariable(type=generic, value=value, name=name, strict=strict,
allow_downcast=allow_downcast)
allow_downcast=allow_downcast)
import cPickle, logging
import cPickle
import logging
_logger = logging.getLogger("theano.gof.callcache")
......@@ -18,9 +19,6 @@ class CallCache(object):
def persist(self, filename=None):
if filename is None:
filename = self.filename
# backport
#filename = self.filename if filename is None else filename
f = open(filename, 'w')
cPickle.dump(self.cache, f)
f.close()
......@@ -28,9 +26,6 @@ class CallCache(object):
def call(self, fn, args=(), key=None):
if key is None:
key = (fn, tuple(args))
# backport
#key = (fn, tuple(args)) if key is None else key
if key not in self.cache:
_logger.debug('cache miss %i', len(self.cache))
self.cache[key] = fn(*args)
......
......@@ -8,7 +8,6 @@ import re
import shutil
import struct
import socket
import subprocess
import sys
import textwrap
......@@ -55,16 +54,16 @@ def python_int_bitwidth():
compiledir_format_dict = {
"platform": platform.platform(),
"processor": platform.processor(),
"python_version": platform.python_version(),
"python_bitwidth": local_bitwidth(),
"python_int_bitwidth": python_int_bitwidth(),
"theano_version": theano.__version__,
"numpy_version": numpy.__version__,
"gxx_version": gcc_version_str.replace(" ", "_"),
"hostname": socket.gethostname(),
}
"platform": platform.platform(),
"processor": platform.processor(),
"python_version": platform.python_version(),
"python_bitwidth": local_bitwidth(),
"python_int_bitwidth": python_int_bitwidth(),
"theano_version": theano.__version__,
"numpy_version": numpy.__version__,
"gxx_version": gcc_version_str.replace(" ", "_"),
"hostname": socket.gethostname(),
}
def short_platform(r=None, p=None):
......@@ -182,8 +181,8 @@ def filter_compiledir(path):
if not os.access(path, os.R_OK | os.W_OK | os.X_OK):
# If it exist we need read, write and listing access
raise ValueError(
"compiledir '%s' exists but you don't have read, write"
" or listing permissions." % path)
"compiledir '%s' exists but you don't have read, write"
" or listing permissions." % path)
else:
try:
os.makedirs(path, 0770) # read-write-execute for user and group
......@@ -295,7 +294,8 @@ def cleanup():
have_npy_abi_version = True
elif obj.startswith('c_compiler_str='):
have_c_compiler = True
elif (isinstance(obj, (theano.gof.Op, theano.gof.Type)) and
elif (isinstance(obj, (theano.gof.Op,
theano.gof.Type)) and
hasattr(obj, 'c_code_cache_version')):
v = obj.c_code_cache_version()
if v not in [(), None] and v not in key[0]:
......@@ -310,7 +310,7 @@ def cleanup():
if keydata.key_pkl != filename:
keydata.key_pkl = filename
keydata.remove_key(key)
except IOError as e:
except IOError:
_logger.error(
"Could not remove file '%s'. To complete "
"the clean-up, please remove manually "
......@@ -395,7 +395,7 @@ def print_compiledir_content():
if big_key_files:
big_key_files = sorted(big_key_files, key=lambda t: str(t[1]))
big_total_size = sum([size for dir, size, ops in big_key_files])
big_total_size = sum([sz for _, sz, _ in big_key_files])
print(("There are directories with key files bigger than %d bytes "
"(they probably contain big tensor constants)" %
max_key_file_size))
......
......@@ -102,8 +102,8 @@ def get_lock(lock_dir=None, **kw):
# the lock state and raise an error.
while get_lock.n_lock > 0:
release_lock()
raise Exception("For some unknow reason, the lock was already taken,"
" but no start time was registered.")
raise Exception("For some unknow reason, the lock was already "
"taken, but no start time was registered.")
now = time.time()
if now - get_lock.start_time > config.compile.timeout/2:
lockpath = os.path.join(get_lock.lock_dir, 'lock')
......@@ -242,7 +242,7 @@ def lock(tmp_dir, timeout=notset, min_wait=None, max_wait=None, verbosity=1):
continue
if last_owner == read_owner:
if (timeout is not None and
time.time() - time_start >= timeout):
time.time() - time_start >= timeout):
# Timeout exceeded or locking process dead.
if not no_display:
if read_owner == 'failure':
......
......@@ -14,18 +14,21 @@ if os.path.exists(os.path.join(config.compiledir, 'cutils_ext.so')):
def compile_cutils_code():
types = ['npy_' + t for t in ['int8', 'int16', 'int32', 'int64', 'int128',
'int256', 'uint8', 'uint16', 'uint32', 'uint64', 'uint128', 'uint256',
'float16', 'float32', 'float64', 'float80', 'float96', 'float128',
'float256']]
'int256', 'uint8', 'uint16', 'uint32',
'uint64', 'uint128', 'uint256',
'float16', 'float32', 'float64',
'float80', 'float96', 'float128',
'float256']]
complex_types = ['npy_' + t for t in ['complex32', 'complex64',
'complex128', 'complex160', 'complex192', 'complex512']]
'complex128', 'complex160',
'complex192', 'complex512']]
inplace_map_template = """
#if defined(%(typen)s)
static void %(type)s_inplace_add(PyArrayMapIterObject *mit, PyArrayIterObject *it, int inc_or_set)
static void %(type)s_inplace_add(PyArrayMapIterObject *mit,
PyArrayIterObject *it, int inc_or_set)
{
int index = mit->size;
while (index--) {
......@@ -38,46 +41,52 @@ def compile_cutils_code():
#endif
"""
floatadd = "((%(type)s*)mit->dataptr)[0] = inc_or_set * ((%(type)s*)mit->dataptr)[0] + ((%(type)s*)it->dataptr)[0];"
floatadd = ("((%(type)s*)mit->dataptr)[0] = inc_or_set * "
"((%(type)s*)mit->dataptr)[0] + ((%(type)s*)it->dataptr)[0];")
complexadd = """
((%(type)s*)mit->dataptr)[0].real = inc_or_set * ((%(type)s*)mit->dataptr)[0].real + ((%(type)s*)it->dataptr)[0].real;
((%(type)s*)mit->dataptr)[0].imag = inc_or_set * ((%(type)s*)mit->dataptr)[0].imag + ((%(type)s*)it->dataptr)[0].imag;
((%(type)s*)mit->dataptr)[0].real = inc_or_set *
((%(type)s*)mit->dataptr)[0].real + ((%(type)s*)it->dataptr)[0].real;
((%(type)s*)mit->dataptr)[0].imag = inc_or_set *
((%(type)s*)mit->dataptr)[0].imag + ((%(type)s*)it->dataptr)[0].imag;
"""
fns = ''.join([inplace_map_template % {'type': t, 'typen': t.upper(),
'op': floatadd % {'type': t}}
for t in types] +
for t in types] +
[inplace_map_template % {'type': t, 'typen': t.upper(),
'op': complexadd % {'type': t}}
for t in complex_types])
for t in complex_types])
def gen_binop(type, typen):
return """
#if defined(%(typen)s)
%(type)s_inplace_add,
#endif
""" % dict(type=type, typen=typen)
fn_array = ("static inplace_map_binop addition_funcs[] = {" +
''.join(["""
#if defined(%(typen)s)
%(type)s_inplace_add,
#endif
""" % {'type': t, 'typen': t.upper()}
for t in types + complex_types]) +
"""NULL};
""")
''.join([gen_binop(type=t, typen=t.upper())
for t in types + complex_types]) + "NULL};\n")
def gen_num(typen):
return """
#if defined(%(typen)s)
%(typen)s,
#endif
""" % dict(type=type, typen=typen)
type_number_array = ("static int type_numbers[] = {" +
''.join(["""
#if defined(%(typen)s)
%(typen)s,
#endif
""" % {'type': t, 'typen': t.upper()}
for t in types + complex_types]) +
"-1000};")
''.join([gen_num(typen=t.upper())
for t in types + complex_types]) + "-1000};")
code = ("""
#if NPY_API_VERSION >= 0x00000008
typedef void (*inplace_map_binop)(PyArrayMapIterObject *, PyArrayIterObject *, int inc_or_set);
""" + fns + fn_array + type_number_array +
"""
typedef void (*inplace_map_binop)(PyArrayMapIterObject *,
PyArrayIterObject *, int inc_or_set);
""" + fns + fn_array + type_number_array + """
static int
map_increment(PyArrayMapIterObject *mit, PyObject *op, inplace_map_binop add_inplace, int inc_or_set)
map_increment(PyArrayMapIterObject *mit, PyObject *op,
inplace_map_binop add_inplace, int inc_or_set)
{
PyArrayObject *arr = NULL;
PyArrayIterObject *it;
......@@ -129,7 +138,8 @@ inplace_increment(PyObject *dummy, PyObject *args)
return NULL;
}
if (!PyArray_Check(arg_a)) {
PyErr_SetString(PyExc_ValueError, "needs an ndarray as first argument");
PyErr_SetString(PyExc_ValueError,
"needs an ndarray as first argument");
return NULL;
}
......@@ -285,7 +295,7 @@ try:
open(os.path.join(location, '__init__.py'), 'w').close()
try:
from cutils_ext.cutils_ext import *
from cutils_ext.cutils_ext import * # noqa
except ImportError:
get_lock()
# Ensure no-one else is currently modifying the content of the compilation
......@@ -296,11 +306,11 @@ try:
# We must retry to import it as some other process could
# have been compiling it between the first failed import
# and when we receive the lock
from cutils_ext.cutils_ext import *
from cutils_ext.cutils_ext import * # noqa
except ImportError:
compile_cutils()
from cutils_ext.cutils_ext import *
from cutils_ext.cutils_ext import * # noqa
finally:
# Release lock on compilation directory.
......
......@@ -15,12 +15,13 @@ _logger = logging.getLogger('theano.gof.lazylinker_c')
force_compile = False
version = 0.21 # must match constant returned in function get_version()
lazylinker_ext = None
def try_import():
global lazylinker_ext
sys.path[0:0] = [config.compiledir]
import lazylinker_ext
import lazylinker_ext # noqa
del sys.path[0]
......@@ -43,11 +44,11 @@ try:
# Try to make the location
os.mkdir(location)
except OSError as e:
# If we get an error, verify that the error was # 17, the path already exists,
# and that it is a directory
# Note: we can't check if it exists before making it, because we are not holding
# the lock right now, so we could race another process and get error 17 if we lose
# the race
# If we get an error, verify that the error was # 17, the
# path already exists, and that it is a directory Note: we
# can't check if it exists before making it, because we
# are not holding the lock right now, so we could race
# another process and get error 17 if we lose the race
assert e.errno == errno.EEXIST
assert os.path.isdir(location)
......@@ -142,5 +143,5 @@ except ImportError:
# Release lock on compilation directory.
release_lock()
from lazylinker_ext.lazylinker_ext import *
from lazylinker_ext.lazylinker_ext import * # noqa
assert force_compile or (version == get_version())
......@@ -11,14 +11,14 @@ from theano.configparser import AddConfigVar, FloatParam
from theano import config
AddConfigVar('optdb.position_cutoff',
'Where to stop eariler during optimization. It represent the'
'Where to stop eariler during optimization. It represent the'
' position of the optimizer where to stop.',
FloatParam(numpy.inf),
in_c_key=False)
FloatParam(numpy.inf),
in_c_key=False)
AddConfigVar('optdb.max_use_ratio',
'A ratio that prevent infinite loop in EquilibriumOptimizer.',
FloatParam(5),
in_c_key=False)
'A ratio that prevent infinite loop in EquilibriumOptimizer.',
FloatParam(5),
in_c_key=False)
class DB(object):
......@@ -32,7 +32,7 @@ class DB(object):
self.__db__ = DefaultOrderedDict(OrderedSet)
self._names = set()
self.name = None # will be reset by register
#(via obj.name by the thing doing the registering)
# (via obj.name by the thing doing the registering)
def register(self, name, obj, *tags, **kwargs):
"""
......@@ -175,8 +175,10 @@ class Query(object):
self.exclude = OrderedSet(self.exclude)
def __str__(self):
return "Query{inc=%s,ex=%s,require=%s,subquery=%s,position_cutoff=%d}" % (
self.include, self.exclude, self.require, self.subquery, self.position_cutoff)
return ("Query{inc=%s,ex=%s,require=%s,subquery=%s,"
"position_cutoff=%d}" %
(self.include, self.exclude, self.require, self.subquery,
self.position_cutoff))
# add all opt with this tag
def including(self, *tags):
......@@ -268,7 +270,7 @@ class SequenceDB(DB):
position_cutoff = kwtags.pop('position_cutoff',
config.optdb.position_cutoff)
if len(tags) >= 1 and isinstance(tags[0], Query):
# the call to super should have raise an error with a good message
# the call to super should have raise an error with a good message
assert len(tags) == 1
if getattr(tags[0], 'position_cutoff', None):
position_cutoff = tags[0].position_cutoff
......
......@@ -39,9 +39,9 @@ def make_depends():
def depends(pair):
""" Returns True if a depends on b """
a, b = pair
return (any(bout in a.inputs for bout in b.outputs)
or any(depends((ainp.owner, b)) for ainp in a.inputs
if ainp.owner))
return (any(bout in a.inputs for bout in b.outputs) or
any(depends((ainp.owner, b)) for ainp in a.inputs
if ainp.owner))
return depends
......@@ -160,12 +160,12 @@ def posort(l, *cmps):
for b in l:
assert not(b in comes_after[a] and a in comes_after[b])
for cmp in cmps:
for cmp_fn in cmps:
for a in l:
for b in l:
if cmp(a, b) < 0: # a wants to come before b
if cmp_fn(a, b) < 0: # a wants to come before b
# if this wouldn't cause a cycle and isn't already known
if not b in comes_before[a] and not b in comes_after[a]:
if b not in comes_before[a] and b not in comes_after[a]:
add_links(a, b)
# check() # debug code
......
......@@ -36,8 +36,11 @@ def test_give_variables_names_small():
def test_remove():
even = lambda x: x % 2 == 0
odd = lambda x: x % 2 == 1
# The list are neede as with python 3, remove and filter return generators
def even(x):
return x % 2 == 0
def odd(x):
return x % 2 == 1
# The list are needed as with python 3, remove and filter return generators
# and we can't compare generators.
assert list(remove(even, range(5))) == list(filter(odd, range(5)))
......@@ -126,7 +126,7 @@ class LambdExtract:
def __call__(self):
return self.fgraph.change_input(self.node, self.i, self.r,
reason=("Revert", self.reason))
reason=("Revert", self.reason))
class History(Feature):
......@@ -214,9 +214,9 @@ class Validator(Feature):
class ReplaceValidate(History, Validator):
pickle_rm_attr = ["replace_validate", "replace_all_validate",
"replace_all_validate_remove"] + \
History.pickle_rm_attr + Validator.pickle_rm_attr
pickle_rm_attr = (["replace_validate", "replace_all_validate",
"replace_all_validate_remove"] +
History.pickle_rm_attr + Validator.pickle_rm_attr)
def on_attach(self, fgraph):
for attr in ('replace_validate', 'replace_all_validate',
......@@ -256,11 +256,13 @@ class ReplaceValidate(History, Validator):
try:
fgraph.replace(r, new_r, reason=reason, verbose=False)
except Exception as e:
if ('The type of the replacement must be the same' not in
str(e) and 'does not belong to this FunctionGraph' not in str(e)):
msg = str(e)
s1 = 'The type of the replacement must be the same'
s2 = 'does not belong to this FunctionGraph'
if (s1 not in msg and s2 not in msg):
out = sys.stderr
print("<<!! BUG IN FGRAPH.REPLACE OR A LISTENER !!>>", end=' ', file=out)
print(type(e), e, reason, file=out)
print("<<!! BUG IN FGRAPH.REPLACE OR A LISTENER !!>>",
type(e), e, reason, file=out)
# this might fail if the error is in a listener:
# (fgraph.replace kinda needs better internal error handling)
fgraph.revert(chk)
......@@ -286,13 +288,14 @@ class ReplaceValidate(History, Validator):
fgraph.revert(chk)
if warn:
out = sys.stderr
print((
print(
"WARNING: An optimization wanted to replace a Variable"
" in the graph, but the replacement for it doesn't"
" remove it. We disabled the optimization."
" Your function runs correctly, but it would be"
" appreciated if you submit this problem to the"
" mailing list theano-users so that we can fix it."), file=out)
" mailing list theano-users so that we can fix it.",
file=out)
print(reason, replacements, file=out)
raise ReplacementDidntRemovedError()
......@@ -311,7 +314,8 @@ class NodeFinder(Bookkeeper):
def on_attach(self, fgraph):
if self.fgraph is not None:
raise Exception("A NodeFinder instance can only serve one FunctionGraph.")
raise Exception("A NodeFinder instance can only serve one "
"FunctionGraph.")
if hasattr(fgraph, 'get_nodes'):
raise AlreadyThere("NodeFinder is already present or in conflict"
" with another plugin.")
......
"""WRITEME Defines the `Type` class."""
__docformat__ = "restructuredtext en"
from theano.compat import PY3
from theano.gof import utils
......@@ -13,6 +10,8 @@ from theano.gof import graph
########
from theano.gof.op import CLinkerObject
__docformat__ = "restructuredtext en"
class CLinkerType(CLinkerObject):
"""Interface specification for Types that can be arguments to a `CLinkerOp`.
......@@ -45,7 +44,8 @@ class CLinkerType(CLinkerObject):
- `MethodNotDefined`: Subclass does not implement this method
"""
raise MethodNotDefined("c_literal", type(self), self.__class__.__name__)
raise MethodNotDefined("c_literal", type(self),
self.__class__.__name__)
def c_declare(self, name, sub, check_input=True):
"""Required: Return c code to declare variables that will be
......@@ -56,7 +56,8 @@ class CLinkerType(CLinkerObject):
return "PyObject ** addr_of_%(name)s;"
:param name: the name of the ``PyObject *`` pointer that will the value for this Type
:param name: the name of the ``PyObject *`` pointer that will
the value for this Type
:type name: string
......@@ -138,7 +139,8 @@ class CLinkerType(CLinkerObject):
- `MethodNotDefined`: Subclass does not implement this method
"""
raise MethodNotDefined("c_extract", type(self), self.__class__.__name__)
raise MethodNotDefined("c_extract", type(self),
self.__class__.__name__)
def c_extract_out(self, name, sub, check_input=True):
"""Optional: C code to extract a PyObject * instance.
......@@ -156,9 +158,9 @@ class CLinkerType(CLinkerObject):
%(c_extract_code)s
}
""" % dict(
name=name,
c_init_code=self.c_init(name, sub),
c_extract_code=self.c_extract(name, sub, check_input))
name=name,
c_init_code=self.c_init(name, sub),
c_extract_code=self.c_extract(name, sub, check_input))
def c_cleanup(self, name, sub):
"""Return c code to clean up after `c_extract`.
......@@ -184,11 +186,12 @@ class CLinkerType(CLinkerObject):
def c_sync(self, name, sub):
"""Required: Return c code to pack C types back into a PyObject.
The code returned from this function must be templated using "%(name)s",
representing the name that the caller wants to call this Variable. The
returned code may set "py_%(name)s" to a PyObject* and that PyObject*
will be accessible from Python via variable.data. Do not forget to adjust
reference counts if "py_%(name)s" is changed from its original value.
The code returned from this function must be templated using
"%(name)s", representing the name that the caller wants to
call this Variable. The returned code may set "py_%(name)s"
to a PyObject* and that PyObject* will be accessible from
Python via variable.data. Do not forget to adjust reference
counts if "py_%(name)s" is changed from its original value.
:Parameters:
- `name`: WRITEME
......@@ -205,10 +208,11 @@ class CLinkerType(CLinkerObject):
def c_code_cache_version(self):
"""Return a tuple of integers indicating the version of this Type.
An empty tuple indicates an 'unversioned' Type that will not be cached between processes.
An empty tuple indicates an 'unversioned' Type that will not
be cached between processes.
The cache mechanism may erase cached modules that have been superceded by newer
versions. See `ModuleCache` for details.
The cache mechanism may erase cached modules that have been
superceded by newer versions. See `ModuleCache` for details.
"""
return ()
......@@ -221,19 +225,21 @@ class PureType(object):
- creating `Variable` instances (conventionally, `__call__` does this), and
- filtering a value assigned to a `Variable` so that the value conforms to restrictions
imposed by the type (also known as casting, this is done by `filter`),
- filtering a value assigned to a `Variable` so that the value
conforms to restrictions imposed by the type (also known as
casting, this is done by `filter`),
"""
# the type that will be created by call to make_variable.
Variable = graph.Variable
Variable = graph.Variable # the type that will be created by call to make_variable.
Constant = graph.Constant # the type that will be created by call to make_constant
# the type that will be created by call to make_constant
Constant = graph.Constant
def filter(self, data, strict=False, allow_downcast=None):
"""Required: Return data or an appropriately wrapped/converted data.
Subclass implementation should raise a TypeError exception if the data is not of an
acceptable type.
Subclass implementation should raise a TypeError exception if
the data is not of an acceptable type.
If strict is True, the data returned must be the same as the
data passed as an argument. If it is False, and allow_downcast
......@@ -272,18 +278,19 @@ class PureType(object):
if other.type != self:
raise TypeError(
'Cannot convert Type %(othertype)s '
'(of Variable %(other)s) into Type %(self)s. '
'You can try to manually convert %(other)s into a %(self)s.'
% dict(
othertype=other.type,
other=other,
self=self)
)
'Cannot convert Type %(othertype)s '
'(of Variable %(other)s) into Type %(self)s. '
'You can try to manually convert %(other)s into a %(self)s.'
% dict(
othertype=other.type,
other=other,
self=self)
)
return other
def is_valid_value(self, a):
"""Required: Return True for any python object `a` that would be a legal value for a Variable of this Type"""
"""Required: Return True for any python object `a` that would be a
legal value for a Variable of this Type"""
try:
self.filter(a, strict=True)
return True
......@@ -291,7 +298,8 @@ class PureType(object):
return False
def value_validity_msg(self, a):
"""Optional: return a message explaining the output of is_valid_value"""
"""Optional: return a message explaining the output of
is_valid_value"""
return "none"
def make_variable(self, name=None):
......@@ -371,7 +379,8 @@ class Type(object2, PureType, CLinkerType):
But you are encouraged to write your own, as described in WRITEME.
The following following code illustrates the use of a Type instance, here tensor.fvector:
The following following code illustrates the use of a Type
instance, here tensor.fvector:
.. code-block:: python
......@@ -381,17 +390,21 @@ class Type(object2, PureType, CLinkerType):
# Create a second Variable with the same Type instance
c = tensor.fvector()
Whenever you create a symbolic variable in theano (technically, `Variable`) it will contain a
reference to a Type instance. That reference is typically constant during the lifetime of
the Variable. Many variables can refer to a single Type instance, as do b and c above. The
Type instance defines the kind of value which might end up in that variable when executing
a `Function`. In this sense, theano is like a strongly-typed language because the types
are included in the graph before the values. In our example above, b is a Variable which is
guaranteed to correspond to a numpy.ndarray of rank 1 when we try to do some computations
Whenever you create a symbolic variable in theano (technically,
`Variable`) it will contain a reference to a Type instance. That
reference is typically constant during the lifetime of the
Variable. Many variables can refer to a single Type instance, as
do b and c above. The Type instance defines the kind of value
which might end up in that variable when executing a `Function`.
In this sense, theano is like a strongly-typed language because
the types are included in the graph before the values. In our
example above, b is a Variable which is guaranteed to correspond
to a numpy.ndarray of rank 1 when we try to do some computations
with it.
Many `Op` instances will raise an exception if they are applied to inputs with incorrect
types. Type references are also useful to do type-checking in pattern-based optimizations.
Many `Op` instances will raise an exception if they are applied to
inputs with incorrect types. Type references are also useful to
do type-checking in pattern-based optimizations.
"""
def convert_variable(self, var):
......@@ -451,8 +464,8 @@ class Generic(SingletonType):
"""
Represents a generic Python object.
This class implements the `PureType` and `CLinkerType` interfaces for generic PyObject
instances.
This class implements the `PureType` and `CLinkerType` interfaces
for generic PyObject instances.
EXAMPLE of what this means, or when you would use this type.
......
from __future__ import print_function
import linecache
import traceback
import re
import sys
from theano import config
......@@ -15,7 +14,6 @@ def simple_extract_stack(f=None, limit=None):
This is because this update cause an call to os.stat to get the
line content. This cause too much long on cluster.
"""
if f is None:
try:
......@@ -48,7 +46,7 @@ if sys.version_info[:2] > (3, 4):
# I enable my implementation only for some python version just to
# be sure the Python internal do not change. If this work with
# other python version, you can enable it.
simple_extract_stack = traceback.extract_stack
simple_extract_stack = traceback.extract_stack # noqa
def add_tag_trace(thing, user_line=1):
......@@ -81,7 +79,7 @@ def add_tag_trace(thing, user_line=1):
"theano/scan_module/",
"theano/sparse/",
"theano/typed_list/",
]:
]:
if p in file_path:
tr = tr[:-1]
rm = True
......@@ -190,8 +188,8 @@ def deprecated(filename, msg=''):
def g(*args, **kwargs):
if printme[0]:
print('WARNING: %s.%s deprecated. %s'\
% (filename, f.__name__, msg))
print('WARNING: %s.%s deprecated. %s' %
(filename, f.__name__, msg))
printme[0] = False
return f(*args, **kwargs)
return g
......@@ -220,7 +218,7 @@ def difference(seq1, seq2):
raise Exception('not worth it')
set2 = set(seq2)
return [x for x in seq1 if x not in set2]
except Exception as e:
except Exception:
# maybe a seq2 element is not hashable
# maybe seq2 is too short
# -> use O(len(seq1) * len(seq2)) algo
......@@ -311,11 +309,11 @@ def comm_guard(type1, type2):
old_f = f.func_globals[f.__name__]
def new_f(arg1, arg2, *rest):
if (type1 is ANY_TYPE or isinstance(arg1, type1)) \
and (type2 is ANY_TYPE or isinstance(arg2, type2)):
if ((type1 is ANY_TYPE or isinstance(arg1, type1)) and
(type2 is ANY_TYPE or isinstance(arg2, type2))):
pass
elif (type1 is ANY_TYPE or isinstance(arg2, type1)) \
and (type2 is ANY_TYPE or isinstance(arg1, type2)):
elif ((type1 is ANY_TYPE or isinstance(arg2, type1)) and
(type2 is ANY_TYPE or isinstance(arg1, type2))):
arg1, arg2 = arg2, arg1
else:
return old_f(arg1, arg2, *rest)
......@@ -337,8 +335,9 @@ def comm_guard(type1, type2):
return type.__name__
new_f.__doc__ = (str(old_f.__doc__) + "\n" +
", ".join([typename(type) for type in (type1, type2)]) +
"\n" + str(f.__doc__ or ""))
", ".join([typename(type)
for type in (type1, type2)]) +
"\n" + str(f.__doc__ or ""))
return new_f
return wrap
......@@ -369,8 +368,8 @@ def type_guard(type1):
return type.__name__
new_f.__doc__ = (str(old_f.__doc__) + "\n" +
", ".join([typename(type) for type in (type1,)]) +
"\n" + str(f.__doc__ or ""))
", ".join([typename(type) for type in (type1,)]) +
"\n" + str(f.__doc__ or ""))
return new_f
return wrap
......@@ -406,15 +405,16 @@ def give_variables_names(variables):
This function is idempotent."""
names = map(lambda var: var.name, variables)
h = hist(names)
bad_var = lambda var: not var.name or h[var.name] > 1
def bad_var(var):
return not var.name or h[var.name] > 1
for i, var in enumerate(filter(bad_var, variables)):
var.name = (var.name or "") + "_%d" % i
if not unique(map(str, variables)):
raise ValueError("Not all variables have unique names."
"Maybe you've named some of the variables identically")
raise ValueError("Not all variables have unique names. Maybe you've "
"named some of the variables identically")
return variables
......
......@@ -53,7 +53,8 @@ AddConfigVar('vm.lazy',
in_c_key=False)
def calculate_reallocate_info(order, fgraph, storage_map, compute_map_re, dependencies):
def calculate_reallocate_info(order, fgraph, storage_map, compute_map_re,
dependencies):
reallocated_info = {}
viewed_by = {}
for var in fgraph.variables:
......@@ -74,14 +75,14 @@ def calculate_reallocate_info(order, fgraph, storage_map, compute_map_re, depend
ins = None
if dmap and idx_o in dmap:
idx_v = dmap[idx_o]
assert len(
idx_v) == 1, "Here we only support the possibility to destroy one input"
assert len(idx_v) == 1, ("Here we only support the possibility"
" to destroy one input")
ins = node.inputs[idx_v[0]]
if vmap and idx_o in vmap:
assert ins is None
idx_v = vmap[idx_o]
assert len(
idx_v) == 1, "Here we only support the possibility to view one input"
assert len(idx_v) == 1, ("Here we only support the possibility"
" to view one input")
ins = node.inputs[idx_v[0]]
if ins is not None:
assert isinstance(ins, theano.Variable)
......@@ -92,10 +93,11 @@ def calculate_reallocate_info(order, fgraph, storage_map, compute_map_re, depend
for ins in node.inputs:
assert not (ins in view_of and viewed_by[ins])
if (getattr(ins, 'ndim', None) == 0 and not storage_map[ins][0]
and ins not in fgraph.outputs and ins.owner
and all([compute_map_re[v][0] for v in dependencies.get(ins, [])])
and ins not in allocated):
if (getattr(ins, 'ndim', None) == 0 and not storage_map[ins][0] and
ins not in fgraph.outputs and ins.owner and
all([compute_map_re[v][0]
for v in dependencies.get(ins, [])]) and
ins not in allocated):
# Constant Memory cannot be changed
# Constant and shared variables' storage_map value is not empty
reuse_out = None
......@@ -105,8 +107,9 @@ def calculate_reallocate_info(order, fgraph, storage_map, compute_map_re, depend
if reuse_out:
break
for out in order[i].outputs:
if (getattr(out, 'ndim', None) == 0 and out not in pre_allocated
and ins.type == out.type):
if (getattr(out, 'ndim', None) == 0 and
out not in pre_allocated and
ins.type == out.type):
reuse_out = out
pre_allocated.add(out)
allocated.add(ins)
......@@ -122,8 +125,9 @@ def calculate_reallocate_info(order, fgraph, storage_map, compute_map_re, depend
if reuse_out:
break
for out in order[i].outputs:
if (getattr(out, 'ndim', None) == 0 and out not in pre_allocated
and ins.type == out.type):
if (getattr(out, 'ndim', None) == 0 and
out not in pre_allocated and
ins.type == out.type):
reuse_out = out
pre_allocated.add(out)
allocated.add(ins)
......@@ -508,9 +512,10 @@ class Stack(VM):
st = "c"
self.variable_strides[var] = st
except Exception:
link.raise_with_op(current_apply,
self.thunks[self.node_idx[current_apply]],
storage_map=storage_map)
link.raise_with_op(
current_apply,
self.thunks[self.node_idx[current_apply]],
storage_map=storage_map)
for o in current_apply.outputs:
compute_map[o][0] = 1
......@@ -521,9 +526,9 @@ class Stack(VM):
for i in current_apply.inputs:
# Garbage Collection -> check if anybody else uses
# this input
if (dependencies[i]
and i.owner
and i not in self.outputs):
if (dependencies[i] and
i.owner and
i not in self.outputs):
if all(compute_map[v][0]
for v in dependencies[i]):
storage_map[i][0] = None
......@@ -544,10 +549,13 @@ class Stack(VM):
'destroy_map',
False)):
warnings.warn(
"There was a bug that existed in the default Theano configuration,"
" only in the development version between July 5th 2012"
" and July 30th 2012. This was not in a released version."
" The bug was affecting this script.",
"There was a bug that existed in "
"the default Theano configuration,"
" only in the development version "
"between July 5th 2012 and "
"July 30th 2012. This was not in "
"a released version. The bug was "
"affecting this script.",
# The stack level is not good when
# inside a Scan.
stacklevel=3
......@@ -578,9 +586,10 @@ class Stack(VM):
self.call_times[current_idx] += dt
except Exception:
link.raise_with_op(current_apply,
self.thunks[self.node_idx[current_apply]],
storage_map=storage_map)
link.raise_with_op(
current_apply,
self.thunks[self.node_idx[current_apply]],
storage_map=storage_map)
if requires:
for r in requires:
......@@ -639,7 +648,7 @@ class Stack(VM):
if self.allow_gc:
for v in storage_map:
if v.owner and not v in self.outputs:
if v.owner and v not in self.outputs:
if compute_map[v][0] == 2:
continue
else:
......@@ -840,7 +849,6 @@ class VM_Linker(link.LocalLinker):
vars_idx_inv[i] = var
# put storage_map and compute_map into a int-based scheme
n_applies = len(nodes)
storage_map_list = [storage_map[vars_idx_inv[i]]
for i in xrange(len(vars_idx_inv))]
compute_map_list = [compute_map[vars_idx_inv[i]]
......@@ -988,7 +996,8 @@ class VM_Linker(link.LocalLinker):
else:
dependencies = self.compute_gc_dependencies(storage_map)
reallocated_info = calculate_reallocate_info(order, fgraph, storage_map, compute_map_re,dependencies)
reallocated_info = calculate_reallocate_info(
order, fgraph, storage_map, compute_map_re, dependencies)
for node in order:
try:
......@@ -1014,7 +1023,8 @@ class VM_Linker(link.LocalLinker):
lazy = config.vm.lazy
if lazy is None:
lazy = not all([(not th.lazy) for th in thunks])
if not (lazy or (config.profile and config.profile_memory) or self.use_cloop or self.callback):
if not (lazy or (config.profile and config.profile_memory) or
self.use_cloop or self.callback):
for pair in reallocated_info.values():
storage_map[pair[1]] = storage_map[pair[0]]
......@@ -1024,10 +1034,10 @@ class VM_Linker(link.LocalLinker):
for node in order:
clear_after_this_thunk = []
for input in node.inputs:
if ((input in computed)
and (input not in fgraph.outputs)
and (node == last_user[input])
and input not in reallocated_info.keys()):
if (input in computed and
input not in fgraph.outputs and
node == last_user[input] and
input not in reallocated_info.keys()):
clear_after_this_thunk.append(storage_map[input])
post_thunk_clear.append(clear_after_this_thunk)
else:
......
......@@ -2,7 +2,6 @@ from functools import wraps
import numpy
import theano
from theano import scalar as scal, Constant
from theano.gof import local_optimizer
from theano.tensor import (DimShuffle, get_scalar_constant_value,
......@@ -18,7 +17,7 @@ def grab_cpu_scalar(v, nd):
if v.owner is not None:
n = v.owner
if (isinstance(n.op, GpuDimShuffle) and
n.op.new_order == ('x',) * nd):
n.op.new_order == ('x',) * nd):
return host_from_gpu(n.inputs[0])
elif (isinstance(n.op, DimShuffle) and
n.op.new_order == ('x',) * nd):
......@@ -29,7 +28,7 @@ def grab_cpu_scalar(v, nd):
return None
else:
if (isinstance(v, Constant) and
v.broadcastable == (True,) * nd):
v.broadcastable == (True,) * nd):
return v.dimshuffle(())
......@@ -63,8 +62,8 @@ def alpha_merge(cls, alpha_in, beta_in, nd):
@wraps(maker)
def opt(node):
if (isinstance(node.op, GpuElemwise) and
node.op.scalar_op == scal.mul and
node.nin == 2):
node.op.scalar_op == scal.mul and
node.nin == 2):
targ = find_node(node.inputs[0], cls)
if targ is None:
targ = find_node(node.inputs[1], cls)
......@@ -87,8 +86,8 @@ def output_merge(cls, alpha_in, beta_in, out_in, nd):
@wraps(maker)
def opt(node):
if (isinstance(node.op, GpuElemwise) and
node.op.scalar_op == scal.add and
node.nin == 2):
node.op.scalar_op == scal.add and
node.nin == 2):
targ = find_node(node.inputs[0], cls)
W = node.inputs[1]
if targ is None:
......
......@@ -7,13 +7,13 @@ from theano.tests import unittest_tools as utt
# Skip tests if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest
import theano.sandbox.cuda as cuda_ndarray
if not cuda_ndarray.cuda_available:
if not cuda_ndarray.cuda_available: # noqa
raise SkipTest('Optional package cuda not available')
from theano.misc.pycuda_init import pycuda_available
if not pycuda_available:
if not pycuda_available: # noqa
raise SkipTest('Optional package pycuda not available')
from theano.sandbox.cuda.fftconv import scikits_cuda_available
if not scikits_cuda_available:
if not scikits_cuda_available: # noqa
raise SkipTest('Optional package scikits.cuda not available')
from theano.sandbox.cuda import float32_shared_constructor as shared
......
......@@ -2,13 +2,14 @@
# mpiexec -np 2 python _test_mpi_roundtrip.py
from mpi4py import MPI
comm = MPI.COMM_WORLD
import theano
from theano.tensor.io import send, recv, mpi_cmps
from theano.gof.sched import sort_schedule_fn
import numpy as np
from sys import stdout, stderr, exit
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
......
from datetime import datetime
__authors__ = "Ian Goodfellow"
__credits__ = ["Ian Goodfellow"]
__license__ = "3-clause BSD"
__maintainer__ = "Ian Goodfellow"
__email__ = "goodfeli@iro"
from datetime import datetime
def disturb_mem():
# Allocate a time-dependent amount of objects to increase
......
from __future__ import print_function
import os, unittest, sys
import nose.plugins.builtin
import os
import unittest
import sys
from nose.config import Config
from nose.plugins.manager import PluginManager
from numpy.testing.nosetester import import_nose, NoseTester
import nose.plugins.builtin
from numpy.testing.nosetester import NoseTester
from numpy.testing.noseclasses import KnownFailure, NumpyTestProgram
......@@ -31,7 +34,7 @@ class TheanoNoseTester(NoseTester):
:type extra_argv: list
:param extra_argv: List with any extra arguments to pass to nosetests.
"""
#self.package_path = os.path.abspath(self.package_path)
# self.package_path = os.path.abspath(self.package_path)
argv = [__file__, self.package_path]
argv += ['--verbosity', str(verbose)]
if extra_argv:
......@@ -39,8 +42,6 @@ class TheanoNoseTester(NoseTester):
return argv
def _show_system_info(self):
nose = import_nose()
import theano
print("Theano version %s" % theano.__version__)
theano_dir = os.path.dirname(theano.__file__)
......@@ -49,22 +50,20 @@ class TheanoNoseTester(NoseTester):
super(TheanoNoseTester, self)._show_system_info()
def prepare_test_args(self, verbose=1, extra_argv=None, coverage=False,
capture=True, knownfailure=True):
capture=True, knownfailure=True):
"""
Prepare arguments for the `test` method.
Takes the same arguments as `test`.
"""
# fail with nice error message if nose is not present
nose = import_nose()
# compile argv
argv = self._test_argv(verbose, extra_argv)
# numpy way of doing coverage
if coverage:
argv += ['--cover-package=%s' % self.package_name, '--with-coverage',
'--cover-tests', '--cover-inclusive', '--cover-erase']
argv += ['--cover-package=%s' % self.package_name,
'--with-coverage', '--cover-tests',
'--cover-inclusive', '--cover-erase']
# Capture output only if needed
if not capture:
......@@ -79,7 +78,7 @@ class TheanoNoseTester(NoseTester):
return argv, plugins
def test(self, verbose=1, extra_argv=None, coverage=False, capture=True,
knownfailure=True):
knownfailure=True):
"""
Run tests for module using nose.
......@@ -91,7 +90,8 @@ class TheanoNoseTester(NoseTester):
:param extra_argv: List with any extra arguments to pass to nosetests.
:type coverage: bool
:param coverage: If True, report coverage of Theano code. Default is False.
:param coverage: If True, report coverage of Theano
code. Default is False.
:type capture: bool
:param capture: If True, capture the standard output of the tests, like
......@@ -123,7 +123,7 @@ class TheanoNoseTester(NoseTester):
"launch theano.test()."))
argv, plugins = self.prepare_test_args(verbose, extra_argv, coverage,
capture, knownfailure)
capture, knownfailure)
# The "plugins" keyword of NumpyTestProgram gets ignored if config is
# specified. Moreover, using "addplugins" instead can lead to strange
......@@ -134,8 +134,6 @@ class TheanoNoseTester(NoseTester):
def main(modulename):
debug = False
if 0:
unittest.main()
elif len(sys.argv) == 2 and sys.argv[1] == "--debug":
......
......@@ -20,7 +20,6 @@ __contact__ = "Saizheng Zhang <saizhenglisa..at..gmail.com>"
whitelist_flake8 = [
"__init__.py",
"version.py",
"tests/test_gradient.py",
"tests/test_config.py",
"tests/diverse_tests.py",
......@@ -31,37 +30,20 @@ whitelist_flake8 = [
"tests/test_record.py",
"tests/__init__.py",
"tests/test_updates.py",
"tests/main.py",
"tests/test_pickle_unpickle_theano_fn.py",
"tests/test_determinism.py",
"tests/record.py",
"tests/test_printing.py",
"tests/test_tutorial.py",
"tests/disturb_mem.py",
"tests/unittest_tools.py",
"compile/ops.py",
"compile/debugmode.py",
"compile/function.py",
"compile/pfunc.py",
"compile/mode.py",
"compile/profilemode.py",
"compile/builders.py",
"compile/__init__.py",
"compile/profiling.py",
"compile/function_module.py",
"compile/sharedvalue.py",
"compile/monitormode.py",
"compile/io.py",
"compile/module.py",
"compile/tests/test_builders.py",
"compile/tests/test_misc.py",
"compile/tests/test_monitormode.py",
"compile/tests/test_function_module.py",
"compile/tests/test_inplace_opt_for_value.py",
"compile/tests/test_shared.py",
"compile/tests/test_ops.py",
"compile/tests/test_pfunc.py",
"compile/tests/test_module.py",
"compile/tests/test_debugmode.py",
"compile/tests/test_profiling.py",
"typed_list/type.py",
......@@ -94,16 +76,13 @@ whitelist_flake8 = [
"tensor/io.py",
"tensor/elemwise_cgen.py",
"tensor/raw_random.py",
"tensor/randomstreams.py",
"tensor/blas_scipy.py",
"tensor/basic.py",
"tensor/tests/test_subtensor.py",
"tensor/tests/test_utils.py",
"tensor/tests/test_nlinalg.py",
"tensor/tests/test_randomstreams.py",
"tensor/tests/test_shared_randomstreams.py",
"tensor/tests/test_misc.py",
"tensor/tests/test_naacl09.py",
"tensor/tests/mlp_test.py",
"tensor/tests/test_opt_uncanonicalize.py",
"tensor/tests/test_opt.py",
......@@ -155,7 +134,6 @@ whitelist_flake8 = [
"sandbox/test_theano_object.py",
"sandbox/test_scan.py",
"sandbox/rng_mrg.py",
"sandbox/downsample.py",
"sandbox/solve.py",
"sandbox/theano_object.py",
"sandbox/scan.py",
......@@ -190,7 +168,6 @@ whitelist_flake8 = [
"sandbox/cuda/nvcc_compiler.py",
"sandbox/cuda/neighbours.py",
"sandbox/cuda/tests/walltime.py",
"sandbox/cuda/tests/test_fftconv.py",
"sandbox/cuda/tests/test_gradient.py",
"sandbox/cuda/tests/test_neighbours.py",
"sandbox/cuda/tests/test_conv_cuda_ndarray.py",
......@@ -218,7 +195,6 @@ whitelist_flake8 = [
"sandbox/scan_module/tests/test_utils.py",
"sandbox/scan_module/tests/test_scan.py",
"sandbox/linalg/ops.py",
"sandbox/linalg/kron.py",
"sandbox/linalg/__init__.py",
"sandbox/linalg/tests/test_linalg.py",
"sandbox/gpuarray/comp.py",
......@@ -288,24 +264,12 @@ whitelist_flake8 = [
"sparse/sandbox/truedot.py",
"sparse/sandbox/sp.py",
"gof/destroyhandler.py",
"gof/vm.py",
"gof/cutils.py",
"gof/compiledir.py",
"gof/unify.py",
"gof/lazylinker_c.py",
"gof/optdb.py",
"gof/utils.py",
"gof/graph.py",
"gof/callcache.py",
"gof/python25.py",
"gof/type.py",
"gof/__init__.py",
"gof/cc.py",
"gof/opt.py",
"gof/compilelock.py",
"gof/link.py",
"gof/sched.py",
"gof/toolbox.py",
"gof/fg.py",
"gof/op.py",
"gof/cmodule.py",
......@@ -322,9 +286,6 @@ whitelist_flake8 = [
"gof/tests/test_cc.py",
"gof/tests/test_compute_test_value.py",
"gof/sandbox/equilibrium.py",
"sandbox/cuda/opt_util.py",
"gof/tests/test_utils.py",
"tensor/tests/_test_mpi_roundtrip.py",
]
......
try:
from theano.generated_version import *
from theano.generated_version import * # noqa
except ImportError:
short_version = 'unknown'
version = 'unknown'
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论