提交 ca79f02e authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Merged -- no conflict

...@@ -35,3 +35,5 @@ theano/version.py ...@@ -35,3 +35,5 @@ theano/version.py
theano/version.py.out theano/version.py.out
distribute-*.egg distribute-*.egg
distribute-*.tar.gz distribute-*.tar.gz
out1
out2
...@@ -11,8 +11,6 @@ How should you write your algorithm to make the most of what Theano can do? ...@@ -11,8 +11,6 @@ How should you write your algorithm to make the most of what Theano can do?
Limitations Limitations
----------- -----------
- Conditional control flow is possible but currently not efficient. The current implementation will evaluate both sides of an ``if`` construct (see :func:`tensor.switch`).
- While- or for-Loops within an expression graph are supported, but only via - While- or for-Loops within an expression graph are supported, but only via
the :func:`theano.scan` op (which puts restrictions on how the loop body can the :func:`theano.scan` op (which puts restrictions on how the loop body can
interact with the rest of the graph). interact with the rest of the graph).
......
"""Provides `DebugMode`, an evaluation mode for debugging theano internals.""" """Provides `DebugMode`, an evaluation mode for debugging theano internals.
:TODO: add support for IfElse Op, LazyLinker, PureOp, etc.
"""
__docformat__ = "restructuredtext en" __docformat__ = "restructuredtext en"
import time, copy, sys, copy_reg, gc, os import time, copy, sys, copy_reg, gc, os
...@@ -1552,7 +1556,8 @@ class _Maker(FunctionMaker): #inheritance buys a few helper functions ...@@ -1552,7 +1556,8 @@ class _Maker(FunctionMaker): #inheritance buys a few helper functions
def __init__(self, inputs, outputs, optimizer, mode, def __init__(self, inputs, outputs, optimizer, mode,
accept_inplace = False, accept_inplace = False,
function_builder = Function): function_builder = Function,
profile=None):
""" """
:type inputs: a list of SymbolicInput instances :type inputs: a list of SymbolicInput instances
...@@ -1567,7 +1572,7 @@ class _Maker(FunctionMaker): #inheritance buys a few helper functions ...@@ -1567,7 +1572,7 @@ class _Maker(FunctionMaker): #inheritance buys a few helper functions
:note: this function sets TensorType.filter_checks_isfinite when `mode.check_isfinite` is True :note: this function sets TensorType.filter_checks_isfinite when `mode.check_isfinite` is True
""" """
self.profile = profile
# Handle the case where inputs and/or outputs is a single Variable (not in a list) # Handle the case where inputs and/or outputs is a single Variable (not in a list)
unpack_single = False unpack_single = False
return_none = False return_none = False
......
...@@ -7,12 +7,13 @@ _logger = logging.getLogger('theano.compile.function') ...@@ -7,12 +7,13 @@ _logger = logging.getLogger('theano.compile.function')
from io import In from io import In
from function_module import orig_function from function_module import orig_function
from profiling import ProfileStats
from pfunc import pfunc from pfunc import pfunc
from numpy import any #for to work in python 2.4 from numpy import any #for to work in python 2.4
def function(inputs, outputs=None, mode=None, updates=[], givens=[], def function(inputs, outputs=None, mode=None, updates=[], givens=[],
no_default_updates=False, accept_inplace=False, name=None, no_default_updates=False, accept_inplace=False, name=None,
rebuild_strict=True, allow_input_downcast=None): rebuild_strict=True, allow_input_downcast=None, profile=None):
""" """
Return a callable object that will calculate `outputs` from `inputs`. Return a callable object that will calculate `outputs` from `inputs`.
...@@ -62,6 +63,11 @@ def function(inputs, outputs=None, mode=None, updates=[], givens=[], ...@@ -62,6 +63,11 @@ def function(inputs, outputs=None, mode=None, updates=[], givens=[],
precise, type. None (default) is almost like False, but allows precise, type. None (default) is almost like False, but allows
downcasting of Python float scalars to floatX. downcasting of Python float scalars to floatX.
:type profile: None, True, or ProfileStats instance
:param profile: accumulate profiling information into a given ProfileStats
instance. If argument is `True` then a new ProfileStats instance will be
used. This profiling object will be available via self.profile.
:note: Regarding givens: Be careful to make sure that these substitutions are :note: Regarding givens: Be careful to make sure that these substitutions are
independent--behaviour when Var1 of one pair appears in the graph leading to Var2 in independent--behaviour when Var1 of one pair appears in the graph leading to Var2 in
another expression is undefined. Replacements specified with givens are different from another expression is undefined. Replacements specified with givens are different from
...@@ -88,6 +94,8 @@ def function(inputs, outputs=None, mode=None, updates=[], givens=[], ...@@ -88,6 +94,8 @@ def function(inputs, outputs=None, mode=None, updates=[], givens=[],
if uses_In or uses_tuple: if uses_In or uses_tuple:
# we must use old semantics in this case. # we must use old semantics in this case.
if profile:
raise NotImplementedError('profiling not supported in old-style function')
if uses_updates or uses_givens: if uses_updates or uses_givens:
raise NotImplementedError("In() instances and tuple inputs triggers the old semantics, which disallow using updates and givens") raise NotImplementedError("In() instances and tuple inputs triggers the old semantics, which disallow using updates and givens")
fn = orig_function(inputs, outputs, fn = orig_function(inputs, outputs,
...@@ -102,7 +110,8 @@ def function(inputs, outputs=None, mode=None, updates=[], givens=[], ...@@ -102,7 +110,8 @@ def function(inputs, outputs=None, mode=None, updates=[], givens=[],
no_default_updates=no_default_updates, no_default_updates=no_default_updates,
accept_inplace=accept_inplace,name=name, accept_inplace=accept_inplace,name=name,
rebuild_strict=rebuild_strict, rebuild_strict=rebuild_strict,
allow_input_downcast=allow_input_downcast) allow_input_downcast=allow_input_downcast,
profile=profile)
# We need to add the flag check_aliased inputs if we have any mutable or # We need to add the flag check_aliased inputs if we have any mutable or
# borrowed used defined inputs # borrowed used defined inputs
fn._check_for_aliased_inputs = check_for_aliased_inputs fn._check_for_aliased_inputs = check_for_aliased_inputs
......
...@@ -4,7 +4,9 @@ import os, logging ...@@ -4,7 +4,9 @@ import os, logging
import numpy, theano import numpy, theano
from theano import gof from theano import gof
from theano.configparser import config, AddConfigVar, StrParam import theano.gof.vm
from theano.configparser import config, AddConfigVar, StrParam, EnumStr
_logger = logging.getLogger('theano.compile.mode') _logger = logging.getLogger('theano.compile.mode')
...@@ -55,7 +57,11 @@ predefined_linkers = { ...@@ -55,7 +57,11 @@ predefined_linkers = {
'c' : gof.CLinker(), 'c' : gof.CLinker(),
'c|py' : gof.OpWiseCLinker(allow_gc=True), 'c|py' : gof.OpWiseCLinker(allow_gc=True),
'c|py_nogc' : gof.OpWiseCLinker(allow_gc=False), 'c|py_nogc' : gof.OpWiseCLinker(allow_gc=False),
'c&py' : gof.DualLinker(checker = check_equal) 'c&py' : gof.DualLinker(checker = check_equal),
'vm' : gof.vm.VM_Linker(allow_gc=True, use_cloop=False),
'cvm' : gof.vm.VM_Linker(allow_gc=True, use_cloop=True),
'vm_nogc' : gof.vm.VM_Linker(allow_gc=False, use_cloop=False),
'cvm_nogc': gof.vm.VM_Linker(allow_gc=False, use_cloop=True),
} }
...@@ -249,6 +255,7 @@ class Mode(object): ...@@ -249,6 +255,7 @@ class Mode(object):
self._optimizer = optimizer self._optimizer = optimizer
self.call_time = 0 self.call_time = 0
self.fn_time = 0 self.fn_time = 0
linker.mode = self #TODO: WHY IS THIS HERE?
self.optimizer_time = 0 self.optimizer_time = 0
self.linker_time = 0 self.linker_time = 0
...@@ -290,15 +297,27 @@ class Mode(object): ...@@ -290,15 +297,27 @@ class Mode(object):
FAST_COMPILE = Mode('py', 'fast_compile') FAST_COMPILE = Mode('py', 'fast_compile')
FAST_RUN = Mode('c|py', 'fast_run') FAST_RUN = Mode('c|py', 'fast_run')
FAST_RUN_NOGC = Mode("c|py_nogc", 'fast_run') FAST_RUN_NOGC = Mode("c|py_nogc", 'fast_run')
SANITY_CHECK = [Mode('c|py', None),
Mode('c|py', 'fast_run')]
STABILIZE = Mode("c|py", OPT_STABILIZE) STABILIZE = Mode("c|py", OPT_STABILIZE)
predefined_modes = {'FAST_COMPILE': FAST_COMPILE, predefined_modes = {'FAST_COMPILE': FAST_COMPILE,
'FAST_RUN': FAST_RUN, 'FAST_RUN': FAST_RUN,
'FAST_RUN_NOGC':FAST_RUN_NOGC, 'FAST_RUN_NOGC':FAST_RUN_NOGC,
'SANITY_CHECK': SANITY_CHECK, 'STABILIZE': STABILIZE,
'STABILIZE': STABILIZE} 'VM':Mode('vm', 'fast_run'),
'VM_NOGC':Mode('vm_nogc', 'fast_run'),
'CVM':Mode('cvm', 'fast_run'),
'CVM_NOGC':Mode('cvm_nogc', 'fast_run'),
}
#Don't add FAST_RUN_NOGC to this list(as well as other ALL CAPS short cut)
#The way to get FAST_RUN_NOGC is with the flag 'linker=c|py_nogc'
#The old all capital letter way of working is deprecated as it is not scalable.
AddConfigVar('mode',
"Default compilation mode",
EnumStr(*(predefined_modes.keys() + [
'Mode','DEBUG_MODE', 'PROFILE_MODE'])),
in_c_key=False)
instanciated_default_mode=None instanciated_default_mode=None
def get_mode(orig_string): def get_mode(orig_string):
...@@ -329,7 +348,7 @@ def get_mode(orig_string): ...@@ -329,7 +348,7 @@ def get_mode(orig_string):
ret = DebugMode(optimizer=config.optimizer) ret = DebugMode(optimizer=config.optimizer)
else: else:
# The import is needed in case string is ProfileMode # The import is needed in case string is ProfileMode
from profilemode import ProfileMode from profilemode import ProfileMode,prof_mode_instance_to_print
ret = eval(string+'(linker=config.linker, optimizer=config.optimizer)') ret = eval(string+'(linker=config.linker, optimizer=config.optimizer)')
elif predefined_modes.has_key(string): elif predefined_modes.has_key(string):
ret = predefined_modes[string] ret = predefined_modes[string]
...@@ -349,7 +368,6 @@ def get_mode(orig_string): ...@@ -349,7 +368,6 @@ def get_mode(orig_string):
#must tell python to print the summary at the end. #must tell python to print the summary at the end.
if string == 'ProfileMode': if string == 'ProfileMode':
#need to import later to break circular dependency. #need to import later to break circular dependency.
from profilemode import prof_mode_instance_to_print
prof_mode_instance_to_print.append(ret) prof_mode_instance_to_print.append(ret)
return ret return ret
...@@ -365,3 +383,4 @@ def register_mode(name, mode): ...@@ -365,3 +383,4 @@ def register_mode(name, mode):
if name in predefined_modes: if name in predefined_modes:
raise ValueError('Mode name already taken: %s' % name) raise ValueError('Mode name already taken: %s' % name)
predefined_modes[name] = mode predefined_modes[name] = mode
"""Provide a simple user friendly API """ """Provide a simple user friendly API """
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import numpy # for backport to 2.4, to get any().
from profiling import ProfileStats
from theano.gof import Container, Variable, generic, graph, Constant, Value from theano.gof import Container, Variable, generic, graph, Constant, Value
from theano.compile import orig_function, In, Out from theano.compile import orig_function, In, Out
from theano.compile.sharedvalue import SharedVariable, shared from theano.compile.sharedvalue import SharedVariable, shared
import numpy # for backport to 2.4, to get any(). from theano import config
def rebuild_collect_shared( outputs def rebuild_collect_shared( outputs
, inputs = None , inputs = None
...@@ -292,7 +295,8 @@ class Param(object): ...@@ -292,7 +295,8 @@ class Param(object):
def pfunc(params, outputs=None, mode=None, updates=[], givens=[], def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
no_default_updates=False, accept_inplace=False, name=None, no_default_updates=False, accept_inplace=False, name=None,
rebuild_strict=True, allow_input_downcast=None): rebuild_strict=True, allow_input_downcast=None,
profile=None):
"""Function-constructor for graphs with shared variables. """Function-constructor for graphs with shared variables.
:type params: list of either Variable or Param instances. :type params: list of either Variable or Param instances.
...@@ -319,11 +323,9 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[], ...@@ -319,11 +323,9 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
If False (default), perform them all. Else, perform automatic updates on all Variables If False (default), perform them all. Else, perform automatic updates on all Variables
that are neither in "updates" nor in "no_default_updates". that are neither in "updates" nor in "no_default_updates".
:param name: an optional name for this fct. If used, the profile mode will print the time spent in this fct. :type name: None or string
:param name: attaches a name to the Profiling result of this function when
:rtype: theano.compile.Function using ProfileMode (will be deprecated).
:returns: a callable object that will compute the outputs (given the inputs)
and update the implicit function arguments according to the `updates`.
:type allow_input_downcast: Boolean :type allow_input_downcast: Boolean
:param allow_input_downcast: True means that the values passed as :param allow_input_downcast: True means that the values passed as
...@@ -333,6 +335,21 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[], ...@@ -333,6 +335,21 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
precise, type. None (default) is almost like False, but allows precise, type. None (default) is almost like False, but allows
downcasting of Python float scalars to floatX. downcasting of Python float scalars to floatX.
:type profile: None, True, str, or ProfileStats instance
:param profile: accumulate profiling information into a given ProfileStats
instance. None is the default, and means to use the value of
config.profile.
If argument is `True` then a new ProfileStats instance will be
used. If argument is a string, a new ProfileStats instance will be created
with that string as its `message` attribute. This profiling object will be
available via self.profile.
:rtype: theano.compile.Function
:returns: a callable object that will compute the outputs (given the inputs)
and update the implicit function arguments according to the `updates`.
:note: Regarding givens: Be careful to make sure that these substitutions are :note: Regarding givens: Be careful to make sure that these substitutions are
independent--behaviour when Var1 of one pair appears in the graph leading to Var2 in independent--behaviour when Var1 of one pair appears in the graph leading to Var2 in
another expression is undefined. Replacements specified with givens are different from another expression is undefined. Replacements specified with givens are different from
...@@ -354,6 +371,17 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[], ...@@ -354,6 +371,17 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
# Then it clones the outputs and the update expressions. This rebuilds a computation graph # Then it clones the outputs and the update expressions. This rebuilds a computation graph
# from the inputs and the givens. # from the inputs and the givens.
# #
if profile is None:
profile = config.profile
# profile -> True or False
if profile == True:
profile = ProfileStats(message=name)
# profile -> object
if type(profile) == str:
profile = ProfileStats(message=profile)
# profile is typically either False or an object at this point.
# No need to block other objects being passed through though. It might be
# useful.
if not isinstance(params,(list,tuple)): if not isinstance(params,(list,tuple)):
raise Exception("in pfunc() the first argument must be a list or a tuple") raise Exception("in pfunc() the first argument must be a list or a tuple")
...@@ -393,7 +421,7 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[], ...@@ -393,7 +421,7 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
inputs.append(si) inputs.append(si)
return orig_function(inputs, cloned_outputs, mode, return orig_function(inputs, cloned_outputs, mode,
accept_inplace=accept_inplace, name=name) accept_inplace=accept_inplace, name=name, profile=profile)
def _pfunc_param_to_in(param, strict=False, allow_downcast=None): def _pfunc_param_to_in(param, strict=False, allow_downcast=None):
......
...@@ -8,6 +8,8 @@ from theano.configparser import config, AddConfigVar, IntParam, BoolParam ...@@ -8,6 +8,8 @@ from theano.configparser import config, AddConfigVar, IntParam, BoolParam
from theano.compile.function_module import FunctionMaker from theano.compile.function_module import FunctionMaker
run_cthunk = None # Will be imported only when needed. run_cthunk = None # Will be imported only when needed.
from profiling import ProfileStats
import_time = time.time() import_time = time.time()
AddConfigVar('ProfileMode.n_apply_to_print', AddConfigVar('ProfileMode.n_apply_to_print',
...@@ -34,24 +36,53 @@ AddConfigVar('ProfileMode.profile_memory', ...@@ -34,24 +36,53 @@ AddConfigVar('ProfileMode.profile_memory',
class Profile_Maker(FunctionMaker): class Profile_Maker(FunctionMaker):
def create(self, input_storage=None, trustme=False): def create(self, input_storage=None, trustme=False):
ret = super(Profile_Maker,self).create(input_storage, trustme) ret = super(Profile_Maker,self).create(input_storage, trustme)
# create a function-specific storage container for profiling info
profile = ProfileStats(atexit_print=False)
self.mode.profile_stats[ret] = profile
ret.profile = profile
#initialize the timers
for i, node in enumerate(ret.maker.env.toposort()): for i, node in enumerate(ret.maker.env.toposort()):
self.mode.apply_time[(i,node)]=0.0 profile.apply_time[node]=0.0
assert len(ret.fn.thunk_groups[i])==1 profile.outputs_size[node]=[0.0] * len(node.outputs)
self.mode.op_cimpl[node.op] = hasattr(ret.fn.thunk_groups[i][0],'cthunk')
# a thunk_group is a list of the thunks from each linker
# corresponding to the i'th position in the toposort.
assert len(ret.fn.thunk_groups[i])==1
profile.apply_cimpl[node] = hasattr(
ret.fn.thunk_groups[i][0],
'cthunk')
# Here we replace the linker function.
# This ugliness makes WrapLinker (an object that *generates*
# functions and is not function-specific) work with ProfileStats
# objects which are function-specific.
#capture old fn in closure. This is important since new_fn is about to
#take its place as ret.fn.
ret_fn = ret.fn
def new_fn():
self.mode.apply_time = self.mode.profile_stats[ret].apply_time
self.mode.outputs_size = self.mode.profile_stats[ret].outputs_size
ret_fn()
# delete the old apply_time variable
# because it doesn't mean the same thing anymore.
# This prevents old code from looking like it still works.
del self.mode.apply_time
del self.mode.outputs_size
ret.fn = new_fn
return ret return ret
class ProfileMode(Mode): class ProfileMode(Mode):
def __init__(self, linker=config.linker, optimizer=config.optimizer): def __init__(self, linker=config.linker, optimizer=config.optimizer):
apply_time = {}
op_cimpl = {}
compile_time = 0 #time passed in theano.function()
fct_call_time = {}#time passed inside theano fct call including op time.
fct_call = {}
message="" message=""
outputs_size={} profile_stats={}
self.__setstate__((linker, optimizer, apply_time, op_cimpl, self.__setstate__((linker,
compile_time, fct_call_time, fct_call, message, outputs_size)) optimizer,
message,
profile_stats))
def function_maker(self, i,o,m, *args, **kwargs): def function_maker(self, i,o,m, *args, **kwargs):
"""Return an instance of `Profiler_Maker` which init the count""" """Return an instance of `Profiler_Maker` which init the count"""
...@@ -59,28 +90,24 @@ class ProfileMode(Mode): ...@@ -59,28 +90,24 @@ class ProfileMode(Mode):
assert m is self assert m is self
return Profile_Maker(i, o, self, *args, **kwargs) return Profile_Maker(i, o, self, *args, **kwargs)
local_time = property(lambda self: [sum(self.apply_time.values())]) def __get_local_time(self):
rval = 0
for ps in self.profile_stats.values():
rval += sum(ps.apply_time.values())
return rval
local_time = property(__get_local_time)
def __getstate__(self): def __getstate__(self):
#print "__getstate__",self.provided_linker,self.provided_optimizer #print "__getstate__",self.provided_linker,self.provided_optimizer
return (self.provided_linker, self.provided_optimizer, self.apply_time, return (self.provided_linker,
self.op_cimpl, self.compile_time, self.fct_call_time, self.provided_optimizer,
self.fct_call, self.message, self.outputs_size) self.message,
self.profile_stats)
def __setstate__(self, state): def __setstate__(self, state):
linker, optimizer, apply_time, op_cimpl, compile_time, \ linker, optimizer, message, profile_stats = state
fct_call_time, fct_call, message, outputs_size = state self.message = message
self.apply_time = apply_time self.profile_stats = profile_stats
self.op_cimpl = op_cimpl
self.compile_time = compile_time
self.fct_call_time = fct_call_time
self.fct_call = fct_call
self.call_time = 0
self.fn_time = 0
self.optimizer_time = 0
self.linker_time = 0
self.message = ""
self.outputs_size = outputs_size
def profile_thunk(i, node, th): def profile_thunk(i, node, th):
""" Profile only the execution time """ Profile only the execution time
...@@ -102,7 +129,7 @@ class ProfileMode(Mode): ...@@ -102,7 +129,7 @@ class ProfileMode(Mode):
th() th()
dt = time.time() - t0 dt = time.time() - t0
apply_time[(i,node)] += dt self.apply_time[node] += max(dt, 1e-14)
def profile_thunk2(i, node, th): def profile_thunk2(i, node, th):
...@@ -149,8 +176,8 @@ class ProfileMode(Mode): ...@@ -149,8 +176,8 @@ class ProfileMode(Mode):
else: else:
raise Exception("Can't determine the memory size of dtype",o[0].dtype) raise Exception("Can't determine the memory size of dtype",o[0].dtype)
size.append(s) size.append(s)
outputs_size[node]=size self.outputs_size[node]=size
apply_time[(i,node)] += dt self.apply_time[node] += max(dt, 1e-14)
self.provided_linker = linker self.provided_linker = linker
...@@ -182,22 +209,44 @@ class ProfileMode(Mode): ...@@ -182,22 +209,44 @@ class ProfileMode(Mode):
Currently there is n_apply_to_print, n_ops_to_print and min_memory_size Currently there is n_apply_to_print, n_ops_to_print and min_memory_size
that are accepted. that are accepted.
""" """
compile_time = sum([ps.compile_time for ps in self.profile_stats.values()])
fct_call = dict([(fn, ps.fct_callcount)
for (fn, ps) in self.profile_stats.items()])
fct_call_time = dict([(fn, ps.fct_call_time)
for (fn, ps) in self.profile_stats.items()])
apply_time = {}
for fn, ps in self.profile_stats.items():
for (i, node) in enumerate(fn.maker.env.toposort()):
apply_time[(i, node)] = ps.apply_time[node]
for (i,n),t in apply_time.items():
if t == 0:
print i, n
op_cimpl = {}
outputs_size = {}
for fn, ps in self.profile_stats.items():
op_cimpl.update(ps.apply_cimpl)
compile_time = self.compile_time
fct_call_time = self.fct_call_time
fct_call = self.fct_call
apply_time = self.apply_time
op_cimpl = self.op_cimpl
message = self.message message = self.message
outputs_size = self.outputs_size
other_time = {'linker_time':self.linker_time, outputs_size = {}
'optimizer_time':self.optimizer_time} for fn, ps in self.profile_stats.items():
outputs_size.update(ps.outputs_size)
other_time = dict(
linker_time = sum(
[ps.linker_time for ps in self.profile_stats.values()]),
optimizer_time = sum(
[ps.optimizer_time for ps in self.profile_stats.values()]))
self.print_summary_("print_summary", compile_time, fct_call_time, fct_call, self.print_summary_("print_summary", compile_time, fct_call_time, fct_call,
apply_time, op_cimpl, message, outputs_size, other_time, apply_time, op_cimpl, message, outputs_size,
self.local_time, other_time,
**kwargs) **kwargs)
def print_diff_summary(self, other, **kwargs): def print_diff_summary(self, other, **kwargs):
""" As print_summary, but print the difference on two different profile mode. """ As print_summary, but print the difference on two different profile mode.
TODO: Also we don't print the Apply-wise summary as it don't work for now. TODO: Also we don't print the Apply-wise summary as it don't work for now.
...@@ -240,7 +289,7 @@ class ProfileMode(Mode): ...@@ -240,7 +289,7 @@ class ProfileMode(Mode):
@staticmethod @staticmethod
def print_summary_(fct_name, compile_time, fct_call_time, fct_call, def print_summary_(fct_name, compile_time, fct_call_time, fct_call,
apply_time, op_cimpl, message, outputs_size, apply_time, op_cimpl, message, outputs_size,
other_time, local_time, other_time,
n_apply_to_print=config.ProfileMode.n_apply_to_print, n_apply_to_print=config.ProfileMode.n_apply_to_print,
n_ops_to_print=config.ProfileMode.n_ops_to_print, n_ops_to_print=config.ProfileMode.n_ops_to_print,
print_apply=True, print_apply=True,
...@@ -256,7 +305,6 @@ class ProfileMode(Mode): ...@@ -256,7 +305,6 @@ class ProfileMode(Mode):
whose outputs memory size is lower then that. whose outputs memory size is lower then that.
""" """
local_time = sum(apply_time.values())
total_time = time.time() - import_time total_time = time.time() - import_time
total_fct_time = sum(fct_call_time.values()) total_fct_time = sum(fct_call_time.values())
total_fct_call = sum(fct_call.values()) total_fct_call = sum(fct_call.values())
...@@ -312,7 +360,7 @@ class ProfileMode(Mode): ...@@ -312,7 +360,7 @@ class ProfileMode(Mode):
op_time[op]+=t op_time[op]+=t
nb_call = [v for k,v in fct_call.items() if k.maker.env is a.env][0] nb_call = [v for k,v in fct_call.items() if k.maker.env is a.env][0]
if t==0: if t==0:
assert nb_call == 0 assert nb_call == 0, nb_call
else: else:
op_call[op] += nb_call op_call[op] += nb_call
op_apply[op] += 1 op_apply[op] += 1
...@@ -429,8 +477,8 @@ class ProfileMode(Mode): ...@@ -429,8 +477,8 @@ class ProfileMode(Mode):
else: else:
fct_memory={}#env->dict(node->(outputs size)) fct_memory={}#env->dict(node->(outputs size))
var_mem = {} var_mem = {}
for node,val in outputs_size.items(): for node, val in outputs_size.items():
fct_memory.setdefault(node.env,{}) fct_memory.setdefault(node.env, {})
fct_memory[node.env][node]=val fct_memory[node.env][node]=val
for out,v in zip(node.outputs,val): for out,v in zip(node.outputs,val):
var_mem[out]=v var_mem[out]=v
...@@ -600,7 +648,7 @@ def atexit_print_default_profile_mode(): ...@@ -600,7 +648,7 @@ def atexit_print_default_profile_mode():
config.mode=PROFILE_MODE config.mode=PROFILE_MODE
""" """
for prof_mode in prof_mode_instance_to_print: for prof_mode in prof_mode_instance_to_print:
if sum(prof_mode.apply_time.values())>0: if prof_mode.local_time>0:
prof_mode.print_summary() prof_mode.print_summary()
#Register atexit_print_default_profile_mode to have the summary of the #Register atexit_print_default_profile_mode to have the summary of the
......
差异被折叠。
"""
Test compilation modes
"""
from nose.plugins.skip import SkipTest
import unittest
import theano
import numpy
import random
import numpy.random
from theano.tests import unittest_tools as utt
import theano.tensor as T
class T_bunch_of_modes(unittest.TestCase):
def test1(self):
# this is a quick test after the LazyLinker branch merge
# to check that all the current modes can still be used.
linker_classes_involved = []
for modename in theano.config.__class__.__dict__['mode'].all:
x = T.matrix()
y = T.vector()
f = theano.function([x,y], x+y, mode=modename)
# test that it runs something
f([[1,2],[3,4]], [5, 6])
linker_classes_involved.append(f.maker.mode.linker.__class__)
print 'MODE:', modename, f.maker.mode.linker, 'stop'
# regression check:
# there should be
# - VM_Linker
# - OpWiseCLinker (FAST_RUN)
# - WrapLinker (PROFILE_MODE)
# - PerformLinker (FAST_COMPILE)
# - DebugMode's Linker (DEBUG_MODE)
assert 5 == len(set(linker_classes_involved))
if __name__ == '__main__':
unittest.main()
...@@ -65,15 +65,6 @@ AddConfigVar('force_device', ...@@ -65,15 +65,6 @@ AddConfigVar('force_device',
BoolParam(False, allow_override=False), BoolParam(False, allow_override=False),
in_c_key=False) in_c_key=False)
#Don't add FAST_RUN_NOGC to this list(as well as other ALL CAPS short cut)
#The way to get FAST_RUN_NOGC is with the flag 'linker=c|py_nogc'
#The old all capital letter way of working is deprecated as it is not scalable.
AddConfigVar('mode',
"Default compilation mode",
EnumStr('Mode', 'ProfileMode', 'DebugMode', 'FAST_RUN',
'FAST_COMPILE', 'PROFILE_MODE', 'DEBUG_MODE'),
in_c_key=False)
# Test whether or not gcc is present: disable C code if it is not. # Test whether or not gcc is present: disable C code if it is not.
# Using the dummy file descriptor below is a workaround for a crash experienced # Using the dummy file descriptor below is a workaround for a crash experienced
# in an unusual Python 2.4.4 Windows environment with the default stdin=None. # in an unusual Python 2.4.4 Windows environment with the default stdin=None.
...@@ -84,13 +75,15 @@ try: ...@@ -84,13 +75,15 @@ try:
# Keep the default linker the same as the one for the mode FAST_RUN # Keep the default linker the same as the one for the mode FAST_RUN
AddConfigVar('linker', AddConfigVar('linker',
"Default linker used if the theano flags mode is Mode or ProfileMode", "Default linker used if the theano flags mode is Mode or ProfileMode",
EnumStr('c|py', 'py', 'c', 'c|py_nogc', 'c&py'), EnumStr('c|py', 'py', 'c', 'c|py_nogc', 'c&py',
'vm', 'cvm', 'vm_nogc', 'cvm_nogc'),
in_c_key=False) in_c_key=False)
except OSError: except OSError:
# gcc is not present, linker should default to python only # gcc is not present, linker should default to python only
AddConfigVar('linker', AddConfigVar('linker',
"Default linker used if the theano flags mode is Mode or ProfileMode", "Default linker used if the theano flags mode is Mode or ProfileMode",
EnumStr('py', 'c|py', 'c', 'c|py_nogc', 'c&py'), EnumStr('c|py', 'py', 'c', 'c|py_nogc', 'c&py',
'vm', 'cvm', 'vm_nogc', 'cvm_nogc'),
in_c_key=False) in_c_key=False)
warning('GCC not detected ! Theano will be unable to execute optimized '+ warning('GCC not detected ! Theano will be unable to execute optimized '+
'C-implementations (for both CPU and GPU) and will default to '+ 'C-implementations (for both CPU and GPU) and will default to '+
...@@ -145,10 +138,6 @@ AddConfigVar('op.set_flops', ...@@ -145,10 +138,6 @@ AddConfigVar('op.set_flops',
BoolParam(False), BoolParam(False),
in_c_key=False) in_c_key=False)
AddConfigVar('nvcc.fastmath',
"",
BoolParam(False))
AddConfigVar('gpuelemwise.sync', AddConfigVar('gpuelemwise.sync',
"when true, wait that the gpu fct finished and check it error code.", "when true, wait that the gpu fct finished and check it error code.",
BoolParam(True)) BoolParam(True))
......
...@@ -146,7 +146,7 @@ from link import \ ...@@ -146,7 +146,7 @@ from link import \
Container, Linker, LocalLinker, PerformLinker, WrapLinker, WrapLinkerMany Container, Linker, LocalLinker, PerformLinker, WrapLinker, WrapLinkerMany
from op import \ from op import \
Op Op, PureOp
from opt import (Optimizer, optimizer, SeqOptimizer, from opt import (Optimizer, optimizer, SeqOptimizer,
MergeOptimizer, MergeOptMerge, MergeOptimizer, MergeOptMerge,
......
...@@ -1312,6 +1312,7 @@ def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[] ...@@ -1312,6 +1312,7 @@ def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[]
#DSE Patch 1 for supporting OSX frameworks; add -framework Python #DSE Patch 1 for supporting OSX frameworks; add -framework Python
if sys.platform=='darwin' : if sys.platform=='darwin' :
preargs.extend(['-undefined','dynamic_lookup']) preargs.extend(['-undefined','dynamic_lookup'])
python_inc = distutils.sysconfig.get_python_inc()
# link with the framework library *if specifically requested* # link with the framework library *if specifically requested*
# config.mac_framework_link is by default False, since on some mac # config.mac_framework_link is by default False, since on some mac
# installs linking with -framework causes a Bus Error # installs linking with -framework causes a Bus Error
......
...@@ -311,6 +311,9 @@ class Env(utils.object2): ...@@ -311,6 +311,9 @@ class Env(utils.object2):
self.__import_r__([new_r]) self.__import_r__([new_r])
self.__add_clients__(new_r, [(node, i)]) self.__add_clients__(new_r, [(node, i)])
prune = self.__remove_clients__(r, [(node, i)], False) prune = self.__remove_clients__(r, [(node, i)], False)
# Precondition: the substitution is semantically valid
# However it may introduce cycles to the graph, in which case the
# transaction will be reverted later.
self.execute_callbacks('on_change_input', node, i, r, new_r, reason=reason) self.execute_callbacks('on_change_input', node, i, r, new_r, reason=reason)
if prune: if prune:
...@@ -438,16 +441,32 @@ class Env(utils.object2): ...@@ -438,16 +441,32 @@ class Env(utils.object2):
if len(self.nodes) < 2: if len(self.nodes) < 2:
# optimization # optimization
# when there are 0 or 1 nodes, no sorting is necessary # when there are 0 or 1 nodes, no sorting is necessary
# This special case happens a lot because the OpWiseCLinker produces
# 1-element graphs.
return list(self.nodes) return list(self.nodes)
env = self env = self
ords = {} ords = self.orderings()
for feature in env._features:
if hasattr(feature, 'orderings'):
for op, prereqs in feature.orderings(env).items():
ords.setdefault(op, []).extend(prereqs)
order = graph.io_toposort(env.inputs, env.outputs, ords) order = graph.io_toposort(env.inputs, env.outputs, ords)
return order return order
def orderings(self):
"""
Return dict d s.t. d[node] is a list of nodes that must be evaluated
before node itself can be evaluated.
This is used primarily by the destroy_handler feature to ensure that all
clients of any destroyed inputs have already computed their outputs.
"""
ords = {}
for feature in self._features:
if hasattr(feature, 'orderings'):
for node, prereqs in feature.orderings(self).items():
ords.setdefault(node, []).extend(prereqs)
# eliminate duplicate prereqs
for (node,prereqs) in ords.items():
ords[node] = list(set(prereqs))
return ords
def nclients(self, r): def nclients(self, r):
"""WRITEME Same as len(self.clients(r)).""" """WRITEME Same as len(self.clients(r))."""
return len(self.clients(r)) return len(self.clients(r))
......
差异被折叠。
import os
import theano
from theano import config
from theano.gof.compilelock import get_lock, release_lock
from theano.gof import cmodule
get_lock()
try:
dirname = 'lazylinker_ext'
cfile = os.path.join(theano.__path__[0], 'gof', 'lazylinker_c.c')
code = open(cfile).read()
loc = os.path.join(config.compiledir, dirname)
if not os.path.exists(loc):
os.mkdir(loc)
cmodule.gcc_module_compile_str(dirname, code, location=loc)
from lazylinker_ext.lazylinker_ext import *
finally:
# Release lock on compilation directory.
release_lock()
...@@ -3,18 +3,21 @@ ...@@ -3,18 +3,21 @@
The `Op` class is the base interface for all operations The `Op` class is the base interface for all operations
compatible with `gof`'s :doc:`graph` routines. compatible with `gof`'s :doc:`graph` routines.
""" """
__authors__ = "theano-dev"
__copyright__ = "(c) 2010, Universite de Montreal"
__license__ = "3-clause BSD License"
__contact__ = "theano-dev <theano-dev@googlegroups.com>"
__docformat__ = "restructuredtext en" __docformat__ = "restructuredtext en"
import logging
from theano import config from theano import config
import graph import graph
import numpy import numpy
import utils import utils
import warnings import warnings
import logging
from theano import config
from env import Env from env import Env
import graph
import cc import cc
......
from copy import deepcopy
import numpy
from theano.gof.op import PureOp
from theano.gof import Apply, generic, Container
from theano.gof.link import LocalLinker, map_storage, add_clear_storage
from theano import function, Mode
from theano.lazycond import ifelse
import theano.tensor as T
class IfElseIfElseIf(PureOp):
def __init__(self, inplace=False):
self.inplace=inplace # check destroyhandler and others to ensure that a view_map with
#multiple inputs can work
assert not self.inplace
def make_node(self, c1, t1, c2,t2,c3,t3,f3):
assert t1.type == f3.type
assert t2.type == t3.type
assert t3.type == f3.type
return Apply(self, [c1,t1,c2,t2,c3,t3,f3], [t1.type()])
def make_thunk(self, node, storage_map, compute_map, no_recycling):
input_computed = [compute_map[v] for v in node.inputs]
output_computed = [compute_map[v] for v in node.outputs]
input_registers = [storage_map[v] for v in node.inputs]
output_registers = [storage_map[v] for v in node.outputs]
outtype = node.outputs[0].type
def thunk():
if not input_computed[0][0]:
return [0]
else:
truthval = input_registers[0][0]
if truthval:
if not input_computed[1][0]:
return [1]
else:
output_computed[0][0]=1
output_registers[0][0]=outtype.filter(deepcopy(input_registers[1][0]))
return []
else:
if not input_computed[2][0]:
return [2]
else:
truthval = input_registers[2][0]
if truthval:
if not input_computed[3][0]:
return [3]
else:
output_computed[0][0] = 1
output_registers[0][0] = outtype.filter(deepcopy(input_registers[3][0]))
return []
else:
if not input_computed[4][0]:
return [4]
else:
truthval = input_registers[4][0]
if truthval:
if not input_computed[5][0]:
return [5]
else:
output_computed[0][0] = 1
output_registers[0][0] = outtype.filter(deepcopy(input_registers[5][0]))
return []
else:
if not input_computed[6][0]:
return [6]
else:
output_computed[0][0] = 1
output_registers[0][0] = outtype.filter(deepcopy(input_registers[6][0]))
return []
thunk.lazy = True
return thunk
class NotImplementedOp(PureOp):
class E(Exception): pass
def make_node(self, x):
return Apply(self, [x], [x.type()])
def make_thunk(self, node, storage_map, compute_map, no_recycling):
def thunk():
raise self.E()
thunk.lazy=False
return thunk
def test_ifelse():
a = generic()
b = generic()
c = generic()
notimpl = NotImplementedOp()
f = function([a,b,c], ifelse(a, notimpl(b), c),
mode=Mode(linker='vm', optimizer='fast_run'))
try:
print "case 1"
f( True, 'a', 'b')
assert False
except NotImplementedOp.E:
pass
print "... passed"
print "case 2"
print f( False, 'a', 'b')
assert f( False, 'a', 'b') == 'b'
print "... passed"
def more_complex_test():
notimpl = NotImplementedOp()
ifelseifelseif = IfElseIfElseIf()
x1 = T.scalar('x1')
x2 = T.scalar('x2')
c1 = generic('c1')
c2 = generic('c2')
t1 = ifelse(c1,x1,notimpl(x2))
t1.name = 't1'
t2 = t1*10
t2.name = 't2'
t3 = ifelse(c2,t2, x1+t1)
t3.name = 't3'
t4 = ifelseifelseif(T.eq(x1,x2), x1, T.eq(x1,5), x2, c2, t3, t3+0.5)
t4.name = 't4'
f = function([c1,c2,x1,x2], t4, mode=Mode(linker='vm', optimizer='fast_run'))
print f(1, 0, numpy.array(10,dtype=x1.dtype),0)
assert f(1,0,numpy.array(10,dtype=x1.dtype),0) == 20.5
print '... passed'
if __name__ == '__main__':
more_complex_test()
import gc
import sys
import time
try:
import line_profiler
except ImportError:
pass
import numpy
from theano import function
from theano.gof import vm,link, OpWiseCLinker
from theano.compile import Mode
from theano import tensor
from theano.lazycond import ifelse
import theano
def test_speed():
def build_graph(x, depth=5):
z = x
for d in range(depth):
z = (z + z)
return z
def numpy_version(x, depth):
z = x
for d in xrange(depth):
z = (z+z)
return z
def time_numpy():
steps_a = 5
steps_b = 100
x = numpy.asarray([2.0, 3.0], dtype=theano.config.floatX)
numpy_version(x, steps_a)
t0 = time.time()
print numpy_version(x, steps_a)
t1 = time.time()
t2 = time.time()
print numpy_version(x, steps_b)
t3 = time.time()
t_a = t1 - t0
t_b = t3 - t2
print "%s takes %f s/Kop" % (
'numpy',
(1000*(t_b-t_a) / (steps_b - steps_a)))
def time_linker(name, linker):
steps_a = 5
steps_b = 100
x = tensor.vector()
a = build_graph(x,steps_a)
b = build_graph(x,steps_b)
f_a = function([x], a,
mode=Mode(optimizer=None, linker=linker()),
#profile='f_a speed test %s'%name,
)
f_b = function([x], b,
mode=Mode(optimizer=None, linker=linker()),
#profile='f_b speed test %s'%name,
)
print f_a([2.0, 3.0])
t0 = time.time()
print f_a([2.0, 3.0])
t1 = time.time()
print f_b([2.0, 3.0])
t2 = time.time()
print f_b([2.0, 3.0])
t3 = time.time()
t_a = t1 - t0
t_b = t3 - t2
print "%s takes %f s/Kop" % (
name,
(1000*(t_b-t_a) / (steps_b - steps_a)))
time_linker('c|py', OpWiseCLinker)
time_linker('vmLinker', vm.VM_Linker)
time_linker('vmLinker_nogc', lambda : vm.VM_Linker(allow_gc=False))
time_linker('vmLinker_CLOOP', lambda : vm.VM_Linker(allow_gc=False,
use_cloop=True))
time_numpy()
def test_speed_lazy():
def build_graph(x, depth=5):
z = x
for d in range(depth):
z = ifelse(z> 0, -z, z)
return z
def time_linker(name, linker):
steps_a = 10
steps_b = 100
x = tensor.vector()
a = build_graph(x, steps_a)
b = build_graph(x, steps_b)
f_a = function([x], a,
mode=Mode(optimizer=None,
linker=linker()),
#profile='f_a lazy ifelse %s'%name,
)
f_b = function([x], b,
mode=Mode(optimizer=None,
linker=linker()),
#profile='f_b lazy ifelse %s'%name,
)
print f_a([2.0])
t0 = time.time()
print f_a([2.0])
t1 = time.time()
print f_b([2.0])
t2 = time.time()
print f_b([2.0])
t3 = time.time()
t_a = t1 - t0
t_b = t3 - t2
print "%s takes %f s/Kop" % (
name,
(1000*(t_b-t_a) / (steps_b - steps_a)))
time_linker('vmLinker', vm.VM_Linker)
time_linker('vmLinker_nogc', lambda : vm.VM_Linker(allow_gc=False))
time_linker('vmLinker_C', lambda : vm.VM_Linker(allow_gc=False,
use_cloop=True))
run_memory_usage_tests = False
if run_memory_usage_tests:
# these are not normal unit tests, do not run them as part of standard
# suite. I ran them while looking at top, and stopped when memory usage was
# stable.
def test_leak2():
import theano.sandbox.cuda as cuda
for i in xrange(1000000):
n = numpy.asarray([2.3, 4.5], dtype='f')
c = sys.getrefcount(n)
a = cuda.CudaNdarray(n)
assert c == sys.getrefcount(n)
if not i % 1000:
print '.',
print gc.collect(),
print gc.collect()
sys.stdout.flush()
def test_no_leak_many_graphs():
# Verify no memory leaks when creating and deleting a lot of functions
# This isn't really a unit test, you have to run it and look at top to see
# if there's a leak
for i in xrange(10000):
x = tensor.vector()
z = x
for d in range(10):
z = tensor.sin(-z+ 1)
f = function([x], z, mode=Mode(optimizer=None, linker='cvm'))
if not i % 100:
print gc.collect()
sys.stdout.flush()
gc.collect()
if 1:
f([2.0])
f([3.0])
f([4.0])
f([5.0])
def test_no_leak_many_call_lazy():
# Verify no memory leaks when calling a function a lot of times
# This isn't really a unit test, you have to run it and look at top to see
# if there's a leak
def build_graph(x, depth=5):
z = x
for d in range(depth):
z = ifelse(z> 0, -z, z)
return z
def time_linker(name, linker):
steps_a = 10
x = tensor.vector()
a = build_graph(x, steps_a)
f_a = function([x], a,
mode=Mode(optimizer=None,
linker=linker()))
for i in xrange(100000):
f_a([2.0])
if 0: # this doesn't seem to work, prints 0 for everything
import resource
pre = resource.getrusage(resource.RUSAGE_SELF)
post = resource.getrusage(resource.RUSAGE_SELF)
print pre.ru_ixrss, post.ru_ixrss
print pre.ru_idrss, post.ru_idrss
print pre.ru_maxrss, post.ru_maxrss
time_linker('vmLinker_C', lambda : vm.VM_Linker(allow_gc=False, use_cloop=True))
def test_no_leak_many_call_nonlazy():
# Verify no memory leaks when calling a function a lot of times
# This isn't really a unit test, you have to run it and look at top to see
# if there's a leak
def build_graph(x, depth=5):
z = x
for d in range(depth):
z = tensor.sin(-z+1)
return z
def time_linker(name, linker):
steps_a = 10
x = tensor.vector()
a = build_graph(x,steps_a)
f_a = function([x], a,
mode=Mode(optimizer=None,
linker=linker()))
for i in xrange(500000):
f_a([2.0])
time_linker('vmLinker_C', lambda : vm.VM_Linker(allow_gc=False, use_cloop=True))
差异被折叠。
"""
IfElse is an Op that works with the LazyLinker to support conditional graph evaluation.
:TODO: Add text to library documentation describing the IfElse Op.
"""
from copy import deepcopy
from theano.gof import PureOp, Apply, generic, Container
import theano.tensor
import gof
from compile import optdb
from tensor import opt
@gof.local_optimizer([None])
def ifelse_make_inplace(node):
op = node.op
if isinstance(op, IfElse) and not op.as_view :
print 'ifelse_make_inplace applied'
return IfElse(as_view = True,
gpu = op.gpu, name=op.name).make_node(*node.inputs).outputs
return False
optdb.register('ifelse_make_inplace', opt.in2out(ifelse_make_inplace,
ignore_newtrees=True), 95, 'fast_run', 'inplace')
class IfElse(PureOp):
"""
Op that works with LazyLinker to support conditional graph evaluation.
Example usage:
``rval = ifelse(tf, rval_if_true, rval_if_false)``
:note:
Other Linkers (ALL other linkers right now) are INCOMPATIBLE with this
Op, they will produce functions that FAIL TO EXECUTE.
"""
def __init__(self, as_view=False, gpu = False, name = None):
if as_view:
# check destroyhandler and others to ensure that a view_map with
# multiple inputs can work
view_map = {}
view_map[0] = [1]
self.view_map = view_map
#raise NotImplementedError('IfElse must copy for now')
self.as_view=as_view
self.gpu = gpu
self.name = name
def make_node(self, c, t, f):
if t.type != f.type:
raise TypeError(
'IfElse requires same types for true and false args',
(t.type, f.type))
return Apply(self, [c,t,f], [t.type()])
def make_thunk(self, node, storage_map, compute_map, no_recycling):
outtype = node.outputs[0].type
c,t,f = node.inputs
output = node.outputs[0]
def thunk():
if not compute_map[c][0]:
return [0]
else:
truthval = storage_map[c][0]
if truthval:
if not compute_map[t][0]:
return [1]
else:
compute_map[output][0]=1
if self.as_view:
oval = outtype.filter(storage_map[t][0])
else:
oval = outtype.filter(
deepcopy(storage_map[t][0]))
storage_map[output][0] = oval
return []
else:
if not compute_map[f][0]:
return [2]
else:
# can't view both outputs unless destroyhandler
# improves
compute_map[output][0]=1
oval = outtype.filter(
deepcopy(storage_map[f][0]))
storage_map[output][0]=oval
return []
thunk.lazy = True
thunk.inputs = [storage_map[v] for v in node.inputs]
thunk.outputs = [storage_map[v] for v in node.outputs]
return thunk
ifelse = IfElse()
...@@ -391,7 +391,7 @@ default_colorCodes = {'GpuFromHost' : 'red', ...@@ -391,7 +391,7 @@ default_colorCodes = {'GpuFromHost' : 'red',
'HostFromGpu' : 'red', 'HostFromGpu' : 'red',
'Scan' : 'yellow', 'Scan' : 'yellow',
'Shape' : 'cyan', 'Shape' : 'cyan',
'Cond' : 'magenta', 'IfElse' : 'magenta',
'Elemwise': '#FFAABB', 'Elemwise': '#FFAABB',
'Subtensor': '#FFAAFF'} 'Subtensor': '#FFAAFF'}
...@@ -473,10 +473,10 @@ def pydotprint(fct, outfile=None, ...@@ -473,10 +473,10 @@ def pydotprint(fct, outfile=None,
c3 = pd.Cluster('Middle') c3 = pd.Cluster('Middle')
cond = None cond = None
for node in fct_env.toposort(): for node in fct_env.toposort():
if node.op.__class__.__name__=='Cond' and node.op.name == cond_highlight: if node.op.__class__.__name__=='IfElse' and node.op.name == cond_highlight:
cond = node cond = node
if cond is None: if cond is None:
_warn("pydotprint: cond_highlight is set but there is no Cond node in the graph") _warn("pydotprint: cond_highlight is set but there is no IfElse node in the graph")
cond_highlight = None cond_highlight = None
if cond_highlight is not None: if cond_highlight is not None:
......
import atexit, logging, os, stat, sys import atexit, logging, os, stat, sys
from theano.compile import optdb from theano.compile import optdb
from theano import config
from theano.gof.cmodule import get_lib_extension from theano.gof.cmodule import get_lib_extension
from theano.configparser import config, AddConfigVar, StrParam
import nvcc_compiler import nvcc_compiler
_logger_name = 'theano.sandbox.cuda' _logger_name = 'theano.sandbox.cuda'
...@@ -20,6 +20,22 @@ def debug(*msg): ...@@ -20,6 +20,22 @@ def debug(*msg):
_logger.debug('DEBUG (%s): %s'% ( _logger_name, _logger.debug('DEBUG (%s): %s'% ( _logger_name,
' '.join(str(m) for m in msg))) ' '.join(str(m) for m in msg)))
AddConfigVar('cuda.root',
"""directory with bin/, lib/, include/ for cuda utilities.
This directory is included via -L and -rpath when linking dynamically
compiled modules. If AUTO, if nvcc is in the path, it will use one of
this parent directory. Otherwise /usr/local/cuda. Leave empty to
prevent extra linker directives.
Default: environment variable "CUDA_ROOT" or else "AUTO".
""",
StrParam(os.getenv('CUDA_ROOT', "AUTO")))
if config.cuda.root == "AUTO":
# set nvcc_path correctly and get the version
nvcc_compiler.set_cuda_root()
#is_nvcc_available called here to initialize global vars in nvcc_compiler module
nvcc_compiler.is_nvcc_available()
# Compile cuda_ndarray.cu # Compile cuda_ndarray.cu
# This need that nvcc (part of cuda) is installed. If it is not, a warning is # This need that nvcc (part of cuda) is installed. If it is not, a warning is
......
...@@ -7,20 +7,7 @@ import commands ...@@ -7,20 +7,7 @@ import commands
_logger=logging.getLogger("theano.sandbox.cuda.nvcc_compiler") _logger=logging.getLogger("theano.sandbox.cuda.nvcc_compiler")
_logger.setLevel(logging.WARN) _logger.setLevel(logging.WARN)
from theano.configparser import config, AddConfigVar, StrParam from theano.configparser import config, AddConfigVar, StrParam, BoolParam
AddConfigVar('nvcc.compiler_bindir',
"If defined, nvcc compiler driver will seek g++ and gcc in this directory",
StrParam(""))
AddConfigVar('cuda.nvccflags',
"Extra compiler flags for nvcc",
StrParam(""))
AddConfigVar('cuda.root',
"The directory with bin/, lib/, include/ for cuda utilities. Used to put this directory of nvidia lib in the compiled libraire. Usefull when people forget to update there LD_LIBRARY_PATH and LIBRARY_PATH environment variable. If AUTO, if nvcc is in the path, it will use one of this parent directory. Otherwise /usr/local/cuda. If empty, won't appen the directory in the compiled library",
StrParam(os.getenv('CUDA_ROOT', "AUTO")))
def error(*args): def error(*args):
#sys.stderr.write('ERROR:'+ ' '.join(str(a) for a in args)+'\n') #sys.stderr.write('ERROR:'+ ' '.join(str(a) for a in args)+'\n')
...@@ -35,6 +22,18 @@ def debug(*args): ...@@ -35,6 +22,18 @@ def debug(*args):
#sys.stderr.write('DEBUG:'+ ' '.join(str(a) for a in args)+'\n') #sys.stderr.write('DEBUG:'+ ' '.join(str(a) for a in args)+'\n')
_logger.debug("DEBUG: "+' '.join(str(a) for a in args)) _logger.debug("DEBUG: "+' '.join(str(a) for a in args))
AddConfigVar('nvcc.compiler_bindir',
"If defined, nvcc compiler driver will seek g++ and gcc in this directory",
StrParam(""))
AddConfigVar('nvcc.flags',
"Extra compiler flags for nvcc",
StrParam(""))
AddConfigVar('nvcc.fastmath',
"",
BoolParam(False))
nvcc_path = 'nvcc' nvcc_path = 'nvcc'
nvcc_version = None nvcc_version = None
def is_nvcc_available(): def is_nvcc_available():
...@@ -66,11 +65,6 @@ def set_cuda_root(): ...@@ -66,11 +65,6 @@ def set_cuda_root():
config.cuda.root = os.path.split(dir)[0] config.cuda.root = os.path.split(dir)[0]
return return
if config.cuda.root == "AUTO":
set_cuda_root()
is_nvcc_available()#to set nvcc_path correctly and get the version
rpath_defaults = [] rpath_defaults = []
def add_standard_rpath(rpath): def add_standard_rpath(rpath):
rpath_defaults.append(rpath) rpath_defaults.append(rpath)
...@@ -183,11 +177,9 @@ def nvcc_module_compile_str( ...@@ -183,11 +177,9 @@ def nvcc_module_compile_str(
if sys.platform != 'darwin': if sys.platform != 'darwin':
# the 64bit CUDA libs are in the same files as are named by the function above # the 64bit CUDA libs are in the same files as are named by the function above
rpaths.append(os.path.join(config.cuda.root,'lib64')) rpaths.append(os.path.join(config.cuda.root,'lib64'))
for rpath in rpaths: for rpath in rpaths:
cmd.extend(['-Xlinker',','.join(['-rpath',rpath])]) cmd.extend(['-Xlinker',','.join(['-rpath',rpath])])
nvccflags = [flag for flag in config.cuda.nvccflags.split(' ') if flag] cmd.extend([flag for flag in config.nvcc.flags.split(' ') if flag])
cmd.extend(nvccflags)
cmd.extend('-I%s'%idir for idir in include_dirs) cmd.extend('-I%s'%idir for idir in include_dirs)
cmd.extend(['-o',lib_filename]) cmd.extend(['-o',lib_filename])
cmd.append(os.path.split(cppfilename)[-1]) cmd.append(os.path.split(cppfilename)[-1])
......
...@@ -270,6 +270,48 @@ def local_gpu_dot_to_dot22(node): ...@@ -270,6 +270,48 @@ def local_gpu_dot_to_dot22(node):
shape_out))] shape_out))]
return False return False
@register_opt()
@local_optimizer([])
def local_gpu_lazy_ifelse(node):
"""
gpu_from_host(dot22) -> gpudot(gpu_from_host)
dot(host_from_gpu) -> host_from_gpu(gpudot22)
"""
import theano
if hasattr(theano,"lazycond"):
gpu_ifelse = theano.lazycond.IfElse(gpu = True)
if node.op == gpu_from_host:
host_input = node.inputs[0]
if (host_input.owner
and host_input.owner.op == theano.lazycond.ifelse):
c, t, f = host_input.owner.inputs
if not isinstance(f.type,CudaNdarrayType):
f = gpu_from_host(f)
if not isinstance(t.type,CudaNdarrayType):
t = gpu_from_host(t)
if isinstance(c.type,CudaNdarrayType):
c = host_from_gpu(c)
return [gpu_ifelse(c, t, f)]
if node.op == theano.lazycond.ifelse:
if numpy.any([(i.owner and i.owner.op == host_from_gpu) for i in node.inputs]):
c, t, f = node.inputs
if not isinstance(f.type,CudaNdarrayType):
f = gpu_from_host(f)
if not isinstance(t.type,CudaNdarrayType):
t = gpu_from_host(t)
if isinstance(c.type,CudaNdarrayType):
c = host_from_gpu(c)
return [host_from_gpu(gpu_ifelse(c, t, f))]
return False
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([])
......
...@@ -567,7 +567,7 @@ class ScanMerge(gof.Optimizer): ...@@ -567,7 +567,7 @@ class ScanMerge(gof.Optimizer):
def apply(self, env): def apply(self, env):
nodelist = list(env.toposort()) nodelist = list(env.toposort())
cond_nodes = [ x for x in nodelist if x.op.__class__.__name__=='Cond'] cond_nodes = [ x for x in nodelist if x.op.__class__.__name__=='IfElse']
scan_nodes = [ x for x in nodelist if x.op.__class__.__name__=='Scan'] scan_nodes = [ x for x in nodelist if x.op.__class__.__name__=='Scan']
# Having lazy ifs in the graph complicates a bit things, and for # Having lazy ifs in the graph complicates a bit things, and for
......
...@@ -133,6 +133,79 @@ def sp_ones_like(x): ...@@ -133,6 +133,79 @@ def sp_ones_like(x):
data, indices, indptr, shape = csm_properties(x) #TODO: don't restrict to CSM formats data, indices, indptr, shape = csm_properties(x) #TODO: don't restrict to CSM formats
return CSM(format=x.format)(tensor.ones_like(data), indices, indptr, shape) return CSM(format=x.format)(tensor.ones_like(data), indices, indptr, shape)
class _sparse_py_operators:
T = property(lambda self: transpose(self), doc = "Return aliased transpose of self (read-only)")
def __neg__(self): return neg(self)
def __add__(left, right): return add(left, right)
def __radd__(right, left): return add(left, right)
def __sub__(left, right): return sub(left, right)
def __rsub__(right, left): return sub(left, right)
def __mul__(left, right): return mul(left, right)
def __rmul__(left, right): return mul(left, right)
#extra pseudo-operator symbols
def __dot__(left, right): return structured_dot(left, right)
def __rdot__(right, left): return structured_dot(left, right)
#N.B. THIS IS COMMENTED OUT ON PURPOSE!!!
# Discussion with Fred & James (at least, and maybe others before)
# we decided that casting from a sparse to dense should be explicit
# because it's usually something you want to be pretty careful about,
# and not to do by accident.
#def _as_TensorVariable(self):
# return dense_from_sparse(self)
shape = property(lambda self: tensor.shape(dense_from_sparse(self))) # don't worry!
# ... the plan is that the ShapeFeature in tensor.opt will do shape propagation
# ... and remove the dense_from_sparse from the graph. This will *NOT* actually expand
# ... your sparse matrix just to get the shape.
ndim = property(lambda self: self.type.ndim)
dtype = property(lambda self: self.type.dtype)
class SparseVariable(gof.Variable, _sparse_py_operators):
dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
def __str__(self):
return '%s{%s,%s}'%(
self.__class__.__name__,
self.format,
self.dtype)
def __repr__(self):
return str(self)
class SparseConstantSignature(tuple):
def __eq__(self, other):
(a, b), (x,y) = self, other
return a == x \
and (b.dtype == y.dtype)\
and (type(b) == type(y))\
and (b.shape == y.shape)\
and (abs(b-y).sum() < 1e-6 * b.nnz)
def __hash__(self):
(a,b) = self
return hash(type(self)) ^ hash(a) ^ hash(type(b))
class SparseConstant(gof.Constant, _sparse_py_operators):
dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
def signature(self):
assert self.data is not None
return SparseConstantSignature((self.type, self.data))
def __str__(self):
return '%s{%s,%s,shape=%s,nnz=%s}'%(
self.__class__.__name__,
self.format,
self.dtype,
self.data.shape,
self.data.nnz)
def __repr__(self):
return str(self)
class SparseValue(gof.Value, _sparse_py_operators):
dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
class SparseType(gof.Type): class SparseType(gof.Type):
""" """
...@@ -149,6 +222,9 @@ class SparseType(gof.Type): ...@@ -149,6 +222,9 @@ class SparseType(gof.Type):
dtype_set = set(['int', 'int8', 'int16','int32', 'int64', 'float32', 'float64', 'complex64','complex128']) dtype_set = set(['int', 'int8', 'int16','int32', 'int64', 'float32', 'float64', 'complex64','complex128'])
ndim = 2 ndim = 2
Variable = SparseVariable
Constant = SparseConstant
def __init__(self, format, dtype): def __init__(self, format, dtype):
""" """
Fundamental way to create a sparse node. Fundamental way to create a sparse node.
...@@ -248,65 +324,6 @@ csr_dmatrix = SparseType(format='csr', dtype='float64') ...@@ -248,65 +324,6 @@ csr_dmatrix = SparseType(format='csr', dtype='float64')
csc_fmatrix = SparseType(format='csc', dtype='float32') csc_fmatrix = SparseType(format='csc', dtype='float32')
csr_fmatrix = SparseType(format='csr', dtype='float32') csr_fmatrix = SparseType(format='csr', dtype='float32')
class _sparse_py_operators:
T = property(lambda self: transpose(self), doc = "Return aliased transpose of self (read-only)")
def __neg__(self): return neg(self)
def __add__(left, right): return add(left, right)
def __radd__(right, left): return add(left, right)
def __sub__(left, right): return sub(left, right)
def __rsub__(right, left): return sub(left, right)
def __mul__(left, right): return mul(left, right)
def __rmul__(left, right): return mul(left, right)
#extra pseudo-operator symbols
def __dot__(left, right): return structured_dot(left, right)
def __rdot__(right, left): return structured_dot(left, right)
#N.B. THIS IS COMMENTED OUT ON PURPOSE!!!
# Discussion with Fred & James (at least, and maybe others before)
# we decided that casting from a sparse to dense should be explicit
# because it's usually something you want to be pretty careful about,
# and not to do by accident.
#def _as_TensorVariable(self):
# return dense_from_sparse(self)
shape = property(lambda self: tensor.shape(dense_from_sparse(self))) # don't worry!
# ... the plan is that the ShapeFeature in tensor.opt will do shape propagation
# ... and remove the dense_from_sparse from the graph. This will *NOT* actually expand
# ... your sparse matrix just to get the shape.
ndim = property(lambda self: self.type.ndim)
dtype = property(lambda self: self.type.dtype)
class SparseVariable(gof.Variable, _sparse_py_operators):
dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
class SparseConstantSignature(tuple):
def __eq__(self, other):
(a, b), (x,y) = self, other
return a == x \
and (b.dtype == y.dtype)\
and (type(b) == type(y))\
and (b.shape == y.shape)\
and (abs(b-y).sum() < 1e-6 * b.nnz)
def __hash__(self):
(a,b) = self
return hash(type(self)) ^ hash(a) ^ hash(type(b))
class SparseConstant(gof.Constant, _sparse_py_operators):
dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
def signature(self):
assert self.data is not None
return SparseConstantSignature((self.type, self.data))
class SparseValue(gof.Value, _sparse_py_operators):
dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
# CONSTRUCTION # CONSTRUCTION
class CSMProperties(gof.Op): class CSMProperties(gof.Op):
"""Extract all of .data .indices and .indptr""" """Extract all of .data .indices and .indptr"""
......
...@@ -937,6 +937,9 @@ def _gemm_from_node2(node): ...@@ -937,6 +937,9 @@ def _gemm_from_node2(node):
lst = _factor_canonicalized(lst) lst = _factor_canonicalized(lst)
rval = _gemm_from_factored_list(lst) rval = _gemm_from_factored_list(lst)
#print "RVAL", rval #print "RVAL", rval
# THIS GOT COMMENTED OUT AT SOME POINT - ASK P.Lamblin maybe why?
#if rval:
# assert rval[0].type == node.outputs[0].type, (rval[0].type, node.outputs[0].type)
if rval and (rval[0].type == node.outputs[0].type): if rval and (rval[0].type == node.outputs[0].type):
return rval return rval
......
...@@ -3057,30 +3057,33 @@ def constant_folding(node): ...@@ -3057,30 +3057,33 @@ def constant_folding(node):
for input in node.inputs: for input in node.inputs:
if not isinstance(input, Constant): if not isinstance(input, Constant):
return False return False
try: #condition: all inputs are constant
storage = [[None] for output in node.outputs]
node.op.perform(node, [x.data for x in node.inputs], storage) storage_map=dict([(i,[i.data]) for i in node.inputs])
except MethodNotDefined: compute_map=dict([(i,[True]) for i in node.inputs])
tmp_inputs = [x.type() for x in node.inputs] for o in node.outputs:
f = compile.function( storage_map[o] = [None]
inputs=tmp_inputs, compute_map[o] = [False]
outputs=node.op.make_node(*tmp_inputs).outputs,
mode=compile.Mode(linker='c|py',optimizer=None)) thunk = node.op.make_thunk(node, storage_map, compute_map,
xvals = f(*[x.data for x in node.inputs]) no_recycling=[])
storage = [[xv] for xv in xvals]
required = thunk()
msg = [] assert not required # a node whose inputs are all provided should always
assert len(storage) == len(node.outputs) # return successfully
for s, output in zip(storage, node.outputs):
rval = []
for output in node.outputs:
assert compute_map[output][0], (output, storage_map[output][0])
try: try:
constant = output.type.Constant constant = output.type.Constant
except: except AttributeError:
constant = Constant constant = Constant
msg += [constant(output.type, s[0])] rval.append(constant(output.type, storage_map[output][0]))
return msg return rval
register_canonicalize(constant_folding, 'fast_compile') register_canonicalize(constant_folding, 'fast_compile')
register_stabilize(constant_folding) # because register_stabilize(constant_folding)
register_specialize(constant_folding) register_specialize(constant_folding)
def _is_1(expr): def _is_1(expr):
......
...@@ -20,7 +20,7 @@ def test_no_reuse(): ...@@ -20,7 +20,7 @@ def test_no_reuse():
return return
assert not 'should not get here' assert not 'should not get here'
def test_gc(): def test_gc_never_pickles_temporaries():
x = T.dvector() x = T.dvector()
#print >> sys.stderr, 'BUILDING GRAPH' #print >> sys.stderr, 'BUILDING GRAPH'
...@@ -32,32 +32,63 @@ def test_gc(): ...@@ -32,32 +32,63 @@ def test_gc():
optimizer=None optimizer=None
optimizer='fast_run' optimizer='fast_run'
for f_linker, g_linker in [ for f_linker, g_linker in [
(theano.PerformLinker(allow_gc = True), theano.PerformLinker(allow_gc=False)), (theano.PerformLinker(allow_gc = True), theano.PerformLinker(allow_gc=False)),
(theano.OpWiseCLinker(allow_gc = True), theano.OpWiseCLinker(allow_gc=False))]: (theano.OpWiseCLinker(allow_gc = True), theano.OpWiseCLinker(allow_gc=False))]:
#f_linker has garbage collection
#g_linker has no garbage collection
#print >> sys.stderr, 'COMPILING' #print >> sys.stderr, 'COMPILING'
f = theano.function([x], r,mode=theano.Mode(optimizer=optimizer, linker=f_linker)) f = theano.function([x], r,mode=theano.Mode(optimizer=optimizer, linker=f_linker))
g = theano.function([x], r,mode=theano.Mode(optimizer=optimizer, linker=g_linker))
len_pre_f = len(cPickle.dumps(f))
len_pre_g = len(cPickle.dumps(g))
# should be no difference at first
# In future, FunctionMaker might pickle linker-dependent stuff and make
# this assertion fail.
assert len_pre_f == len_pre_g
def a(fn):
return len(cPickle.dumps(fn.maker))
assert a(f) == a(f) # some sanity checks on the pickling mechanism
assert a(g) == a(g) # some sanity checks on the pickling mechanism
g = theano.function([x], r,mode=theano.Mode(optimizer=optimizer, linker=f_linker)) def b(fn):
return len(
cPickle.dumps(
theano.compile.function_module._pickle_Function(
fn)))
assert b(f) == b(f) # some sanity checks on the pickling mechanism
pre_f = cPickle.dumps(f) def c(fn):
pre_g = cPickle.dumps(g) return len(cPickle.dumps(fn))
assert c(f) == c(f) # some sanity checks on the pickling mechanism
assert c(g) == c(g) # some sanity checks on the pickling mechanism
#print >> sys.stderr, 'RUNNING'
# now run the function once to create temporaries within the no-gc
# linker
f(numpy.ones(100, dtype='float64')) f(numpy.ones(100, dtype='float64'))
g(numpy.ones(100, dtype='float64')) g(numpy.ones(100, dtype='float64'))
# serialize the functions again
post_f = cPickle.dumps(f) post_f = cPickle.dumps(f)
post_g = cPickle.dumps(g) post_g = cPickle.dumps(g)
#because allow_gc should leave the function un-changed by calling
assert len(pre_f) == len(post_f)
#because temporaries that weren't collected shouldn't be pickled anyway
len_post_f = len(post_f) len_post_f = len(post_f)
len_post_g = len(post_g) len_post_g = len(post_g)
assert len_post_f == len_post_g
#assert that f() didn't cause the function to grow
# allow_gc should leave the function un-changed by calling
assert len_pre_f == len_post_f
#assert that g() didn't cause g to grow
# because temporaries that weren't collected shouldn't be pickled anyway
assert len_post_f == len_post_g, (f_linker, len_post_f, len_post_g)
def test_merge_opt_runtime(): def test_merge_opt_runtime():
......
...@@ -49,11 +49,14 @@ class T_random_function(unittest.TestCase): ...@@ -49,11 +49,14 @@ class T_random_function(unittest.TestCase):
rng_R = random_state_type() rng_R = random_state_type()
# use make_node to override some of the self.args # use make_node to override some of the self.args
post_r2, out2 = rf2(rng_R, (4,), -2, 2) post_r2, out2 = rf2(rng_R, (4,), -2, 2) # NOT INPLACE
post_r2_4, out2_4 = rf2(rng_R, (4,), -4.0, 2) post_r4, out4 = rf4(rng_R, (4,), -4, 4) # INPLACE
post_r2_4_4, out2_4_4 = rf2(rng_R, (4,), -4.0, 4.0) post_r2_4, out2_4 = rf2(rng_R, (4,), -4.0, 2) # NOT INPLACE
post_r4, out4 = rf4(rng_R, (4,), -4, 4) post_r2_4_4, out2_4_4 = rf2(rng_R, (4,), -4.0, 4.0) # NOT INPLACE
# configure out4 to be computed inplace
# The update expression means that the random state rng_R will
# be maintained by post_r4
f = compile.function( f = compile.function(
[compile.In(rng_R, [compile.In(rng_R,
value=numpy.random.RandomState(utt.fetch_seed()), value=numpy.random.RandomState(utt.fetch_seed()),
...@@ -65,9 +68,25 @@ class T_random_function(unittest.TestCase): ...@@ -65,9 +68,25 @@ class T_random_function(unittest.TestCase):
f2, f4, f2_4, f2_4_4 = f() f2, f4, f2_4, f2_4_4 = f()
f2b, f4b, f2_4b, f2_4_4b = f() f2b, f4b, f2_4b, f2_4_4b = f()
assert numpy.allclose(f2*2, f4) print f2
assert numpy.allclose(f2_4_4, f4) print f4
assert not numpy.allclose(f4, f4b) print f2_4
print f2_4_4
#print f2b
#print f4b
#print f2_4b
#print f2_4_4b
# setting bounds is same as multiplying by 2
assert numpy.allclose(f2*2, f4), (f2, f4)
# retrieving from non-inplace generator
# is same as inplace one for first call
assert numpy.allclose(f2_4_4, f4), (f2_4_4, f4)
# f4 changes from call to call, that the update has worked
assert not numpy.allclose(f4, f4b), (f4, f4b)
def test_inplace_optimization(self): def test_inplace_optimization(self):
"""Test that FAST_RUN includes the random_make_inplace optimization""" """Test that FAST_RUN includes the random_make_inplace optimization"""
......
...@@ -13,19 +13,32 @@ from theano.tests import unittest_tools as utt ...@@ -13,19 +13,32 @@ from theano.tests import unittest_tools as utt
should ensure that it will remain operational should ensure that it will remain operational
''' '''
class T_diverse(unittest.TestCase): class T_scipy(unittest.TestCase):
def setUp(self): def setUp(self):
utt.seed_rng() utt.seed_rng()
self.orig_floatX = theano.config.floatX
def tearDown(self):
theano.config.floatX = self.orig_floatX
def scipy_paper_example1(self): def test_scipy_paper_example1(self):
a = theano.tensor.vector('a') # declare variable a = theano.tensor.vector('a') # declare variable
b = a + a**10 # build expression b = a + a**10 # build expression
f = theano.function([a], b) # compile function f = theano.function([a], b) # compile function
assert numpy.all(f([0,1,2]) == numpy.array([0,2,1026])) assert numpy.all(f([0,1,2]) == numpy.array([0,2,1026]))
def scipy_papaer_example2(self): def test_scipy_paper_example2(self):
''' This just sees if things compile well and if they run ''' ''' This just sees if things compile well and if they run '''
# PREAMPBLE
T = theano.tensor
shared = theano.shared
function = theano.function
rng = numpy.random
theano.config.floatX='float64'
#
# ACTUAL SCRIPT FROM PAPER
x = T.matrix() x = T.matrix()
y = T.vector() y = T.vector()
w = shared(rng.randn(100)) w = shared(rng.randn(100))
...@@ -52,6 +65,7 @@ class T_diverse(unittest.TestCase): ...@@ -52,6 +65,7 @@ class T_diverse(unittest.TestCase):
for i in range(training_steps): for i in range(training_steps):
pred, err = train(D[0], D[1]) pred, err = train(D[0], D[1])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论