提交 e9f194f5 authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Merged

...@@ -38,11 +38,13 @@ There are less methods to define for an Op than for a Type: ...@@ -38,11 +38,13 @@ There are less methods to define for an Op than for a Type:
*Default:* The default behavior is to do nothing. *Default:* The default behavior is to do nothing.
.. function:: c_compile_args() .. function:: c_compile_args()
c_no_compile_args()
c_headers() c_headers()
c_libraries() c_libraries()
c_support_code() c_support_code()
Allows you to specify headers, libraries, special g++ arguments or Allows you to specify headers, libraries,
special g++ arguments to add/exclude or
helper functions/structs that the type needs. See :ref:`op`. helper functions/structs that the type needs. See :ref:`op`.
......
...@@ -75,11 +75,13 @@ the most important ones: ...@@ -75,11 +75,13 @@ the most important ones:
decrease the appropriate reference counts. decrease the appropriate reference counts.
.. function:: c_compile_args() .. function:: c_compile_args()
c_no_compile_args()
c_headers() c_headers()
c_libraries() c_libraries()
c_support_code() c_support_code()
Allows you to specify headers, libraries, special g++ arguments or Allows you to specify headers, libraries,
special g++ arguments to add/exclude or
helper functions/structs that the type needs. See :ref:`type`. helper functions/structs that the type needs. See :ref:`type`.
......
...@@ -260,7 +260,7 @@ Glossary of terminology ...@@ -260,7 +260,7 @@ Glossary of terminology
* making :term:`Apply` instances, which mean "apply this TOI to some particular inputs" (via the ``make_node``), * making :term:`Apply` instances, which mean "apply this TOI to some particular inputs" (via the ``make_node``),
* performing the calculation of outputs from given inputs (via the ``perform``), * performing the calculation of outputs from given inputs (via the ``perform``),
* producing c code to perform calculation of outputs from inputs (via ``c_code, c_code_cleanup, c_support_code, c_headers, c_libraries, c_compile_args``) * producing c code to perform calculation of outputs from inputs (via ``c_code, c_code_cleanup, c_support_code, c_headers, c_libraries, c_compile_args, c_no_compile_args``)
* [optionally] building gradient-calculating graphs (via ``grad``). * [optionally] building gradient-calculating graphs (via ``grad``).
See :ref:`intro_to_ops`. See :ref:`intro_to_ops`.
......
...@@ -135,12 +135,10 @@ Now, using ``Module``: ...@@ -135,12 +135,10 @@ Now, using ``Module``:
m = M.Module() m = M.Module()
n = T.scalar('n') n = T.scalar('n')
m.c = T.scalar() # state variables m.c = T.scalar() # state variables
m.inc = M.Method(n, [], c = m.c + n) # m.c <= m.c + n m.inc = M.Method(n, [], updates = {m.c: m.c + n}) # m.c <= m.c + n
m.dec = M.Method(n, [], c = m.c - n) # k.c <= k.c - n m.dec = M.Method(n, [], updates = {m.c: m.c - n}) # k.c <= k.c - n
m.dec = M.Method(n, [], updates = {m.c: m.c - n})#alternative syntax
#m.dec = M.Method(n, [], updates = {c: m.c - n})#global c don't exist #m.dec = M.Method(n, [], updates = {c: m.c - n})#global c don't exist
#m.dec = M.Method(n, [], m.c = m.c - n) #python don't suppor this syntax #m.plus10 does not update the state
#m.plus10 don't update the state
m.plus10 = M.Method([], m.c + 10) # m.c is always accessible since it is a member of this mlass m.plus10 = M.Method([], m.c + 10) # m.c is always accessible since it is a member of this mlass
inst = m.make(c = 0) # here, we make an "instance" of the module with c initialized to 0 inst = m.make(c = 0) # here, we make an "instance" of the module with c initialized to 0
...@@ -192,8 +190,8 @@ Using Module: ...@@ -192,8 +190,8 @@ Using Module:
m = M.Module() m = M.Module()
n = T.scalar('n') n = T.scalar('n')
m.c = T.scalar() # state variables m.c = T.scalar() # state variables
m.inc = M.Method(n, [], c = m.c + n) # m.c <= m.c + n m.inc = M.Method(n, [], updates = {m.c: m.c + n}) # m.c <= m.c + n
m.dec = M.Method(n, [], c = m.c - n) # m.c <= m.c - n m.dec = M.Method(n, [], updates = {m.c: m.c - n}) # m.c <= m.c - n
return m return m
m = M.Module() m = M.Module()
......
...@@ -36,7 +36,7 @@ The ``inputs`` argument to ``theano.function`` is a list, containing the ``Varia ...@@ -36,7 +36,7 @@ The ``inputs`` argument to ``theano.function`` is a list, containing the ``Varia
.. class:: In .. class:: In
.. method:: __init__(variable, name=None, value=None, update=None, mutable=False) .. method:: __init__(variable, name=None, value=None, update=None, mutable=False, strict=False, autoname=True, implicit=None)
``variable``: a Variable instance. This will be assigned a value ``variable``: a Variable instance. This will be assigned a value
before running the function, not computed from its owner. before running the function, not computed from its owner.
...@@ -46,8 +46,12 @@ The ``inputs`` argument to ``theano.function`` is a list, containing the ``Varia ...@@ -46,8 +46,12 @@ The ``inputs`` argument to ``theano.function`` is a list, containing the ``Varia
can be set by ``kwarg``, and its value can be accessed by can be set by ``kwarg``, and its value can be accessed by
``self.<name>``. The default value is ``None``. ``self.<name>``. The default value is ``None``.
``value``: literal or Container. This is the default value of ``value``: literal or ``Container``. The initial/default value for this
the Input. The default value of this parameter is ``None`` input. If update is`` None``, this input acts just like
an argument with a default value in Python. If update is not ``None``,
changes to this
value will "stick around", whether due to an update or a user's
explicit action.
``update``: Variable instance. This expression Variable will ``update``: Variable instance. This expression Variable will
replace ``value`` after each function call. The default value is replace ``value`` after each function call. The default value is
...@@ -57,11 +61,28 @@ The ``inputs`` argument to ``theano.function`` is a list, containing the ``Varia ...@@ -57,11 +61,28 @@ The ``inputs`` argument to ``theano.function`` is a list, containing the ``Varia
compiled function to modify the Python object being used as the compiled function to modify the Python object being used as the
default value. The default value is ``False``. default value. The default value is ``False``.
``strict``: Bool (default: ``False`` ). ``True`` means that the value
you pass for this input must have exactly the right type. Otherwise, it
may be cast automatically to the proper type.
``autoname``: Bool. If set to ``True``, if ``name`` is ``None`` and ``autoname``: Bool. If set to ``True``, if ``name`` is ``None`` and
the Variable has a name, it will be taken as the input's the Variable has a name, it will be taken as the input's
name. If autoname is set to ``False``, the name is the exact name. If autoname is set to ``False``, the name is the exact
value passed as the name parameter (possibly ``None``). value passed as the name parameter (possibly ``None``).
``implicit``: Bool or ``None`` (default: ``None``)
``True``: This input is implicit in the sense that the user is not allowed
to provide a value for it. Requires ``value`` to be set.
``False``: The user can provide a value for this input. Be careful
when ``value`` is a container, because providing an input value will
overwrite the content of this container.
``None``: Automatically choose between ``True`` or ``False`` depending on the
situation. It will be set to ``False`` in all cases except if
``value`` is a container (so that there is less risk of accidentally
overwriting its content without being aware of it).
Value: initial and default values Value: initial and default values
--------------------------------- ---------------------------------
...@@ -136,6 +157,31 @@ Theano's Module system uses this mechanism to share storage between Methods. ...@@ -136,6 +157,31 @@ Theano's Module system uses this mechanism to share storage between Methods.
The container being shared doesn't have to correspond to the same Variable in both functions, The container being shared doesn't have to correspond to the same Variable in both functions,
but that's usually how this mechanism is used. but that's usually how this mechanism is used.
Note that when an input's ``value`` parameter is a shared container, this
input is considered as implicit by default. This means it cannot be set by the
user.
If ``implicit`` is manually set to ``False``, then it can be set by the user,
but then it will overwrite the container's content, so one should be careful
when allowing this.
This is illustrated in the following example.
>>> dec(1, 0) # Try to manually set an implicit input
<type 'exceptions.TypeError'>: Tried to provide value for implicit input: s
>>> dec = function([x, In(s, update=(s-x), value=inc.container[s], implicit=False)], [])
>>> inc[s] = 2
>>> print dec[s] # Containers are shared
2.0
>>> dec(1)
[]
>>> print inc[s] # Calling dec decreased the value in inc's container
1.0
>>> dec(1, 0) # Update inc[s] with 0 - 1 = -1
[]
>>> print inc[s]
-1.0
>>> print dec[s] # Still shared
-1.0
Input Argument Restrictions Input Argument Restrictions
--------------------------- ---------------------------
...@@ -168,8 +214,8 @@ instance explicitly with the ``autoname`` flag set to False. ...@@ -168,8 +214,8 @@ instance explicitly with the ``autoname`` flag set to False.
Access to function values and containers Access to function values and containers
---------------------------------------- ----------------------------------------
For each input, ``theano.function`` will create a ``Container`` if the For each input, ``theano.function`` will create a ``Container`` if
value was not already a ``Container``. At the time of a function call, ``value`` was not already a ``Container`` (or if ``implicit`` was ``False``). At the time of a function call,
each of these containers must be filled with a value. Each input (but each of these containers must be filled with a value. Each input (but
especially ones with a default value or an update expression) may have a especially ones with a default value or an update expression) may have a
value between calls. The function interface defines a way to get at value between calls. The function interface defines a way to get at
......
...@@ -215,7 +215,7 @@ class Function(object): ...@@ -215,7 +215,7 @@ class Function(object):
self.return_none = return_none self.return_none = return_none
self.maker = maker self.maker = maker
# we'll be popping stuff off this `containers` object. It's a copy # We will be popping stuff off this `containers` object. It is a copy.
containers = list(self.input_storage) containers = list(self.input_storage)
finder = {} finder = {}
inv_finder = {} inv_finder = {}
...@@ -229,15 +229,26 @@ class Function(object): ...@@ -229,15 +229,26 @@ class Function(object):
#setters = [] #setters = []
# Initialize the storage # Initialize the storage
# this loop works by modifying the elements (as variable c) of self.input_storage inplace.
for i, ((input, indices, sinputs), (required, refeed, value)) in enumerate(zip(self.indices, defaults)): for i, ((input, indices, sinputs), (required, refeed, value)) in enumerate(zip(self.indices, defaults)):
if indices is None: # this is true iff input is not a SymbolicInputKit if indices is None: # this is true iff input is not a SymbolicInputKit
c = containers[0] #containers is being used as a stack. Here we pop off the next one. c = containers[0] #containers is being used as a stack. Here we pop off the next one.
if input.strict: if input.strict:
c.strict = True c.strict = True
if value is not None: if value is not None:
# always initialize the storage # Always initialize the storage.
c.data = value if isinstance(value, gof.Container):
# There is no point in obtaining the current value
# stored in the container, since the container is
# shared.
# For safety, we make sure 'refeed' is False, since
# there is no need to refeed the defaullt value.
assert not refeed
else:
c.value = value
c.required = required c.required = required
c.implicit = input.implicit
c.provided = 0 # this is a count of how many times the input has been provided (reinitialized to 0 on __call__) c.provided = 0 # this is a count of how many times the input has been provided (reinitialized to 0 on __call__)
finder[i] = c finder[i] = c
finder[input.variable] = c finder[input.variable] = c
...@@ -247,6 +258,9 @@ class Function(object): ...@@ -247,6 +258,9 @@ class Function(object):
#setters.append(partial(assign, c)) #setters.append(partial(assign, c))
containers[:1] = [] containers[:1] = []
else: else:
# TODO The following code may need to do something to handle
# implicit inputs.
# The input is a SymbolicInputKit, so we take as many containers as the Kit provides inputs # The input is a SymbolicInputKit, so we take as many containers as the Kit provides inputs
cs = containers[:len(indices)] cs = containers[:len(indices)]
# distribute does the initialization of the containers # distribute does the initialization of the containers
...@@ -347,20 +361,27 @@ class Function(object): ...@@ -347,20 +361,27 @@ class Function(object):
# Set keyword arguments # Set keyword arguments
for k, arg in kwargs.iteritems(): for k, arg in kwargs.iteritems():
self[k] = arg self[k] = arg
# Check if inputs are missing or if inputs were set more than once
# Check if inputs are missing, or if inputs were set more than once, or
# if we tried to provide inputs that are supposed to be implicit.
for c in self.input_storage: for c in self.input_storage:
if c.required and not c.provided: if c.required and not c.provided:
raise TypeError("Missing required input: %s" % getattr(self.inv_finder[c], 'variable', self.inv_finder[c])) raise TypeError("Missing required input: %s" % getattr(self.inv_finder[c], 'variable', self.inv_finder[c]))
if c.provided > 1: if c.provided > 1:
raise TypeError("Multiple values for input: %s" % getattr(self.inv_finder[c], 'variable', self.inv_finder[c])) raise TypeError("Multiple values for input: %s" % getattr(self.inv_finder[c], 'variable', self.inv_finder[c]))
if c.implicit and c.provided > 0:
raise TypeError('Tried to provide value for implicit input: %s'
% getattr(self.inv_finder[c], 'variable',
self.inv_finder[c]))
# Do the actual work # Do the actual work
self.fn() self.fn()
# Retrieve the values that were computed # Retrieve the values that were computed
outputs = [x.data for x in self.output_storage] outputs = [x.data for x in self.output_storage]
#remove internal references to required inputs # Remove internal references to required inputs.
#these can't be re-used anyway # These cannot be re-used anyway.
for x in self.input_storage: for x in self.input_storage:
if c.required: if c.required:
c.storage[0] = None c.storage[0] = None
...@@ -377,12 +398,16 @@ class Function(object): ...@@ -377,12 +398,16 @@ class Function(object):
# Update the inputs that have an update function # Update the inputs that have an update function
for input, storage in reversed(zip(self.maker.expanded_inputs, self.input_storage)): for input, storage in reversed(zip(self.maker.expanded_inputs, self.input_storage)):
if input.update: if input.update is not None:
storage.data = outputs.pop() storage.data = outputs.pop()
# Put default values back in the storage # Put default values back in the storage
for i, (required, refeed, value) in enumerate(self.defaults): for i, (required, refeed, value) in enumerate(self.defaults):
if refeed: if refeed:
if isinstance(value, gof.Container):
value = value.storage[0]
self[i] = value self[i] = value
if self.return_none: if self.return_none:
return None return None
elif self.unpack_single and len(outputs) == 1: elif self.unpack_single and len(outputs) == 1:
...@@ -404,26 +429,26 @@ class Function(object): ...@@ -404,26 +429,26 @@ class Function(object):
def _pickle_Function(f): def _pickle_Function(f):
#copy of the input storage list #copy of the input storage list
ins = list(f.input_storage) ins = list(f.input_storage)
defaults = [] input_storage = []
for (input, indices, inputs), (required, refeed, default) in zip(f.indices, f.defaults): for (input, indices, inputs), (required, refeed, default) in zip(f.indices, f.defaults):
if isinstance(input, SymbolicInputKit): if isinstance(input, SymbolicInputKit):
li = len(indices) li = len(indices)
if not default: if not default:
defaults.append(ins[:li]) input_storage.append(ins[:li])
else: else:
defaults.append(default) input_storage.append(default)
ins[:li] = [] ins[:li] = []
else: else:
defaults.append(ins[0]) input_storage.append(ins[0])
del ins[0] del ins[0]
inputs_data = [x.data for x in f.input_storage] inputs_data = [x.data for x in f.input_storage]
# HACK to detect aliased storage. # HACK to detect aliased storage.
# aliased relationships will not be preserved across the pickle operation # This is here because aliased relationships are not [currently] preserved across the pickle operation
if not (f.pickle_aliased_memory_strategy == 'ignore'): if not (f.pickle_aliased_memory_strategy == 'ignore'):
all_data = defaults + inputs_data all_data = input_storage + inputs_data # addition here means list append
for i, d_i in enumerate(all_data): for i, d_i in enumerate(all_data):
for j, d_j in enumerate(all_data): for j, d_j in enumerate(all_data):
if (i < j) and isinstance(d_i, numpy.ndarray) and isinstance(d_j, numpy.ndarray): if (i < j) and isinstance(d_i, numpy.ndarray) and isinstance(d_j, numpy.ndarray):
...@@ -436,14 +461,14 @@ def _pickle_Function(f): ...@@ -436,14 +461,14 @@ def _pickle_Function(f):
else: else:
raise AliasedMemoryError(d_i, d_j) raise AliasedMemoryError(d_i, d_j)
rval = (_constructor_Function, (f.maker, defaults, inputs_data)) rval = (_constructor_Function, (f.maker, input_storage, inputs_data))
return rval return rval
def _constructor_Function(maker, defaults, data): def _constructor_Function(maker, input_storage, inputs_data):
f = maker.create(defaults, trustme = True) f = maker.create(input_storage, trustme = True)
assert len(f.input_storage) == len(data) assert len(f.input_storage) == len(inputs_data)
for container, x in zip(f.input_storage, data): for container, x in zip(f.input_storage, inputs_data):
container.data = x assert (container.data is x) or (container.data == x)
return f return f
copy_reg.pickle(Function, _pickle_Function) copy_reg.pickle(Function, _pickle_Function)
...@@ -626,97 +651,53 @@ class FunctionMaker(object): ...@@ -626,97 +651,53 @@ class FunctionMaker(object):
self.accept_inplace = accept_inplace self.accept_inplace = accept_inplace
self.function_builder = function_builder self.function_builder = function_builder
def create(self, defaults = None, trustme = False): self.required = [(i.value == None) for i in self.inputs]
self.refeed = [
(i.value != None and not isinstance(i.value, gof.Container) and i.update == None)
for i in self.inputs]
def create(self, input_storage=None, trustme=False):
""" """
Create a function. Create a function.
defaults -> a list matching the inputs list and providing default values input_storage -> a list matching the inputs list and providing default values
if the default for an input is None, then that input is a if the default for an input is None, then that input is a
required input. For an input with an update, the default required input. For an input with an update, the default
acts as initialization. acts as initialization.
trustme -> disables some exceptions, used internally trustme -> disables some exceptions, used internally
""" """
if defaults is None: if input_storage is None:
defaults = [None]*len(self.inputs) input_storage = [None]*len(self.inputs)
input_storage = [] # list of independent one-element lists, will be passed to the linker input_storage_lists = [] # list of independent one-element lists, will be passed to the linker
_defaults = [] defaults = []
# The following loop is to fill in the input_storage and _defaults lists. # The following loop is to fill in the input_storage_lists and defaults lists.
for (input, indices, subinputs), default in zip(self.indices, defaults): assert len(self.indices) == len(input_storage)
__default = default for i, ((input, indices, subinputs), input_storage_i) in enumerate(zip(self.indices, input_storage)):
# Replace any default value given as a variable by its container.
if isinstance(default, gof.Container): # Note that this makes sense only in the context of shared variables,
# If the default is a gof.Container, this means we want to share # but for now we avoid dealing directly with them to avoid dependency
# the same storage. This is done by appending default.storage # on the shared variables work-in-progress repository.
# to input_storage if isinstance(input_storage_i, gof.Variable):
input_storage_i = input_storage_i.container
if isinstance(input_storage_i, gof.Container):
# If the default is a gof.Container, this means we want to
# share the same storage. This is done by appending
# input_storage_i.storage to input_storage_lists.
if indices is not None: if indices is not None:
raise TypeError("Cannot take a Container instance as default for a SymbolicInputKit.") raise TypeError("Cannot take a Container instance as default for a SymbolicInputKit.")
input_storage.append(default.storage) input_storage_lists.append(input_storage_i.storage)
default = None defaults.append((self.required[i],
required = False self.refeed[i],
elif isinstance(input, SymbolicInputKit): input_storage_i.storage[0]))
# If the input is a SymbolicInputKit, it represents more than
# one storage unit. The indices and subinputs lists represent which
# of the kit's inputs are active in this graph, so we make as many
# storage units as needed
if isinstance(default, (list, tuple)) \
and all(isinstance(x, gof.Container) for x in default):
if len(default) == len(indices):
input_storage += [x.storage for x in default]
elif len(default) > len(indices):
input_storage += [default[i].storage for i in indices]
else:
raise ValueError('Not enough storage for SymbolicInputKit', input, indices, default)
default = NODEFAULT
else:
input_storage += [[None] for i in indices]
else: else:
# Normal case: one new, independent storage unit # Normal case: one new, independent storage unit
input_storage.append([None]) input_storage_lists.append([input_storage_i])
defaults.append((self.required[i], self.refeed[i], input_storage_i))
# Filling _defaults. Each entry is a tuple of three elements:
# (required, refeed, value)
# - required means that the user must provide a value when calling the function
# - refeed means that we want to put the default back in the storage after each function call
# - value is the value that will be put in the storage initially
# Even though a SymbolicInputKit represents more than one input,
# we still only have one entry for the defaults list.
if isinstance(input, SymbolicInputKit):
if default is NODEFAULT:
_defaults.append((False, False, None))
elif default is None:
_defaults.append((True, True, None))
else:
_defaults.append((False, False, default))
elif input.update is not None:
# If the input has an update, then (logically) it is not required since
# it is just a parameter and of course we don't want to refeed the default
# back into the storage as it would defeat the point of updating it. We
# always do this policy.
if default is None:
if trustme or isinstance(__default, gof.Container):
_defaults.append((False, False, None))
else:
# This might catch some bugs early
raise ValueError("A default (initial) value is required for an input which can update itself.", input)
else:
_defaults.append((False, False, default))
else:
if default is None:
if trustme or isinstance(__default, gof.Container):
_defaults.append((False, False, None))
else:
# No default, so this is a required input. Nothing to feed back, initial value is None.
_defaults.append((True, False, None))
else:
# Default value. It is not required, but we want to put it back into the storage
# everytime so it behaves like most programming languages' default values
_defaults.append((False, True, default))
defaults = _defaults
# Get a function instance # Get a function instance
_fn, _i, _o = self.linker.make_thunk(input_storage = input_storage) _fn, _i, _o = self.linker.make_thunk(input_storage = input_storage_lists)
fn = self.function_builder(_fn, _i, _o, self.indices, self.outputs, defaults, self.unpack_single, self.return_none, self) fn = self.function_builder(_fn, _i, _o, self.indices, self.outputs, defaults, self.unpack_single, self.return_none, self)
return fn return fn
...@@ -805,6 +786,7 @@ def function(inputs, outputs, mode=None, accept_inplace = False): ...@@ -805,6 +786,7 @@ def function(inputs, outputs, mode=None, accept_inplace = False):
""" """
mode = mode if mode is not None else mode_module.default_mode mode = mode if mode is not None else mode_module.default_mode
inputs = map(convert_function_input, inputs) inputs = map(convert_function_input, inputs)
if outputs is not None: if outputs is not None:
outputs = map(FunctionMaker.wrap_out, outputs) if isinstance(outputs, (list, tuple)) else FunctionMaker.wrap_out(outputs) outputs = map(FunctionMaker.wrap_out, outputs) if isinstance(outputs, (list, tuple)) else FunctionMaker.wrap_out(outputs)
...@@ -820,6 +802,7 @@ def function(inputs, outputs, mode=None, accept_inplace = False): ...@@ -820,6 +802,7 @@ def function(inputs, outputs, mode=None, accept_inplace = False):
else: else:
#return a different kind of function #return a different kind of function
def dup_defaults(): def dup_defaults():
# TODO This may need to be changed to use containers as defaults.
return [copy.copy(default.value) if isinstance(default, gof.Container) else return [copy.copy(default.value) if isinstance(default, gof.Container) else
copy.copy(default) copy.copy(default)
for default in defaults] for default in defaults]
......
"""Define `SymbolicInput`, `SymbolicOutput`, `In`, `Out` """ """Define `SymbolicInput`, `SymbolicOutput`, `In`, `Out` """
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from theano import gof
class SymbolicInput(object): class SymbolicInput(object):
""" """
Represents a symbolic input for use with function or FunctionMaker. Represents a symbolic input for use with function or FunctionMaker.
...@@ -27,9 +29,15 @@ class SymbolicInput(object): ...@@ -27,9 +29,15 @@ class SymbolicInput(object):
autoname: Bool (default: True) autoname: Bool (default: True)
See the name option. See the name option.
implicit: Bool (default: False)
See help(In). Note that 'None' is not allowed here, since we are in the
symbolic case.
""" """
def __init__(self, variable, name=None, update=None, mutable=None, strict=False, autoname=True): def __init__(self, variable, name=None, update=None, mutable=None, strict=False, autoname=True,
implicit=False):
assert implicit is not None # Safety check.
self.variable = variable self.variable = variable
self.name = variable.name if (autoname and name is None) else name self.name = variable.name if (autoname and name is None) else name
if self.name is not None and not isinstance(self.name, str): if self.name is not None and not isinstance(self.name, str):
...@@ -37,6 +45,7 @@ class SymbolicInput(object): ...@@ -37,6 +45,7 @@ class SymbolicInput(object):
self.update = update self.update = update
self.mutable = mutable if (mutable is not None) else (update is not None) self.mutable = mutable if (mutable is not None) else (update is not None)
self.strict = strict self.strict = strict
self.implicit = implicit
def __str__(self): def __str__(self):
if self.update: if self.update:
...@@ -132,14 +141,39 @@ class In(SymbolicInput): ...@@ -132,14 +141,39 @@ class In(SymbolicInput):
strict: Bool (default: False) strict: Bool (default: False)
True: means that the value you pass for this input must have exactly the right type True: means that the value you pass for this input must have exactly the right type
False: the value you pass for this input may be casted automatically to the proper type False: the value you pass for this input may be cast automatically to the proper type
autoname: Bool (default: True) autoname: Bool (default: True)
See the name option. See the name option.
implicit: Bool or None (default: None)
True: This input is implicit in the sense that the user is not allowed
to provide a value for it. Requires 'value' to be set.
False: The user can provide a value for this input. Be careful when
'value' is a container, because providing an input value will
overwrite the content of this container.
None: Automatically choose between True or False depending on the
situation. It will be set to False in all cases except if 'value'
is a container (so that there is less risk of accidentally
overwriting its content without being aware of it).
""" """
def __init__(self, variable, name=None, value=None, update=None, mutable=None, strict=False, autoname=True): # Note: the documentation above is duplicated in doc/topics/function.txt,
super(In, self).__init__(variable, name, update, mutable, strict, autoname) # try to keep it synchronized.
def __init__(self, variable, name=None, value=None, update=None,
mutable=None, strict=False, autoname=True,
implicit=None):
if implicit is None:
# TODO Having a default value being a Variable only makes sense
# if this is a SharedVariable. This should be changed once shared
# variables are part of Theano instead of living in a separate
# repository.
implicit = (isinstance(value, gof.Container) or
isinstance(value, gof.Variable))
super(In, self).__init__(variable, name, update, mutable, strict,
autoname, implicit = implicit)
self.value = value self.value = value
if self.implicit and value is None:
raise TypeError('An implicit input must be given a default value')
class SymbolicOutput(object): class SymbolicOutput(object):
......
...@@ -37,6 +37,7 @@ predefined_linkers = { ...@@ -37,6 +37,7 @@ predefined_linkers = {
'c&py' : gof.DualLinker(checker = check_equal) 'c&py' : gof.DualLinker(checker = check_equal)
} }
#Keep default_linker the same as the one for default_mode
default_linker = 'c|py' default_linker = 'c|py'
def register_linker(name, linker): def register_linker(name, linker):
...@@ -63,7 +64,8 @@ predefined_optimizers = { ...@@ -63,7 +64,8 @@ predefined_optimizers = {
'fast_run_stable' : OPT_FAST_RUN_STABLE, 'fast_run_stable' : OPT_FAST_RUN_STABLE,
'fast_compile' : OPT_FAST_COMPILE 'fast_compile' : OPT_FAST_COMPILE
} }
default_optimizer = 'merge' #Keep default_optimizer the same as the one for default_mode
default_optimizer = 'fast_run'
def register_optimizer(name, opt): def register_optimizer(name, opt):
"""Add a `Optimizer` which can be referred to by `name` in `Mode`.""" """Add a `Optimizer` which can be referred to by `name` in `Mode`."""
...@@ -157,6 +159,7 @@ predefined_modes = {'FAST_COMPILE': FAST_COMPILE, ...@@ -157,6 +159,7 @@ predefined_modes = {'FAST_COMPILE': FAST_COMPILE,
# The default mode used by functions and modules is read from the environment # The default mode used by functions and modules is read from the environment
# variable THEANO_DEFAULT_MODE. Unit tests will run using this value. If the env. var. # variable THEANO_DEFAULT_MODE. Unit tests will run using this value. If the env. var.
# is not set, it will default to 'FAST_RUN' # is not set, it will default to 'FAST_RUN'
# keep default_mode.optimizer==default_optimizer and default_mode.linker==default_linker!
## ##
default_mode = os.getenv('THEANO_DEFAULT_MODE','FAST_RUN') default_mode = os.getenv('THEANO_DEFAULT_MODE','FAST_RUN')
......
...@@ -354,7 +354,7 @@ class Method(Component): ...@@ -354,7 +354,7 @@ class Method(Component):
return memo[self] return memo[self]
self.resolve_all() # resolve all so we don't have to mess with strings self.resolve_all() # resolve all so we don't have to mess with strings
def get_storage(r, require = False): def get_storage(r, require=False):
# If require is True, we can only get storage from the memo. # If require is True, we can only get storage from the memo.
try: try:
return memo[r] return memo[r]
...@@ -405,7 +405,8 @@ class Method(Component): ...@@ -405,7 +405,8 @@ class Method(Component):
variable=k, variable=k,
update=v, update=v,
value=get_storage(k, not allocate_all).value, value=get_storage(k, not allocate_all).value,
mutable=True) mutable=True,
implicit = True)
inputs.append(input_k) inputs.append(input_k)
else: else:
raise ValueError(('Variable listed in both inputs and updates.' raise ValueError(('Variable listed in both inputs and updates.'
...@@ -437,6 +438,13 @@ class Method(Component): ...@@ -437,6 +438,13 @@ class Method(Component):
assert storage.mutable == False assert storage.mutable == False
else: else:
storage = get_storage(input, not allocate_all) storage = get_storage(input, not allocate_all)
# Declare as an implicit input.
# TODO Note from OD: is this dangerous? (in case this storage
# is shared, and would sometimes need to be implicit, sometimes
# not).
storage.implicit = True
assert type(storage) is io.In assert type(storage) is io.In
inputs.append(storage) inputs.append(storage)
......
...@@ -2,10 +2,11 @@ import time ...@@ -2,10 +2,11 @@ import time
from ..gof.link import WrapLinkerMany from ..gof.link import WrapLinkerMany
from ..gof.cutils import run_cthunk from ..gof.cutils import run_cthunk
from ..compile.mode import Mode from ..compile.mode import Mode, predefined_linkers
from ..gof.cc import OpWiseCLinker
class ProfileMode(Mode): class ProfileMode(Mode):
def __init__(self, linker, optimizer=None): def __init__(self, linker=OpWiseCLinker(), optimizer=None):
local_time = [0.0] local_time = [0.0]
apply_time = {} apply_time = {}
op_time = {} op_time = {}
...@@ -31,6 +32,9 @@ class ProfileMode(Mode): ...@@ -31,6 +32,9 @@ class ProfileMode(Mode):
self.op_time = op_time self.op_time = op_time
self.op_cimpl = op_cimpl self.op_cimpl = op_cimpl
if isinstance(linker, str):
linker = predefined_linkers[linker]
wrap_linker = WrapLinkerMany([linker], [blah]) wrap_linker = WrapLinkerMany([linker], [blah])
if optimizer: if optimizer:
super(ProfileMode, self).__init__(wrap_linker, optimizer) super(ProfileMode, self).__init__(wrap_linker, optimizer)
......
...@@ -7,7 +7,7 @@ from theano.compile.function_module import * ...@@ -7,7 +7,7 @@ from theano.compile.function_module import *
from theano import tensor from theano import tensor
from theano import tensor as T from theano import tensor as T
import random import random, theano
import numpy as N import numpy as N
...@@ -250,9 +250,30 @@ class T_function(unittest.TestCase): ...@@ -250,9 +250,30 @@ class T_function(unittest.TestCase):
self.failUnless(f[s] == 2) self.failUnless(f[s] == 2)
self.failUnless(g[s] == 2) self.failUnless(g[s] == 2)
f(1, 2) f(1, 2)
g(1, 2)
self.failUnless(f[s] == 4) self.failUnless(f[s] == 4)
self.failUnless(g[s] == 4) self.failUnless(g[s] == 4)
g(1, 2) # has no effect on state
self.failUnless(f[s] == 4)
self.failUnless(g[s] == 4)
def test_shared_state_not_implicit(self):
# This test is taken from the documentation in
# doc/topics/function.txt. If it does not pass anymore and yet the
# behavior is still intended the doc and the test should both be
# updated accordingly.
x, s = T.scalars('xs')
inc = function([x, In(s, update=(s+x), value=10.0)], [])
dec = function([x, In(s, update=(s-x), value=inc.container[s],
implicit = False)], [])
self.failUnless(dec[s] is inc[s])
inc[s] = 2
self.failUnless(dec[s] == 2)
dec(1)
self.failUnless(inc[s] == 1)
dec(1, 0)
self.failUnless(inc[s] == -1)
self.failUnless(dec[s] == -1)
class T_picklefunction(unittest.TestCase): class T_picklefunction(unittest.TestCase):
...@@ -278,6 +299,13 @@ class T_picklefunction(unittest.TestCase): ...@@ -278,6 +299,13 @@ class T_picklefunction(unittest.TestCase):
self.failIf(g.container[2].storage is f.container[2].storage) self.failIf(g.container[2].storage is f.container[2].storage)
self.failIf(x in g.container) self.failIf(x in g.container)
self.failIf(x in g.value) self.failIf(x in g.value)
self.failUnless(len(f.defaults) == len(g.defaults))
print 'f.defaults = %s' % (f.defaults, )
print 'g.defaults = %s' % (g.defaults, )
self.failUnless(all([f_req == g_req and f_feed == g_feed and
f_val == g_val
for ((f_req, f_feed, f_val), (g_req, g_feed, g_val)) in zip(
f.defaults, g.defaults)]))
self.failIf(g.value[1] is f.value[1]) # should not have been copied self.failIf(g.value[1] is f.value[1]) # should not have been copied
self.failIf(g.value[2] is f.value[2]) # should have been copied because it is mutable. self.failIf(g.value[2] is f.value[2]) # should have been copied because it is mutable.
...@@ -287,6 +315,32 @@ class T_picklefunction(unittest.TestCase): ...@@ -287,6 +315,32 @@ class T_picklefunction(unittest.TestCase):
self.failUnless(f(2, 1) == g(2)) #they should be in sync, default value should be copied. self.failUnless(f(2, 1) == g(2)) #they should be in sync, default value should be copied.
f(1,2) # put them out of sync f(1,2) # put them out of sync
self.failIf(f(1, 2) == g(1, 2)) #they should not be equal anymore. self.failIf(f(1, 2) == g(1, 2)) #they should not be equal anymore.
g(1, 2) # put them back in sync
self.failUnless(f(3) == g(3)) # They should be in sync again.
def test_deepcopy_shared_container(self):
# Ensure that shared containers remain shared after a deep copy.
a, x = T.scalars('ax')
h = function([In(a, value = 0.0)], a)
f = function([x, In(a, value=h.container[a], implicit = True)], x + a)
try:
memo = {}
ac = copy.deepcopy(a)
memo.update({id(a): ac})
hc = copy.deepcopy(h, memo = memo)
memo.update({id(h): hc})
fc = copy.deepcopy(f, memo = memo)
except NotImplementedError, e:
if e[0].startswith('DebugMode is not picklable'):
return
else:
raise
h[a] = 1
hc[ac] = 2
self.failUnless(f[a] == 1)
self.failUnless(fc[ac] == 2)
def test_pickle(self): def test_pickle(self):
a = T.scalar() # the a is for 'anonymous' (un-named). a = T.scalar() # the a is for 'anonymous' (un-named).
...@@ -472,7 +526,7 @@ if __name__ == '__main__': ...@@ -472,7 +526,7 @@ if __name__ == '__main__':
if 1: if 1:
unittest.main() unittest.main()
else: elif 0:
testcases = [] testcases = []
testcases.append(T_function) testcases.append(T_function)
...@@ -483,3 +537,11 @@ if __name__ == '__main__': ...@@ -483,3 +537,11 @@ if __name__ == '__main__':
suite.addTest(testloader.loadTestsFromTestCase(testcase)) suite.addTest(testloader.loadTestsFromTestCase(testcase))
unittest.TextTestRunner(verbosity=2).run(suite) unittest.TextTestRunner(verbosity=2).run(suite)
#</boilerplate> #</boilerplate>
elif 0:
theano.compile.mode.default_mode = 'FAST_COMPILE'
t = T_picklefunction()
def fu(b):
assert b
t.failUnless = fu
t.test_deepcopy_shared_container()
...@@ -678,6 +678,23 @@ def test_method_mode(): ...@@ -678,6 +678,23 @@ def test_method_mode():
assert m.h.maker.mode == m.g.maker.mode assert m.h.maker.mode == m.g.maker.mode
assert numpy.all(m.f([1,2]) == m.g([1,2])) assert numpy.all(m.f([1,2]) == m.g([1,2]))
def test_method_implicit_ticket_384():
"""
Ensure it is not possible to accidentally overwrite module variables
added as implicit inputs.
"""
M = Module()
M.x = T.scalar()
M.f = Method([M.x], M.x * 3)
m = M.make()
m.f(0)
try:
m.f(0, 0)
assert False
except TypeError, e:
if not str(e).startswith('Tried to provide value for implicit input'):
raise
def test_pickle(): def test_pickle():
"""Test that a module can be pickled""" """Test that a module can be pickled"""
M = Module() M = Module()
......
...@@ -526,10 +526,23 @@ class CLinker(link.Linker): ...@@ -526,10 +526,23 @@ class CLinker(link.Linker):
This might contain duplicates. This might contain duplicates.
""" """
ret = [] ret = ["-O3", "-w"]#-w means supress all warnings
# this is the param the -ffast-math activate. I put the explicitly as FillMissing must disable "-ffinite-math-only". Putting -ffast-math would make it disable all other parameter at the same time.
ret += ["-fno-math-errno", "-funsafe-math-optimizations",
"-fno-signaling-nans", "-fcx-limited-range",
"-fno-rounding-math", "-ffinite-math-only"]
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]: for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
try: ret += x.c_compile_args() try: ret += x.c_compile_args()
except utils.MethodNotDefined: pass except utils.MethodNotDefined: pass
ret=list(set(ret))#to remove duplicate
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
try:
for i in x.c_no_compile_args():
try:
ret.remove(i)
except ValueError:
pass# in case the value is not there
except utils.MethodNotDefined: pass
return ret return ret
def headers(self): def headers(self):
...@@ -703,16 +716,7 @@ class CLinker(link.Linker): ...@@ -703,16 +716,7 @@ class CLinker(link.Linker):
instantiate.customize.add_support_code(support_code) instantiate.customize.add_support_code(support_code)
instantiate.customize.add_support_code(self.struct_code) instantiate.customize.add_support_code(self.struct_code)
instantiate.customize.add_support_code(static) instantiate.customize.add_support_code(static)
for extra_arg in (
"-O2",
"-ffast-math",
#"-fprefetch-loop-arrays",
#"-ftree-vect-loop-version",
#"-ftree-loop-optimize",
#"-ftree-vectorize"):
"-w" #-w means supress all warnings
):
instantiate.customize.add_extra_compile_arg(extra_arg)
for arg in self.compile_args(): for arg in self.compile_args():
instantiate.customize.add_extra_compile_arg(arg) instantiate.customize.add_extra_compile_arg(arg)
for header in self.headers(): for header in self.headers():
......
...@@ -97,6 +97,25 @@ class CLinkerOp(object): ...@@ -97,6 +97,25 @@ class CLinkerOp(object):
raise utils.MethodNotDefined('%s.c_compile_args' \ raise utils.MethodNotDefined('%s.c_compile_args' \
% self.__class__.__name__) % self.__class__.__name__)
def c_no_compile_args(self):
"""Optional: Return a list of incompatible gcc compiler arguments.
We will remove those arguments from the command line of gcc. So if
another Op adds a compile arg in the graph that is incompatible
with this Op, the incompatible arg will not be used.
Useful for instance to remove -ffast-math.
EXAMPLE
WRITEME
:Exceptions:
- `MethodNotDefined`: the subclass does not override this method
"""
raise utils.MethodNotDefined('%s.c_no_compile_args' \
% self.__class__.__name__)
def c_headers(self): def c_headers(self):
"""Optional: Return a list of header files that must be included to compile the C code. """Optional: Return a list of header files that must be included to compile the C code.
......
...@@ -148,6 +148,24 @@ class CLinkerType(object): ...@@ -148,6 +148,24 @@ class CLinkerType(object):
""" """
raise MethodNotDefined("c_compile_args", type(self), self.__class__.__name__) raise MethodNotDefined("c_compile_args", type(self), self.__class__.__name__)
def c_no_compile_args(self):
"""Optional: Return a list of incompatible gcc compiler arguments.
We will remove those arguments from the command line of gcc. So if
another Op adds a compile arg in the graph that is incompatible
with this Op, the incompatible arg will not be used.
Useful for instance to remove -ffast-math.
EXAMPLE
WRITEME
:Exceptions:
- `MethodNotDefined`: the subclass does not override this method
"""
raise MethodNotDefined("c_no_compile_args", type(self), self.__class__.__name__)
def c_headers(self): def c_headers(self):
"""Optional: Return a list of header files required by code returned by """Optional: Return a list of header files required by code returned by
this class. this class.
......
...@@ -8,7 +8,7 @@ def getFilterOutShp(inshp, kshp, (dx,dy)=(1,1), mode='valid'): ...@@ -8,7 +8,7 @@ def getFilterOutShp(inshp, kshp, (dx,dy)=(1,1), mode='valid'):
s = -1 if mode=='valid' else 1 s = -1 if mode=='valid' else 1
inshp, kshp = N.array(inshp), N.array(kshp) inshp, kshp = N.array(inshp), N.array(kshp)
return N.int64(N.ceil((inshp[1:] + s*kshp - s*1)/\ return N.int64(N.ceil((inshp[1:] + s*kshp - s*1)/\
N.array([dy,dx], dtype='float'))) N.array([dx,dy], dtype='float')))
class ConvOp(Op): class ConvOp(Op):
""" """
...@@ -44,21 +44,19 @@ class ConvOp(Op): ...@@ -44,21 +44,19 @@ class ConvOp(Op):
self.unroll_kern=unroll_kern self.unroll_kern=unroll_kern
if self.unroll_batch>0 and self.bsize % self.unroll_batch!=0: if self.unroll_batch>0 and self.bsize % self.unroll_batch!=0:
if self.bsize<self.unroll_batch: if self.bsize<=self.unroll_batch:
self.unroll_batch = self.bsize self.unroll_batch = self.bsize
else: else:
self.unroll_batch=1
print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_batch(%s) must be 0 or a multiple of bsize(%s). We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_batch),str(self.bsize)) print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_batch(%s) must be 0 or a multiple of bsize(%s). We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_batch),str(self.bsize))
self.unroll_batch=1
if self.unroll_kern>0 and self.nkern % unroll_kern!=0: if self.unroll_kern>0 and self.nkern % unroll_kern!=0:
if self.nkern<self.unroll_kern: if self.nkern<=self.unroll_kern:
self.unroll_kern = self.nkern self.unroll_kern = self.nkern
else: else:
self.unroll_kern=1
print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(%s) should be 0 or a multiple of nkern(%s)We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_kern),str(self.nkern)) print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(%s) should be 0 or a multiple of nkern(%s)We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_kern),str(self.nkern))
if self.dx!=1 or self.dy!=1: self.unroll_kern=1
print "Warning, dx!=1 or dy!=1 only supported in python mode!"
raise NotImplementedError()
self.outshp = getFilterOutShp(self.imshp, kshp, (dx,dy), output_mode) self.outshp = getFilterOutShp(self.imshp, kshp, (dx,dy), output_mode)
self.fulloutshp = getFilterOutShp(self.imshp, kshp, (1,1), output_mode)
self.out_mode = output_mode self.out_mode = output_mode
if not self.out_mode in ["valid", "full"]: if not self.out_mode in ["valid", "full"]:
raise Exception("Mode %s not implemented"%self.out_mode) raise Exception("Mode %s not implemented"%self.out_mode)
...@@ -92,7 +90,7 @@ class ConvOp(Op): ...@@ -92,7 +90,7 @@ class ConvOp(Op):
raise Exception("The image and the kernel must have the same type." raise Exception("The image and the kernel must have the same type."
"inputs(%s), kerns(%s)"%(inputs.dtype, kerns.dtype)) "inputs(%s), kerns(%s)"%(inputs.dtype, kerns.dtype))
output = tensor.tensor(dtype=inputs.type.dtype, output = tensor.tensor(dtype=inputs.type.dtype,
broadcastable=[False]*outdim, broadcastable=[False]*outdim,
name="ConvOp_Output"); name="ConvOp_Output");
return gof.Apply(self, [inputs, kerns], [output]) return gof.Apply(self, [inputs, kerns], [output])
...@@ -105,7 +103,8 @@ class ConvOp(Op): ...@@ -105,7 +103,8 @@ class ConvOp(Op):
from scipy.signal.signaltools import _valfrommode, _bvalfromboundary from scipy.signal.signaltools import _valfrommode, _bvalfromboundary
from scipy.signal.sigtools import _convolve2d from scipy.signal.sigtools import _convolve2d
if z[0] is None: if z[0] is None:
z[0] = N.zeros((self.bsize,)+(self.nkern,)+tuple(self.outshp)) z[0] = N.zeros((self.bsize,)+(self.nkern,)+tuple(self.fulloutshp),
dtype=img2d.dtype)
zz=z[0] zz=z[0]
val = _valfrommode(self.out_mode) val = _valfrommode(self.out_mode)
bval = _bvalfromboundary('fill') bval = _bvalfromboundary('fill')
...@@ -119,7 +118,11 @@ class ConvOp(Op): ...@@ -119,7 +118,11 @@ class ConvOp(Op):
for im0 in range(self.imshp[0]): for im0 in range(self.imshp[0]):
zz[b,n,...] += _convolve2d(\ zz[b,n,...] += _convolve2d(\
img2d[b,im0,...], filtersflipped[n,im0,...],1,val, bval, 0) img2d[b,im0,...], filtersflipped[n,im0,...],1,val, bval, 0)
zz = zz[:,:,0::self.dx,0::self.dy] #We copy it to remove the Stride mismatch warning from DEBUG_MODE.
#The copy make that we return an object with the same stride as the c version.
#The copy don't affect the performence during our experience as in that case we
#execute the c version which is much faster.
zz = zz[:,:,0::self.dx,0::self.dy].copy()
z[0]=zz z[0]=zz
...@@ -131,6 +134,13 @@ class ConvOp(Op): ...@@ -131,6 +134,13 @@ class ConvOp(Op):
* inputs needs to be a 4D tensor. Couldn't get 3D to work * inputs needs to be a 4D tensor. Couldn't get 3D to work
* will crash if filter the same size as input image * will crash if filter the same size as input image
""" """
outshp = self.fulloutshp
if self.dx!=1 or self.dy!=1:
upgz = T.as_tensor(N.zeros((self.bsize,self.nkern)+tuple(self.fulloutshp),
dtype=gz.type.dtype))
gz = T.SetSubtensor([slice(self.bsize), slice(self.nkern),
slice(0,outshp[0],self.dy),
slice(0,outshp[1],self.dx)])(upgz,gz)
####### Determine gradient on kernels ######## ####### Determine gradient on kernels ########
if inputs.ndim == 3: if inputs.ndim == 3:
...@@ -144,26 +154,28 @@ class ConvOp(Op): ...@@ -144,26 +154,28 @@ class ConvOp(Op):
(img, filters) = (newin, newgz) (img, filters) = (newin, newgz)
(bsize, nkern) = (self.imshp[0], self.nkern) (bsize, nkern) = (self.imshp[0], self.nkern)
imshp = N.hstack((self.bsize, self.imshp[1:])) imshp = N.hstack((self.bsize, self.imshp[1:]))
kshp = self.outshp kshp = outshp
un_b = self.unroll_batch
un_k = self.unroll_kern
elif self.out_mode == 'full': elif self.out_mode == 'full':
(img, filters) = (newgz, newin) (img, filters) = (newgz, newin)
(bsize, nkern) = (self.nkern, self.imshp[0]) (bsize, nkern) = (self.nkern, self.imshp[0])
imshp = N.hstack((self.bsize, self.outshp)) imshp = N.hstack((self.bsize, outshp))
kshp = self.imshp[1:] kshp = self.imshp[1:]
un_b = self.unroll_kern
un_k = self.unroll_batch
else: else:
raise NotImplementedError('Only [full,valid] modes are currently supported.') raise NotImplementedError('Only [full,valid] modes are currently supported.')
filters = filters[:,:,::-1,::-1] filters = filters[:,:,::-1,::-1]
#find good value for the unroll #find good value for the unroll
un_b = self.unroll_batch
un_k = self.unroll_kern
if un_b!=0 and bsize%un_b!=0: if un_b!=0 and bsize%un_b!=0:
if bsize<un_b: if bsize<un_b:
un_b = bsize un_b = bsize
else: else:
un_b = 1 un_b = 1
print "OPTIMISATION WARNING: in ConvOp.grad() we can't determine a good unroll value for the batch. Maybe you can optimize this!" print "OPTIMISATION WARNING: in ConvOp.grad() we can't determine a good unroll value for the batch. Maybe you can optimize this!", bsize, un_b, self.unroll_batch, self.unroll_kern
if un_k!=0 and nkern%un_k!=0: if un_k!=0 and nkern%un_k!=0:
if nkern<un_k: if nkern<un_k:
un_k = nkern un_k = nkern
...@@ -173,6 +185,7 @@ class ConvOp(Op): ...@@ -173,6 +185,7 @@ class ConvOp(Op):
dw = ConvOp(imshp, kshp, nkern, bsize, 1,1, output_mode='valid', dw = ConvOp(imshp, kshp, nkern, bsize, 1,1, output_mode='valid',
unroll_batch=un_b, unroll_kern=un_k)(img,filters) unroll_batch=un_b, unroll_kern=un_k)(img,filters)
assert (dw.owner.op.outshp==self.kshp).all()
if self.out_mode == 'valid': if self.out_mode == 'valid':
# before DimShuffle, dw is of shape visdim x nkern x kshp[0] x kshp[1] # before DimShuffle, dw is of shape visdim x nkern x kshp[0] x kshp[1]
dw = tensor.DimShuffle(dw.broadcastable, (1,0,2,3))(dw) dw = tensor.DimShuffle(dw.broadcastable, (1,0,2,3))(dw)
...@@ -183,11 +196,11 @@ class ConvOp(Op): ...@@ -183,11 +196,11 @@ class ConvOp(Op):
filters = tensor.DimShuffle(gz.broadcastable, (1,0,2,3))(kerns) filters = tensor.DimShuffle(gz.broadcastable, (1,0,2,3))(kerns)
filters = filters[:,:,::-1,::-1] filters = filters[:,:,::-1,::-1]
nkern = self.imshp[0] nkern = self.imshp[0]
imshp = N.hstack((self.nkern,self.outshp)) imshp = N.hstack((self.nkern,outshp))
din = ConvOp(imshp, self.kshp, nkern, self.bsize, din = ConvOp(imshp, self.kshp, nkern, self.bsize,
1,1, output_mode=mode, 1,1, output_mode=mode,
unroll_batch=un_b, unroll_kern=un_k)(gz,filters) unroll_batch=un_b, unroll_kern=un_k)(gz,filters)
assert (din.owner.op.outshp==self.imshp[1:]).all()
return [din, dw] return [din, dw]
#def c(): #def c():
...@@ -238,7 +251,7 @@ using namespace std; ...@@ -238,7 +251,7 @@ using namespace std;
self.unroll_kern) self.unroll_kern)
#TODO: should we choose the unroll size automatically with the bigger divisor under 5? #TODO: should we choose the unroll size automatically with the bigger divisor under 5?
if self.out_mode == 'valid': if self.out_mode == 'valid' and self.dx==0 and self.dy==0:
# print "return gemm version" # print "return gemm version"
return _conv_op_code_valid_gemm % d return _conv_op_code_valid_gemm % d
else: else:
...@@ -388,8 +401,11 @@ if ((!%(z)s) ...@@ -388,8 +401,11 @@ if ((!%(z)s)
} }
int Os[2]; int Os[2];
if (mode == FULL) {Os[0] = dim_im[0]+dim_ker[0]-1; Os[1] = dim_im[1]+dim_ker[1]-1;} Os[0]=%(self_outshp0)s;
else {Os[0] = dim_im[0]-dim_ker[0]+1; Os[1] = dim_im[1]-dim_ker[1]+1;} Os[1]=%(self_outshp1)s;
//I keep the formula to calculte Os in case we need it in the futur.
//if (mode == FULL) {Os[0] = (int)ceil((dim_im[0]+dim_ker[0]-1)/float(%(self_dx)s)); Os[1] = ceil((dim_im[1]+dim_ker[1]-1)/float(%(self_dy)s));}
//else {Os[0] = (int)ceil((dim_im[0]-dim_ker[0]+1)/float(%(self_dx)s)); Os[1] = (int)ceil((dim_im[1]-dim_ker[1]+1)/float(%(self_dy)s));}
for(int b=0;b< %(self_bsize)s;b++){ for(int b=0;b< %(self_bsize)s;b++){
for(int n_kern=0;n_kern<%(self_nkern)s;n_kern++){ for(int n_kern=0;n_kern<%(self_nkern)s;n_kern++){
...@@ -410,12 +426,14 @@ for(int b=0;b< %(self_bsize)s;b++){ ...@@ -410,12 +426,14 @@ for(int b=0;b< %(self_bsize)s;b++){
int new_m; int new_m;
for (int m=0; m < Os[0]; m++) { for (int iter_m=0; iter_m < Os[0]; iter_m++) {
// Reposition index into input image based on requested output size // Reposition index into input image based on requested output size
if (mode == FULL) new_m = m ; int pos_m = iter_m*%(self_dx)s;//The position of the patch in the image
else new_m = (m+dim_ker[0]-1); if (mode == FULL) new_m = pos_m ;
else new_m = (pos_m+dim_ker[0]-1);
for (int n=0; n < Os[1]; n++) { // loop over columns for (int iter_n=0; iter_n < Os[1]; iter_n++) { // loop over columns
int pos_n=iter_n*%(self_dy)s;
%(type)s sum=0; %(type)s sum=0;
// Sum over kernel, if index into image is out of bounds // Sum over kernel, if index into image is out of bounds
...@@ -433,7 +451,7 @@ for(int b=0;b< %(self_bsize)s;b++){ ...@@ -433,7 +451,7 @@ for(int b=0;b< %(self_bsize)s;b++){
}else{ }else{
//do the part where kernel is to the right of the img //do the part where kernel is to the right of the img
int k=0,max_k=max((int)(n-dim_im[1])+1,0); int k=0,max_k=max((int)(pos_n-dim_im[1])+1,0);
if(fill_value!=0){ if(fill_value!=0){
for(k=0;k<max_k;k++){ for(k=0;k<max_k;k++){
...@@ -442,9 +460,9 @@ for(int b=0;b< %(self_bsize)s;b++){ ...@@ -442,9 +460,9 @@ for(int b=0;b< %(self_bsize)s;b++){
}else {k=max_k;} }else {k=max_k;}
//do the part where the kernel is on the img //do the part where the kernel is on the img
max_k=min(n+1,(int)dim_ker[1]); max_k=min(pos_n+1,(int)dim_ker[1]);
const %(type)s * idx_in=&in[ind0*dim_im[1]]; const %(type)s * idx_in=&in[ind0*dim_im[1]];
for (int ind1=n-k; k<max_k; k++,ind1--) { for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
sum+= idx_hvals[k] * idx_in[ind1]; sum+= idx_hvals[k] * idx_in[ind1];
} }
//do the part to the left of the img //do the part to the left of the img
...@@ -454,14 +472,13 @@ for(int b=0;b< %(self_bsize)s;b++){ ...@@ -454,14 +472,13 @@ for(int b=0;b< %(self_bsize)s;b++){
}else{ }else{
const %(type)s* idx_in=&in[ind0*dim_im[1]]; //JB: should be dim_im[1] right? (was dim_im[0]) const %(type)s* idx_in=&in[ind0*dim_im[1]]; //JB: should be dim_im[1] right? (was dim_im[0])
const %(type)s* idx_hvals=&hvals[j*dim_ker[1]]; const %(type)s* idx_hvals=&hvals[j*dim_ker[1]];
int new_n = (n+dim_ker[1]-1); int new_n = (pos_n+dim_ker[1]-1);
for (int k=0,last=new_n; k < dim_ker[1]; k++,last--) { for (int k=0,last=new_n; k < dim_ker[1]; k++,last--) {
sum+=idx_hvals[k]*idx_in[last]; sum+=idx_hvals[k]*idx_in[last];
} }
} }
}//for j }//for j
out[m*dim_zz[1]+n] %(affectation)s sum; out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum;
}//for n }//for n
}//for m }//for m
}//for stack_size }//for stack_size
...@@ -763,7 +780,11 @@ if(%(img2d)s->nd==2){ ...@@ -763,7 +780,11 @@ if(%(img2d)s->nd==2){
img2d_dim[1]=%(img2d)s->dimensions[1]; img2d_dim[1]=%(img2d)s->dimensions[1];
img2d_dim[0]=%(img2d)s->dimensions[0]; img2d_dim[0]=%(img2d)s->dimensions[0];
}else { }else {
PyErr_SetString(PyExc_ValueError, "img don't have a good shape"); std:stringstream temp;
temp << "nddim="<<%(img2d)s->nd;
std::string param = temp.str();
PyErr_SetString(PyExc_ValueError,
("img don't have a good shape. " + param).c_str());
%(fail)s; %(fail)s;
} }
...@@ -777,11 +798,7 @@ if(%(filtersflipped)s->nd==3){ ...@@ -777,11 +798,7 @@ if(%(filtersflipped)s->nd==3){
kerns_dim[1]=%(filtersflipped)s->dimensions[1]; kerns_dim[1]=%(filtersflipped)s->dimensions[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0]; kerns_dim[0]=%(filtersflipped)s->dimensions[0];
}else{ }else{
std:stringstream temp; PyErr_SetString(PyExc_ValueError, "kernel don't have a good shape");
temp << "nddim="<<%(filtersflipped)s->nd;
std::string param = temp.str();
PyErr_SetString(PyExc_ValueError,
("kernel don't have a good shape. " + param).c_str());
%(fail)s; %(fail)s;
} }
...@@ -844,8 +861,12 @@ if ((!%(z)s) ...@@ -844,8 +861,12 @@ if ((!%(z)s)
} }
int Os[2]; int Os[2];
if (mode == FULL) {Os[0] = dim_im[0]+dim_ker[0]-1; Os[1] = dim_im[1]+dim_ker[1]-1;} Os[0]=%(self_outshp0)s;
else {Os[0] = dim_im[0]-dim_ker[0]+1; Os[1] = dim_im[1]-dim_ker[1]+1;} Os[1]=%(self_outshp1)s;
//I keep the formula to calculte Os in case we need it in the futur.
//if (mode == FULL) {Os[0] = (int)ceil((dim_im[0]+dim_ker[0]-1)/float(%(self_dx)s)); Os[1] = ceil((dim_im[1]+dim_ker[1]-1)/float(%(self_dy)s));}
//else {Os[0] = (int)ceil((dim_im[0]-dim_ker[0]+1)/float(%(self_dx)s)); Os[1] = (int)ceil((dim_im[1]-dim_ker[1]+1)/float(%(self_dy)s));}
for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
for(int n_kern=0;n_kern<%(self_nkern)s;n_kern+=%(unroll_ksize)s){ for(int n_kern=0;n_kern<%(self_nkern)s;n_kern+=%(unroll_ksize)s){
...@@ -866,12 +887,14 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){ ...@@ -866,12 +887,14 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
int new_m; int new_m;
for (int m=0; m < Os[0]; m++) { for (int iter_m=0; iter_m < Os[0]; iter_m++) {
// Reposition index into input image based on requested output size // Reposition index into input image based on requested output size
if (mode == FULL) new_m = m ; int pos_m = iter_m*%(self_dx)s;//The position of the patch in the image
else new_m = (m+dim_ker[0]-1); if (mode == FULL) new_m = pos_m ;
else new_m = (pos_m+dim_ker[0]-1);
for (int n=0; n < Os[1]; n++) { // loop over columns for (int iter_n=0; iter_n < Os[1]; iter_n++) { // loop over columns
int pos_n=iter_n*%(self_dy)s;
"""%d """%d
ret+=my_dup("%(type)s sum%(unroll_iter)s=0;", unroll_bsize*unroll_ksize) ret+=my_dup("%(type)s sum%(unroll_iter)s=0;", unroll_bsize*unroll_ksize)
ret+=""" ret+="""
...@@ -895,7 +918,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){ ...@@ -895,7 +918,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
}else{ }else{
//do the part where kernel is to the right of the img //do the part where kernel is to the right of the img
int k=0,max_k=max((int)(n-dim_im[1])+1,0); int k=0,max_k=max((int)(pos_n-dim_im[1])+1,0);
if(fill_value!=0){ if(fill_value!=0){
for(k=0;k<max_k;k++){ for(k=0;k<max_k;k++){
...@@ -906,11 +929,11 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){ ...@@ -906,11 +929,11 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
}else {k=max_k;} }else {k=max_k;}
//do the part where the kernel is on the img //do the part where the kernel is on the img
max_k=min(n+1,(int)dim_ker[1]); max_k=min(pos_n+1,(int)dim_ker[1]);
"""%d """%d
ret+=my_dup("const %(type)s * idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];", unroll_bsize) ret+=my_dup("const %(type)s * idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];", unroll_bsize)
ret+=""" ret+="""
for (int ind1=n-k; k<max_k; k++,ind1--) { for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
"""%d """%d
ret+=my_dup2("sum%(unroll_iter)s+= idx_hvals%(unroll_kiter)s[k] * idx_in%(unroll_biter)s[ind1];") ret+=my_dup2("sum%(unroll_iter)s+= idx_hvals%(unroll_kiter)s[k] * idx_in%(unroll_biter)s[ind1];")
...@@ -929,7 +952,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){ ...@@ -929,7 +952,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
ret+=my_dup("const %(type)s* idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];", unroll_bsize) ret+=my_dup("const %(type)s* idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];", unroll_bsize)
ret+=my_dup("const %(type)s* idx_hvals%(unroll_iter)s=&hvals%(unroll_iter)s[j*dim_ker[1]];",unroll_ksize) ret+=my_dup("const %(type)s* idx_hvals%(unroll_iter)s=&hvals%(unroll_iter)s[j*dim_ker[1]];",unroll_ksize)
ret+=""" ret+="""
int new_n = (n+dim_ker[1]-1); int new_n = (pos_n+dim_ker[1]-1);
for (int k=0,last=new_n; k < dim_ker[1]; k++,last--) { for (int k=0,last=new_n; k < dim_ker[1]; k++,last--) {
"""%d """%d
...@@ -940,7 +963,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){ ...@@ -940,7 +963,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
}//for j }//for j
"""%d """%d
ret+=my_dup("out%(unroll_iter)s[m*dim_zz[1]+n] %(affectation)s sum%(unroll_iter)s;", unroll_bsize*unroll_ksize) ret+=my_dup("out%(unroll_iter)s[iter_m*dim_zz[1]+iter_n] %(affectation)s sum%(unroll_iter)s;", unroll_bsize*unroll_ksize)
ret+=""" ret+="""
}//for n }//for n
}//for m }//for m
......
...@@ -90,16 +90,18 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll ...@@ -90,16 +90,18 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
####### test with new sp.convolve2 function ###### ####### test with new sp.convolve2 function ######
time1 = time.time() time1 = time.time()
hid, outshp2 = convolve2(kern, kshp, nkern, img, imshp, hid, outshp2 = convolve2(kern, kshp, nkern, img, imshp,
bsize, (1,1), mode=conv_mode) bsize, (ss[0],ss[1]), mode=conv_mode)
propup = function([kern, img], hid) propup = function([kern, img], hid)
propup1 = function([kern, img], hid,mode=Mode(linker="py")) propup1 = function([kern, img], hid,mode=Mode(linker="py"))
hidval = propup(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1)) hidval = propup(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1))
hidval = hidval.reshape(bsize,nkern,outshp2[-2],outshp2[-1])[:,:,::ss[0],::ss[1]] hidval = hidval.reshape(bsize,nkern,outshp2[-2],outshp2[-1])
# hidval = hidval[:,:,::ss[0],::ss[1]]
hidval = hidval.reshape(bsize, -1) hidval = hidval.reshape(bsize, -1)
for i in range(repeat): for i in range(repeat):
hidval1 = propup1(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1)) hidval1 = propup1(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1))
hidval1 = hidval1.reshape(bsize,nkern,outshp2[-2],outshp2[-1])[:,:,::ss[0],::ss[1]] hidval1 = hidval1.reshape(bsize,nkern,outshp2[-2],outshp2[-1])
# hidval1 = hidval1[:,:,::ss[0],::ss[1]]
hidval1 = hidval1.reshape(bsize, -1) hidval1 = hidval1.reshape(bsize, -1)
assert (N.abs(hidval-hidval1)<1e-5).all() assert (N.abs(hidval-hidval1)<1e-5).all()
...@@ -113,7 +115,7 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll ...@@ -113,7 +115,7 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
hidval1=outval.copy() hidval1=outval.copy()
# ConvOp # ConvOp
conv_op = ConvOp(imshp, kshp, nkern, bsize, 1,1, conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern)(inputs4, kerns4) conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0],ss[1], conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern)(inputs4, kerns4)
l1shp=N.hstack((nkern, l1shp=N.hstack((nkern,
getFilterOutShp(imshp, kshp, ss, conv_mode))) getFilterOutShp(imshp, kshp, ss, conv_mode)))
propup2 = function([inputs4, kerns4], conv_op) propup2 = function([inputs4, kerns4], conv_op)
...@@ -122,14 +124,14 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll ...@@ -122,14 +124,14 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
time1 = time.time() time1 = time.time()
for i in range(repeat): for i in range(repeat):
hidval2_ = propup2(imgval,w_flip) hidval2_ = propup2(imgval,w_flip)
hidval2 = hidval2_[:,:,0::ss[0],0::ss[1]] hidval2 = hidval2_#[:,:,0::ss[0],0::ss[1]]
tctot += time.time() - time1 tctot += time.time() - time1
if conv_op_py: if conv_op_py:
time1 = time.time() time1 = time.time()
for i in range(repeat): for i in range(repeat):
hidval3_ = propup3(imgval,w_flip) hidval3_ = propup3(imgval,w_flip)
hidval3 = hidval3_[:,:,0::ss[0],0::ss[1]] hidval3 = hidval3_#[:,:,0::ss[0],0::ss[1]]
tpytot += time.time() - time1 tpytot += time.time() - time1
assert (N.abs(hidval2-hidval3)<1e-5).all() assert (N.abs(hidval2-hidval3)<1e-5).all()
else: else:
...@@ -235,7 +237,7 @@ class TestConvOp(unittest.TestCase): ...@@ -235,7 +237,7 @@ class TestConvOp(unittest.TestCase):
# compute with new convolve2 (no timing info) # compute with new convolve2 (no timing info)
output4, outshp4 = convolve2(kerns, kshp, nkern, input,\ output4, outshp4 = convolve2(kerns, kshp, nkern, input,\
imshp, bsize, (1,1), bias=bias, mode=conv_mode) imshp, bsize, (ss[0],ss[1]), bias=bias, mode=conv_mode)
# print 'output4', output4 # print 'output4', output4
ttime1 = time.time() ttime1 = time.time()
...@@ -244,7 +246,7 @@ class TestConvOp(unittest.TestCase): ...@@ -244,7 +246,7 @@ class TestConvOp(unittest.TestCase):
# print 'out4', out4, img1d, filtersflipped # print 'out4', out4, img1d, filtersflipped
tconv2 += [time.time() - ttime1] tconv2 += [time.time() - ttime1]
out4 = out4.reshape(bsize, nkern, outshp4[1], outshp4[2]) out4 = out4.reshape(bsize, nkern, outshp4[1], outshp4[2])
out4 = out4[:,:,0::ss[0],0::ss[1]] out4 = out4#[:,:,0::ss[0],0::ss[1]]
out4 = out4.reshape(bsize, -1) out4 = out4.reshape(bsize, -1)
# compute with ConvOp # compute with ConvOp
...@@ -252,18 +254,18 @@ class TestConvOp(unittest.TestCase): ...@@ -252,18 +254,18 @@ class TestConvOp(unittest.TestCase):
inputs=dmatrix3() inputs=dmatrix3()
kerns3=dmatrix3() kerns3=dmatrix3()
bia=T.dscalar() bia=T.dscalar()
conv_op = ConvOp(imshp, kshp, nkern, bsize, 1,1, conv_mode)(inputs, kerns3) conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0],ss[1], conv_mode)(inputs, kerns3)
f2 = function([inputs, kerns3], conv_op, mode=Mode(linker="c")) f2 = function([inputs, kerns3], conv_op, mode=Mode(linker="c"))
f3 = function([inputs, kerns3], conv_op, mode=Mode(linker="py")) f3 = function([inputs, kerns3], conv_op, mode=Mode(linker="py"))
ttime1 = time.time() ttime1 = time.time()
out2_ = f2(img2d, filtersflipped) out2_ = f2(img2d, filtersflipped)
out2__ = out2_[:,:,0::ss[0],0::ss[1]] out2__ = out2_#[:,:,0::ss[0],0::ss[1]]
tconvop += [time.time() - ttime1] tconvop += [time.time() - ttime1]
out2___ = out2__.copy() out2___ = out2__.copy()
out2 = out2___ + biasvals.reshape(1,nkern,1,1) out2 = out2___ + biasvals.reshape(1,nkern,1,1)
out3_ = f3(img2d, filtersflipped) out3_ = f3(img2d, filtersflipped)
out3__ = out3_[:,:,0::ss[0],0::ss[1]] out3__ = out3_#[:,:,0::ss[0],0::ss[1]]
out3___ = out3__.copy() out3___ = out3__.copy()
out3 = out3___ + biasvals.reshape(1,nkern,1,1) out3 = out3___ + biasvals.reshape(1,nkern,1,1)
assert (N.abs(out2_-out3_)<1e-5).all() assert (N.abs(out2_-out3_)<1e-5).all()
...@@ -302,15 +304,21 @@ class TestConvOp(unittest.TestCase): ...@@ -302,15 +304,21 @@ class TestConvOp(unittest.TestCase):
print 'speed up ConvOp vs convolve2d: %.3f'%d.mean(),d print 'speed up ConvOp vs convolve2d: %.3f'%d.mean(),d
def test_multilayer_conv(self): def test_multilayer_conv(self):
print '\n\n*************************************************'
print ' TEST MULTILAYER CONVOLUTION'
print '*************************************************'
# fixed parameters # fixed parameters
# test multiple configuration at the same time
bsizes = [6,6] # batch size bsizes = [6,6] # batch size
imshp_starts = [(1,28,28),(1,4,4)] imshp_starts = [(1,13,14),(1,4,5)]
kshpss = ([[5,6],[7,4]],[[2,2],[2,2]]) kshpss = ([[5,6],[7,4]],[[2,2],[2,2]])
nkernss = [[20,40],[2,2]] # per output pixel nkernss = [[20,40],[2,2]] # per output pixel
ssizess = [[(1,1),(2,2)],[(1,1),(2,2)]] ssizess = [[(1,1),(1,2)],[(1,1),(2,2)]]
convmodes = ['valid','full'] convmodes = ['valid','full']
do_convolve2=True do_convolve2=True
unroll = [(0,0),(1,1),(2,2),(3,2)]#(batch,kern) unroll = [(0,0),(1,1),(2,2),(3,2)]#(batch,kern)
do_speed_test = False
# TODO: this version show a bug that was fixed # TODO: this version show a bug that was fixed
# the test is included in the upper test. # the test is included in the upper test.
...@@ -319,15 +327,6 @@ class TestConvOp(unittest.TestCase): ...@@ -319,15 +327,6 @@ class TestConvOp(unittest.TestCase):
# nkerns = [2,2] # per output pixel # nkerns = [2,2] # per output pixel
# ssizes = [(1,1),(2,2)]#2,2)] # ssizes = [(1,1),(2,2)]#2,2)]
#test speed
# bsize = 10 # batch size
# imshp_start = (1,50,49)#un square shape to test more corner case.
# kshps = ([11,12],[12,11])#un square shape to test more corner case.
# nkerns = [20,20] # per output pixel
# ssizes = [(1,1),]#(1,1)]#(2,2) bugged
# convmodes = ['valid','full']
# do_convolve2=False
N.set_printoptions(threshold=N.nan) N.set_printoptions(threshold=N.nan)
# symbolic stuff # symbolic stuff
...@@ -338,7 +337,7 @@ class TestConvOp(unittest.TestCase): ...@@ -338,7 +337,7 @@ class TestConvOp(unittest.TestCase):
for i in range(len(kshpss)): for i in range(len(kshpss)):
assert len(kshpss[i])==len(nkernss[i])==len(kerns) assert len(kshpss[i])==len(nkernss[i])==len(kerns)
if False: if do_speed_test:
# calculate the speed up of different combination of unroll # calculate the speed up of different combination of unroll
# put the paramter to the same you will try. # put the paramter to the same you will try.
...@@ -418,19 +417,23 @@ class TestConvOp(unittest.TestCase): ...@@ -418,19 +417,23 @@ class TestConvOp(unittest.TestCase):
d=N.asarray(ntot)/tpytot d=N.asarray(ntot)/tpytot
print 'speed up py theano(ConvOp) vs convolve2d: %.3fx'%d.mean(),d print 'speed up py theano(ConvOp) vs convolve2d: %.3fx'%d.mean(),d
def test_ConvOpGrad(self): def test_ConvOpGrad(self):
""" """
test the gradient in float and double test the gradient in float and double
""" """
print '\n\n*************************************************'
print ' TEST ConvOp.grad'
print '*************************************************'
nkern = 4 nkern = 4
bsize = 3 bsize = 3
types = ["float32", "float64"] types = ["float32", "float64"]
kshps = [(5,5), (6,7)] kshps = [(3,4)]
imshps = [(1,5,5), (2,8,8), (3,8,7)] imshps = [(2,8,7)]
modes = ['valid', 'full'] modes = ['valid', 'full']
unroll_batch=[0,1,3] unroll_batch=[0,1,3]
unroll_kern=[0,1,4] unroll_kern=[0,1,4]
ssizes = [(1,1),(2,2)]
for typ in types: for typ in types:
imgs = T.TensorType(typ, (False, False, False, False),'imgs') imgs = T.TensorType(typ, (False, False, False, False),'imgs')
...@@ -445,42 +448,41 @@ class TestConvOp(unittest.TestCase): ...@@ -445,42 +448,41 @@ class TestConvOp(unittest.TestCase):
continue continue
for un_b in unroll_batch: for un_b in unroll_batch:
for un_k in unroll_kern: for un_k in unroll_kern:
imgvals = N.array(N.random.random(N.hstack((bsize,imshp))),dtype=imgs.dtype) for ss in ssizes:
# print 'imgvals.shape = ', imgvals.shape, imgvals.dtype imgvals = N.array(N.random.random(N.hstack((bsize,imshp))),dtype=imgs.dtype)
# imgvals = imgvals.reshape(bsize,-1)
kernvals = N.array(N.random.rand(nkern,visdim,kshp[0],
kernvals = N.array(N.random.rand(nkern,visdim,kshp[0], kshp[1]),dtype=kerns.dtype)
kshp[1]),dtype=kerns.dtype)
def testf(imgs, kerns):
# print 'kernvals.shape = ', kernvals.shape, kernvals.dtype out, outshp = convolve2(kerns, kshp, nkern,
# kernvals = kernvals.reshape(nkern,-1) imgs, imshp, bsize,
mode=mode, step=ss,
def testf(imgs, kerns): unroll_batch=un_b,
out, outshp = convolve2(kerns, kshp, nkern, unroll_kern=un_k)
imgs, imshp, bsize, return out
mode=mode, #TODO the tolerance needed to pass is very high for float32(0.16). Is this acceptable? Expected?
unroll_batch=un_b, utt.verify_grad(testf, [imgvals, kernvals],
unroll_kern=un_k) cast_to_output_type=True,
return out tol=None if typ!="float32" else 0.16)
#TODO the tolerance needed to pass is very high for float32(0.16). Is this acceptable? Expected?
utt.verify_grad(testf, [imgvals, kernvals],
cast_to_output_type=True,
tol=None if typ!="float32" else 0.16)
if __name__ == '__main__': if __name__ == '__main__':
# t = TestConvOp("test_convolution") t = TestConvOp("test_convolution")
# t.test_convolution() # t.test_convolution()
# t.test_multilayer_conv() t.test_multilayer_conv()
# from theano.tests import main # from theano.tests import main
# main("test_sp") # main("test_sp")
bsize = 20 # batch size if False:
imshp_start = (1,100,100)#un square shape to test more corner case. #used to lanch 8 jobs at the same time.
kshps = ([11,12],[12,11])#un square shape to test more corner case. bsize = 20 # batch size
nkerns = [20,20] # per output pixel imshp_start = (1,100,100)#un square shape to test more corner case.
ssizes = [(1,1),]#(1,1)]#(2,2) bugged kshps = ([11,12],[12,11])#un square shape to test more corner case.
convmodes = ['valid','full'] nkerns = [20,20] # per output pixel
unroll_batch = 5 ssizes = [(1,1),]#(1,1)]#(2,2) bugged
unroll_kern = 2 convmodes = ['valid','full']
ctot=0 unroll_batch = 5
tctot, tpytot, ntot = exec_multilayer_conv_nnet(convmodes[1], ssizes[0], bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_batch, unroll_kern=unroll_kern, validate=False, do_print=False,repeat=5) unroll_kern = 2
print "total exec time %.3fs"%tctot ctot=0
tctot, tpytot, ntot = exec_multilayer_conv_nnet(convmodes[1], ssizes[0], bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_batch, unroll_kern=unroll_kern, validate=False, do_print=False,repeat=5)
print "total exec time %.3fs"%tctot
...@@ -2054,8 +2054,10 @@ class Reshape(Op): ...@@ -2054,8 +2054,10 @@ class Reshape(Op):
The number of dimensions to which to reshape to (ndim) must be known at graph The number of dimensions to which to reshape to (ndim) must be known at graph
build time.""" build time."""
view_map = {0: [0]} #output 0 is potentially aliased to inputs [0] view_map = {0: [0]} #output 0 is potentially aliased to inputs [0]
def __init__(self, ndim): def __init__(self, ndim, name = None):
self.ndim = ndim self.ndim = ndim
if name:
self.name = name
def __eq__(self, other): def __eq__(self, other):
return (type(other) is Reshape) and (other.ndim == self.ndim) return (type(other) is Reshape) and (other.ndim == self.ndim)
def __hash__(self): def __hash__(self):
...@@ -2075,10 +2077,10 @@ class Reshape(Op): ...@@ -2075,10 +2077,10 @@ class Reshape(Op):
def grad(self, (x, shp), (g_out,)): def grad(self, (x, shp), (g_out,)):
return [reshape(g_out, shape(x), ndim=x.ndim), None] return [reshape(g_out, shape(x), ndim=x.ndim), None]
def reshape(x, newshape, ndim=None): def reshape(x, newshape, ndim=None, name=None):
if ndim is None: if ndim is None:
ndim = get_vector_length(newshape) ndim = get_vector_length(newshape)
op = Reshape(ndim) op = Reshape(ndim, name)
return op(x, newshape) return op(x, newshape)
......
...@@ -581,6 +581,11 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -581,6 +581,11 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
""", """,
inside_row_loop, inside_row_loop,
""" """
if ((y_i >= %(x)s->dimensions[1]) || (y_i < 0))
{
PyErr_SetString(PyExc_ValueError, "y_i value out of bounds");
%(fail)s;
}
nll_i[0] = - x_i[y_i*Sx] nll_i[0] = - x_i[y_i*Sx]
- b_i[y_i*Sb] - b_i[y_i*Sb]
+ row_max + row_max
......
...@@ -686,7 +686,7 @@ class Canonizer(gof.LocalOptimizer): ...@@ -686,7 +686,7 @@ class Canonizer(gof.LocalOptimizer):
op = node.op op = node.op
if op not in [self.main, self.inverse, self.reciprocal]: if op not in [self.main, self.inverse, self.reciprocal]:
return False return False
inputs = node.inputs inputs = node.inputs
out = node.outputs[0] out = node.outputs[0]
assert len(node.outputs) == 1 assert len(node.outputs) == 1
...@@ -725,8 +725,14 @@ class Canonizer(gof.LocalOptimizer): ...@@ -725,8 +725,14 @@ class Canonizer(gof.LocalOptimizer):
return getattr(self, 'name', 'Canonizer(%s, %s, %s)' % (self.main, self.inverse, self.reciprocal)) return getattr(self, 'name', 'Canonizer(%s, %s, %s)' % (self.main, self.inverse, self.reciprocal))
def mul_calculate(num, denum, aslist = False): def mul_calculate(num, denum, aslist=False):
v = reduce(N.multiply, num, 1.0) / reduce(N.multiply, denum, 1.0) if not num and not denum:
# Smallest 1 possible.
return [] if aslist else N.int8(1)
# Make sure we do not accidently upcast data types.
first = num[0] if num else denum[0]
one = N.asarray(first).dtype.type(1)
v = reduce(N.multiply, num, one) / reduce(N.multiply, denum, one)
if aslist: if aslist:
if N.all(v == 1): if N.all(v == 1):
return [] return []
......
...@@ -551,7 +551,8 @@ def test_naacl_model(iters_per_unsup=10, iters_per_sup=10, ...@@ -551,7 +551,8 @@ def test_naacl_model(iters_per_unsup=10, iters_per_sup=10,
s0 = str(m.finetuning_update(*(inputs + [targets]))) s0 = str(m.finetuning_update(*(inputs + [targets])))
print iters_per_sup * (i+1), s0 print iters_per_sup * (i+1), s0
if iters_per_sup == 10: if iters_per_sup == 10:
assert s0.startswith('15.6511')#should check for the 8 decimal only. s0f = float(s0)
assert 15.6510 < s0f and s0f < 15.6512
def jtest_main(): def jtest_main():
from theano import gof from theano import gof
......
...@@ -57,9 +57,13 @@ class test_dimshuffle_lift(unittest.TestCase): ...@@ -57,9 +57,13 @@ class test_dimshuffle_lift(unittest.TestCase):
x, y, z = inputs([False]*1, [False]*2, [False]*3) x, y, z = inputs([False]*1, [False]*2, [False]*3)
e = x + y + z e = x + y + z
g = Env([x, y, z], [e]) g = Env([x, y, z], [e])
self.failUnless(str(g) == "[add(InplaceDimShuffle{x,0,1}(add(InplaceDimShuffle{x,0}(x), y)), z)]", str(g)) self.failUnless(str(g) == ("[Elemwise{add,no_inplace}("
"InplaceDimShuffle{x,0,1}(Elemwise{add,no_inplace}"
"(InplaceDimShuffle{x,0}(x), y)), z)]"), str(g))
dimshuffle_lift.optimize(g) dimshuffle_lift.optimize(g)
self.failUnless(str(g) == "[add(add(InplaceDimShuffle{x,x,0}(x), InplaceDimShuffle{x,0,1}(y)), z)]", str(g)) self.failUnless(str(g) == ("[Elemwise{add,no_inplace}(Elemwise"
"{add,no_inplace}(InplaceDimShuffle{x,x,0}(x), InplaceDimShuffle"
"{x,0,1}(y)), z)]"), str(g))
def test_add_canonizer_problem0(): def test_add_canonizer_problem0():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论