提交 2bc77c39 authored 作者: Razvan Pascanu's avatar Razvan Pascanu

merge

...@@ -44,7 +44,13 @@ precise inspection of what's being computed where, when, and how, see the ...@@ -44,7 +44,13 @@ precise inspection of what's being computed where, when, and how, see the
How do I print a graph before or after compilation? How do I print a graph before or after compilation?
---------------------------------------------------------- ----------------------------------------------------------
Theano provides a function to print a graph before and after compilation: Theano provides two functions to print a graph to the terminal before or after
compilation. It can print graph that only have one output. If you have multiple
output, call once for each output. Their is another one that create a png image
of the function. It support multiple output.
1) The first is ``theano.pp``. It hide some Op added by the compiler as the
*DimShuffle Op used for the broadcast.
>>> x = T.dscalar('x') >>> x = T.dscalar('x')
>>> y = x**2 >>> y = x**2
...@@ -55,13 +61,63 @@ Theano provides a function to print a graph before and after compilation: ...@@ -55,13 +61,63 @@ Theano provides a function to print a graph before and after compilation:
>>> pp(f.maker.env.outputs[0]) >>> pp(f.maker.env.outputs[0])
'(2.0 * x)' '(2.0 * x)'
The parameter in T.dscalar('x') in the first line is the name of this variable(in the graph, not in python). This name is reused when printing the graph. Otherwise the variable x is printed as its type as: <TensorType(float64, scalar)>. That is not the most comprehensible. The string 'x' can be any string, but to make the code more comprehensible, try to pass the same name or derivative of the name in python. The parameter in T.dscalar('x') in the first line is the name of this variable
in the graph. This name is used when printing the graph to make it more readable.
If no name is provided the variable x is printed as its type as. In this example
<TensorType(float64, scalar)>.
The name parameter can be any string. Their is absolutly no restriction.
This mean you can have many variable with the same name.
To make the code more comprehensible, try to give the name parameter the same name as what you use in the code.
2)The second fonction to print a graph is `theano.printing.debugprint`(Variable, depth=-1)
dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd
You can use is on graph variable and compiled function as pp. If the depth
parameter is provided, we limit the nuber of level that we print.
>>> theano.printing.debugprint(gy)
Elemwise{mul} 46950804894224
Elemwise{mul} 46950804735120
Elemwise{second,no_inplace} 46950804626128
Elemwise{pow,no_inplace} 46950804625040
x 46950658736720
2 46950804039760
1.0 46950804625488
2 46950804039760
Elemwise{pow} 46950804737616
x 46950658736720
Elemwise{sub} 46950804736720
2 46950804039760
InplaceDimShuffle{} 46950804736016
1 46950804735760
<open file '<stdout>', mode 'w' at 0x2ab38d49f198>
>>> theano.printing.debugprint(gy, depth=2)
Elemwise{mul} 46950804894224
Elemwise{mul} 46950804735120
Elemwise{pow} 46950804737616
<open file '<stdout>', mode 'w' at 0x2ab38d49f198>
>>> theano.printing.debugprint(f.maker.env.outputs[0])
Elemwise{mul,no_inplace} 46950805397392
2.0 46950805310800
x 46950804895504
<open file '<stdout>', mode 'w' at 0x2ab38d49f198>
3) The function ``theano.printing.pydotprint(fct, file=SOME_DEFAULT_VALUE)`` will print a compiled theano function to a png file.
In the graph, box are an Apply Node(the execution of an op) and ellipse are variable.
If variable have name they are used as the text(if multiple var have the same name, they will be merged in the graph).
Otherwise, if the variable is constant, we print the value and finaly we print the type + an uniq number to don't have multiple var merged.
We print the op of the apply in the Apply box with a number that represent the toposort order of application of those Apply.
green ellipse are input to the graph and blue ellipse are output of the graph.
The function I compiled is too slow, what's up? The function I compiled is too slow, what's up?
----------------------------------------------- -----------------------------------------------
First, make sure you're running in FAST_RUN mode, by passing ``mode='FAST_RUN'`` First, make sure you're running in FAST_RUN mode, by passing ``mode='FAST_RUN'``
to ``theano.function`` or ``theano.make`` or by setting to ``FAST_RUN`` to ``theano.function`` or ``theano.make`` or by setting to ``PROFILE_MODE``
the flag :attr:`config.mode`. Some the flags :attr:`config.mode`. Some
operations have excruciatingly slow Python implementations and that operations have excruciatingly slow Python implementations and that
can negatively effect the performance of FAST_COMPILE. can negatively effect the performance of FAST_COMPILE.
...@@ -80,7 +136,7 @@ Check out this one: ...@@ -80,7 +136,7 @@ Check out this one:
.. code-block:: python .. code-block:: python
class PrintEverythingMode(theano.Mode): class PrintEverythingMode(Mode):
def __init__(self): def __init__(self):
def print_eval(i, node, fn): def print_eval(i, node, fn):
print i, node, [input[0] for input in fn.inputs], print i, node, [input[0] for input in fn.inputs],
......
...@@ -961,6 +961,7 @@ class _Linker(gof.link.LocalLinker): ...@@ -961,6 +961,7 @@ class _Linker(gof.link.LocalLinker):
# transfer the initial values from the storage_map to the r_vals # transfer the initial values from the storage_map to the r_vals
debug("DEBUGMODE: transfer initial values") debug("DEBUGMODE: transfer initial values")
r_transfered_from_storage_map = []
for r in storage_map: for r in storage_map:
if (r.owner is None): if (r.owner is None):
if (storage_map[r][0] is None): if (storage_map[r][0] is None):
...@@ -969,6 +970,7 @@ class _Linker(gof.link.LocalLinker): ...@@ -969,6 +970,7 @@ class _Linker(gof.link.LocalLinker):
raise InvalidValueError(r, storage_map[r][0]) raise InvalidValueError(r, storage_map[r][0])
r_vals[r] = storage_map[r][0] r_vals[r] = storage_map[r][0]
storage_map[r][0] = None storage_map[r][0] = None
r_transfered_from_storage_map.append(r)
##### #####
# Precondition: the storage map is empty, transferred completely to r_vals # Precondition: the storage map is empty, transferred completely to r_vals
##### #####
...@@ -1122,7 +1124,7 @@ class _Linker(gof.link.LocalLinker): ...@@ -1122,7 +1124,7 @@ class _Linker(gof.link.LocalLinker):
else: else:
storage_map[r][0] = dr_vals[r][0] storage_map[r][0] = dr_vals[r][0]
except: except:
for r in storage_map: for r in r_transfered_from_storage_map:
if r in original_storage_map_keys: if r in original_storage_map_keys:
if storage_map[r][0] is None: if storage_map[r][0] is None:
storage_map[r][0] = r_vals[r] storage_map[r][0] = r_vals[r]
......
...@@ -283,6 +283,11 @@ class Function(object): ...@@ -283,6 +283,11 @@ class Function(object):
#def assign(c, v): #def assign(c, v):
#c.data = v #c.data = v
# Store the list of names of named inputs.
named_inputs = []
# Count the number of un-named inputs.
n_unnamed_inputs = 0
#setters = [] #setters = []
# Initialize the storage # Initialize the storage
# this loop works by modifying the elements (as variable c) of self.input_storage inplace. # this loop works by modifying the elements (as variable c) of self.input_storage inplace.
...@@ -312,6 +317,10 @@ class Function(object): ...@@ -312,6 +317,10 @@ class Function(object):
finder[input.name] = c finder[input.name] = c
else: else:
finder[input.name] = DUPLICATE finder[input.name] = DUPLICATE
if input.name is None:
n_unnamed_inputs += 1
else:
named_inputs.append(input.name)
#backport #backport
#finder[input.name] = c if input.name not in finder else DUPLICATE #finder[input.name] = c if input.name not in finder else DUPLICATE
# inv_finder maps the container to the input (useful for one error message) # inv_finder maps the container to the input (useful for one error message)
...@@ -378,7 +387,9 @@ class Function(object): ...@@ -378,7 +387,9 @@ class Function(object):
try: try:
s = finder[item] s = finder[item]
except KeyError: except KeyError:
raise TypeError("Unknown input or state: %s" % item) # Print informative error message.
msg = get_info_on_inputs(named_inputs, n_unnamed_inputs)
raise TypeError("Unknown input or state: %s. %s" % (item, msg))
if s is DUPLICATE: if s is DUPLICATE:
raise TypeError("Ambiguous name: %s - please check the names of the inputs of your function for duplicates." % item) raise TypeError("Ambiguous name: %s - please check the names of the inputs of your function for duplicates." % item)
if isinstance(s, gof.Container): if isinstance(s, gof.Container):
...@@ -1014,3 +1025,43 @@ def convert_function_input(input): ...@@ -1014,3 +1025,43 @@ def convert_function_input(input):
else: else:
raise TypeError("Unknown input type: %s, expected Variable instance" % type(input), input) raise TypeError("Unknown input type: %s, expected Variable instance" % type(input), input)
def get_info_on_inputs(named_inputs, n_unnamed_inputs):
"""Return a human-readable description of named and un-named inputs."""
n_named_inputs = len(named_inputs)
def get_plural(n):
if n > 1:
return 's'
else:
return ''
if n_named_inputs == 0:
if n_unnamed_inputs == 0:
msg = 'The function is supposed to have no input.'
else:
if n_unnamed_inputs == 1:
msg = ("The function has a single input variable which has no "
"name, and thus cannot be assigned through a keyword"
" argument (use 'name=...' in a Variable's "
"constructor to give it a name).")
else:
# Use plural.
msg = ("The function has %s inputs, but none of them is named,"
" and thus they cannot be assigned through keyword "
"arguments (use 'name=...' in a Variable's "
"constructor to give it a name)." % n_unnamed_inputs)
else:
if n_unnamed_inputs == 0:
msg = ("The function has %s named input%s (%s)." % (
n_named_inputs, get_plural(n_named_inputs),
', '.join(named_inputs)))
else:
msg = ("The function has %s named input%s (%s), and %s unnamed "
"input%s which thus cannot be accessed through keyword "
"argument%s (use 'name=...' in a variable's constructor "
"to give it a name)." % (
n_named_inputs, get_plural(n_named_inputs),
', '.join(named_inputs), n_unnamed_inputs,
get_plural(n_unnamed_inputs),
get_plural(n_unnamed_inputs)))
return msg
...@@ -579,6 +579,14 @@ def std_libs(): ...@@ -579,6 +579,14 @@ def std_libs():
def std_lib_dirs(): def std_lib_dirs():
return std_lib_dirs_and_libs()[1] return std_lib_dirs_and_libs()[1]
p=subprocess.Popen(['gcc','-dumpversion'],stdout=subprocess.PIPE)
p.wait()
gcc_version_str = p.stdout.readline().strip()
del p
def gcc_version():
return gcc_version_str
def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[], def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[],
preargs=[]): preargs=[]):
""" """
......
"""Pretty-printing graphs, and the 'Print' Op. """Pretty-printing (pprint()), the 'Print' Op, debugprint() and pydotprint().
They all allow different way to print a graph or the result of an Op in a graph(Print Op)
""" """
import gof import gof
from copy import copy from copy import copy
...@@ -7,6 +8,9 @@ from theano import config ...@@ -7,6 +8,9 @@ from theano import config
from gof import Op, Apply from gof import Op, Apply
from theano.gof.python25 import any from theano.gof.python25 import any
#We import the debugprint here to have all printing of graph available from this module
from theano.compile.debugmode import debugprint
class Print(Op): class Print(Op):
"""This identity-like Op has the side effect of printing a message followed by its inputs """This identity-like Op has the side effect of printing a message followed by its inputs
when it runs. Default behaviour is to print the __str__ representation. Optionally, one when it runs. Default behaviour is to print the __str__ representation. Optionally, one
...@@ -307,9 +311,12 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn ...@@ -307,9 +311,12 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn
:param fct: the theano fct returned by theano.function. :param fct: the theano fct returned by theano.function.
:param outfile: the output file where to put the graph. :param outfile: the output file where to put the graph.
In the graph, box are an Apply Node(the execution of an op) and elipse are variable. In the graph, box are an Apply Node(the execution of an op) and ellipse are variable.
If variable have name they are used as the text(if multiple var have the same name, they will be merged in the graph). Otherwise, if a constant, we print the value and finaly we print the type + an uniq number to don't have multiple var merged. If variable have name they are used as the text(if multiple var have the same name, they will be merged in the graph).
Otherwise, if the variable is constant, we print the value and finaly we print the type + an uniq number to don't have multiple var merged.
We print the op of the apply in the Apply box with a number that represent the toposort order of application of those Apply. We print the op of the apply in the Apply box with a number that represent the toposort order of application of those Apply.
green ellipse are input to the graph and blue ellipse are output of the graph.
""" """
import pydot as pd import pydot as pd
......
"""Provides Ops for FFT and DCT.
"""
from theano.gof import Op, Apply, generic
from theano import tensor
import numpy.fft
import numpy
class GradTodo(Op):
def make_node(self, x):
return Apply(self, [x], [x.type()])
def perform(self, node, inputs, outputs):
raise NotImplementedError('TODO')
grad_todo = GradTodo()
class FFT(Op):
"""Fast Fourier Transform
.. TODO:
The current implementation just works for matrix inputs, and permits taking a 1D FFT over
either rows or columns. Add support for N-D FFTs as provided by either numpy or FFTW
directly.
.. TODO:
Give the C code that uses FFTW.
.. TODO:
unit tests.
"""
default_output = 0
# don't return the plan object in the 'buf' output
half = False
"""Only return the first half (positive-valued) of the frequency components"""
def __init__(self, half=False, inverse=False):
self.half = half
self.inverse=inverse
def __eq__(self, other):
return type(self) == type(other) and (self.half == other.half) and (self.inverse ==
other.inverse)
def __hash__(self):
return hash(type(self)) ^ hash(self.half) ^ 9828743 ^ (self.inverse)
def __ne__(self, other):
return not(self == other)
def make_node(self, frames, n, axis):
""" compute an n-point fft of frames along given axis """
_frames = tensor.as_tensor(frames, ndim=2)
_n = tensor.as_tensor(n, ndim=0)
_axis = tensor.as_tensor(axis, ndim=0)
if self.half and _frames.type.dtype.startswith('complex'):
raise TypeError('Argument to HalfFFT must not be complex', frames)
spectrogram = tensor.zmatrix()
buf = generic()
# The `buf` output is present for future work
# when we call FFTW directly and re-use the 'plan' that FFTW creates.
# In that case, buf would store a CObject encapsulating the plan.
rval = Apply(self, [_frames, _n, _axis], [spectrogram, buf])
return rval
def perform(self, node, (frames, n, axis), (spectrogram, buf)):
if self.inverse:
fft_fn = numpy.fft.ifft
else:
fft_fn = numpy.fft.fft
fft = fft_fn(frames, int(n), int(axis))
if self.half:
M, N = fft.shape
if axis == 0:
if (M % 2):
raise ValueError('halfFFT on odd-length vectors is undefined')
spectrogram[0] = fft[0:M/2, :]
elif axis==1:
if (N % 2):
raise ValueError('halfFFT on odd-length vectors is undefined')
spectrogram[0] = fft[:,0:N/2]
else:
raise NotImplementedError()
else:
spectrogram[0] = fft
def grad(self, (frames, n, axis), (g_spectrogram, g_buf)):
return [grad_todo(frames), None, None]
fft = FFT(half=False, inverse=False)
half_fft = FFT(half=True, inverse=False)
ifft = FFT(half=False, inverse=True)
half_ifft = FFT(half=True, inverse=True)
def dct_matrix(rows, cols, unitary=True):
"""
Return a (rows x cols) matrix implementing a discrete cosine transform.
This algorithm is adapted from Dan Ellis' Rastmat
spec2cep.m, lines 15 - 20.
"""
rval = numpy.zeros((rows, cols))
col_range = numpy.arange(cols)
scale = numpy.sqrt(2.0/cols)
for i in xrange(rows):
rval[i] = numpy.cos(i * (col_range*2+1)/(2.0 * cols) * numpy.pi) * scale
if unitary:
rval[0] *= numpy.sqrt(0.5)
return rval
...@@ -29,8 +29,6 @@ ...@@ -29,8 +29,6 @@
""" """
__docformat__ = 'restructedtext en' __docformat__ = 'restructedtext en'
import numpy
import theano import theano
from theano.tensor import opt from theano.tensor import opt
from theano import gof from theano import gof
......
...@@ -3209,8 +3209,8 @@ class AdvancedIncSubtensor(Op): ...@@ -3209,8 +3209,8 @@ class AdvancedIncSubtensor(Op):
broadcastable = x.type.broadcastable)]) broadcastable = x.type.broadcastable)])
raise NotImplementedError('Advanced indexing increment of x (of dimension %i) by y (of dimension %i) with these argument dimensions (%s) not supported yet'\ raise NotImplementedError('Advanced indexing increment of x (of dimension %i) by y (of dimension %i) with these argument dimensions (%s) not supported yet'\
% (x.ndim, y.ndim, ','.join(str(input.ndim) for input in inputs))) % (x.ndim, y.ndim, ','.join(str(input.ndim) for input in inputs)))
raise NotImplementedError('Advanced indexing increment of x by y with arguments (%s) not supported yet'\ raise NotImplementedError('Advanced indexing increment of x (of dim %i) by y (of dim %i) with arguments (%s) not supported yet'\
% ','.join(str(input) for input in inputs)) % (x.ndim, y.ndim, ','.join(str(input) for input in inputs)))
def perform(self, node, inputs, (out,)): def perform(self, node, inputs, (out,)):
# TODO: same thing as in AdvancedSubtensor's perform TODO # TODO: same thing as in AdvancedSubtensor's perform TODO
...@@ -3452,8 +3452,7 @@ def grad(cost, wrt, g_cost=None, consider_constant=[], warn_type=False): ...@@ -3452,8 +3452,7 @@ def grad(cost, wrt, g_cost=None, consider_constant=[], warn_type=False):
:return: symbolic expression of gradient of `cost` with respect to `wrt`. :return: symbolic expression of gradient of `cost` with respect to `wrt`.
If `wrt` is a list, then return a list containing the gradient of `cost` wrt If `wrt` is a list, then return a list containing the gradient of `cost` wrt
each element of the list. If an element of `wrt` is not differentiable each element of the list. If an element of `wrt` is not differentiable
with respect to the output, then a `TensorConstant` with an appropriate with respect to the output, then a zero variable is returned.
kind of zero is returned.
This function is a wrapper around a the more general function This function is a wrapper around a the more general function
`theano.gradient.grad_sources_inputs``. `theano.gradient.grad_sources_inputs``.
...@@ -3473,21 +3472,13 @@ def grad(cost, wrt, g_cost=None, consider_constant=[], warn_type=False): ...@@ -3473,21 +3472,13 @@ def grad(cost, wrt, g_cost=None, consider_constant=[], warn_type=False):
gmap = gradient.grad_sources_inputs([(cost, g_cost)], inputs + consider_constant, gmap = gradient.grad_sources_inputs([(cost, g_cost)], inputs + consider_constant,
warn_type=warn_type) warn_type=warn_type)
def zero(p): # Note that it is important to use `zeros_like` when there is no gradient,
return TensorConstant( # instead of returning a scalar constant equal to zero. Otherwise we lose
TensorType(dtype = p.type.dtype, broadcastable = []), # the guarantee that the gradient has same shape as `wrt`.
theano._asarray(0, dtype=p.type.dtype))
#try:
#it = iter(wrt)
#except:
#it = None
#if it: #hasattr(wrt, '__iter__'): # isinstance(wrt, (list, tuple)):
if isinstance(wrt, (list, tuple)): if isinstance(wrt, (list, tuple)):
return [gmap.get(p, zero(p)) for p in wrt] return [gmap.get(p, zeros_like(p)) for p in wrt]
else: else:
return gmap.get(wrt, zero(wrt)) return gmap.get(wrt, zeros_like(wrt))
class numeric_grad: class numeric_grad:
"""WRITEME""" """WRITEME"""
...@@ -3590,7 +3581,7 @@ class numeric_grad: ...@@ -3590,7 +3581,7 @@ class numeric_grad:
def verify_grad(op, pt, n_tests=2, rng=None, eps=None, tol=None, mode=None, cast_to_output_type=False): def verify_grad(op, pt, n_tests=2, rng=None, eps=None, tol=None, mode=None, cast_to_output_type=False):
""" WRITEME """ WRITEME
Raises an Exception if the difference between the analytic gradient and Raises an Exception if the difference between the analytic gradient and
numerical gradient (computed through the Finite Difference Method) exceeds numerical gradient (computed through the Finite Difference Method) exceeds
the given tolerance. the given tolerance.
...@@ -3607,7 +3598,7 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=None, tol=None, mode=None, cast ...@@ -3607,7 +3598,7 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=None, tol=None, mode=None, cast
try to make it a SMALL graph. Often verify grad is run in try to make it a SMALL graph. Often verify grad is run in
debug mode, which can be very slow if it has to verify a lot debug mode, which can be very slow if it has to verify a lot
of intermediate computations. of intermediate computations.
""" """
pt = [numpy.array(p) for p in pt] pt = [numpy.array(p) for p in pt]
...@@ -3619,9 +3610,8 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=None, tol=None, mode=None, cast ...@@ -3619,9 +3610,8 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=None, tol=None, mode=None, cast
tol = __builtin__.max(_type_tol[str(p.dtype)] for p in pt) tol = __builtin__.max(_type_tol[str(p.dtype)] for p in pt)
if rng is None: if rng is None:
rng = numpy.random raise TypeError('rng should be a valid instance of numpy.random.RandomState.',
from theano import tests as theano_tests # TODO This is an ugly import. Fix? 'You may want to use theano.tests.unittest_tools.verify_grad instead of theano.tensor.verify_grad.')
theano_tests.unittest_tools.seed_rng()
def function(inputs, output): def function(inputs, output):
if mode is None: if mode is None:
...@@ -3633,9 +3623,9 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=None, tol=None, mode=None, cast ...@@ -3633,9 +3623,9 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=None, tol=None, mode=None, cast
for test_num in xrange(n_tests): for test_num in xrange(n_tests):
tensor_pt = [value(p.copy(), name='input %i'%i) for i,p in enumerate(pt)] tensor_pt = [value(p.copy(), name='input %i'%i) for i,p in enumerate(pt)]
#op can be either a function or an actual Op instance #op can be either a function or an actual Op instance
o_output = op(*tensor_pt) o_output = op(*tensor_pt)
if isinstance(o_output,list) > 1: if isinstance(o_output,list) > 1:
raise NotImplementedError('cant (yet) autotest gradient of op with multiple outputs') raise NotImplementedError('cant (yet) autotest gradient of op with multiple outputs')
......
...@@ -100,6 +100,21 @@ class ConvOp(Op): ...@@ -100,6 +100,21 @@ class ConvOp(Op):
'imshp_logical', 'kshp_logical', 'kshp_logical_top_aligned'] 'imshp_logical', 'kshp_logical', 'kshp_logical_top_aligned']
"""These attributes uniquely identify the behaviour of this op for given inputs""" """These attributes uniquely identify the behaviour of this op for given inputs"""
def c_compile_args(self):
#when the ksph==(1,1) gcc 4.3.0 segfault during the compilation with -O3.
#This don't happen at -O2
if theano.gof.cmodule.gcc_version() in ['4.3.0'] and self.kshp==(1,1):
return ['-O2']
else: return []
def c_no_compile_args(self):
#when the ksph==(1,1) gcc 4.3.0 segfault during the compilation with -O3.
#This don't happen at -O2
if theano.gof.cmodule.gcc_version() in ['4.3.0'] and self.kshp==(1,1):
return ['-O3']
else: return []
@staticmethod @staticmethod
def getOutputShape(inshp, kshp, (dx,dy)=(1,1), mode='valid'): def getOutputShape(inshp, kshp, (dx,dy)=(1,1), mode='valid'):
""" """
......
...@@ -708,9 +708,14 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op): ...@@ -708,9 +708,14 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
# advanced indexing is not working yet. When it works, do it to avoid # advanced indexing is not working yet. When it works, do it to avoid
# potentially misleading behavior in gradient computations! (although # potentially misleading behavior in gradient computations! (although
# typically we should not need the gradient w.r.t. dy). # typically we should not need the gradient w.r.t. dy).
# y_idx_range = tensor.arange(y_idx.shape[0]) y_idx_range = tensor.arange(y_idx.shape[0])
# return [g_dx * tensor.AdvancedIncSubtensor((y_idx_range, y_idx))(sm, -1, y_idx_range, y_idx), dy.dimshuffle(0, 'x') * g_dx, None] g_dy = tensor.sum(
return [None, dy.dimshuffle(0, 'x') * g_dx, None] g_dx * tensor.AdvancedIncSubtensor((y_idx_range, y_idx))(
sm, tensor.fill(dy, -1), y_idx_range, y_idx),
axis=1)
g_sm = dy.dimshuffle(0, 'x') * g_dx
g_y_idx = None
return [g_dy, g_sm, g_y_idx]
def c_code_cache_version(self): def c_code_cache_version(self):
return (2,) return (2,)
def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub): def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub):
......
...@@ -95,6 +95,16 @@ class T_CrossentropySoftmax1HotWithBiasDx(unittest.TestCase): ...@@ -95,6 +95,16 @@ class T_CrossentropySoftmax1HotWithBiasDx(unittest.TestCase):
softmax_output = numpy.random.rand(10, 5) softmax_output = numpy.random.rand(10, 5)
softmax_output /= softmax_output.sum(axis=1).reshape(10, 1) softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
utt.verify_grad(f, [softmax_output]) utt.verify_grad(f, [softmax_output])
def test1(self):
rng = numpy.random.RandomState(utt.fetch_seed())
softmax_output = rng.rand(10, 5)
softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
def f(dy):
return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
dy,
softmax_output,
rng.randint(low=0, high=5, size=10)))
utt.verify_grad(f, [rng.rand(10)])
class T_CrossentropySoftmaxArgmax1HotWithBias(unittest.TestCase): class T_CrossentropySoftmaxArgmax1HotWithBias(unittest.TestCase):
def setUp(self): def setUp(self):
......
...@@ -1664,12 +1664,12 @@ class test_grad(unittest.TestCase): ...@@ -1664,12 +1664,12 @@ class test_grad(unittest.TestCase):
self.failUnless(o.gval1 is g1) self.failUnless(o.gval1 is g1)
def test_1None_rval(self): def test_1None_rval(self):
"""grad: Test returning a single None from grad""" """grad: Test returning a single zero value from grad"""
o = test_grad.O() o = test_grad.O()
a1 = o.make_node() a1 = o.make_node()
g = grad(a1.outputs[0], a1.outputs[1]) g = grad(a1.outputs[0], a1.outputs[1])
self.failUnless(isinstance(g, TensorConstant)) self.failUnless(g.owner.op == fill)
self.failUnless(g.data == 0) self.failUnless(g.owner.inputs[1].data == 0)
try: try:
grad(a1.outputs[0], 'wtf') grad(a1.outputs[0], 'wtf')
except AttributeError, e: except AttributeError, e:
...@@ -1677,14 +1677,14 @@ class test_grad(unittest.TestCase): ...@@ -1677,14 +1677,14 @@ class test_grad(unittest.TestCase):
self.fail() self.fail()
def test_NNone_rval(self): def test_NNone_rval(self):
"""grad: Test returning some Nones from grad""" """grad: Test returning some zero value from grad"""
o = test_grad.O() o = test_grad.O()
a1 = o.make_node() a1 = o.make_node()
g0,g1,g2 = grad(a1.outputs[0], a1.inputs + [scalar('z')]) g0,g1,g2 = grad(a1.outputs[0], a1.inputs + [scalar('z')])
self.failUnless(o.gval0 is g0) self.failUnless(o.gval0 is g0)
self.failUnless(o.gval1 is g1) self.failUnless(o.gval1 is g1)
self.failUnless(isinstance(g2, TensorConstant)) self.failUnless(g2.owner.op == fill)
self.failUnless(g2.data == 0) self.failUnless(g2.owner.inputs[1].data == 0)
class T_op_cache(unittest.TestCase): class T_op_cache(unittest.TestCase):
def setUp(self): def setUp(self):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论