提交 d8323502 authored 作者: James Bergstra's avatar James Bergstra

merge + float32 gradient modifications (sorry for combining the two!)

Guillaume and I just found a bug in some experiment code that was
basically caused by confusing semantics of max(). The same sort of
thing applies to min. This is an FYI email to help others on the list
avoid this mistake, which is (I think) easy to make.
Python's max() function takes multiple arguments and returns the
largest one of them. (I won't go into the details of how it deals with
corner cases.)
IN CONTRAST
numpy's max() function takes multiple arguments and returns the
largest element[s] from the *first* argument. The second argument is
used to identify the axis along which to evaluate the [python-style]
max. The third argument is an array into which the result can be
written.
So for example:
.. code-block:: python
>>> max(3, 4)
4
>>> numpy.max(3, 4)
3
>>> a,b,c = [numpy.asarray(i) for i in [0,1,2]]
>>> numpy.max(a,b,c)
0
>>> c
array(0)
Be careful!
Theano defines a max function (called theano.tensor.max) that is
similar numpy's max.
Theano also defines a function called theano.tensor.largest that is
closer to python's, but not identical since it works elemwise for
tensors. There is a corresponding 'smallest' function that is like
min()
...@@ -24,7 +24,19 @@ We describe the details of the compressed sparse matrix types. ...@@ -24,7 +24,19 @@ We describe the details of the compressed sparse matrix types.
is faster if we are modifying the array. After initial inserts, is faster if we are modifying the array. After initial inserts,
we can then convert to the appropriate sparse matrix format. we can then convert to the appropriate sparse matrix format.
There are four member variables that comprise a compressed matrix ``sp``: Their is also those type that exist:
``dok_matrix``
Dictionary of Keys format. From their doc: This is an efficient structure for constructing sparse matrices incrementally.
``coo_matrix``
Coordinate format. From their lil doc: consider using the COO format when constructing large matrices.
Their seam new format planed for scipy 0.7.x:
``bsr_matrix``
Block Compressed Row (BSR). From their doc: The Block Compressed Row (BSR) format is very similar to the Compressed Sparse Row (CSR) format. BSR is appropriate for sparse matrices with dense sub matrices like the last example below. Block matrices often arise in vector-valued finite element discretizations. In such cases, BSR is considerably more efficient than CSR and CSC for many sparse arithmetic operations.
``dia_matrix``
Sparse matrix with DIAgonal storage
There are four member variables that comprise a compressed matrix ``sp``(for at least csc, csr and bsr):
``sp.shape`` ``sp.shape``
gives the shape of the matrix. gives the shape of the matrix.
......
...@@ -15,6 +15,7 @@ import module ...@@ -15,6 +15,7 @@ import module
from module import * from module import *
import debugmode # register DEBUG_MODE import debugmode # register DEBUG_MODE
from debugmode import DebugMode
from profilemode import ProfileMode from profilemode import ProfileMode
...@@ -13,7 +13,7 @@ from itertools import chain ...@@ -13,7 +13,7 @@ from itertools import chain
from functools import partial from functools import partial
import io, sys import io, sys
import function_module as F import function_module as F
from mode import default_mode import mode as get_mode
def name_join(*args): def name_join(*args):
...@@ -73,13 +73,15 @@ class Component(object): ...@@ -73,13 +73,15 @@ class Component(object):
""" """
raise NotImplementedError() raise NotImplementedError()
def make_no_init(self, mode=default_mode): def make_no_init(self, mode=None):
""" """
Allocates the necessary containers using allocate() and uses Allocates the necessary containers using allocate() and uses
build() with the provided mode to make an instance which will build() with the provided mode to make an instance which will
be returned. The initialize() method of the instance will not be returned. The initialize() method of the instance will not
be called. be called.
""" """
if mode is None:
mode = get_mode.default_mode
memo = {} memo = {}
self.allocate(memo) self.allocate(memo)
rval = self.build(mode, memo) rval = self.build(mode, memo)
...@@ -93,7 +95,7 @@ class Component(object): ...@@ -93,7 +95,7 @@ class Component(object):
arguments and the keyword arguments. If 'mode' is in the arguments and the keyword arguments. If 'mode' is in the
keyword arguments it will be passed to build(). keyword arguments it will be passed to build().
""" """
mode = kwargs.pop('mode', default_mode) mode = kwargs.pop('mode', get_mode.default_mode)
rval = self.make_no_init(mode) rval = self.make_no_init(mode)
if hasattr(rval, 'initialize'): if hasattr(rval, 'initialize'):
rval.initialize(*args, **kwargs) rval.initialize(*args, **kwargs)
...@@ -919,8 +921,8 @@ class Module(ComponentDict): ...@@ -919,8 +921,8 @@ class Module(ComponentDict):
""" """
InstanceType = ModuleInstance # By default, we use build ModuleInstance InstanceType = ModuleInstance # By default, we use build ModuleInstance
def __init__(self): def __init__(self, *args, **kw):
super(Module, self).__init__() super(Module, self).__init__(*args, **kw)
self.__dict__["local_attr"]={} self.__dict__["local_attr"]={}
self.__dict__["_components"]={} self.__dict__["_components"]={}
...@@ -1075,7 +1077,7 @@ class Module(ComponentDict): ...@@ -1075,7 +1077,7 @@ class Module(ComponentDict):
""" """
self.make_module_instance(args,kwargs) self.make_module_instance(args,kwargs)
mode = kwargs.pop('mode', default_mode) mode = kwargs.pop('mode', get_mode.default_mode)
rval = self.make_no_init(mode) rval = self.make_no_init(mode)
if hasattr(rval, 'initialize'): if hasattr(rval, 'initialize'):
rval.initialize(*args, **kwargs) rval.initialize(*args, **kwargs)
......
...@@ -581,12 +581,8 @@ class Test_check_isfinite(unittest.TestCase): ...@@ -581,12 +581,8 @@ class Test_check_isfinite(unittest.TestCase):
x = theano.tensor.dvector() x = theano.tensor.dvector()
f = theano.function([x], (x+2) * 5, mode=debugmode.DebugMode(check_isfinite=False)) f = theano.function([x], (x+2) * 5, mode=debugmode.DebugMode(check_isfinite=False))
# the DestroyMap checker should be triggered by Nan != Nan #nan should go through
try: f(numpy.log([3, -4, 5]))
f(numpy.log([3, -4, 5]))
assert False
except debugmode.BadDestroyMap:
pass
#inf should go through #inf should go through
infs = numpy.asarray([1.0,1.,1.])/0 infs = numpy.asarray([1.0,1.,1.])/0
......
...@@ -410,7 +410,7 @@ class CSMGrad(gof.op.Op): ...@@ -410,7 +410,7 @@ class CSMGrad(gof.op.Op):
return 82345 ^ hash(type(self)) ^ _kmap_hash(self.kmap) return 82345 ^ hash(type(self)) ^ _kmap_hash(self.kmap)
def make_node(self, data, gout_data, gout_indices): def make_node(self, data, gout_data, gout_indices):
g_data = data.type() g_data = gout_data.type()
return gof.Apply(self, [data, gout_data, gout_indices], [g_data]) return gof.Apply(self, [data, gout_data, gout_indices], [g_data])
def perform(self, node, (data, gout_data, gout_indices), (g_data,)): def perform(self, node, (data, gout_data, gout_indices), (g_data,)):
...@@ -1134,6 +1134,7 @@ class StructuredDotCSR(gof.Op): ...@@ -1134,6 +1134,7 @@ class StructuredDotCSR(gof.Op):
// pointers to access actual data in the arrays passed as params. // pointers to access actual data in the arrays passed as params.
dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data; dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data;
const dtype_%(a_val)s* __restrict__ Dval = (dtype_%(a_val)s*)%(a_val)s->data;
const npy_int32 * __restrict__ Dind = (npy_int32*)%(a_ind)s->data; const npy_int32 * __restrict__ Dind = (npy_int32*)%(a_ind)s->data;
const npy_int32 * __restrict__ Dptr = (npy_int32*)%(a_ptr)s->data; const npy_int32 * __restrict__ Dptr = (npy_int32*)%(a_ptr)s->data;
......
...@@ -161,17 +161,11 @@ class test_structureddot(unittest.TestCase): ...@@ -161,17 +161,11 @@ class test_structureddot(unittest.TestCase):
def test_structuredot(self): def test_structuredot(self):
bsize = 2 bsize = 2
typenames = 'int8', 'int32', 'int16', 'int64', 'float32', 'float64', 'complex64', 'complex128' typenames = 'float32', 'int64', 'int8', 'int32', 'int16', 'float64', 'complex64', 'complex128'
for sparse_dtype in typenames: for dense_dtype in typenames:
for dense_dtype in typenames: for sparse_dtype in typenames:
print >> sys.stderr, dense_dtype, sparse_dtype
output_dtype = theano.scalar.upcast(sparse_dtype, dense_dtype)
#print 'output_dtype = ', output_dtype
#print '** sparse_dtype = ', sparse_dtype
#print '** dense_dtype = ', dense_dtype
# iterate for a few different random graph patterns # iterate for a few different random graph patterns
for i in range(10): for i in range(10):
spmat = sp.csc_matrix((4,6), dtype=sparse_dtype) spmat = sp.csc_matrix((4,6), dtype=sparse_dtype)
...@@ -182,11 +176,10 @@ class test_structureddot(unittest.TestCase): ...@@ -182,11 +176,10 @@ class test_structureddot(unittest.TestCase):
spmat[x,y] = numpy.random.rand()*10 spmat[x,y] = numpy.random.rand()*10
spmat = sp.csc_matrix(spmat) spmat = sp.csc_matrix(spmat)
kerns = tensor.Tensor(sparse_dtype, broadcastable=[False])('kerns') kerns = tensor.Tensor(broadcastable=[False], dtype=sparse_dtype)('kerns')
images = tensor.Tensor(dense_dtype, broadcastable=[False, False])('images') images = tensor.Tensor(broadcastable=[False, False], dtype=dense_dtype)('images')
#print 'kerns.dtype = ', kerns.dtype
#print 'images.dtype = ', images.dtype output_dtype = theano.scalar.upcast(sparse_dtype, dense_dtype)
## ##
# Test compressed-sparse column matrices ### # Test compressed-sparse column matrices ###
## ##
...@@ -195,60 +188,64 @@ class test_structureddot(unittest.TestCase): ...@@ -195,60 +188,64 @@ class test_structureddot(unittest.TestCase):
def buildgraphCSC(kerns,images): def buildgraphCSC(kerns,images):
csc = CSC(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape) csc = CSC(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
assert csc.type.dtype == sparse_dtype assert csc.type.dtype == sparse_dtype
return structured_dot(csc, images.T) rval = structured_dot(csc, images.T)
assert rval.type.dtype == output_dtype
return rval
out = buildgraphCSC(kerns,images) out = buildgraphCSC(kerns,images)
f = theano.function([kerns,images], out) f = theano.function([kerns,images], out)
# compute theano outputs # compute theano outputs
#print 'spmat.data', spmat.data.dtype.num kernvals = spmat.data[:spmat.size]
kernvals = numpy.array(spmat.data[:spmat.size]) imvals = 1.0 + 1.0 * numpy.array(numpy.arange(bsize*spmat.shape[1]).\
#print 'kdtype', kernvals.dtype, kernvals.shape, kernvals.ndim, kernvals.dtype.num
#print 'type of kernvals = ', kernvals.dtype
imvals = 1.0 * numpy.array(numpy.arange(bsize*spmat.shape[1]).\
reshape(bsize,spmat.shape[1]), dtype=dense_dtype) reshape(bsize,spmat.shape[1]), dtype=dense_dtype)
print 'kerntype', str(kernvals.dtype), kernvals.dtype.num
outvals = f(kernvals,imvals) outvals = f(kernvals,imvals)
print 'YAY'
print spmat.todense()
print imvals.T
print "OUT1", outvals
# compare to scipy # compare to scipy
c = spmat * (imvals.T) c = spmat * (imvals.T)
assert _is_dense(c) assert _is_dense(c)
assert str(outvals.dtype) == output_dtype assert str(outvals.dtype) == output_dtype
assert numpy.all(numpy.abs(outvals -
numpy.array(c, dtype=output_dtype)) < 1e-4)
if sparse_dtype.startswith('float') and dense_dtype.startswith('float'):
utt.verify_grad(buildgraphCSC,
[kernvals, imvals])
print 'BBB'
##
# Test compressed-sparse row matrices ###
##
spmat = spmat.tocsr()
# build theano graph
def buildgraphCSR(kerns,images):
csr = CSR(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
return structured_dot(csr, images.T)
out = buildgraphCSR(kerns,images)
f = theano.function([kerns,images], out)
# compute theano output
kernvals[:] = spmat.data[:spmat.size]
#kernvals = numpy.empty(spmat.size, dtype=dense_dtype)
imvals = 1.0 * numpy.arange(bsize*spmat.shape[1]).reshape(bsize,spmat.shape[1])
print 'kerntype2', str(kernvals.dtype), kernvals.dtype.num
outvals = f(kernvals,imvals)
print 'YAYAGI'
# compare to scipy
c = spmat * (imvals.T)
assert _is_dense(c)
assert str(outvals.dtype) == output_dtype
assert numpy.all(numpy.abs(outvals - assert numpy.all(numpy.abs(outvals -
numpy.array(c, dtype=output_dtype)) < 1e-4) numpy.array(c, dtype=output_dtype)) < 1e-4)
#if sparse_dtype.startswith('float') and dense_dtype.startswith('float'): # we could test more, but hopefully this suffices?
#utt.verify_grad(buildgraphCSC, [kernvals,imvals]) if sparse_dtype.startswith('float') and dense_dtype.startswith('float'):
utt.verify_grad( buildgraphCSR, [kernvals,imvals])
def notest(self):
##
# Test compressed-sparse row matrices ###
##
spmat = spmat.tocsr()
# build theano graph
def buildgraphCSR(kerns,images):
csr = CSR(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
assert csr.type.dtype == sparse_dtype
return structured_dot(csr, images.T)
out = buildgraphCSR(kerns,images)
f = theano.function([kerns,images], out)
# compute theano output
kernvals = spmat.data[:spmat.size]
imvals = 1.0 * numpy.arange(bsize*spmat.shape[1]).reshape(bsize,spmat.shape[1])
outvals = f(kernvals,imvals)
# compare to scipy
c = spmat * (imvals.T)
assert _is_dense(c)
assert str(outvals.dtype) == output_dtype
if not numpy.all(numpy.abs(outvals - numpy.array(c, dtype=output_dtype)) < 1e-5):
print numpy.abs(outvals - numpy.array(c, dtype=output_dtype))
assert numpy.all(numpy.abs(outvals -
numpy.array(c, dtype=output_dtype)) < 1e-4)
# we could test more, but hopefully this suffices?
if sparse_dtype.startswith('float') and dense_dtype.startswith('float'):
utt.verify_grad( buildgraphCSR, [kernvals,imvals])
def test_opt_unpack(self): def test_opt_unpack(self):
kerns = tensor.Tensor(dtype='int64', broadcastable=[False])('kerns') kerns = tensor.Tensor(dtype='int64', broadcastable=[False])('kerns')
......
...@@ -5,6 +5,7 @@ from basic import * ...@@ -5,6 +5,7 @@ from basic import *
import opt import opt
import blas import blas
import xlogx
import raw_random, randomstreams import raw_random, randomstreams
from randomstreams import \ from randomstreams import \
......
...@@ -264,13 +264,43 @@ class TensorType(Type): ...@@ -264,13 +264,43 @@ class TensorType(Type):
if 'int' in str(a.dtype): if 'int' in str(a.dtype):
return numpy.all(a==b) return numpy.all(a==b)
elif a.shape == (): #for comparing scalars, use broadcasting. elif a.shape == (): #for comparing scalars, use broadcasting.
# Note: according to James B, there was a reason for the
# following two lines, that may seem weird at first glance.
# If someone can figure out what it is, please say it here!
ones = numpy.ones(2) ones = numpy.ones(2)
return numpy.allclose(ones * a, ones*b) return numpy.allclose(ones * a, ones*b)
#elif str(a.dtype).startswith('complex'): #elif str(a.dtype).startswith('complex'):
# print >> sys.stderr, 'WARNING: skipping comparison of complex' # print >> sys.stderr, 'WARNING: skipping comparison of complex'
# return True # return True
else: else:
return numpy.allclose(a,b) cmp = numpy.allclose(a,b)
if cmp:
# Numpy claims they are close, this is good enough for us.
return True
# Numpy is unhappy, but it does not necessarily mean that a and
# b are different. Indeed, Numpy does not like missing values
# and will return False whenever some are found in a or b.
# The proper way would be to use the MaskArray stuff available
# in Numpy. However, it looks like it has been added to Numpy's
# core recently, so it may not be available to everyone. Thus,
# for now we use a home-made recipe, that should probably be
# revisited in the future.
a_missing = numpy.isnan(a)
if not a_missing.any():
# There are no missing values in a, thus this is not the
# reason why numpy.allclose(a, b) returned False.
return False
# The following line is what numpy.allclose bases its decision
# upon, according to its documentation.
rtol = 1.0000000000000001e-05
atol = 1e-8
cmp_elemwise = (numpy.absolute(a - b) <=
(atol + rtol * numpy.absolute(b)))
# Find places where both a and b have missing values.
both_missing = a_missing * numpy.isnan(b)
# Combine all information.
return (cmp_elemwise + both_missing).all()
return False return False
def __hash__(self): def __hash__(self):
...@@ -967,6 +997,11 @@ def smallest(*args): ...@@ -967,6 +997,11 @@ def smallest(*args):
"""Return the [elementwise] smallest of a variable number of arguments (like python's min).""" """Return the [elementwise] smallest of a variable number of arguments (like python's min)."""
return min(stack(*args), axis=0) return min(stack(*args), axis=0)
@constructor
def largest(*args):
"""Return the [elementwise] largest of a variable number of arguments (like python's max)."""
return max(stack(*args), axis=0)
########################## ##########################
# Comparison # Comparison
...@@ -2355,7 +2390,10 @@ def grad(cost, wrt, g_cost=None, consider_constant=[]): ...@@ -2355,7 +2390,10 @@ def grad(cost, wrt, g_cost=None, consider_constant=[]):
class numeric_grad: class numeric_grad:
"""WRITEME""" """WRITEME"""
def __init__(self, f, pt, eps=1.0e-7): type_eps = {'float64': 1e-7,
'float32': 3e-3}
def __init__(self, f, pt, eps=None):
"""Return the gradient of f at pt. """Return the gradient of f at pt.
This function computes the gradient by a one-sided finite differences of a This function computes the gradient by a one-sided finite differences of a
...@@ -2363,6 +2401,9 @@ class numeric_grad: ...@@ -2363,6 +2401,9 @@ class numeric_grad:
It is assumed that f(...) will return a scalar. It is assumed that f(...) will return a scalar.
It is assumed that all f's inputs are numpy.ndarray objects. It is assumed that all f's inputs are numpy.ndarray objects.
:param eps: the stepsize for the finite differencing. None means input
dtype-dependent. See `type_eps`.
""" """
def prod(inputs): def prod(inputs):
...@@ -2388,9 +2429,15 @@ class numeric_grad: ...@@ -2388,9 +2429,15 @@ class numeric_grad:
total_size = __builtin__.sum(prod(sh) for sh in shapes) total_size = __builtin__.sum(prod(sh) for sh in shapes)
working_dtype = __builtin__.min((self.type_eps[dt], dt) for dt in dtypes)[1]
#create un-initialized memory #create un-initialized memory
x = numpy.ndarray((total_size,), dtype=dtypes[0]) x = numpy.ndarray((total_size,), dtype=working_dtype)
gx = numpy.ndarray((total_size,), dtype=dtypes[0]) gx = numpy.ndarray((total_size,), dtype=working_dtype)
if eps is None:
eps = __builtin__.max(self.type_eps[dt] for dt in dtypes)
#set up aliases so that apt[i] is backed by memory in x #set up aliases so that apt[i] is backed by memory in x
# and self.gf is backed by memory in gx # and self.gf is backed by memory in gx
...@@ -2430,10 +2477,10 @@ class numeric_grad: ...@@ -2430,10 +2477,10 @@ class numeric_grad:
errs = [] errs = []
for a, b in zip(g_pt, self.gf): for a, b in zip(g_pt, self.gf):
errs.append(numpy.max(numeric_grad.abs_rel_err(a,b))) errs.append(numpy.max(numeric_grad.abs_rel_err(a,b)))
return numpy.max(errs) return numpy.max(errs), numpy.argmax(errs)
def verify_grad(op, pt, n_tests=2, rng=None, eps=1.0e-7, tol=0.0001): def verify_grad(op, pt, n_tests=2, rng=None, eps=None, tol=None):
""" WRITEME """ WRITEME
Raises an Exception if the difference between the analytic gradient and Raises an Exception if the difference between the analytic gradient and
...@@ -2445,12 +2492,19 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=1.0e-7, tol=0.0001): ...@@ -2445,12 +2492,19 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=1.0e-7, tol=0.0001):
:param pt: the list of numpy.ndarrays to use as inputs to the op :param pt: the list of numpy.ndarrays to use as inputs to the op
:param n_tests: number of times to run the test :param n_tests: number of times to run the test
:param rng: random number generator from which to draw random samples :param rng: random number generator from which to draw random samples
:param eps: stepsize used in the Finite Difference Method :param eps: stepsize used in the Finite Difference Method (Default None is type-dependent)
:param tol: relative tolerance used as threshold for gradient comparison :param tol: relative tolerance used as threshold for gradient comparison
""" """
pt = [numpy.array(p) for p in pt] pt = [numpy.array(p) for p in pt]
_type_tol = dict( # relativ error tolerances for different types
float32=1e-2,
float64=1e-4)
if tol is None:
tol = __builtin__.max(_type_tol[str(p.dtype)] for p in pt)
if rng is None: if rng is None:
rng = numpy.random rng = numpy.random
unittest_tools.seed_rng() unittest_tools.seed_rng()
...@@ -2493,9 +2547,9 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=1.0e-7, tol=0.0001): ...@@ -2493,9 +2547,9 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=1.0e-7, tol=0.0001):
if not isinstance(analytic_grad, (list, tuple)): if not isinstance(analytic_grad, (list, tuple)):
analytic_grad = [analytic_grad] analytic_grad = [analytic_grad]
max_err = num_grad.max_err(analytic_grad) max_err, max_err_pos = num_grad.max_err(analytic_grad)
if max_err > tol: if max_err > tol:
raise Exception(verify_grad.E_grad, (max_err, tol)) raise Exception(verify_grad.E_grad, (max_err, tol, max_err_pos))
verify_grad.E_grad = 'gradient error exceeded tolerance' verify_grad.E_grad = 'gradient error exceeded tolerance'
"""This error is raised when a gradient is calculated, but incorrect.""" """This error is raised when a gradient is calculated, but incorrect."""
...@@ -848,7 +848,13 @@ def binary_crossentropy(output, target): ...@@ -848,7 +848,13 @@ def binary_crossentropy(output, target):
return -(target * tensor.log(output) + (1.0 - target) * tensor.log(1.0 - output)) return -(target * tensor.log(output) + (1.0 - target) * tensor.log(1.0 - output))
def categorical_crossentropy(coding_dist, true_dist, axis=1): def categorical_crossentropy(coding_dist, true_dist, axis=1):
"""Return the cross-entropy between an approximating distribution and a true distribution """
WARNING: THIS FUNCTION IS UNNECESSARILY POLYMORPHIC.
We ultimately don't want the polymorphism, and will move this function to pylearn.algorithms.cost.
The 1hot version will be removed.
The length of the documentation here is a form of code smell.
Return the cross-entropy between an approximating distribution and a true distribution
The cross entropy between two probability distributions measures the average number of bits The cross entropy between two probability distributions measures the average number of bits
needed to identify an event from a set of possibilities, if a coding scheme is used based needed to identify an event from a set of possibilities, if a coding scheme is used based
...@@ -876,6 +882,7 @@ def categorical_crossentropy(coding_dist, true_dist, axis=1): ...@@ -876,6 +882,7 @@ def categorical_crossentropy(coding_dist, true_dist, axis=1):
:returns: the cross entropy between each coding and true distribution. :returns: the cross entropy between each coding and true distribution.
""" """
assert true_dist.ndim in (1,2)
if true_dist.ndim == 2: if true_dist.ndim == 2:
return -theano.sum(true_dist * log(coding_dist), axis=axis) return -theano.sum(true_dist * log(coding_dist), axis=axis)
else: else:
......
import theano import theano
from theano import tensor, scalar
import numpy import numpy
from elemwise import Elemwise
from theano import scalar
class XlogX(scalar.UnaryScalarOp): class XlogX(scalar.UnaryScalarOp):
""" """
Compute X * log(X), with special case 0 log(0) = 0. Compute X * log(X), with special case 0 log(0) = 0.
...@@ -24,7 +26,7 @@ class XlogX(scalar.UnaryScalarOp): ...@@ -24,7 +26,7 @@ class XlogX(scalar.UnaryScalarOp):
: %(x)s * log(%(x)s);""" % locals() : %(x)s * log(%(x)s);""" % locals()
raise NotImplementedError('only floatingpoint is implemented') raise NotImplementedError('only floatingpoint is implemented')
scalar_xlogx = XlogX(scalar.upgrade_to_float, name='scalar_xlogx') scalar_xlogx = XlogX(scalar.upgrade_to_float, name='scalar_xlogx')
xlogx = tensor.Elemwise(scalar_xlogx, name='xlogx') xlogx = Elemwise(scalar_xlogx, name='xlogx')
class XlogY0(scalar.BinaryScalarOp): class XlogY0(scalar.BinaryScalarOp):
...@@ -48,5 +50,4 @@ class XlogY0(scalar.BinaryScalarOp): ...@@ -48,5 +50,4 @@ class XlogY0(scalar.BinaryScalarOp):
: %(x)s * log(%(y)s);""" % locals() : %(x)s * log(%(y)s);""" % locals()
raise NotImplementedError('only floatingpoint is implemented') raise NotImplementedError('only floatingpoint is implemented')
scalar_xlogy0 = XlogY0(scalar.upgrade_to_float, name='scalar_xlogy0') scalar_xlogy0 = XlogY0(scalar.upgrade_to_float, name='scalar_xlogy0')
xlogy0 = tensor.Elemwise(scalar_xlogy0, name='xlogy0') xlogy0 = Elemwise(scalar_xlogy0, name='xlogy0')
...@@ -42,7 +42,7 @@ def seed_rng(pseed=None): ...@@ -42,7 +42,7 @@ def seed_rng(pseed=None):
numpy.random.seed(seed) numpy.random.seed(seed)
return seed return seed
def verify_grad(op, pt, n_tests=2, rng=None, eps=1.0e-7, tol=0.0001): def verify_grad(op, pt, n_tests=2, rng=None, *args, **kwargs):
""" """
Wrapper for tensor/basic.py:verify_grad Wrapper for tensor/basic.py:verify_grad
Takes care of seeding the random number generator if None is given Takes care of seeding the random number generator if None is given
...@@ -50,5 +50,5 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=1.0e-7, tol=0.0001): ...@@ -50,5 +50,5 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=1.0e-7, tol=0.0001):
if rng is None: if rng is None:
seed_rng() seed_rng()
rng = numpy.random rng = numpy.random
T.verify_grad(op, pt, n_tests, rng, eps, tol) T.verify_grad(op, pt, n_tests, rng, *args, **kwargs)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论