提交 f9ca8f9d authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Make grad more general (A. Bergeron)

Gradient code is moved from tensor/tensor_grad.py to theano/gradient.py. This makes it work with sparse variables. This commit was originally written by Arnaud Bergeron. I re-authored it to avoid a big merge in repo history.
上级 0e018bc4
""" """
Theano is an optimizing compiler in Python, built to evaluate complicated expressions Theano is an optimizing compiler in Python, built to evaluate
(especially matrix-valued ones) as quickly as possible. complicated expressions (especially matrix-valued ones) as quickly as
Theano compiles expression graphs (see :doc:`graph` ) that are built by Python code. possible. Theano compiles expression graphs (see :doc:`graph` ) that
The expressions in these graphs are called `Apply` nodes and the variables in these graphs are called `Variable` nodes. are built by Python code. The expressions in these graphs are called
`Apply` nodes and the variables in these graphs are called `Variable`
You compile a graph by calling `function`, which takes a graph, and returns a callable object. nodes.
One of theano's most important features is that `function` can transform your graph before
compiling it. You compile a graph by calling `function`, which takes a graph, and
It can replace simple expressions with faster or more numerically stable implementations. returns a callable object. One of theano's most important features is
that `function` can transform your graph before compiling it. It can
replace simple expressions with faster or more numerically stable
implementations.
To learn more, check out: To learn more, check out:
...@@ -37,7 +40,8 @@ logging_default_handler.setFormatter(logging_default_formatter) ...@@ -37,7 +40,8 @@ logging_default_handler.setFormatter(logging_default_formatter)
theano_logger.addHandler(logging_default_handler) theano_logger.addHandler(logging_default_handler)
theano_logger.setLevel(logging.WARNING) theano_logger.setLevel(logging.WARNING)
import configparser, configdefaults import configparser
import configdefaults
config = configparser.TheanoConfigParser() config = configparser.TheanoConfigParser()
...@@ -87,8 +91,10 @@ from updates import Updates ...@@ -87,8 +91,10 @@ from updates import Updates
import tensor import tensor
import scalar import scalar
#import sparse #we don't import by default as we don't want to force having scipy installed. #we don't import by default as we don't want to force having scipy installed.
#import sparse
import gradient import gradient
from gradient import Rop, Lop, grad
import gof import gof
if config.device.startswith('gpu') or config.init_gpu_device.startswith('gpu'): if config.device.startswith('gpu') or config.init_gpu_device.startswith('gpu'):
...@@ -126,8 +132,10 @@ del _all, _divide, _over, _under, _invalid ...@@ -126,8 +132,10 @@ del _all, _divide, _over, _under, _invalid
## import scalar_opt ## import scalar_opt
### This is defined here because it is designed to work across symbolic datatypes ### This is defined here because it is designed to work across symbolic
# (Sparse and Tensor) # datatypes (Sparse and Tensor)
def dot(l, r): def dot(l, r):
"""Return a symbolic matrix/dot product between l and r """ """Return a symbolic matrix/dot product between l and r """
rval = NotImplemented rval = NotImplemented
...@@ -144,5 +152,6 @@ def dot(l, r): ...@@ -144,5 +152,6 @@ def dot(l, r):
except Exception, e1: except Exception, e1:
rval = NotImplemented rval = NotImplemented
if rval == NotImplemented: if rval == NotImplemented:
raise NotImplementedError("Dot failed for the following reasons:", (e0, e1)) raise NotImplementedError("Dot failed for the following reasons:",
(e0, e1))
return rval return rval
差异被折叠。
...@@ -137,9 +137,13 @@ def sp_ones_like(x): ...@@ -137,9 +137,13 @@ def sp_ones_like(x):
data, indices, indptr, shape = csm_properties(x) #TODO: don't restrict to CSM formats data, indices, indptr, shape = csm_properties(x) #TODO: don't restrict to CSM formats
return CSM(format=x.format)(tensor.ones_like(data), indices, indptr, shape) return CSM(format=x.format)(tensor.ones_like(data), indices, indptr, shape)
def sp_zeros_like(x): def sp_zeros_like(x):
_, _, indptr, shape = csm_properties(x) #TODO: don't restrict to CSM formats #TODO: don't restrict to CSM formats
return CSM(format=x.format)(numpy.array([], dtype=x.type.dtype), numpy.array([]), tensor.zeros_like(indptr), shape) _, _, indptr, shape = csm_properties(x)
return CSM(format=x.format)(numpy.array([], dtype=x.type.dtype),
numpy.array([]), tensor.zeros_like(indptr),
shape)
class _sparse_py_operators: class _sparse_py_operators:
...@@ -177,6 +181,9 @@ class _sparse_py_operators: ...@@ -177,6 +181,9 @@ class _sparse_py_operators:
# that stored zeros *do* count in the size. # that stored zeros *do* count in the size.
size = property(lambda self: csm_data(self).size) size = property(lambda self: csm_data(self).size)
def zeros_like(model):
return sp_zeros_like(model)
class SparseVariable(gof.Variable, _sparse_py_operators): class SparseVariable(gof.Variable, _sparse_py_operators):
dtype = property(lambda self: self.type.dtype) dtype = property(lambda self: self.type.dtype)
...@@ -189,10 +196,6 @@ class SparseVariable(gof.Variable, _sparse_py_operators): ...@@ -189,10 +196,6 @@ class SparseVariable(gof.Variable, _sparse_py_operators):
def __repr__(self): def __repr__(self):
return str(self) return str(self)
def zeros_like(model, dtype=None):
# TODO: don't ignore dtype
return sp_zeros_like(model)
class SparseConstantSignature(tuple): class SparseConstantSignature(tuple):
def __eq__(self, other): def __eq__(self, other):
(a, b), (x,y) = self, other (a, b), (x,y) = self, other
......
...@@ -824,13 +824,16 @@ class test_zeros_like(unittest.TestCase): ...@@ -824,13 +824,16 @@ class test_zeros_like(unittest.TestCase):
def test(self): def test(self):
x = theano.sparse.csr_matrix() x = theano.sparse.csr_matrix()
f = theano.function([x], theano.sparse.sp_zeros_like(x)) f = theano.function([x], theano.sparse.sp_zeros_like(x))
vx = scipy.sparse.csr_matrix(numpy.asarray(numpy.random.binomial(1, 0.5, (100, 100)), dtype=theano.config.floatX)) vx = scipy.sparse.csr_matrix(numpy.asarray(
numpy.random.binomial(1, 0.5, (100, 100)),
dtype=theano.config.floatX))
fx = f(vx) fx = f(vx)
assert fx.nnz == 0 assert fx.nnz == 0
assert fx.shape == vx.shape assert fx.shape == vx.shape
def test_shape_i(): def test_shape_i():
sparse_dtype = 'float32' sparse_dtype = 'float32'
......
...@@ -30,7 +30,6 @@ import sharedvar # adds shared-variable constructors ...@@ -30,7 +30,6 @@ import sharedvar # adds shared-variable constructors
# `theano.shared` and `tensor._shared`. # `theano.shared` and `tensor._shared`.
from sharedvar import tensor_constructor as _shared from sharedvar import tensor_constructor as _shared
def shared(*args, **kw): def shared(*args, **kw):
""" """
Backward-compatibility wrapper around `tensor._shared`. Backward-compatibility wrapper around `tensor._shared`.
...@@ -50,6 +49,5 @@ def shared(*args, **kw): ...@@ -50,6 +49,5 @@ def shared(*args, **kw):
import nnet # used for softmax, sigmoid, etc. import nnet # used for softmax, sigmoid, etc.
from theano.gradient import Rop, Lop, grad, numeric_grad, verify_grad, \
from tensor_grad import Rop, Lop, grad, numeric_grad, verify_grad, \ jacobian, hessian
jacobian, hessian
...@@ -1450,16 +1450,12 @@ class _tensor_py_operators: ...@@ -1450,16 +1450,12 @@ class _tensor_py_operators:
def get_constant_value(self): def get_constant_value(self):
return get_constant_value(self) return get_constant_value(self)
def zeros_like(model):
return zeros_like(model)
class TensorVariable(_tensor_py_operators, Variable): class TensorVariable(_tensor_py_operators, Variable):
"""Subclass to add the tensor operators to the basic `Variable` class.""" """Subclass to add the tensor operators to the basic `Variable` class."""
def zeros_like(model, dtype=None):
"Used for grad, Lop and Rop"
# Tested through the zeros_like method below
if dtype is None:
dtype = model.type.dtype
return fill(model, constant(0.0, dtype=dtype))
TensorType.Variable = TensorVariable TensorType.Variable = TensorVariable
...@@ -2369,7 +2365,9 @@ def ones_like(model, dtype=None): ...@@ -2369,7 +2365,9 @@ def ones_like(model, dtype=None):
@constructor @constructor
def zeros_like(model, dtype=None): def zeros_like(model, dtype=None):
"""equivalent of numpy.zeros_like""" """equivalent of numpy.zeros_like"""
return TensorVariable.zeros_like(model, dtype=None) if dtype is None:
dtype = model.type.dtype
return fill(model, constant(0.0, dtype=dtype))
def zeros(shape, dtype=config.floatX): def zeros(shape, dtype=config.floatX):
""" """
......
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论