提交 94ccd2ae authored 作者: lamblin's avatar lamblin

Merge pull request #981 from nouiz/sparse_grad_import

Sparse grad import
......@@ -161,3 +161,22 @@ def dot(l, r):
raise NotImplementedError("Dot failed for the following reasons:",
(e0, e1))
return rval
def get_constant_value(v):
"""return the constant scalar(0-D) value underlying variable `v`
If v is the output of dimshuffles, fills, allocs, rebroadcasts, cast
this function digs through them.
If theano.sparse is also there, we will look over CSM op.
If `v` is not some view of constant data, then raise a TypeError.
"""
if hasattr(theano, 'sparse') and isinstance(v.type,
theano.sparse.SparseType):
if v.owner is not None and isinstance(v.owner.op,
theano.sparse.CSM):
data = v.owner.inputs[0]
return tensor.get_constant_value(data)
return tensor.get_constant_value(v)
......@@ -804,36 +804,11 @@ def _populate_grad_dict(var_to_node_to_idx,
no_constant_value = True
try:
constant_value = tensor.get_constant_value(term)
constant_value = theano.get_constant_value(term)
no_constant_value = False
except TypeError:
pass
extra_msg = ''
# The above won't work if it's a sparse type, handle sparse
# types here
if no_constant_value:
if isinstance(term.type, theano.sparse.SparseType):
if term.owner is not None and isinstance(term.owner.op,
theano.sparse.CSM):
data = term.owner.inputs[0]
try:
constant_value = tensor.get_constant_value(data)
no_constant_value = False
except TypeError:
print theano.printing.min_informative_str(data)
extra_msg += " It is a CSM, but its data isn't constant."
pass
else:
extra_msg += " It is a SparseType but theano doesn't know how"
extra_msg += " to turn it into a constant."
#end if CSM
else:
extra_msg += " It is not a SparseType."
#end if SparseType
#end if no_constant_value
if no_constant_value:
msg = "%s.grad returned %s of type %s for input"
msg += " %d. This input's only connections to "
......@@ -844,7 +819,6 @@ def _populate_grad_dict(var_to_node_to_idx,
msg += "DisconnectedType and theano can't "
msg += "simplify it to a constant, so it's not "
msg += "verifiably zeros."
msg += extra_msg
msg = msg % (str(node.op), str(term),
str(type(term)), i)
......
import numpy
try:
import scipy
import scipy.sparse
imported_scipy = True
except ImportError:
imported_scipy = False
......@@ -8,6 +8,7 @@ except ImportError:
import theano
from theano import gof
def _is_sparse(x):
"""
@rtype: boolean
......
......@@ -65,7 +65,7 @@ def check_equal_numpy(x, y):
elif (isinstance(x, numpy.random.RandomState) and
isinstance(y, numpy.random.RandomState)):
return python_all(numpy.all(a == b) for a, b in
zip(x.__getstate__(), y.__getstate__()))
izip(x.__getstate__(), y.__getstate__()))
else:
return x == y
......@@ -3823,7 +3823,7 @@ class Subtensor(Op):
# infer the broadcasting pattern
padded = (idx_list
+ [slice(None, None, None)] * (x.type.ndim - len(idx_list)))
broadcastable = [bc for p, bc in zip(padded, x.type.broadcastable)
broadcastable = [bc for p, bc in izip(padded, x.type.broadcastable)
if isinstance(p, slice)]
input_types = Subtensor.collapse(idx_list,
......@@ -3832,7 +3832,7 @@ class Subtensor(Op):
raise IndexError(
"Not enough inputs to fill in the Subtensor template.",
inputs, idx_list)
for input, expected_type in zip(inputs, input_types):
for input, expected_type in izip(inputs, input_types):
if input.type != expected_type:
raise TypeError(
"Wrong type for Subtensor template. Expected %s, got %s."
......@@ -4458,7 +4458,7 @@ class IncSubtensor(Op):
raise IndexError(
"Not enough inputs to fill in the Subtensor template.",
inputs, idx_list)
for input, expected_type in zip(inputs, input_types):
for input, expected_type in izip(inputs, input_types):
if input.type != expected_type:
raise TypeError(
"Wrong type for Subtensor template. Expected %s, got %s."
......@@ -5830,7 +5830,7 @@ class PermuteRowElements(Op):
# Compute the broadcastable pattern of the output
out_broadcastable = [xb and yb for xb, yb in
zip(x.type.broadcastable, y.type.broadcastable)]
izip(x.type.broadcastable, y.type.broadcastable)]
out_type = tensor(dtype=x.type.dtype, broadcastable=out_broadcastable)
inputlist = [x, y, inverse]
......@@ -5897,7 +5897,7 @@ class PermuteRowElements(Op):
# Make sure the output is big enough
out_s = []
for xdim, ydim in zip(x_s, y_s):
for xdim, ydim in izip(x_s, y_s):
if xdim == ydim:
outdim = xdim
elif xdim == 1:
......
......@@ -15,6 +15,7 @@ from theano.printing import min_informative_str, pprint
from theano.gof.python25 import all, any
from theano.tensor.utils import hash_from_dict
from theano.gradient import DisconnectedType
from theano.gof.null_type import NullType
config = theano.config
......@@ -538,14 +539,14 @@ class Elemwise(Op):
# it is multiplied by nout because Elemwise supports multiple outputs
# (nout of them)
out_broadcastables = [[all(bcast)
for bcast in zip(*[input.type.broadcastable
for bcast in izip(*[input.type.broadcastable
for input in inputs])]] * shadow.nout
#inplace_pattern maps output idx -> input idx
inplace_pattern = self.inplace_pattern
if inplace_pattern:
for overwriter, overwritten in inplace_pattern.items():
for ob, ib in zip(out_broadcastables[overwriter],
for ob, ib in izip(out_broadcastables[overwriter],
inputs[overwritten].type.broadcastable):
if ib and not ob:
raise ValueError((
......@@ -560,7 +561,7 @@ class Elemwise(Op):
([i.type.dtype for i in inputs], out_dtypes, inplace_pattern)))
outputs = [TensorType(dtype=dtype, broadcastable=broadcastable)()
for dtype, broadcastable in zip(out_dtypes, out_broadcastables)
for dtype, broadcastable in izip(out_dtypes, out_broadcastables)
]
return Apply(self, inputs, outputs)
......@@ -608,7 +609,7 @@ class Elemwise(Op):
bgrads = self._bgrad(inputs, ograds)
rop_out = None
for jdx, (inp, eval_point) in enumerate(zip(inputs,
for jdx, (inp, eval_point) in enumerate(izip(inputs,
eval_points)):
# if None, then we can just ignore this branch ..
# what we do is to assume that for any non-differentiable
......@@ -638,9 +639,42 @@ class Elemwise(Op):
def grad(self, inputs, ograds):
outs = self(*inputs)
if not isinstance(outs, (list,tuple)):
outs = [ outs ]
#compute grad with respect to broadcasted input
rval = self._bgrad(inputs, ograds)
# TODO: make sure that zeros are clearly identifiable
# to the gradient.grad method when the outputs have
# some integer and some floating point outputs
if False in [str(out.type.dtype).find('int') == -1
for out in outs]:
# For integer output, return value may
# only be zero or undefined
# We don't bother with trying to check
# that the scalar ops correctly
# returned something that evaluates to 0,
# we just make the return
# value obviously zero so that gradient.grad
# can tell this op did
# the right thing.
new_rval = []
for elem, ipt in izip(rval, inputs):
if isinstance(elem.type, (NullType, DisconnectedType)):
new_rval.append(elem)
else:
elem = ipt.zeros_like()
if str(elem.type.dtype).find('int') != -1:
elem = elem.astype(theano.config.floatX)
assert str(elem.type.dtype).find('int') == -1
new_rval.append(elem)
return new_rval
#sum out the broadcasted dimensions
for i, ipt in enumerate(inputs):
if rval[i] is None:
......@@ -724,7 +758,7 @@ class Elemwise(Op):
*[transform(ipt) for ipt in node.inputs])
return new_r
ret = []
for scalar_igrad, ipt in zip(scalar_igrads, inputs):
for scalar_igrad, ipt in izip(scalar_igrads, inputs):
if scalar_igrad is None:
# undefined gradient
ret.append(None)
......@@ -735,7 +769,7 @@ class Elemwise(Op):
def perform(self, node, inputs, output_storage):
maxsize = max(len(input.shape) for input in inputs)
for dims in zip(*[([(1, True)] * (maxsize - len(input.shape))
for dims in izip(*[([(1, True)] * (maxsize - len(input.shape))
+ zip(input.shape, sinput.type.broadcastable))
for input, sinput in zip(inputs, node.inputs)]):
if max(d for d, b in dims) != 1 and (1, False) in dims:
......@@ -767,7 +801,7 @@ class Elemwise(Op):
# Determine the shape of outputs
out_shape = []
for values in zip(*[input.shape for input in inputs]):
for values in izip(*[input.shape for input in inputs]):
if numpy.prod(values) == 0:
# All non-broadcasted dimensions should be zero
assert max(values) <= 1
......@@ -777,7 +811,7 @@ class Elemwise(Op):
out_shape = tuple(out_shape)
if not self.inplace_pattern:
for output, storage in zip(node.outputs, output_storage):
for output, storage in izip(node.outputs, output_storage):
odat = storage[0]
if odat is not None:
if odat.shape != out_shape:
......@@ -789,7 +823,7 @@ class Elemwise(Op):
storage[0] = odat
else:
for i, (output, storage) in enumerate(
zip(node.outputs, output_storage)):
izip(node.outputs, output_storage)):
#i is an output idx
if i in self.inplace_pattern:
odat = inputs[self.inplace_pattern[i]]
......@@ -883,7 +917,7 @@ class Elemwise(Op):
else:
# there must be some input that is not broadcastable in
# dimension 'dim'
for ishp, i in zip(i_shapes, node.inputs):
for ishp, i in izip(i_shapes, node.inputs):
if isinstance(i.type, theano.scalar.Scalar):
continue # we skip scalar
if not i.type.broadcastable[dim]:
......@@ -926,7 +960,7 @@ class Elemwise(Op):
# These are the outputs that we will need to allocate
# (output, name, name of the c type), transposed
real = zip(*[(r, s, r.type.dtype_specs()[1])
for r, s in zip(node.outputs, onames) if r not in dmap])
for r, s in izip(node.outputs, onames) if r not in dmap])
if real:
real_outputs, real_onames, real_odtypes = real
else:
......@@ -936,7 +970,7 @@ class Elemwise(Op):
# (output, name), transposed (c type name not needed since we don't
# need to allocate.
aliased = zip(*[(r, s)
for (r, s) in zip(node.outputs, onames) if r in dmap])
for (r, s) in izip(node.outputs, onames) if r in dmap])
if aliased:
aliased_outputs, aliased_onames = aliased
else:
......@@ -952,7 +986,7 @@ class Elemwise(Op):
# dimensionality)
nnested = len(orders[0])
sub = dict(sub)
for i, (input, iname) in enumerate(zip(inputs, inames)):
for i, (input, iname) in enumerate(izip(inputs, inames)):
# the c generators will substitute the input names for
# references to loop variables lv0, lv1, ...
sub['lv%i' % i] = iname
......@@ -964,7 +998,7 @@ class Elemwise(Op):
# We loop over the "real" outputs, i.e., those that are not
# inplace (must be allocated) and we declare/allocate/check
# them
for output, oname, odtype in zip(
for output, oname, odtype in izip(
real_outputs, real_onames, real_odtypes):
i += 1 # before this loop, i = number of inputs
sub['lv%i' % i] = oname
......@@ -980,7 +1014,7 @@ class Elemwise(Op):
# inplace (overwrite the contents of one of the inputs) and
# make the output pointers point to theur corresponding input
# pointers.
for output, oname in zip(aliased_outputs, aliased_onames):
for output, oname in izip(aliased_outputs, aliased_onames):
olv_index = inputs.index(dmap[output][0])
iname = inames[olv_index]
# We make the output point to the corresponding input and
......@@ -1006,7 +1040,7 @@ class Elemwise(Op):
# not be declared, as they are #defined in defines
task_decl = "".join([
"%(dtype)s& %(name)s_i = *%(name)s_iter;\n" % locals()
for name, dtype in zip(inames + list(real_onames),
for name, dtype in izip(inames + list(real_onames),
idtypes + list(real_odtypes))])
# We generate the C code of the inner loop using the scalar op
......@@ -1305,7 +1339,7 @@ class CAReduce(Op):
nnested = len(order1)
sub = dict(sub)
for i, (input, iname) in enumerate(zip(node.inputs, inames)):
for i, (input, iname) in enumerate(izip(node.inputs, inames)):
sub['lv%i' % i] = iname
decl = cgen.make_declare([order], [idtype], sub)
......
......@@ -848,6 +848,31 @@ class TestElemwise(unittest_tools.InferShapeTester):
[t_left_val, t_right_val], Elemwise)
def test_gt_grad():
"""A user test that failed.
Something about it made Elemwise.grad return something that was
too complicated for get_constant_value to recognize as being 0, so
gradient.grad reported that it was not a valid gradient of an
integer.
"""
floatX = config.floatX
T = theano.tensor
input_ = T.vector(dtype=floatX)
random_values = numpy.random.RandomState(1234).uniform(low=-1, high=1, size=(2,2))
W_values = numpy.asarray(random_values, dtype=floatX)
W = theano.shared(value=W_values, name='weights')
correct_score = T.dot(input_, W)
wrong_input = T.vector(dtype=floatX)
wrong_score = theano.clone(correct_score, {input_: wrong_input})
# Hinge loss
scores = T.ones_like(correct_score) - correct_score + wrong_score
cost = (scores * (scores > 0)).sum()
T.grad(cost, input_)
"""
if __name__ == '__main__':
#unittest.main()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论