提交 8eeaea6c authored 作者: abergeron's avatar abergeron

Merge pull request #1919 from nouiz/crash_fix_broadcast

Crash fix broadcast
......@@ -20,7 +20,6 @@ from theano.sandbox.gpuarray.elemwise import GpuElemwise
from theano.sandbox.gpuarray.comp import NVCC_compiler
class GpuSubtensor(HideC, Subtensor):
def make_node(self, x, *inputs):
rval = tensor.Subtensor.make_node(self, x, *inputs)
......@@ -32,15 +31,10 @@ class GpuSubtensor(HideC, Subtensor):
def perform(self, node, inputs, out_):
out, = out_
x = inputs[0]
if self.perform_cache_cdata is not None:
out[0] = x.__getitem__(self.perform_cache_cdata)
return
cdata = get_idx_list(inputs, self.idx_list)
if len(cdata) == 1:
cdata = cdata[0]
if len(inputs) == 1:
self.perform_cache_cdata = cdata
out[0] = x.__getitem__(cdata)
......@@ -232,7 +226,8 @@ class GpuIncSubtensor(IncSubtensor):
# scalar case
if not self.set_instead_of_inc:
#x.__setitem__(cdata, sub_x + y)
tmp = pygpu.elemwise.elemwise2(sub_x, '+', y, sub_x, broadcast=False)
tmp = pygpu.elemwise.elemwise2(sub_x, '+', y, sub_x,
broadcast=False)
x.__setitem__(cdata, tmp)
else:
x.__setitem__(cdata, y)
......@@ -592,4 +587,4 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
return;
}
""" %locals()
""" % locals()
......@@ -64,6 +64,7 @@ def make_constant(args):
return a
return tuple(map(conv, args))
def get_idx_list(inputs, idx_list):
'''
Given a list of inputs to the subtensor and its idx_list reorders
......@@ -81,8 +82,8 @@ def get_idx_list(inputs, idx_list):
return indices.pop()
elif isinstance(entry, slice):
return slice(convert(entry.start),
convert(entry.stop),
convert(entry.step))
convert(entry.stop),
convert(entry.step))
else:
return entry
cdata = tuple(map(convert, idx_list))
......@@ -125,13 +126,13 @@ def get_canonical_form_slice(theslice, length):
# in the generic case below.
if step == 1:
is_start_0 = (
start in [None, 0] or
(is_start_constant and is_length_constant and
start < 0 and start + length <= 0))
start in [None, 0] or
(is_start_constant and is_length_constant and
start < 0 and start + length <= 0))
is_stop_length = (
stop in [None, length, maxsize] or
(is_stop_constant and is_length_constant and
stop >= length))
stop in [None, length, maxsize] or
(is_stop_constant and is_length_constant and
stop >= length))
if is_start_0:
# 0:stop:1
if is_stop_length:
......@@ -395,6 +396,7 @@ class Subtensor(Op):
NotScalarConstantError: v
"""
real_idx = get_idx_list(inputs, self.idx_list)
def conv(val):
if val is None:
return None
......@@ -441,11 +443,12 @@ class Subtensor(Op):
raise exception
input_types = Subtensor.collapse(idx_list,
lambda entry: isinstance(entry, gof.Type))
lambda entry: isinstance(entry,
gof.Type))
if len(inputs) != len(input_types):
raise IndexError(
"Not enough inputs to fill in the Subtensor template.",
inputs, idx_list)
"Not enough inputs to fill in the Subtensor template.",
inputs, idx_list)
for input, expected_type in izip(inputs, input_types):
if input.type != expected_type:
raise TypeError(
......@@ -473,7 +476,7 @@ class Subtensor(Op):
return gof.Apply(self,
(x, ) + inputs,
[theano.tensor.tensor(dtype=x.type.dtype,
broadcastable=broadcastable)])
broadcastable=broadcastable)])
def perform(self, node, inputs, out_):
out, = out_
......@@ -592,7 +595,7 @@ class Subtensor(Op):
def helper_c_code(node, name, inputs, outputs, sub, idx_list, view_ndim,
c_prefix=None,
strides_mul=None,
):
):
"""
The parameters c_prefix are there to allow reusing this
function on PyArray and CudaNdarray object.
......@@ -637,23 +640,23 @@ class Subtensor(Op):
def init_entry(entry, depth=0):
if isinstance(entry, (numpy.integer, int)):
init_cmds.append(
"subtensor_spec[%i] = %i;" % (spec_pos(),
entry))
"subtensor_spec[%i] = %i;" % (spec_pos(),
entry))
inc_spec_pos(1)
if depth == 0:
is_slice.append(0)
elif isinstance(entry, Type):
init_cmds.append(
"subtensor_spec[%i] = %s;" % (spec_pos(),
inputs[input_pos()]))
"subtensor_spec[%i] = %s;" % (spec_pos(),
inputs[input_pos()]))
inc_spec_pos(1)
inc_input_pos(1)
if depth == 0:
is_slice.append(0)
elif entry is None:
init_cmds.append(
"subtensor_spec[%i] = %i;" % (spec_pos(),
NONE_CODE))
"subtensor_spec[%i] = %i;" % (spec_pos(),
NONE_CODE))
inc_spec_pos(1)
if depth == 0:
is_slice.append(0)
......@@ -686,26 +689,26 @@ class Subtensor(Op):
x, = inputs[:1]
z, = outputs
if view_ndim:
rval = """
rval = """
// Argument of the view
npy_intp xview_dims[%(view_ndim)s];
npy_intp xview_strides[%(view_ndim)s];
"""% locals()
""" % locals()
else:
rval = """
rval = """
// Argument of the view
npy_intp* xview_dims = NULL;
npy_intp* xview_strides = NULL;
"""
rval += """
// One more argument of the view
npy_intp xview_offset = 0;
// The subtensor is created by iterating over the dimensions
// and updating stride, shape, and data pointers
......@@ -716,7 +719,7 @@ class Subtensor(Op):
int inner_ii = 0; // the current dimension of zview
int outer_ii = 0; // current dimension of z
for (; outer_ii < %(len_is_slice)s; ++outer_ii)
{
if (is_slice[outer_ii])
......@@ -944,11 +947,11 @@ class SubtensorPrinter:
raise TypeError("Can only print Subtensor.")
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Subtensor),
SubtensorPrinter())
SubtensorPrinter())
def set_subtensor(x, y, inplace=False,
tolerate_inplace_aliasing=False):
tolerate_inplace_aliasing=False):
"""Return x with the given subtensor overwritten by y.
Example: To replicate the numpy expression "r[10:] = 5", type
......@@ -960,11 +963,11 @@ def set_subtensor(x, y, inplace=False,
:param tolerate_inplace_aliasing: see inc_subtensor for documentation.
"""
return inc_subtensor(x, y, inplace, set_instead_of_inc=True,
tolerate_inplace_aliasing=tolerate_inplace_aliasing)
tolerate_inplace_aliasing=tolerate_inplace_aliasing)
def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
tolerate_inplace_aliasing=False):
tolerate_inplace_aliasing=False):
"""Return x with the given subtensor incremented by y.
:param x: the symbolic result of a Subtensor operation.
......@@ -987,7 +990,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
if y.ndim > x.ndim:
raise TypeError(("Trying to increment a %d-dimensional "
"subtensor with a %d-dimensional value.") % (x.ndim, y.ndim))
"subtensor with a %d-dimensional value.") % (x.ndim,
y.ndim))
for dim in range(y.ndim):
dim_offset = x.ndim - y.ndim
......@@ -1042,20 +1046,22 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
# return something that has the same shape as x, not as x.T (inner_x).
# So re-apply the outer dimshuffle on the new inc_subtensor,
# and return advanced_inc_subtensor1(x.T, i, y).T.
inner_incsubtensor = inc_subtensor(inner_x, y,
inplace=inplace,
set_instead_of_inc=set_instead_of_inc,
tolerate_inplace_aliasing=tolerate_inplace_aliasing)
inner_incsubtensor = inc_subtensor(
inner_x, y,
inplace=inplace,
set_instead_of_inc=set_instead_of_inc,
tolerate_inplace_aliasing=tolerate_inplace_aliasing)
return x.owner.op(inner_incsubtensor, *x.owner.inputs[1:])
elif isinstance(x.owner.op, theano.tensor.Reshape):
inner_x = x.owner.inputs[0]
# Try to apply inc_subtensor on inner_x.
# If it works, there is no need to reshape, as the inc_subtensor
# will have the same shape as inner_x, which is what we want.
inner_incsubtensor = inc_subtensor(inner_x, y.flatten(),
inplace=inplace,
set_instead_of_inc=set_instead_of_inc,
tolerate_inplace_aliasing=tolerate_inplace_aliasing)
inner_incsubtensor = inc_subtensor(
inner_x, y.flatten(),
inplace=inplace,
set_instead_of_inc=set_instead_of_inc,
tolerate_inplace_aliasing=tolerate_inplace_aliasing)
return inner_incsubtensor
else:
raise TypeError('x must be the result of a subtensor operation')
......@@ -1077,7 +1083,7 @@ class IncSubtensor(Op):
check_input = False
def __init__(self, idx_list, inplace=False, set_instead_of_inc=False,
destroyhandler_tolerate_aliased=None):
destroyhandler_tolerate_aliased=None):
if destroyhandler_tolerate_aliased is None:
destroyhandler_tolerate_aliased = []
self.idx_list = map(Subtensor.convert, idx_list)
......@@ -1085,7 +1091,7 @@ class IncSubtensor(Op):
if inplace:
self.destroy_map = {0: [0]}
self.destroyhandler_tolerate_aliased = list(
destroyhandler_tolerate_aliased)
destroyhandler_tolerate_aliased)
self.set_instead_of_inc = set_instead_of_inc
def __eq__(self, other):
......@@ -1109,7 +1115,7 @@ class IncSubtensor(Op):
# else entry
# for entry in self.idx_list)
return hashtype(self) ^ hash(idx_list) ^ hash(self.inplace) \
^ hash(self.set_instead_of_inc)
^ hash(self.set_instead_of_inc)
def __str__(self):
indices = []
......@@ -1126,10 +1132,10 @@ class IncSubtensor(Op):
msg += 'Inc'
else:
msg += 'Set'
return "%s{%s;%s}" % (
self.__class__.__name__,
msg,
", ".join(indices))
return "%s{%s;%s}" % (
self.__class__.__name__,
msg,
", ".join(indices))
def make_node(self, x, y, *inputs):
"""
......@@ -1140,25 +1146,26 @@ class IncSubtensor(Op):
x, y = map(theano.tensor.as_tensor_variable, [x, y])
if y.ndim > x.ndim:
raise ValueError(("Trying to increment a %d-dimensional "
"subtensor with a %d-dimensional value.") % (x.ndim,
y.ndim))
"subtensor with a %d-dimensional value.") % (
x.ndim, y.ndim))
inputs = tuple(map(Subtensor.my_as_scalar, inputs))
idx_list = list(self.idx_list)
if len(idx_list) > x.type.ndim:
exception = ValueError(
Subtensor.e_invalid % (
len(idx_list),
x.type.ndim))
Subtensor.e_invalid % (
len(idx_list),
x.type.ndim))
exception.subtensor_invalid = True
raise exception
input_types = Subtensor.collapse(idx_list,
lambda entry: isinstance(entry, gof.Type))
input_types = Subtensor.collapse(
idx_list,
lambda entry: isinstance(entry, gof.Type))
if len(inputs) != len(input_types):
raise IndexError(
"Not enough inputs to fill in the Subtensor template.",
inputs, idx_list)
"Not enough inputs to fill in the Subtensor template.",
inputs, idx_list)
for input, expected_type in izip(inputs, input_types):
if input.type != expected_type:
raise TypeError(
......@@ -1442,6 +1449,25 @@ class IncSubtensor(Op):
else:
gx = g_output
gy = Subtensor(idx_list=self.idx_list)(g_output, *idx_list)
if gy.broadcastable != y.broadcastable:
y_broad = (True,) * (gy.ndim - y.ndim) + y.broadcastable
assert sum(gy.broadcastable) < sum(y_broad)
axis_to_sum = []
for i in range(gy.ndim):
if gy.broadcastable[i] is False and y_broad[i] is True:
axis_to_sum.append(i)
elif (gy.broadcastable[i] is True and
y_broad[i] is False):
# This mean that THeano where able to infer that
# gy.shape[i] is 1, so y.shape[i] is 1, but we
# didn't know it. It is fine.
pass
else:
assert gy.broadcastable[i] == y_broad[i]
gy = gy.sum(axis=axis_to_sum, keepdims=True)
if gy.ndim != y.ndim:
gy = gy.dimshuffle(*range(y.ndim, gy.ndim))
assert gy.broadcastable == y.broadcastable
return [gx, gy] + [DisconnectedType()()] * len(idx_list)
......
......@@ -88,7 +88,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
f = inplace_func([], t, mode=self.mode)
topo = f.maker.fgraph.toposort()
topo_ = [node for node in topo if not isinstance(node.op,
self.ignore_topo)]
self.ignore_topo)]
assert len(topo_) == 1
if not list:
assert isinstance(topo_[0].op, self.sub)
......@@ -365,19 +365,39 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
f = inplace_func([], gn, mode=self.mode)
topo = f.maker.fgraph.toposort()
topo_ = [node for node in topo if not isinstance(node.op,
self.ignore_topo)]
self.ignore_topo)]
if not self.fast_compile:
assert len(topo_) == 6
assert numpy.sum([isinstance(node.op, self.inc_sub)
for node in topo_]) == 1
for node in topo_]) == 1
assert numpy.sum([isinstance(node.op, self.sub)
for node in topo_]) == 1
for node in topo_]) == 1
gval = f()
good = numpy.zeros_like(data)
good[subi:, subi] = numpy.exp(data[subi:, subi])
self.assertTrue(numpy.allclose(gval, good), (gval, good))
def test_grad_2d_inc_set_subtensor(self):
for n_shape, m_shape in [
[(2, 3), (2, 2)],
[(3, 2), (2, 2)],
[(3, 2), (1, 2)],
[(3, 2), (2,)],
]:
for op in [inc_subtensor, set_subtensor]:
subi = 2
data = numpy.asarray(rand(*n_shape), dtype=self.dtype)
n = self.shared(data)
z = scal.constant(subi)
m = matrix('m', dtype=self.dtype)
mv = numpy.asarray(rand(*m_shape), dtype=self.dtype)
t = op(n[:z, :z], m)
gn, gm = theano.tensor.grad(theano.tensor.sum(t), [n, m])
utt.verify_grad(lambda m: op(n[:z, :z], m), [mv])
utt.verify_grad(lambda nn: op(nn[:z, :z], mv), [data])
def test_grad_0d(self):
data = numpy.asarray(rand(2, 3), dtype=self.dtype)
n = self.shared(data)
......
......@@ -643,10 +643,10 @@ theano.compile.register_shape_i_c_code(
TensorType,
"""
if(!%(oname)s)
%(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
%(oname)s=(PyArrayObject*)PyArray_EMPTY(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(oname)s))[0]=PyArray_DIMS(%(iname)s)[%(i)s];
""",
version=1)
version=2)
# Register TensorType C code for DeepCopyOp
theano.compile.register_deep_copy_op_c_code(
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论