提交 8eeaea6c authored 作者: abergeron's avatar abergeron

Merge pull request #1919 from nouiz/crash_fix_broadcast

Crash fix broadcast
...@@ -20,7 +20,6 @@ from theano.sandbox.gpuarray.elemwise import GpuElemwise ...@@ -20,7 +20,6 @@ from theano.sandbox.gpuarray.elemwise import GpuElemwise
from theano.sandbox.gpuarray.comp import NVCC_compiler from theano.sandbox.gpuarray.comp import NVCC_compiler
class GpuSubtensor(HideC, Subtensor): class GpuSubtensor(HideC, Subtensor):
def make_node(self, x, *inputs): def make_node(self, x, *inputs):
rval = tensor.Subtensor.make_node(self, x, *inputs) rval = tensor.Subtensor.make_node(self, x, *inputs)
...@@ -32,15 +31,10 @@ class GpuSubtensor(HideC, Subtensor): ...@@ -32,15 +31,10 @@ class GpuSubtensor(HideC, Subtensor):
def perform(self, node, inputs, out_): def perform(self, node, inputs, out_):
out, = out_ out, = out_
x = inputs[0] x = inputs[0]
if self.perform_cache_cdata is not None:
out[0] = x.__getitem__(self.perform_cache_cdata)
return
cdata = get_idx_list(inputs, self.idx_list) cdata = get_idx_list(inputs, self.idx_list)
if len(cdata) == 1: if len(cdata) == 1:
cdata = cdata[0] cdata = cdata[0]
if len(inputs) == 1:
self.perform_cache_cdata = cdata
out[0] = x.__getitem__(cdata) out[0] = x.__getitem__(cdata)
...@@ -232,7 +226,8 @@ class GpuIncSubtensor(IncSubtensor): ...@@ -232,7 +226,8 @@ class GpuIncSubtensor(IncSubtensor):
# scalar case # scalar case
if not self.set_instead_of_inc: if not self.set_instead_of_inc:
#x.__setitem__(cdata, sub_x + y) #x.__setitem__(cdata, sub_x + y)
tmp = pygpu.elemwise.elemwise2(sub_x, '+', y, sub_x, broadcast=False) tmp = pygpu.elemwise.elemwise2(sub_x, '+', y, sub_x,
broadcast=False)
x.__setitem__(cdata, tmp) x.__setitem__(cdata, tmp)
else: else:
x.__setitem__(cdata, y) x.__setitem__(cdata, y)
...@@ -592,4 +587,4 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1): ...@@ -592,4 +587,4 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
return; return;
} }
""" %locals() """ % locals()
...@@ -64,6 +64,7 @@ def make_constant(args): ...@@ -64,6 +64,7 @@ def make_constant(args):
return a return a
return tuple(map(conv, args)) return tuple(map(conv, args))
def get_idx_list(inputs, idx_list): def get_idx_list(inputs, idx_list):
''' '''
Given a list of inputs to the subtensor and its idx_list reorders Given a list of inputs to the subtensor and its idx_list reorders
...@@ -81,8 +82,8 @@ def get_idx_list(inputs, idx_list): ...@@ -81,8 +82,8 @@ def get_idx_list(inputs, idx_list):
return indices.pop() return indices.pop()
elif isinstance(entry, slice): elif isinstance(entry, slice):
return slice(convert(entry.start), return slice(convert(entry.start),
convert(entry.stop), convert(entry.stop),
convert(entry.step)) convert(entry.step))
else: else:
return entry return entry
cdata = tuple(map(convert, idx_list)) cdata = tuple(map(convert, idx_list))
...@@ -125,13 +126,13 @@ def get_canonical_form_slice(theslice, length): ...@@ -125,13 +126,13 @@ def get_canonical_form_slice(theslice, length):
# in the generic case below. # in the generic case below.
if step == 1: if step == 1:
is_start_0 = ( is_start_0 = (
start in [None, 0] or start in [None, 0] or
(is_start_constant and is_length_constant and (is_start_constant and is_length_constant and
start < 0 and start + length <= 0)) start < 0 and start + length <= 0))
is_stop_length = ( is_stop_length = (
stop in [None, length, maxsize] or stop in [None, length, maxsize] or
(is_stop_constant and is_length_constant and (is_stop_constant and is_length_constant and
stop >= length)) stop >= length))
if is_start_0: if is_start_0:
# 0:stop:1 # 0:stop:1
if is_stop_length: if is_stop_length:
...@@ -395,6 +396,7 @@ class Subtensor(Op): ...@@ -395,6 +396,7 @@ class Subtensor(Op):
NotScalarConstantError: v NotScalarConstantError: v
""" """
real_idx = get_idx_list(inputs, self.idx_list) real_idx = get_idx_list(inputs, self.idx_list)
def conv(val): def conv(val):
if val is None: if val is None:
return None return None
...@@ -441,11 +443,12 @@ class Subtensor(Op): ...@@ -441,11 +443,12 @@ class Subtensor(Op):
raise exception raise exception
input_types = Subtensor.collapse(idx_list, input_types = Subtensor.collapse(idx_list,
lambda entry: isinstance(entry, gof.Type)) lambda entry: isinstance(entry,
gof.Type))
if len(inputs) != len(input_types): if len(inputs) != len(input_types):
raise IndexError( raise IndexError(
"Not enough inputs to fill in the Subtensor template.", "Not enough inputs to fill in the Subtensor template.",
inputs, idx_list) inputs, idx_list)
for input, expected_type in izip(inputs, input_types): for input, expected_type in izip(inputs, input_types):
if input.type != expected_type: if input.type != expected_type:
raise TypeError( raise TypeError(
...@@ -473,7 +476,7 @@ class Subtensor(Op): ...@@ -473,7 +476,7 @@ class Subtensor(Op):
return gof.Apply(self, return gof.Apply(self,
(x, ) + inputs, (x, ) + inputs,
[theano.tensor.tensor(dtype=x.type.dtype, [theano.tensor.tensor(dtype=x.type.dtype,
broadcastable=broadcastable)]) broadcastable=broadcastable)])
def perform(self, node, inputs, out_): def perform(self, node, inputs, out_):
out, = out_ out, = out_
...@@ -592,7 +595,7 @@ class Subtensor(Op): ...@@ -592,7 +595,7 @@ class Subtensor(Op):
def helper_c_code(node, name, inputs, outputs, sub, idx_list, view_ndim, def helper_c_code(node, name, inputs, outputs, sub, idx_list, view_ndim,
c_prefix=None, c_prefix=None,
strides_mul=None, strides_mul=None,
): ):
""" """
The parameters c_prefix are there to allow reusing this The parameters c_prefix are there to allow reusing this
function on PyArray and CudaNdarray object. function on PyArray and CudaNdarray object.
...@@ -637,23 +640,23 @@ class Subtensor(Op): ...@@ -637,23 +640,23 @@ class Subtensor(Op):
def init_entry(entry, depth=0): def init_entry(entry, depth=0):
if isinstance(entry, (numpy.integer, int)): if isinstance(entry, (numpy.integer, int)):
init_cmds.append( init_cmds.append(
"subtensor_spec[%i] = %i;" % (spec_pos(), "subtensor_spec[%i] = %i;" % (spec_pos(),
entry)) entry))
inc_spec_pos(1) inc_spec_pos(1)
if depth == 0: if depth == 0:
is_slice.append(0) is_slice.append(0)
elif isinstance(entry, Type): elif isinstance(entry, Type):
init_cmds.append( init_cmds.append(
"subtensor_spec[%i] = %s;" % (spec_pos(), "subtensor_spec[%i] = %s;" % (spec_pos(),
inputs[input_pos()])) inputs[input_pos()]))
inc_spec_pos(1) inc_spec_pos(1)
inc_input_pos(1) inc_input_pos(1)
if depth == 0: if depth == 0:
is_slice.append(0) is_slice.append(0)
elif entry is None: elif entry is None:
init_cmds.append( init_cmds.append(
"subtensor_spec[%i] = %i;" % (spec_pos(), "subtensor_spec[%i] = %i;" % (spec_pos(),
NONE_CODE)) NONE_CODE))
inc_spec_pos(1) inc_spec_pos(1)
if depth == 0: if depth == 0:
is_slice.append(0) is_slice.append(0)
...@@ -686,26 +689,26 @@ class Subtensor(Op): ...@@ -686,26 +689,26 @@ class Subtensor(Op):
x, = inputs[:1] x, = inputs[:1]
z, = outputs z, = outputs
if view_ndim: if view_ndim:
rval = """ rval = """
// Argument of the view // Argument of the view
npy_intp xview_dims[%(view_ndim)s]; npy_intp xview_dims[%(view_ndim)s];
npy_intp xview_strides[%(view_ndim)s]; npy_intp xview_strides[%(view_ndim)s];
"""% locals() """ % locals()
else: else:
rval = """ rval = """
// Argument of the view // Argument of the view
npy_intp* xview_dims = NULL; npy_intp* xview_dims = NULL;
npy_intp* xview_strides = NULL; npy_intp* xview_strides = NULL;
""" """
rval += """ rval += """
// One more argument of the view // One more argument of the view
npy_intp xview_offset = 0; npy_intp xview_offset = 0;
// The subtensor is created by iterating over the dimensions // The subtensor is created by iterating over the dimensions
// and updating stride, shape, and data pointers // and updating stride, shape, and data pointers
...@@ -716,7 +719,7 @@ class Subtensor(Op): ...@@ -716,7 +719,7 @@ class Subtensor(Op):
int inner_ii = 0; // the current dimension of zview int inner_ii = 0; // the current dimension of zview
int outer_ii = 0; // current dimension of z int outer_ii = 0; // current dimension of z
for (; outer_ii < %(len_is_slice)s; ++outer_ii) for (; outer_ii < %(len_is_slice)s; ++outer_ii)
{ {
if (is_slice[outer_ii]) if (is_slice[outer_ii])
...@@ -944,11 +947,11 @@ class SubtensorPrinter: ...@@ -944,11 +947,11 @@ class SubtensorPrinter:
raise TypeError("Can only print Subtensor.") raise TypeError("Can only print Subtensor.")
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Subtensor), pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Subtensor),
SubtensorPrinter()) SubtensorPrinter())
def set_subtensor(x, y, inplace=False, def set_subtensor(x, y, inplace=False,
tolerate_inplace_aliasing=False): tolerate_inplace_aliasing=False):
"""Return x with the given subtensor overwritten by y. """Return x with the given subtensor overwritten by y.
Example: To replicate the numpy expression "r[10:] = 5", type Example: To replicate the numpy expression "r[10:] = 5", type
...@@ -960,11 +963,11 @@ def set_subtensor(x, y, inplace=False, ...@@ -960,11 +963,11 @@ def set_subtensor(x, y, inplace=False,
:param tolerate_inplace_aliasing: see inc_subtensor for documentation. :param tolerate_inplace_aliasing: see inc_subtensor for documentation.
""" """
return inc_subtensor(x, y, inplace, set_instead_of_inc=True, return inc_subtensor(x, y, inplace, set_instead_of_inc=True,
tolerate_inplace_aliasing=tolerate_inplace_aliasing) tolerate_inplace_aliasing=tolerate_inplace_aliasing)
def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False, def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
tolerate_inplace_aliasing=False): tolerate_inplace_aliasing=False):
"""Return x with the given subtensor incremented by y. """Return x with the given subtensor incremented by y.
:param x: the symbolic result of a Subtensor operation. :param x: the symbolic result of a Subtensor operation.
...@@ -987,7 +990,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False, ...@@ -987,7 +990,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
if y.ndim > x.ndim: if y.ndim > x.ndim:
raise TypeError(("Trying to increment a %d-dimensional " raise TypeError(("Trying to increment a %d-dimensional "
"subtensor with a %d-dimensional value.") % (x.ndim, y.ndim)) "subtensor with a %d-dimensional value.") % (x.ndim,
y.ndim))
for dim in range(y.ndim): for dim in range(y.ndim):
dim_offset = x.ndim - y.ndim dim_offset = x.ndim - y.ndim
...@@ -1042,20 +1046,22 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False, ...@@ -1042,20 +1046,22 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
# return something that has the same shape as x, not as x.T (inner_x). # return something that has the same shape as x, not as x.T (inner_x).
# So re-apply the outer dimshuffle on the new inc_subtensor, # So re-apply the outer dimshuffle on the new inc_subtensor,
# and return advanced_inc_subtensor1(x.T, i, y).T. # and return advanced_inc_subtensor1(x.T, i, y).T.
inner_incsubtensor = inc_subtensor(inner_x, y, inner_incsubtensor = inc_subtensor(
inplace=inplace, inner_x, y,
set_instead_of_inc=set_instead_of_inc, inplace=inplace,
tolerate_inplace_aliasing=tolerate_inplace_aliasing) set_instead_of_inc=set_instead_of_inc,
tolerate_inplace_aliasing=tolerate_inplace_aliasing)
return x.owner.op(inner_incsubtensor, *x.owner.inputs[1:]) return x.owner.op(inner_incsubtensor, *x.owner.inputs[1:])
elif isinstance(x.owner.op, theano.tensor.Reshape): elif isinstance(x.owner.op, theano.tensor.Reshape):
inner_x = x.owner.inputs[0] inner_x = x.owner.inputs[0]
# Try to apply inc_subtensor on inner_x. # Try to apply inc_subtensor on inner_x.
# If it works, there is no need to reshape, as the inc_subtensor # If it works, there is no need to reshape, as the inc_subtensor
# will have the same shape as inner_x, which is what we want. # will have the same shape as inner_x, which is what we want.
inner_incsubtensor = inc_subtensor(inner_x, y.flatten(), inner_incsubtensor = inc_subtensor(
inplace=inplace, inner_x, y.flatten(),
set_instead_of_inc=set_instead_of_inc, inplace=inplace,
tolerate_inplace_aliasing=tolerate_inplace_aliasing) set_instead_of_inc=set_instead_of_inc,
tolerate_inplace_aliasing=tolerate_inplace_aliasing)
return inner_incsubtensor return inner_incsubtensor
else: else:
raise TypeError('x must be the result of a subtensor operation') raise TypeError('x must be the result of a subtensor operation')
...@@ -1077,7 +1083,7 @@ class IncSubtensor(Op): ...@@ -1077,7 +1083,7 @@ class IncSubtensor(Op):
check_input = False check_input = False
def __init__(self, idx_list, inplace=False, set_instead_of_inc=False, def __init__(self, idx_list, inplace=False, set_instead_of_inc=False,
destroyhandler_tolerate_aliased=None): destroyhandler_tolerate_aliased=None):
if destroyhandler_tolerate_aliased is None: if destroyhandler_tolerate_aliased is None:
destroyhandler_tolerate_aliased = [] destroyhandler_tolerate_aliased = []
self.idx_list = map(Subtensor.convert, idx_list) self.idx_list = map(Subtensor.convert, idx_list)
...@@ -1085,7 +1091,7 @@ class IncSubtensor(Op): ...@@ -1085,7 +1091,7 @@ class IncSubtensor(Op):
if inplace: if inplace:
self.destroy_map = {0: [0]} self.destroy_map = {0: [0]}
self.destroyhandler_tolerate_aliased = list( self.destroyhandler_tolerate_aliased = list(
destroyhandler_tolerate_aliased) destroyhandler_tolerate_aliased)
self.set_instead_of_inc = set_instead_of_inc self.set_instead_of_inc = set_instead_of_inc
def __eq__(self, other): def __eq__(self, other):
...@@ -1109,7 +1115,7 @@ class IncSubtensor(Op): ...@@ -1109,7 +1115,7 @@ class IncSubtensor(Op):
# else entry # else entry
# for entry in self.idx_list) # for entry in self.idx_list)
return hashtype(self) ^ hash(idx_list) ^ hash(self.inplace) \ return hashtype(self) ^ hash(idx_list) ^ hash(self.inplace) \
^ hash(self.set_instead_of_inc) ^ hash(self.set_instead_of_inc)
def __str__(self): def __str__(self):
indices = [] indices = []
...@@ -1126,10 +1132,10 @@ class IncSubtensor(Op): ...@@ -1126,10 +1132,10 @@ class IncSubtensor(Op):
msg += 'Inc' msg += 'Inc'
else: else:
msg += 'Set' msg += 'Set'
return "%s{%s;%s}" % ( return "%s{%s;%s}" % (
self.__class__.__name__, self.__class__.__name__,
msg, msg,
", ".join(indices)) ", ".join(indices))
def make_node(self, x, y, *inputs): def make_node(self, x, y, *inputs):
""" """
...@@ -1140,25 +1146,26 @@ class IncSubtensor(Op): ...@@ -1140,25 +1146,26 @@ class IncSubtensor(Op):
x, y = map(theano.tensor.as_tensor_variable, [x, y]) x, y = map(theano.tensor.as_tensor_variable, [x, y])
if y.ndim > x.ndim: if y.ndim > x.ndim:
raise ValueError(("Trying to increment a %d-dimensional " raise ValueError(("Trying to increment a %d-dimensional "
"subtensor with a %d-dimensional value.") % (x.ndim, "subtensor with a %d-dimensional value.") % (
y.ndim)) x.ndim, y.ndim))
inputs = tuple(map(Subtensor.my_as_scalar, inputs)) inputs = tuple(map(Subtensor.my_as_scalar, inputs))
idx_list = list(self.idx_list) idx_list = list(self.idx_list)
if len(idx_list) > x.type.ndim: if len(idx_list) > x.type.ndim:
exception = ValueError( exception = ValueError(
Subtensor.e_invalid % ( Subtensor.e_invalid % (
len(idx_list), len(idx_list),
x.type.ndim)) x.type.ndim))
exception.subtensor_invalid = True exception.subtensor_invalid = True
raise exception raise exception
input_types = Subtensor.collapse(idx_list, input_types = Subtensor.collapse(
lambda entry: isinstance(entry, gof.Type)) idx_list,
lambda entry: isinstance(entry, gof.Type))
if len(inputs) != len(input_types): if len(inputs) != len(input_types):
raise IndexError( raise IndexError(
"Not enough inputs to fill in the Subtensor template.", "Not enough inputs to fill in the Subtensor template.",
inputs, idx_list) inputs, idx_list)
for input, expected_type in izip(inputs, input_types): for input, expected_type in izip(inputs, input_types):
if input.type != expected_type: if input.type != expected_type:
raise TypeError( raise TypeError(
...@@ -1442,6 +1449,25 @@ class IncSubtensor(Op): ...@@ -1442,6 +1449,25 @@ class IncSubtensor(Op):
else: else:
gx = g_output gx = g_output
gy = Subtensor(idx_list=self.idx_list)(g_output, *idx_list) gy = Subtensor(idx_list=self.idx_list)(g_output, *idx_list)
if gy.broadcastable != y.broadcastable:
y_broad = (True,) * (gy.ndim - y.ndim) + y.broadcastable
assert sum(gy.broadcastable) < sum(y_broad)
axis_to_sum = []
for i in range(gy.ndim):
if gy.broadcastable[i] is False and y_broad[i] is True:
axis_to_sum.append(i)
elif (gy.broadcastable[i] is True and
y_broad[i] is False):
# This mean that THeano where able to infer that
# gy.shape[i] is 1, so y.shape[i] is 1, but we
# didn't know it. It is fine.
pass
else:
assert gy.broadcastable[i] == y_broad[i]
gy = gy.sum(axis=axis_to_sum, keepdims=True)
if gy.ndim != y.ndim:
gy = gy.dimshuffle(*range(y.ndim, gy.ndim))
assert gy.broadcastable == y.broadcastable
return [gx, gy] + [DisconnectedType()()] * len(idx_list) return [gx, gy] + [DisconnectedType()()] * len(idx_list)
......
...@@ -88,7 +88,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -88,7 +88,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
f = inplace_func([], t, mode=self.mode) f = inplace_func([], t, mode=self.mode)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
topo_ = [node for node in topo if not isinstance(node.op, topo_ = [node for node in topo if not isinstance(node.op,
self.ignore_topo)] self.ignore_topo)]
assert len(topo_) == 1 assert len(topo_) == 1
if not list: if not list:
assert isinstance(topo_[0].op, self.sub) assert isinstance(topo_[0].op, self.sub)
...@@ -365,19 +365,39 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -365,19 +365,39 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
f = inplace_func([], gn, mode=self.mode) f = inplace_func([], gn, mode=self.mode)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
topo_ = [node for node in topo if not isinstance(node.op, topo_ = [node for node in topo if not isinstance(node.op,
self.ignore_topo)] self.ignore_topo)]
if not self.fast_compile: if not self.fast_compile:
assert len(topo_) == 6 assert len(topo_) == 6
assert numpy.sum([isinstance(node.op, self.inc_sub) assert numpy.sum([isinstance(node.op, self.inc_sub)
for node in topo_]) == 1 for node in topo_]) == 1
assert numpy.sum([isinstance(node.op, self.sub) assert numpy.sum([isinstance(node.op, self.sub)
for node in topo_]) == 1 for node in topo_]) == 1
gval = f() gval = f()
good = numpy.zeros_like(data) good = numpy.zeros_like(data)
good[subi:, subi] = numpy.exp(data[subi:, subi]) good[subi:, subi] = numpy.exp(data[subi:, subi])
self.assertTrue(numpy.allclose(gval, good), (gval, good)) self.assertTrue(numpy.allclose(gval, good), (gval, good))
def test_grad_2d_inc_set_subtensor(self):
for n_shape, m_shape in [
[(2, 3), (2, 2)],
[(3, 2), (2, 2)],
[(3, 2), (1, 2)],
[(3, 2), (2,)],
]:
for op in [inc_subtensor, set_subtensor]:
subi = 2
data = numpy.asarray(rand(*n_shape), dtype=self.dtype)
n = self.shared(data)
z = scal.constant(subi)
m = matrix('m', dtype=self.dtype)
mv = numpy.asarray(rand(*m_shape), dtype=self.dtype)
t = op(n[:z, :z], m)
gn, gm = theano.tensor.grad(theano.tensor.sum(t), [n, m])
utt.verify_grad(lambda m: op(n[:z, :z], m), [mv])
utt.verify_grad(lambda nn: op(nn[:z, :z], mv), [data])
def test_grad_0d(self): def test_grad_0d(self):
data = numpy.asarray(rand(2, 3), dtype=self.dtype) data = numpy.asarray(rand(2, 3), dtype=self.dtype)
n = self.shared(data) n = self.shared(data)
......
...@@ -643,10 +643,10 @@ theano.compile.register_shape_i_c_code( ...@@ -643,10 +643,10 @@ theano.compile.register_shape_i_c_code(
TensorType, TensorType,
""" """
if(!%(oname)s) if(!%(oname)s)
%(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0); %(oname)s=(PyArrayObject*)PyArray_EMPTY(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(oname)s))[0]=PyArray_DIMS(%(iname)s)[%(i)s]; ((npy_int64*)PyArray_DATA(%(oname)s))[0]=PyArray_DIMS(%(iname)s)[%(i)s];
""", """,
version=1) version=2)
# Register TensorType C code for DeepCopyOp # Register TensorType C code for DeepCopyOp
theano.compile.register_deep_copy_op_c_code( theano.compile.register_deep_copy_op_c_code(
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论