提交 8eeaea6c authored 作者: abergeron's avatar abergeron

Merge pull request #1919 from nouiz/crash_fix_broadcast

Crash fix broadcast
...@@ -20,7 +20,6 @@ from theano.sandbox.gpuarray.elemwise import GpuElemwise ...@@ -20,7 +20,6 @@ from theano.sandbox.gpuarray.elemwise import GpuElemwise
from theano.sandbox.gpuarray.comp import NVCC_compiler from theano.sandbox.gpuarray.comp import NVCC_compiler
class GpuSubtensor(HideC, Subtensor): class GpuSubtensor(HideC, Subtensor):
def make_node(self, x, *inputs): def make_node(self, x, *inputs):
rval = tensor.Subtensor.make_node(self, x, *inputs) rval = tensor.Subtensor.make_node(self, x, *inputs)
...@@ -32,15 +31,10 @@ class GpuSubtensor(HideC, Subtensor): ...@@ -32,15 +31,10 @@ class GpuSubtensor(HideC, Subtensor):
def perform(self, node, inputs, out_): def perform(self, node, inputs, out_):
out, = out_ out, = out_
x = inputs[0] x = inputs[0]
if self.perform_cache_cdata is not None:
out[0] = x.__getitem__(self.perform_cache_cdata)
return
cdata = get_idx_list(inputs, self.idx_list) cdata = get_idx_list(inputs, self.idx_list)
if len(cdata) == 1: if len(cdata) == 1:
cdata = cdata[0] cdata = cdata[0]
if len(inputs) == 1:
self.perform_cache_cdata = cdata
out[0] = x.__getitem__(cdata) out[0] = x.__getitem__(cdata)
...@@ -232,7 +226,8 @@ class GpuIncSubtensor(IncSubtensor): ...@@ -232,7 +226,8 @@ class GpuIncSubtensor(IncSubtensor):
# scalar case # scalar case
if not self.set_instead_of_inc: if not self.set_instead_of_inc:
#x.__setitem__(cdata, sub_x + y) #x.__setitem__(cdata, sub_x + y)
tmp = pygpu.elemwise.elemwise2(sub_x, '+', y, sub_x, broadcast=False) tmp = pygpu.elemwise.elemwise2(sub_x, '+', y, sub_x,
broadcast=False)
x.__setitem__(cdata, tmp) x.__setitem__(cdata, tmp)
else: else:
x.__setitem__(cdata, y) x.__setitem__(cdata, y)
...@@ -592,4 +587,4 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1): ...@@ -592,4 +587,4 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
return; return;
} }
""" %locals() """ % locals()
...@@ -64,6 +64,7 @@ def make_constant(args): ...@@ -64,6 +64,7 @@ def make_constant(args):
return a return a
return tuple(map(conv, args)) return tuple(map(conv, args))
def get_idx_list(inputs, idx_list): def get_idx_list(inputs, idx_list):
''' '''
Given a list of inputs to the subtensor and its idx_list reorders Given a list of inputs to the subtensor and its idx_list reorders
...@@ -395,6 +396,7 @@ class Subtensor(Op): ...@@ -395,6 +396,7 @@ class Subtensor(Op):
NotScalarConstantError: v NotScalarConstantError: v
""" """
real_idx = get_idx_list(inputs, self.idx_list) real_idx = get_idx_list(inputs, self.idx_list)
def conv(val): def conv(val):
if val is None: if val is None:
return None return None
...@@ -441,7 +443,8 @@ class Subtensor(Op): ...@@ -441,7 +443,8 @@ class Subtensor(Op):
raise exception raise exception
input_types = Subtensor.collapse(idx_list, input_types = Subtensor.collapse(idx_list,
lambda entry: isinstance(entry, gof.Type)) lambda entry: isinstance(entry,
gof.Type))
if len(inputs) != len(input_types): if len(inputs) != len(input_types):
raise IndexError( raise IndexError(
"Not enough inputs to fill in the Subtensor template.", "Not enough inputs to fill in the Subtensor template.",
...@@ -693,7 +696,7 @@ class Subtensor(Op): ...@@ -693,7 +696,7 @@ class Subtensor(Op):
npy_intp xview_dims[%(view_ndim)s]; npy_intp xview_dims[%(view_ndim)s];
npy_intp xview_strides[%(view_ndim)s]; npy_intp xview_strides[%(view_ndim)s];
"""% locals() """ % locals()
else: else:
rval = """ rval = """
// Argument of the view // Argument of the view
...@@ -987,7 +990,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False, ...@@ -987,7 +990,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
if y.ndim > x.ndim: if y.ndim > x.ndim:
raise TypeError(("Trying to increment a %d-dimensional " raise TypeError(("Trying to increment a %d-dimensional "
"subtensor with a %d-dimensional value.") % (x.ndim, y.ndim)) "subtensor with a %d-dimensional value.") % (x.ndim,
y.ndim))
for dim in range(y.ndim): for dim in range(y.ndim):
dim_offset = x.ndim - y.ndim dim_offset = x.ndim - y.ndim
...@@ -1042,7 +1046,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False, ...@@ -1042,7 +1046,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
# return something that has the same shape as x, not as x.T (inner_x). # return something that has the same shape as x, not as x.T (inner_x).
# So re-apply the outer dimshuffle on the new inc_subtensor, # So re-apply the outer dimshuffle on the new inc_subtensor,
# and return advanced_inc_subtensor1(x.T, i, y).T. # and return advanced_inc_subtensor1(x.T, i, y).T.
inner_incsubtensor = inc_subtensor(inner_x, y, inner_incsubtensor = inc_subtensor(
inner_x, y,
inplace=inplace, inplace=inplace,
set_instead_of_inc=set_instead_of_inc, set_instead_of_inc=set_instead_of_inc,
tolerate_inplace_aliasing=tolerate_inplace_aliasing) tolerate_inplace_aliasing=tolerate_inplace_aliasing)
...@@ -1052,7 +1057,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False, ...@@ -1052,7 +1057,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
# Try to apply inc_subtensor on inner_x. # Try to apply inc_subtensor on inner_x.
# If it works, there is no need to reshape, as the inc_subtensor # If it works, there is no need to reshape, as the inc_subtensor
# will have the same shape as inner_x, which is what we want. # will have the same shape as inner_x, which is what we want.
inner_incsubtensor = inc_subtensor(inner_x, y.flatten(), inner_incsubtensor = inc_subtensor(
inner_x, y.flatten(),
inplace=inplace, inplace=inplace,
set_instead_of_inc=set_instead_of_inc, set_instead_of_inc=set_instead_of_inc,
tolerate_inplace_aliasing=tolerate_inplace_aliasing) tolerate_inplace_aliasing=tolerate_inplace_aliasing)
...@@ -1140,8 +1146,8 @@ class IncSubtensor(Op): ...@@ -1140,8 +1146,8 @@ class IncSubtensor(Op):
x, y = map(theano.tensor.as_tensor_variable, [x, y]) x, y = map(theano.tensor.as_tensor_variable, [x, y])
if y.ndim > x.ndim: if y.ndim > x.ndim:
raise ValueError(("Trying to increment a %d-dimensional " raise ValueError(("Trying to increment a %d-dimensional "
"subtensor with a %d-dimensional value.") % (x.ndim, "subtensor with a %d-dimensional value.") % (
y.ndim)) x.ndim, y.ndim))
inputs = tuple(map(Subtensor.my_as_scalar, inputs)) inputs = tuple(map(Subtensor.my_as_scalar, inputs))
idx_list = list(self.idx_list) idx_list = list(self.idx_list)
...@@ -1153,7 +1159,8 @@ class IncSubtensor(Op): ...@@ -1153,7 +1159,8 @@ class IncSubtensor(Op):
exception.subtensor_invalid = True exception.subtensor_invalid = True
raise exception raise exception
input_types = Subtensor.collapse(idx_list, input_types = Subtensor.collapse(
idx_list,
lambda entry: isinstance(entry, gof.Type)) lambda entry: isinstance(entry, gof.Type))
if len(inputs) != len(input_types): if len(inputs) != len(input_types):
raise IndexError( raise IndexError(
...@@ -1442,6 +1449,25 @@ class IncSubtensor(Op): ...@@ -1442,6 +1449,25 @@ class IncSubtensor(Op):
else: else:
gx = g_output gx = g_output
gy = Subtensor(idx_list=self.idx_list)(g_output, *idx_list) gy = Subtensor(idx_list=self.idx_list)(g_output, *idx_list)
if gy.broadcastable != y.broadcastable:
y_broad = (True,) * (gy.ndim - y.ndim) + y.broadcastable
assert sum(gy.broadcastable) < sum(y_broad)
axis_to_sum = []
for i in range(gy.ndim):
if gy.broadcastable[i] is False and y_broad[i] is True:
axis_to_sum.append(i)
elif (gy.broadcastable[i] is True and
y_broad[i] is False):
# This mean that THeano where able to infer that
# gy.shape[i] is 1, so y.shape[i] is 1, but we
# didn't know it. It is fine.
pass
else:
assert gy.broadcastable[i] == y_broad[i]
gy = gy.sum(axis=axis_to_sum, keepdims=True)
if gy.ndim != y.ndim:
gy = gy.dimshuffle(*range(y.ndim, gy.ndim))
assert gy.broadcastable == y.broadcastable
return [gx, gy] + [DisconnectedType()()] * len(idx_list) return [gx, gy] + [DisconnectedType()()] * len(idx_list)
......
...@@ -378,6 +378,26 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -378,6 +378,26 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
good[subi:, subi] = numpy.exp(data[subi:, subi]) good[subi:, subi] = numpy.exp(data[subi:, subi])
self.assertTrue(numpy.allclose(gval, good), (gval, good)) self.assertTrue(numpy.allclose(gval, good), (gval, good))
def test_grad_2d_inc_set_subtensor(self):
for n_shape, m_shape in [
[(2, 3), (2, 2)],
[(3, 2), (2, 2)],
[(3, 2), (1, 2)],
[(3, 2), (2,)],
]:
for op in [inc_subtensor, set_subtensor]:
subi = 2
data = numpy.asarray(rand(*n_shape), dtype=self.dtype)
n = self.shared(data)
z = scal.constant(subi)
m = matrix('m', dtype=self.dtype)
mv = numpy.asarray(rand(*m_shape), dtype=self.dtype)
t = op(n[:z, :z], m)
gn, gm = theano.tensor.grad(theano.tensor.sum(t), [n, m])
utt.verify_grad(lambda m: op(n[:z, :z], m), [mv])
utt.verify_grad(lambda nn: op(nn[:z, :z], mv), [data])
def test_grad_0d(self): def test_grad_0d(self):
data = numpy.asarray(rand(2, 3), dtype=self.dtype) data = numpy.asarray(rand(2, 3), dtype=self.dtype)
n = self.shared(data) n = self.shared(data)
......
...@@ -643,10 +643,10 @@ theano.compile.register_shape_i_c_code( ...@@ -643,10 +643,10 @@ theano.compile.register_shape_i_c_code(
TensorType, TensorType,
""" """
if(!%(oname)s) if(!%(oname)s)
%(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0); %(oname)s=(PyArrayObject*)PyArray_EMPTY(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(oname)s))[0]=PyArray_DIMS(%(iname)s)[%(i)s]; ((npy_int64*)PyArray_DATA(%(oname)s))[0]=PyArray_DIMS(%(iname)s)[%(i)s];
""", """,
version=1) version=2)
# Register TensorType C code for DeepCopyOp # Register TensorType C code for DeepCopyOp
theano.compile.register_deep_copy_op_c_code( theano.compile.register_deep_copy_op_c_code(
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论