提交 8eeaea6c authored 作者: abergeron's avatar abergeron

Merge pull request #1919 from nouiz/crash_fix_broadcast

Crash fix broadcast
......@@ -20,7 +20,6 @@ from theano.sandbox.gpuarray.elemwise import GpuElemwise
from theano.sandbox.gpuarray.comp import NVCC_compiler
class GpuSubtensor(HideC, Subtensor):
def make_node(self, x, *inputs):
rval = tensor.Subtensor.make_node(self, x, *inputs)
......@@ -32,15 +31,10 @@ class GpuSubtensor(HideC, Subtensor):
def perform(self, node, inputs, out_):
out, = out_
x = inputs[0]
if self.perform_cache_cdata is not None:
out[0] = x.__getitem__(self.perform_cache_cdata)
return
cdata = get_idx_list(inputs, self.idx_list)
if len(cdata) == 1:
cdata = cdata[0]
if len(inputs) == 1:
self.perform_cache_cdata = cdata
out[0] = x.__getitem__(cdata)
......@@ -232,7 +226,8 @@ class GpuIncSubtensor(IncSubtensor):
# scalar case
if not self.set_instead_of_inc:
#x.__setitem__(cdata, sub_x + y)
tmp = pygpu.elemwise.elemwise2(sub_x, '+', y, sub_x, broadcast=False)
tmp = pygpu.elemwise.elemwise2(sub_x, '+', y, sub_x,
broadcast=False)
x.__setitem__(cdata, tmp)
else:
x.__setitem__(cdata, y)
......@@ -592,4 +587,4 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
return;
}
""" %locals()
""" % locals()
差异被折叠。
......@@ -88,7 +88,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
f = inplace_func([], t, mode=self.mode)
topo = f.maker.fgraph.toposort()
topo_ = [node for node in topo if not isinstance(node.op,
self.ignore_topo)]
self.ignore_topo)]
assert len(topo_) == 1
if not list:
assert isinstance(topo_[0].op, self.sub)
......@@ -365,19 +365,39 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
f = inplace_func([], gn, mode=self.mode)
topo = f.maker.fgraph.toposort()
topo_ = [node for node in topo if not isinstance(node.op,
self.ignore_topo)]
self.ignore_topo)]
if not self.fast_compile:
assert len(topo_) == 6
assert numpy.sum([isinstance(node.op, self.inc_sub)
for node in topo_]) == 1
for node in topo_]) == 1
assert numpy.sum([isinstance(node.op, self.sub)
for node in topo_]) == 1
for node in topo_]) == 1
gval = f()
good = numpy.zeros_like(data)
good[subi:, subi] = numpy.exp(data[subi:, subi])
self.assertTrue(numpy.allclose(gval, good), (gval, good))
def test_grad_2d_inc_set_subtensor(self):
for n_shape, m_shape in [
[(2, 3), (2, 2)],
[(3, 2), (2, 2)],
[(3, 2), (1, 2)],
[(3, 2), (2,)],
]:
for op in [inc_subtensor, set_subtensor]:
subi = 2
data = numpy.asarray(rand(*n_shape), dtype=self.dtype)
n = self.shared(data)
z = scal.constant(subi)
m = matrix('m', dtype=self.dtype)
mv = numpy.asarray(rand(*m_shape), dtype=self.dtype)
t = op(n[:z, :z], m)
gn, gm = theano.tensor.grad(theano.tensor.sum(t), [n, m])
utt.verify_grad(lambda m: op(n[:z, :z], m), [mv])
utt.verify_grad(lambda nn: op(nn[:z, :z], mv), [data])
def test_grad_0d(self):
data = numpy.asarray(rand(2, 3), dtype=self.dtype)
n = self.shared(data)
......
......@@ -643,10 +643,10 @@ theano.compile.register_shape_i_c_code(
TensorType,
"""
if(!%(oname)s)
%(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
%(oname)s=(PyArrayObject*)PyArray_EMPTY(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(oname)s))[0]=PyArray_DIMS(%(iname)s)[%(i)s];
""",
version=1)
version=2)
# Register TensorType C code for DeepCopyOp
theano.compile.register_deep_copy_op_c_code(
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论