提交 8eeaea6c authored 作者: abergeron's avatar abergeron

Merge pull request #1919 from nouiz/crash_fix_broadcast

Crash fix broadcast
...@@ -20,7 +20,6 @@ from theano.sandbox.gpuarray.elemwise import GpuElemwise ...@@ -20,7 +20,6 @@ from theano.sandbox.gpuarray.elemwise import GpuElemwise
from theano.sandbox.gpuarray.comp import NVCC_compiler from theano.sandbox.gpuarray.comp import NVCC_compiler
class GpuSubtensor(HideC, Subtensor): class GpuSubtensor(HideC, Subtensor):
def make_node(self, x, *inputs): def make_node(self, x, *inputs):
rval = tensor.Subtensor.make_node(self, x, *inputs) rval = tensor.Subtensor.make_node(self, x, *inputs)
...@@ -32,15 +31,10 @@ class GpuSubtensor(HideC, Subtensor): ...@@ -32,15 +31,10 @@ class GpuSubtensor(HideC, Subtensor):
def perform(self, node, inputs, out_): def perform(self, node, inputs, out_):
out, = out_ out, = out_
x = inputs[0] x = inputs[0]
if self.perform_cache_cdata is not None:
out[0] = x.__getitem__(self.perform_cache_cdata)
return
cdata = get_idx_list(inputs, self.idx_list) cdata = get_idx_list(inputs, self.idx_list)
if len(cdata) == 1: if len(cdata) == 1:
cdata = cdata[0] cdata = cdata[0]
if len(inputs) == 1:
self.perform_cache_cdata = cdata
out[0] = x.__getitem__(cdata) out[0] = x.__getitem__(cdata)
...@@ -232,7 +226,8 @@ class GpuIncSubtensor(IncSubtensor): ...@@ -232,7 +226,8 @@ class GpuIncSubtensor(IncSubtensor):
# scalar case # scalar case
if not self.set_instead_of_inc: if not self.set_instead_of_inc:
#x.__setitem__(cdata, sub_x + y) #x.__setitem__(cdata, sub_x + y)
tmp = pygpu.elemwise.elemwise2(sub_x, '+', y, sub_x, broadcast=False) tmp = pygpu.elemwise.elemwise2(sub_x, '+', y, sub_x,
broadcast=False)
x.__setitem__(cdata, tmp) x.__setitem__(cdata, tmp)
else: else:
x.__setitem__(cdata, y) x.__setitem__(cdata, y)
...@@ -592,4 +587,4 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1): ...@@ -592,4 +587,4 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
return; return;
} }
""" %locals() """ % locals()
差异被折叠。
...@@ -88,7 +88,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -88,7 +88,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
f = inplace_func([], t, mode=self.mode) f = inplace_func([], t, mode=self.mode)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
topo_ = [node for node in topo if not isinstance(node.op, topo_ = [node for node in topo if not isinstance(node.op,
self.ignore_topo)] self.ignore_topo)]
assert len(topo_) == 1 assert len(topo_) == 1
if not list: if not list:
assert isinstance(topo_[0].op, self.sub) assert isinstance(topo_[0].op, self.sub)
...@@ -365,19 +365,39 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -365,19 +365,39 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
f = inplace_func([], gn, mode=self.mode) f = inplace_func([], gn, mode=self.mode)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
topo_ = [node for node in topo if not isinstance(node.op, topo_ = [node for node in topo if not isinstance(node.op,
self.ignore_topo)] self.ignore_topo)]
if not self.fast_compile: if not self.fast_compile:
assert len(topo_) == 6 assert len(topo_) == 6
assert numpy.sum([isinstance(node.op, self.inc_sub) assert numpy.sum([isinstance(node.op, self.inc_sub)
for node in topo_]) == 1 for node in topo_]) == 1
assert numpy.sum([isinstance(node.op, self.sub) assert numpy.sum([isinstance(node.op, self.sub)
for node in topo_]) == 1 for node in topo_]) == 1
gval = f() gval = f()
good = numpy.zeros_like(data) good = numpy.zeros_like(data)
good[subi:, subi] = numpy.exp(data[subi:, subi]) good[subi:, subi] = numpy.exp(data[subi:, subi])
self.assertTrue(numpy.allclose(gval, good), (gval, good)) self.assertTrue(numpy.allclose(gval, good), (gval, good))
def test_grad_2d_inc_set_subtensor(self):
for n_shape, m_shape in [
[(2, 3), (2, 2)],
[(3, 2), (2, 2)],
[(3, 2), (1, 2)],
[(3, 2), (2,)],
]:
for op in [inc_subtensor, set_subtensor]:
subi = 2
data = numpy.asarray(rand(*n_shape), dtype=self.dtype)
n = self.shared(data)
z = scal.constant(subi)
m = matrix('m', dtype=self.dtype)
mv = numpy.asarray(rand(*m_shape), dtype=self.dtype)
t = op(n[:z, :z], m)
gn, gm = theano.tensor.grad(theano.tensor.sum(t), [n, m])
utt.verify_grad(lambda m: op(n[:z, :z], m), [mv])
utt.verify_grad(lambda nn: op(nn[:z, :z], mv), [data])
def test_grad_0d(self): def test_grad_0d(self):
data = numpy.asarray(rand(2, 3), dtype=self.dtype) data = numpy.asarray(rand(2, 3), dtype=self.dtype)
n = self.shared(data) n = self.shared(data)
......
...@@ -643,10 +643,10 @@ theano.compile.register_shape_i_c_code( ...@@ -643,10 +643,10 @@ theano.compile.register_shape_i_c_code(
TensorType, TensorType,
""" """
if(!%(oname)s) if(!%(oname)s)
%(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0); %(oname)s=(PyArrayObject*)PyArray_EMPTY(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(oname)s))[0]=PyArray_DIMS(%(iname)s)[%(i)s]; ((npy_int64*)PyArray_DATA(%(oname)s))[0]=PyArray_DIMS(%(iname)s)[%(i)s];
""", """,
version=1) version=2)
# Register TensorType C code for DeepCopyOp # Register TensorType C code for DeepCopyOp
theano.compile.register_deep_copy_op_c_code( theano.compile.register_deep_copy_op_c_code(
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论