提交 a240803c authored 作者: Frederic's avatar Frederic

[BUG] fix 2 bug in the new faster GpuAdvancedIncSubtensor1

It was always running inplace and it was alwayr considering the input as 2d.
上级 32e5fa85
......@@ -2451,13 +2451,14 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
out[0] = x
def c_code_cache_version(self):
return (1,)
return (2,)
def c_code(self, node, name, inputs, outputs, sub):
active_device_no = theano.sandbox.cuda.active_device_number()
compute_capability = theano.sandbox.cuda.device_properties(active_device_no)['major']
compute_capability = device_properties(active_device_no)['major']
if (self.set_instead_of_inc) or \
(node.inputs[0].ndim != node.inputs[1].ndim) or \
(node.inputs[0].ndim != 2) or \
(compute_capability < 2):
raise NotImplementedError("This case does not have C code yet.")
......@@ -2477,12 +2478,12 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
Py_XINCREF(%(out)s);
}
CudaNdarray_vector_add_fast(%(x)s, %(y)s, %(ind)s);
CudaNdarray_vector_add_fast(%(out)s, %(y)s, %(ind)s);
if (!%(out)s) {
%(fail)s
}
""" %locals()
""" % locals()
def c_support_code_apply(self, node, nodename):
return """
......
......@@ -999,20 +999,23 @@ class T_subtensor(theano.tensor.tests.test_basic.T_subtensor):
def test_advinc_subtensor1():
""" Test the second case in the opt local_gpu_advanced_incsubtensor1 """
shared = cuda.shared_constructor
#shared = tensor.shared
xval = numpy.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
dtype='float32')
yval = numpy.asarray([[10, 10, 10], [10, 10, 10]],
dtype='float32')
x = shared(xval, name='x')
y = T.fmatrices('y')
expr = T.advanced_inc_subtensor1(x, y, [0, 2])
f = theano.function([y], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, cuda.GpuAdvancedIncSubtensor1)
for node in f.maker.fgraph.toposort()]) == 1
assert numpy.allclose(f(yval), [[11., 12., 13.], [4., 5., 6.],
[17., 18., 19.]])
for shp in [(3, 3), (3, 3, 3)]:
shared = cuda.shared_constructor
xval = numpy.arange(numpy.prod(shp), dtype='float32').reshape(shp) + 1
yval = numpy.empty((2,) + shp[1:], dtype='float32')
yval[:] = 10
x = shared(xval, name='x')
y = T.tensor(dtype='float32',
broadcastable=(False,) * len(shp),
name='y')
expr = T.advanced_inc_subtensor1(x, y, [0, 2])
f = theano.function([y], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, cuda.GpuAdvancedIncSubtensor1)
for node in f.maker.fgraph.toposort()]) == 1
rval = f(yval)
rep = xval.copy()
rep[[0, 2]] += yval
assert numpy.allclose(rval, rep)
def test_inc_subtensor():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论