提交 d0043caf authored 作者: Frederic Bastien's avatar Frederic Bastien

Make GpuAdvancedIncSubtensor1_dev20 being introduced for vector_scalar case…

Make GpuAdvancedIncSubtensor1_dev20 being introduced for vector_scalar case instead of GpuAdvancedIncSubtensor1
上级 3d716fd6
...@@ -1036,7 +1036,13 @@ def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs): ...@@ -1036,7 +1036,13 @@ def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs):
set_instead_of_inc = op.set_instead_of_inc set_instead_of_inc = op.set_instead_of_inc
compute_capability = int(context.bin_id[-2]) compute_capability = int(context.bin_id[-2])
if compute_capability < 2 or x.ndim != 2 or y.ndim != 2: if compute_capability >= 2 and x.ndim == 1 and y.ndim == 0:
x = x.dimshuffle(0, 'x')
y = y.dimshuffle('x', 'x')
ret = GpuAdvancedIncSubtensor1_dev20(
set_instead_of_inc=set_instead_of_inc)(x, y, ilist).dimshuffle(0)
return ret
elif compute_capability < 2 or x.ndim != 2 or y.ndim != 2:
return GpuAdvancedIncSubtensor1( return GpuAdvancedIncSubtensor1(
set_instead_of_inc=set_instead_of_inc) set_instead_of_inc=set_instead_of_inc)
else: else:
......
...@@ -66,7 +66,7 @@ def test_advinc_subtensor1(): ...@@ -66,7 +66,7 @@ def test_advinc_subtensor1():
def test_advinc_subtensor1_dtype(): def test_advinc_subtensor1_dtype():
# Test the mixed dtype case # Test the mixed dtype case
shp = (3, 3) shp = (3, 4)
for dtype1, dtype2 in [('float32', 'int8'), ('float32', 'float64')]: for dtype1, dtype2 in [('float32', 'int8'), ('float32', 'float64')]:
shared = gpuarray_shared_constructor shared = gpuarray_shared_constructor
xval = numpy.arange(numpy.prod(shp), dtype=dtype1).reshape(shp) + 1 xval = numpy.arange(numpy.prod(shp), dtype=dtype1).reshape(shp) + 1
...@@ -74,7 +74,28 @@ def test_advinc_subtensor1_dtype(): ...@@ -74,7 +74,28 @@ def test_advinc_subtensor1_dtype():
yval[:] = 10 yval[:] = 10
x = shared(xval, name='x') x = shared(xval, name='x')
y = tensor.tensor(dtype=yval.dtype, y = tensor.tensor(dtype=yval.dtype,
broadcastable=(False,) * len(shp), broadcastable=(False,) * len(yval.shape),
name='y')
expr = tensor.advanced_inc_subtensor1(x, y, [0, 2])
f = theano.function([y], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, GpuAdvancedIncSubtensor1_dev20)
for node in f.maker.fgraph.toposort()]) == 1
rval = f(yval)
rep = xval.copy()
rep[[0, 2]] += yval
assert numpy.allclose(rval, rep)
def test_advinc_subtensor1_vector_scalar():
# Test the case where x is a vector and y a scalar
shp = (3,)
for dtype1, dtype2 in [('float32', 'int8'), ('float32', 'float64')]:
shared = gpuarray_shared_constructor
xval = numpy.arange(numpy.prod(shp), dtype=dtype1).reshape(shp) + 1
yval = numpy.asarray(10, dtype=dtype2)
x = shared(xval, name='x')
y = tensor.tensor(dtype=yval.dtype,
broadcastable=(False,) * len(yval.shape),
name='y') name='y')
expr = tensor.advanced_inc_subtensor1(x, y, [0, 2]) expr = tensor.advanced_inc_subtensor1(x, y, [0, 2])
f = theano.function([y], expr, mode=mode_with_gpu) f = theano.function([y], expr, mode=mode_with_gpu)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论