提交 25089971 authored 作者: Balázs's avatar Balázs

Changed c code version from 6 to 7.

Added tests to test_basic_ops.py for GpuAdvancedIncSubtensor1 (general case) and GpuAdvancedIncSubtensor1_dev20 (2d case if compute capability >=2.0).
上级 61d11637
...@@ -2888,7 +2888,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp): ...@@ -2888,7 +2888,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
out[0] = x out[0] = x
def c_code_cache_version(self): def c_code_cache_version(self):
return (6,) return (7,)
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
if (node.inputs[0].ndim != node.inputs[1].ndim): if (node.inputs[0].ndim != node.inputs[1].ndim):
...@@ -3033,7 +3033,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1): ...@@ -3033,7 +3033,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
return Apply(self, [x_, y_, ilist_], [x_.type()]) return Apply(self, [x_, y_, ilist_], [x_.type()])
def c_code_cache_version(self): def c_code_cache_version(self):
return (6,) return (7,)
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
active_device_no = theano.sandbox.cuda.active_device_number() active_device_no = theano.sandbox.cuda.active_device_number()
......
...@@ -1108,6 +1108,41 @@ def test_advinc_subtensor1(): ...@@ -1108,6 +1108,41 @@ def test_advinc_subtensor1():
rep[[0, 2]] += yval rep[[0, 2]] += yval
utt.assert_allclose(rval, rep) utt.assert_allclose(rval, rep)
def test_advset_subtensor1():
""" Test GPU version of set_subtensor on vectors (uses GpuAdvancedIncSubtensor1) """
shp = (10,)
shared = cuda.shared_constructor
xval = numpy.arange(shp[0], dtype='float32').reshape(shp) + 1
idxs = numpy.array([0,2,5,7,3], dtype='int32')
yval = numpy.ones(len(idxs), dtype='float32')*10
x = shared(xval, name='x')
y = T.tensor(dtype='float32', broadcastable=(False,) * len(shp), name='y')
expr = T.advanced_set_subtensor1(x, y, idxs)
f = theano.function([y], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, cuda.GpuAdvancedIncSubtensor1)
for node in f.maker.fgraph.toposort()]) == 1
rval = f(yval)
rep = xval.copy()
rep[idxs] = yval
utt.assert_allclose(rval, rep)
def test_advset_subtensor1_2d():
""" Test GPU version of set_subtensor on matrices (uses GpuAdvancedIncSubtensor1_dev20 if compute capability >= 2.0) """
shp = (10,5)
shared = cuda.shared_constructor
xval = numpy.arange(numpy.prod(shp), dtype='float32').reshape(shp) + 1
idxs = numpy.array([0,2,5,7,3], dtype='int32')
yval = numpy.ones((len(idxs), shp[1]), dtype='float32')*10
x = shared(xval, name='x')
y = T.tensor(dtype='float32', broadcastable=(False,) * len(shp), name='y')
expr = T.advanced_set_subtensor1(x, y, idxs)
f = theano.function([y], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, cuda.GpuAdvancedIncSubtensor1)
for node in f.maker.fgraph.toposort()]) == 1
rval = f(yval)
rep = xval.copy()
rep[idxs] = yval
utt.assert_allclose(rval, rep)
def test_inc_subtensor(): def test_inc_subtensor():
shared = cuda.shared_constructor shared = cuda.shared_constructor
...@@ -1341,5 +1376,7 @@ def speed_reduce10(): ...@@ -1341,5 +1376,7 @@ def speed_reduce10():
if __name__ == '__main__': if __name__ == '__main__':
test_many_arg_elemwise() #test_many_arg_elemwise()
test_gpujoin_assert_cndas() #test_gpujoin_assert_cndas()
test_advset_subtensor1()
test_advset_subtensor1_2d()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论