提交 c5727d8c authored 作者: Frederic Bastien's avatar Frederic Bastien

implemented GpuAdvancedSubtensor1 and test it.

上级 cb15c1c6
......@@ -137,10 +137,12 @@ outdated!""")
import basic_ops
from basic_ops import (GpuFromHost, HostFromGpu, GpuElemwise,
GpuDimShuffle, GpuSum, GpuReshape, GpuContiguous,
GpuSubtensor, GpuIncSubtensor, GpuFlatten, GpuShape, GpuAlloc,
GpuJoin,fscalar, fscalar, fvector, fmatrix, frow, fcol, ftensor3, ftensor4
, scalar, vector, matrix, row, col, tensor3, tensor4)
GpuDimShuffle, GpuSum, GpuReshape, GpuContiguous,
GpuSubtensor, GpuAdvancedSubtensor1, GpuIncSubtensor,
GpuFlatten, GpuShape, GpuAlloc,
GpuJoin,fscalar, fscalar, fvector, fmatrix, frow, fcol,
ftensor3, ftensor4, scalar, vector, matrix, row, col,
tensor3, tensor4)
from basic_ops import host_from_gpu, gpu_from_host
import opt
import cuda_ndarray
......
......@@ -1720,6 +1720,32 @@ class GpuSubtensor(tensor.Subtensor):
cdata = cdata[0]
out[0] = x.__getitem__(cdata)
class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1):
def make_node(self, x, ilist):
x_ = as_cuda_ndarray_variable(x)
ilist_ = tensor.as_tensor_variable(ilist)
if ilist_.type.dtype[:3] not in ('int', 'uin'):
raise TypeError('index must be integers')
if ilist_.type.broadcastable != (False,):
raise TypeError('index must be vector')
if x_.type.ndim == 0:
raise TypeError('cannot index into a scalar')
if x_.type.broadcastable[0]:
# the caller should have made a copy of x len(ilist) times
raise TypeError('cannot index into a broadcastable dimension')
return Apply(self, [x_, ilist_], [x_.type()])
def perform(self, node, inp, out_):
# This don't work as CudaNdarray_Subscript() don't support it.
#super(GpuAdvancedSubtensor1, self).perform(node, inp, out_)
x, idx = inp
out, = out_
o = cuda_ndarray.cuda_ndarray.CudaNdarray.zeros((len(idx),)+x.shape[1:])
for (j,i) in enumerate(idx):
o[j] = x[i]
out[0] = o
class GpuIncSubtensor(tensor.IncSubtensor):
def make_node(self, x, y, *inputs):
assert isinstance(x.type, CudaNdarrayType)
......
......@@ -500,6 +500,23 @@ def local_gpu_subtensor(node):
return [host_from_gpu(GpuSubtensor(node.op.idx_list)(gpu_x, *coords))]
return False
@register_opt()
@local_optimizer([])
def local_gpu_advanced_subtensor1(node):
if node.op == gpu_from_host:
host_input = node.inputs[0]
if host_input.owner and isinstance(host_input.owner.op, tensor.AdvancedSubtensor1):
x = host_input.owner.inputs[0]
coords = host_input.owner.inputs[1:]
return [GpuAdvancedSubtensor1()(gpu_from_host(x), *coords)]
if isinstance(node.op, tensor.AdvancedSubtensor1):
x = node.inputs[0]
coords = node.inputs[1:]
if x.owner and x.owner.op == host_from_gpu:
gpu_x, = x.owner.inputs
return [host_from_gpu(GpuAdvancedSubtensor1()(gpu_x, *coords))]
return False
@register_opt()
@local_optimizer([])
def local_gpu_incsubtensor(node):
......
......@@ -783,6 +783,18 @@ def test_gpualloc_output_to_gpu():
assert numpy.allclose(numpy.ones(a.value.shape)+9,f_gpu(9))
assert numpy.allclose(f(5),f_gpu(5))
import theano.tensor.tests.test_basic
# This is to don't duplicate test.
# TODO: the source class test only Adv_subtensor1 test on gpu. All other are tested only on the cpu!
class T_Adv_subtensor1(theano.tensor.tests.test_basic.T_subtensor):
shared=staticmethod(cuda.shared_constructor)
adv_sub1=cuda.GpuAdvancedSubtensor1
mode=mode_with_gpu
dtype='float32'
ignore_topo=(B.HostFromGpu, B.GpuFromHost)
def __init__(self, name):
return super(theano.tensor.tests.test_basic.T_subtensor, self).__init__(name)
def test_inc_subtensor():
shared = cuda.shared_constructor
#shared = tensor.shared
......
......@@ -1372,6 +1372,17 @@ class T_min_max(unittest.TestCase):
#check_grad_max(data,eval_outputs(grad(max_and_argmax(n,axis=1)[0],n)),axis=1)
class T_subtensor(unittest.TestCase):
def __init__(self, name, shared=shared,
adv_sub1=theano.tensor.basic.AdvancedSubtensor1, mode=None,
dtype=theano.config.floatX,
ignore_topo=()):
self.shared = shared
self.adv_sub1 = adv_sub1
self.mode = mode
self.dtype=dtype
self.ignore_topo=ignore_topo
return super(T_subtensor, self).__init__(name)
def setUp(self):
Subtensor.debug = False
utt.seed_rng()
......@@ -1582,47 +1593,56 @@ class T_subtensor(unittest.TestCase):
(numpy.random.rand(4,2,3), [0,3]),
(numpy.random.rand(4,2,3), [3,3,1,1,2,2,0,0]),
]:
n = shared(data)
data = numpy.asarray(data, dtype=self.dtype)
n = self.shared(data)
t = n[idx]
f = function([], t, mode=None)
f = function([], t, mode=self.mode)
topo = f.maker.env.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, theano.tensor.basic.AdvancedSubtensor1)
topo_ = [node for node in topo if not isinstance(node.op, self.ignore_topo)]
assert len(topo_) == 1
assert isinstance(topo_[0].op, self.adv_sub1)
val = f()
good = data[idx]
self.failUnless(val.ndim == data.ndim)
self.failUnless(numpy.allclose(val, good), (val, good))
def test_err_invalid_list(self):
n = shared(numpy.asarray(5))
n = self.shared(numpy.asarray(5, dtype=self.dtype))
self.assertRaises(TypeError, n.__getitem__, [0,0])
def test_err_invalid_2list(self):
# TODO the error message is not clear
n = shared(numpy.ones((3,3))*5)
n = self.shared(numpy.ones((3,3), dtype=self.dtype)*5)
self.assertRaises(TypeError, n.__getitem__, ([0,0],[1,1]))
def test_err_bound_list(self):
n = shared(numpy.ones((2,3))*5)
n = self.shared(numpy.ones((2,3),dtype=self.dtype)*5)
t = n[[0,4]]
self.failUnless(isinstance(t.owner.op, AdvancedSubtensor1))
self.assertRaises(IndexError, eval_outputs, [t])
# We test again AdvancedSubtensor1 as we transfer data to the cpu.
self.failUnless(isinstance(t.owner.op, theano.tensor.basic.AdvancedSubtensor1))
f = function([], t, mode=self.mode)
topo = f.maker.env.toposort()
topo_ = [node for node in topo if not isinstance(node.op, self.ignore_topo)]
assert len(topo_)==1
self.failUnless(isinstance(topo_[0].op, self.adv_sub1))
self.assertRaises(IndexError, f)
def grad_list_(self, idxs, data):
n = shared(data)
n = self.shared(data)
fast_compile = theano.config.mode == 'FAST_COMPILE'
for idx in idxs:
# Should stay on the cpu.
idx_ = shared(numpy.asarray(idx))
t = n[idx_]
gn = grad(sum(exp(t)), n)
f = function([], [gn, gn.shape], mode=None)
f = function([], [gn, gn.shape], mode=self.mode)
topo = f.maker.env.toposort()
if not fast_compile:
assert any([isinstance(node.op, AdvancedIncSubtensor1) and node.op.inplace for node in topo])
else:
assert any([isinstance(node.op, AdvancedIncSubtensor1) for node in topo])
assert any([isinstance(node.op, AdvancedSubtensor1) for node in topo])
assert any([isinstance(node.op, self.adv_sub1) for node in topo])
gval, gshape = f()
good = numpy.zeros_like(data)
# good[idx] += numpy.exp(data[idx]) don't work when the same index is used many time
......@@ -1643,28 +1663,29 @@ class T_subtensor(unittest.TestCase):
# Test shape of AdvancedIncSubtensor1 and AdvancedSubtensor1
if idx is idxs[0]:
f = function([], [gn.shape, n[idx_].shape], mode=None)
f = function([], [gn.shape, n[idx_].shape], mode=self.mode)
topo = f.maker.env.toposort()
if not fast_compile:
self.failUnless(not any([isinstance(node.op, AdvancedIncSubtensor1) for node in topo]))
self.failUnless(not any([isinstance(node.op, AdvancedSubtensor1) for node in topo]))
self.failUnless(not any([isinstance(node.op, self.adv_sub1) for node in topo]))
f()
def test_grad_list(self):
data = numpy.random.rand(4)
data = numpy.asarray(data, dtype=self.dtype)
idxs = [[i] for i in range(data.shape[0])]
debug_mode = isinstance(theano.compile.mode.get_default_mode(),
theano.compile.DebugMode)
for i in range(data.shape[0]):
for j in range(0,data.shape[0],2):
idxs.append([i,j,(i+1)%data.shape[0]])
self.grad_list_(idxs, data)
data = numpy.random.rand(4,3)
data = numpy.asarray(data, dtype=self.dtype)
self.grad_list_(idxs, data)
data = numpy.random.rand(4,3,2)
data = numpy.asarray(data, dtype=self.dtype)
self.grad_list_(idxs, data)
def test_shape_list(self):
......@@ -1674,7 +1695,8 @@ class T_subtensor(unittest.TestCase):
(numpy.random.rand(4,2,3), [0,3]),
(numpy.random.rand(4,2,3), [3,3,1,2,2,]),
]:
n = shared(data)
data = numpy.asarray(data, dtype=self.dtype)
n = self.shared(data)
t = n[idx]
f = function([], t.shape, mode=None)
topo = f.maker.env.toposort()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论