提交 c5727d8c authored 作者: Frederic Bastien's avatar Frederic Bastien

implemented GpuAdvancedSubtensor1 and test it.

上级 cb15c1c6
...@@ -137,10 +137,12 @@ outdated!""") ...@@ -137,10 +137,12 @@ outdated!""")
import basic_ops import basic_ops
from basic_ops import (GpuFromHost, HostFromGpu, GpuElemwise, from basic_ops import (GpuFromHost, HostFromGpu, GpuElemwise,
GpuDimShuffle, GpuSum, GpuReshape, GpuContiguous, GpuDimShuffle, GpuSum, GpuReshape, GpuContiguous,
GpuSubtensor, GpuIncSubtensor, GpuFlatten, GpuShape, GpuAlloc, GpuSubtensor, GpuAdvancedSubtensor1, GpuIncSubtensor,
GpuJoin,fscalar, fscalar, fvector, fmatrix, frow, fcol, ftensor3, ftensor4 GpuFlatten, GpuShape, GpuAlloc,
, scalar, vector, matrix, row, col, tensor3, tensor4) GpuJoin,fscalar, fscalar, fvector, fmatrix, frow, fcol,
ftensor3, ftensor4, scalar, vector, matrix, row, col,
tensor3, tensor4)
from basic_ops import host_from_gpu, gpu_from_host from basic_ops import host_from_gpu, gpu_from_host
import opt import opt
import cuda_ndarray import cuda_ndarray
......
...@@ -1720,6 +1720,32 @@ class GpuSubtensor(tensor.Subtensor): ...@@ -1720,6 +1720,32 @@ class GpuSubtensor(tensor.Subtensor):
cdata = cdata[0] cdata = cdata[0]
out[0] = x.__getitem__(cdata) out[0] = x.__getitem__(cdata)
class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1):
def make_node(self, x, ilist):
x_ = as_cuda_ndarray_variable(x)
ilist_ = tensor.as_tensor_variable(ilist)
if ilist_.type.dtype[:3] not in ('int', 'uin'):
raise TypeError('index must be integers')
if ilist_.type.broadcastable != (False,):
raise TypeError('index must be vector')
if x_.type.ndim == 0:
raise TypeError('cannot index into a scalar')
if x_.type.broadcastable[0]:
# the caller should have made a copy of x len(ilist) times
raise TypeError('cannot index into a broadcastable dimension')
return Apply(self, [x_, ilist_], [x_.type()])
def perform(self, node, inp, out_):
# This don't work as CudaNdarray_Subscript() don't support it.
#super(GpuAdvancedSubtensor1, self).perform(node, inp, out_)
x, idx = inp
out, = out_
o = cuda_ndarray.cuda_ndarray.CudaNdarray.zeros((len(idx),)+x.shape[1:])
for (j,i) in enumerate(idx):
o[j] = x[i]
out[0] = o
class GpuIncSubtensor(tensor.IncSubtensor): class GpuIncSubtensor(tensor.IncSubtensor):
def make_node(self, x, y, *inputs): def make_node(self, x, y, *inputs):
assert isinstance(x.type, CudaNdarrayType) assert isinstance(x.type, CudaNdarrayType)
......
...@@ -500,6 +500,23 @@ def local_gpu_subtensor(node): ...@@ -500,6 +500,23 @@ def local_gpu_subtensor(node):
return [host_from_gpu(GpuSubtensor(node.op.idx_list)(gpu_x, *coords))] return [host_from_gpu(GpuSubtensor(node.op.idx_list)(gpu_x, *coords))]
return False return False
@register_opt()
@local_optimizer([])
def local_gpu_advanced_subtensor1(node):
if node.op == gpu_from_host:
host_input = node.inputs[0]
if host_input.owner and isinstance(host_input.owner.op, tensor.AdvancedSubtensor1):
x = host_input.owner.inputs[0]
coords = host_input.owner.inputs[1:]
return [GpuAdvancedSubtensor1()(gpu_from_host(x), *coords)]
if isinstance(node.op, tensor.AdvancedSubtensor1):
x = node.inputs[0]
coords = node.inputs[1:]
if x.owner and x.owner.op == host_from_gpu:
gpu_x, = x.owner.inputs
return [host_from_gpu(GpuAdvancedSubtensor1()(gpu_x, *coords))]
return False
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([])
def local_gpu_incsubtensor(node): def local_gpu_incsubtensor(node):
......
...@@ -783,6 +783,18 @@ def test_gpualloc_output_to_gpu(): ...@@ -783,6 +783,18 @@ def test_gpualloc_output_to_gpu():
assert numpy.allclose(numpy.ones(a.value.shape)+9,f_gpu(9)) assert numpy.allclose(numpy.ones(a.value.shape)+9,f_gpu(9))
assert numpy.allclose(f(5),f_gpu(5)) assert numpy.allclose(f(5),f_gpu(5))
import theano.tensor.tests.test_basic
# This is to don't duplicate test.
# TODO: the source class test only Adv_subtensor1 test on gpu. All other are tested only on the cpu!
class T_Adv_subtensor1(theano.tensor.tests.test_basic.T_subtensor):
shared=staticmethod(cuda.shared_constructor)
adv_sub1=cuda.GpuAdvancedSubtensor1
mode=mode_with_gpu
dtype='float32'
ignore_topo=(B.HostFromGpu, B.GpuFromHost)
def __init__(self, name):
return super(theano.tensor.tests.test_basic.T_subtensor, self).__init__(name)
def test_inc_subtensor(): def test_inc_subtensor():
shared = cuda.shared_constructor shared = cuda.shared_constructor
#shared = tensor.shared #shared = tensor.shared
......
...@@ -1372,6 +1372,17 @@ class T_min_max(unittest.TestCase): ...@@ -1372,6 +1372,17 @@ class T_min_max(unittest.TestCase):
#check_grad_max(data,eval_outputs(grad(max_and_argmax(n,axis=1)[0],n)),axis=1) #check_grad_max(data,eval_outputs(grad(max_and_argmax(n,axis=1)[0],n)),axis=1)
class T_subtensor(unittest.TestCase): class T_subtensor(unittest.TestCase):
def __init__(self, name, shared=shared,
adv_sub1=theano.tensor.basic.AdvancedSubtensor1, mode=None,
dtype=theano.config.floatX,
ignore_topo=()):
self.shared = shared
self.adv_sub1 = adv_sub1
self.mode = mode
self.dtype=dtype
self.ignore_topo=ignore_topo
return super(T_subtensor, self).__init__(name)
def setUp(self): def setUp(self):
Subtensor.debug = False Subtensor.debug = False
utt.seed_rng() utt.seed_rng()
...@@ -1582,47 +1593,56 @@ class T_subtensor(unittest.TestCase): ...@@ -1582,47 +1593,56 @@ class T_subtensor(unittest.TestCase):
(numpy.random.rand(4,2,3), [0,3]), (numpy.random.rand(4,2,3), [0,3]),
(numpy.random.rand(4,2,3), [3,3,1,1,2,2,0,0]), (numpy.random.rand(4,2,3), [3,3,1,1,2,2,0,0]),
]: ]:
n = shared(data) data = numpy.asarray(data, dtype=self.dtype)
n = self.shared(data)
t = n[idx] t = n[idx]
f = function([], t, mode=None) f = function([], t, mode=self.mode)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
assert len(topo) == 1 topo_ = [node for node in topo if not isinstance(node.op, self.ignore_topo)]
assert isinstance(topo[0].op, theano.tensor.basic.AdvancedSubtensor1) assert len(topo_) == 1
assert isinstance(topo_[0].op, self.adv_sub1)
val = f() val = f()
good = data[idx] good = data[idx]
self.failUnless(val.ndim == data.ndim) self.failUnless(val.ndim == data.ndim)
self.failUnless(numpy.allclose(val, good), (val, good)) self.failUnless(numpy.allclose(val, good), (val, good))
def test_err_invalid_list(self): def test_err_invalid_list(self):
n = shared(numpy.asarray(5)) n = self.shared(numpy.asarray(5, dtype=self.dtype))
self.assertRaises(TypeError, n.__getitem__, [0,0]) self.assertRaises(TypeError, n.__getitem__, [0,0])
def test_err_invalid_2list(self): def test_err_invalid_2list(self):
# TODO the error message is not clear # TODO the error message is not clear
n = shared(numpy.ones((3,3))*5) n = self.shared(numpy.ones((3,3), dtype=self.dtype)*5)
self.assertRaises(TypeError, n.__getitem__, ([0,0],[1,1])) self.assertRaises(TypeError, n.__getitem__, ([0,0],[1,1]))
def test_err_bound_list(self): def test_err_bound_list(self):
n = shared(numpy.ones((2,3))*5) n = self.shared(numpy.ones((2,3),dtype=self.dtype)*5)
t = n[[0,4]] t = n[[0,4]]
self.failUnless(isinstance(t.owner.op, AdvancedSubtensor1)) # We test again AdvancedSubtensor1 as we transfer data to the cpu.
self.assertRaises(IndexError, eval_outputs, [t]) self.failUnless(isinstance(t.owner.op, theano.tensor.basic.AdvancedSubtensor1))
f = function([], t, mode=self.mode)
topo = f.maker.env.toposort()
topo_ = [node for node in topo if not isinstance(node.op, self.ignore_topo)]
assert len(topo_)==1
self.failUnless(isinstance(topo_[0].op, self.adv_sub1))
self.assertRaises(IndexError, f)
def grad_list_(self, idxs, data): def grad_list_(self, idxs, data):
n = shared(data) n = self.shared(data)
fast_compile = theano.config.mode == 'FAST_COMPILE' fast_compile = theano.config.mode == 'FAST_COMPILE'
for idx in idxs: for idx in idxs:
# Should stay on the cpu.
idx_ = shared(numpy.asarray(idx)) idx_ = shared(numpy.asarray(idx))
t = n[idx_] t = n[idx_]
gn = grad(sum(exp(t)), n) gn = grad(sum(exp(t)), n)
f = function([], [gn, gn.shape], mode=None) f = function([], [gn, gn.shape], mode=self.mode)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
if not fast_compile: if not fast_compile:
assert any([isinstance(node.op, AdvancedIncSubtensor1) and node.op.inplace for node in topo]) assert any([isinstance(node.op, AdvancedIncSubtensor1) and node.op.inplace for node in topo])
else: else:
assert any([isinstance(node.op, AdvancedIncSubtensor1) for node in topo]) assert any([isinstance(node.op, AdvancedIncSubtensor1) for node in topo])
assert any([isinstance(node.op, AdvancedSubtensor1) for node in topo]) assert any([isinstance(node.op, self.adv_sub1) for node in topo])
gval, gshape = f() gval, gshape = f()
good = numpy.zeros_like(data) good = numpy.zeros_like(data)
# good[idx] += numpy.exp(data[idx]) don't work when the same index is used many time # good[idx] += numpy.exp(data[idx]) don't work when the same index is used many time
...@@ -1643,28 +1663,29 @@ class T_subtensor(unittest.TestCase): ...@@ -1643,28 +1663,29 @@ class T_subtensor(unittest.TestCase):
# Test shape of AdvancedIncSubtensor1 and AdvancedSubtensor1 # Test shape of AdvancedIncSubtensor1 and AdvancedSubtensor1
if idx is idxs[0]: if idx is idxs[0]:
f = function([], [gn.shape, n[idx_].shape], mode=None) f = function([], [gn.shape, n[idx_].shape], mode=self.mode)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
if not fast_compile: if not fast_compile:
self.failUnless(not any([isinstance(node.op, AdvancedIncSubtensor1) for node in topo])) self.failUnless(not any([isinstance(node.op, AdvancedIncSubtensor1) for node in topo]))
self.failUnless(not any([isinstance(node.op, AdvancedSubtensor1) for node in topo])) self.failUnless(not any([isinstance(node.op, self.adv_sub1) for node in topo]))
f() f()
def test_grad_list(self): def test_grad_list(self):
data = numpy.random.rand(4) data = numpy.random.rand(4)
data = numpy.asarray(data, dtype=self.dtype)
idxs = [[i] for i in range(data.shape[0])] idxs = [[i] for i in range(data.shape[0])]
debug_mode = isinstance(theano.compile.mode.get_default_mode(),
theano.compile.DebugMode)
for i in range(data.shape[0]): for i in range(data.shape[0]):
for j in range(0,data.shape[0],2): for j in range(0,data.shape[0],2):
idxs.append([i,j,(i+1)%data.shape[0]]) idxs.append([i,j,(i+1)%data.shape[0]])
self.grad_list_(idxs, data) self.grad_list_(idxs, data)
data = numpy.random.rand(4,3) data = numpy.random.rand(4,3)
data = numpy.asarray(data, dtype=self.dtype)
self.grad_list_(idxs, data) self.grad_list_(idxs, data)
data = numpy.random.rand(4,3,2) data = numpy.random.rand(4,3,2)
data = numpy.asarray(data, dtype=self.dtype)
self.grad_list_(idxs, data) self.grad_list_(idxs, data)
def test_shape_list(self): def test_shape_list(self):
...@@ -1674,7 +1695,8 @@ class T_subtensor(unittest.TestCase): ...@@ -1674,7 +1695,8 @@ class T_subtensor(unittest.TestCase):
(numpy.random.rand(4,2,3), [0,3]), (numpy.random.rand(4,2,3), [0,3]),
(numpy.random.rand(4,2,3), [3,3,1,2,2,]), (numpy.random.rand(4,2,3), [3,3,1,2,2,]),
]: ]:
n = shared(data) data = numpy.asarray(data, dtype=self.dtype)
n = self.shared(data)
t = n[idx] t = n[idx]
f = function([], t.shape, mode=None) f = function([], t.shape, mode=None)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论