提交 3929a557 authored 作者: Ying Zhang's avatar Ying Zhang 提交者: Arnaud Bergeron

GpuAdvancedSubtensor

上级 58e93f9b
...@@ -52,6 +52,7 @@ from .nnet import (gpu_crossentropy_softmax_1hot_with_bias_dx, ...@@ -52,6 +52,7 @@ from .nnet import (gpu_crossentropy_softmax_1hot_with_bias_dx,
from .elemwise import (GpuElemwise, GpuDimShuffle, GpuCAReduceCuda, from .elemwise import (GpuElemwise, GpuDimShuffle, GpuCAReduceCuda,
GpuCAReduceCPY, gpu_ca_reduce_cuda) GpuCAReduceCPY, gpu_ca_reduce_cuda)
from .subtensor import (GpuIncSubtensor, GpuSubtensor, from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedSubtensor,
GpuAdvancedSubtensor1, GpuAdvancedSubtensor1,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20) GpuAdvancedIncSubtensor1_dev20)
...@@ -975,6 +976,12 @@ def local_gpua_advanced_subtensor(op, context_name, inputs, outputs): ...@@ -975,6 +976,12 @@ def local_gpua_advanced_subtensor(op, context_name, inputs, outputs):
return GpuAdvancedSubtensor1() return GpuAdvancedSubtensor1()
@register_opt('fast_compile')
@op_lifter([tensor.AdvancedSubtensor])
def local_gpua_advanced_subtensor_(node, context_name):
return GpuAdvancedSubtensor()
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([tensor.AdvancedIncSubtensor1]) @op_lifter([tensor.AdvancedIncSubtensor1])
@register_opt2([tensor.AdvancedIncSubtensor1], 'fast_compile') @register_opt2([tensor.AdvancedIncSubtensor1], 'fast_compile')
......
...@@ -472,7 +472,122 @@ if (err != GA_NO_ERROR) { ...@@ -472,7 +472,122 @@ if (err != GA_NO_ERROR) {
return (0,) return (0,)
class GpuAdvancedIncSubtensor1(Op): class GpuAdvancedSubtensor(HideC, tensor.AdvancedSubtensor):
"""
AdvancedSubtensor On the GPU.
"""
def make_node(self, x, *inputs):
ctx_name = infer_context_name(x)
rval = tensor.AdvancedSubtensor.make_node(self, x, *inputs)
otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
broadcastable=rval.outputs[0].type.broadcastable,
context_name=ctx_name)
x = as_gpuarray_variable(x, ctx_name)
return gof.Apply(self, [x] + rval.inputs[1:], [otype()])
def perform(self, node, inputs, out_):
out, = out_
x = inputs[0]
idx = inputs[1:]
assert len(idx) >= x.ndim
dims = len(idx)
# step 1: find smallest index
for k, i in enumerate(idx):
if isinstance(i, numpy.ndarray):
start = k
break
for k, i in enumerate(idx[::-1]):
if isinstance(i, numpy.ndarray):
end = len(idx) - k
break
# step 2: transpose
def get_indices(a, b, ind):
"""
Get real indices for a list of indices.
"""
dimshuffle_info = []
new_ind = []
k = 0
for i in range(0, a):
if isinstance(ind[i], slice):
dimshuffle_info.append(k)
new_ind.append(ind[i])
k += 1
elif ind[i] is None:
dimshuffle_info.append('x')
new_ind.append(slice(None))
dimshuffle_info.append(k)
new_ind.append(ind[a])
k += 1
idx_1 = []
idx_2 = []
idx_3 = []
for i in range(a+1, b):
if isinstance(ind[i], slice):
idx_1.append(k)
idx_2.append(ind[i])
k += 1
elif ind[i] is None:
idx_3.append('x')
new_ind.append(slice(None))
else:
idx_3.append(k)
new_ind.append(ind[i])
k += 1
valid_end = a + len(idx_3) + 1
dimshuffle_info.extend(idx_3)
new_ind += idx_2
dimshuffle_info.extend(idx_1)
for i in range(b, len(ind)):
if isinstance(ind[i], slice):
dimshuffle_info.append(k)
new_ind.append(ind[i])
k += 1
elif ind[i] is None:
dimshuffle_info.append('x')
new_ind.append(slice(None))
return dimshuffle_info, new_ind, valid_end
(dimshuffle_idx, new_ind,
end_) = get_indices(start, end, idx)
x = x.transpose(*dimshuffle_idx)
# step 3: partial flattening
start_ = start
shape = (x.shape[: start_] +
(tensor.prod(x.shape[start: end_]),) +
x.shape[end_:])
input_flat = tensor.reshape(x, shape)
# step 4: build the strides
strides = [1]
for i in range(start_, end_-1)[::-1]:
stride = x.shape[i+1] * strides[-1]
strides.append(stride)
# step 5: build the indices into x_flat
items = [new_ind[i] if isinstance(new_ind[i], numpy.ndarray)
else 0 for i in range(start_, end_)]
new_idx = tensor.sum([i * j for i,j
in zip(items, strides[::-1])],
axis=0)
# step 6: advanced slicing
out_flat = input_flat.take(new_idx.flatten())
# step 7: reshape into right shape
out_flat_shp = (x.shape[:start_] +
new_idx.shape + x.shape[end_:]).astype('int32')
o = out_flat.reshape(out_flat_shp,
ndim=dims+new_idx.ndim-2)
idx_ = (new_ind[:start_] + [slice(None)] *
(new_idx.ndim - 2 + end_ - start_) + new_ind[end_:])
out[0] = o.__getitem__(idx_)
class GpuAdvancedIncSubtensor1(HideC, tensor.AdvancedIncSubtensor1):
""" """
Implement AdvancedIncSubtensor1 on the gpu. Implement AdvancedIncSubtensor1 on the gpu.
......
...@@ -10,9 +10,9 @@ from ..basic_ops import HostFromGpu, GpuFromHost ...@@ -10,9 +10,9 @@ from ..basic_ops import HostFromGpu, GpuFromHost
from ..elemwise import GpuDimShuffle from ..elemwise import GpuDimShuffle
from ..subtensor import (GpuIncSubtensor, GpuSubtensor, from ..subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedSubtensor1, GpuAdvancedSubtensor1,
GpuAdvancedSubtensor,
GpuAdvancedIncSubtensor1) GpuAdvancedIncSubtensor1)
from ..type import gpuarray_shared_constructor from ..type import gpuarray_shared_constructor
from .config import mode_with_gpu from .config import mode_with_gpu
...@@ -87,3 +87,43 @@ def test_incsub_f16(): ...@@ -87,3 +87,43 @@ def test_incsub_f16():
rep = xval.copy() rep = xval.copy()
rep[1:] += yval rep[1:] += yval
assert numpy.allclose(rval, rep) assert numpy.allclose(rval, rep)
class G_advancedsubtensor(test_subtensor.TestAdvancedSubtensor):
def shortDescription(self):
return None
def __init__(self, name):
test_subtensor.TestAdvancedSubtensor.__init__(
self, name,
shared=gpuarray_shared_constructor,
sub=GpuAdvancedSubtensor,
inc_sub=GpuIncSubtensor,
adv_sub1=GpuAdvancedSubtensor1,
adv_incsub1=GpuAdvancedIncSubtensor1,
mode=mode_with_gpu,
# avoid errors with limited devices
dtype='float32',
ignore_topo=(HostFromGpu, GpuFromHost,
DeepCopyOp))
# GPU opt can't run in fast_compile only.
self.fast_compile = False
assert self.sub == GpuAdvancedSubtensor
def test_adv_subtensor():
"""Test the advancedsubtensor on gpu."""
shp = (2, 3, 4)
shared = gpuarray_shared_constructor
xval = numpy.arange(numpy.prod(shp), dtype=theano.config.floatX).reshape(shp)
idx1, idx2 = tensor.ivectors('idx1', 'idx2')
idxs = [idx1, slice(0, 2, 1), idx2]
x = shared(xval, name='x')
expr = x[idxs]
f = theano.function([idx1, idx2], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, GpuAdvancedSubtensor)
for node in f.maker.fgraph.toposort()]) == 1
idx1_val = [0, 1]
idx2_val = [0, 1]
rval = f(idx1_val, idx2_val)
rep = xval[idx1_val, slice(0, 2, 1), idx2_val]
assert numpy.allclose(rval, rep)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论