提交 2f0ab791 authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #4763 from abergeron/gpuadvsub

Gpuadvsub
......@@ -52,6 +52,7 @@ from .nnet import (gpu_crossentropy_softmax_1hot_with_bias_dx,
from .elemwise import (GpuElemwise, GpuDimShuffle, GpuCAReduceCuda,
GpuCAReduceCPY, gpu_ca_reduce_cuda)
from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedSubtensor,
GpuAdvancedSubtensor1,
GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20)
......@@ -971,10 +972,17 @@ def local_gpua_inc_subtensor(op, context_name, inputs, outputs):
@register_opt('fast_compile')
@op_lifter([tensor.AdvancedSubtensor1])
@register_opt2([tensor.AdvancedSubtensor1], 'fast_compile')
def local_gpua_advanced_subtensor(op, context_name, inputs, outputs):
def local_gpua_advanced_subtensor1(op, context_name, inputs, outputs):
return GpuAdvancedSubtensor1()
@register_opt('fast_compile')
@op_lifter([tensor.AdvancedSubtensor])
@register_opt2([tensor.AdvancedSubtensor], 'fast_compile')
def local_gpua_advanced_subtensor(op, context_name, inputs, outputs):
return GpuAdvancedSubtensor()
@register_opt('fast_compile')
@op_lifter([tensor.AdvancedIncSubtensor1])
@register_opt2([tensor.AdvancedIncSubtensor1], 'fast_compile')
......
......@@ -472,6 +472,107 @@ if (err != GA_NO_ERROR) {
return (0,)
class GpuAdvancedSubtensor(HideC, tensor.AdvancedSubtensor):
"""
AdvancedSubtensor On the GPU.
"""
def make_node(self, x, *inputs):
ctx_name = infer_context_name(x)
rval = tensor.AdvancedSubtensor.make_node(self, x, *inputs)
otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
broadcastable=rval.outputs[0].type.broadcastable,
context_name=ctx_name)
x = as_gpuarray_variable(x, ctx_name)
return gof.Apply(self, [x] + rval.inputs[1:], [otype()])
def perform(self, node, inputs, out_):
out, = out_
x = inputs[0]
idx = inputs[1:]
# detect and transpose array indices
nidx = []
nshp = list(x.shape)
for k, i in enumerate(idx):
if i is None:
nidx.append(slice(None))
nshp.insert(k, 1)
else:
nidx.append(i)
x = x.reshape(nshp)
narrays = 0
transp = list(range(x.ndim))
p = 0
# ap gives the position of the array in case there is only one.
# if there are more than one (narray > 1) it should be ignored.
ap = 0
for k, i in enumerate(list(nidx)):
if (isinstance(i, numpy.ndarray) and
i.ndim != 0):
transp.remove(k)
transp.insert(p, k)
ap += k
i = nidx.pop(k)
nidx.insert(p, i)
p += 1
narrays += 1
else:
if narrays == 0:
try:
i.__index__()
# We shift back the position of the array by the
# number of dimensions that are removed by
# indexing. If ap is bigger than 0 it means we
# have encountered at least one array.
if ap >= 0:
ap -= 1
# If this index is before the first array then
# we will not move the array back to its
# position. Mark this by faking that there
# are more than two arrays. This is crazy
# numpy behaviour so blame them.
narrays = 2
except Exception:
pass
x = x.transpose(*transp)
idx_ = ([slice(None)] * p + nidx[p:])
x = x.__getitem__(idx_)
# flatten the array-indexed dimensions
shape = ((numpy.prod(x.shape[0: p]),) +
x.shape[p:])
input_flat = x.reshape(shape)
# build the strides
strides = [1]
for i in range(p - 1, 0, -1):
stride = x.shape[i] * strides[-1]
strides.insert(0, stride)
# build the indices and use it
take_idx = sum((i * s for i, s in zip(nidx, strides)))
out_flat = input_flat.take1(pygpu.asarray(take_idx.flatten(),
context=x.context))
# finish up
out_flat_shp = take_idx.shape + x.shape[p:]
o = out_flat.reshape(out_flat_shp)
# If there was only one array we need to move the indexed
# dimension(s) back to the position of the array, which is
# stored in ap. Note that ap is invalid is narrays != 1.
if narrays == 1:
ntransp = list(range(take_idx.ndim, o.ndim))
ntransp[ap:ap] = list(range(take_idx.ndim))
o = o.transpose(*ntransp)
out[0] = o
class GpuAdvancedIncSubtensor1(Op):
"""
Implement AdvancedIncSubtensor1 on the gpu.
......
......@@ -10,6 +10,7 @@ from ..basic_ops import HostFromGpu, GpuFromHost
from ..elemwise import GpuDimShuffle
from ..subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedSubtensor1,
GpuAdvancedSubtensor,
GpuAdvancedIncSubtensor1)
from ..type import gpuarray_shared_constructor
......@@ -40,7 +41,7 @@ class G_subtensor(test_subtensor.T_subtensor):
def test_advinc_subtensor1():
""" Test the second case in the opt local_gpu_advanced_incsubtensor1 """
# Test the second case in the opt local_gpu_advanced_incsubtensor1
for shp in [(3, 3), (3, 3, 3)]:
shared = gpuarray_shared_constructor
xval = numpy.arange(numpy.prod(shp), dtype='float32').reshape(shp) + 1
......@@ -87,3 +88,41 @@ def test_incsub_f16():
rep = xval.copy()
rep[1:] += yval
assert numpy.allclose(rval, rep)
class G_advancedsubtensor(test_subtensor.TestAdvancedSubtensor):
def shortDescription(self):
return None
def __init__(self, name):
test_subtensor.TestAdvancedSubtensor.__init__(
self, name,
shared=gpuarray_shared_constructor,
sub=GpuAdvancedSubtensor,
mode=mode_with_gpu,
# avoid errors with limited devices
dtype='float32',
ignore_topo=(HostFromGpu, GpuFromHost,
DeepCopyOp))
# GPU opt can't run in fast_compile only.
self.fast_compile = False
assert self.sub == GpuAdvancedSubtensor
def test_adv_subtensor():
# Test the advancedsubtensor on gpu.
shp = (2, 3, 4)
shared = gpuarray_shared_constructor
xval = numpy.arange(numpy.prod(shp), dtype=theano.config.floatX).reshape(shp)
idx1, idx2 = tensor.ivectors('idx1', 'idx2')
idxs = [idx1, None, slice(0, 2, 1), idx2, None]
x = shared(xval, name='x')
expr = x[idxs]
f = theano.function([idx1, idx2], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, GpuAdvancedSubtensor)
for node in f.maker.fgraph.toposort()]) == 1
idx1_val = [0, 1]
idx2_val = [0, 1]
rval = f(idx1_val, idx2_val)
rep = xval[idx1_val, None, slice(0, 2, 1), idx2_val, None]
assert numpy.allclose(rval, rep)
......@@ -1009,23 +1009,20 @@ class T_subtensor(theano.tensor.tests.test_subtensor.T_subtensor):
def shortDescription(self):
return None
shared = staticmethod(cuda.shared_constructor)
sub = cuda.GpuSubtensor
inc_sub = cuda.GpuIncSubtensor
adv_sub1 = cuda.GpuAdvancedSubtensor1
adv_incsub1 = cuda.GpuAdvancedIncSubtensor1
dimshuffle = cuda.GpuDimShuffle
mode = mode_with_gpu
dtype = 'float32'
type = tcn.CudaNdarrayType
ignore_topo = (B.HostFromGpu, B.GpuFromHost, theano.compile.DeepCopyOp)
fast_compile = False
ops = (cuda.GpuSubtensor, cuda.GpuIncSubtensor,
cuda.GpuAdvancedSubtensor1, cuda.GpuAdvancedIncSubtensor1)
def __init__(self, name):
return super(theano.tensor.tests.test_subtensor.T_subtensor,
self).__init__(name)
super(T_subtensor, self).__init__(
name,
shared=cuda.shared_constructor,
sub=cuda.GpuSubtensor,
inc_sub=cuda.GpuIncSubtensor,
adv_sub1=cuda.GpuAdvancedSubtensor1,
adv_incsub1=cuda.GpuAdvancedIncSubtensor1,
dimshuffle=cuda.GpuDimShuffle,
mode=mode_with_gpu,
dtype='float32',
type=tcn.CudaNdarrayType,
ignore_topo=(B.HostFromGpu, B.GpuFromHost, theano.compile.DeepCopyOp))
self.fast_compile = False
def test_adv_sub1_fast(self):
"""We check that the special cases of advanced indexing that
......
......@@ -20,7 +20,7 @@ from theano.tensor.basic import alloc
from theano.tensor.basic import (addbroadcast, clip, get_scalar_constant_value,
ARange, TensorType, NotScalarConstantError)
from theano.tensor.elemwise import DimShuffle
from theano.tensor.type_other import NoneConst, SliceType, make_slice
from theano.tensor.type_other import NoneConst, SliceType, NoneTypeT, make_slice
from theano import config
inplace_increment = None
......@@ -2077,6 +2077,8 @@ def as_index_variable(idx):
return make_slice(idx)
if isinstance(idx, gof.Variable) and isinstance(idx.type, SliceType):
return idx
if isinstance(idx, gof.Variable) and isinstance(idx.type, NoneTypeT):
return idx
idx = theano.tensor.as_tensor_variable(idx)
if idx.type.dtype[:3] not in ('int', 'uin'):
raise TypeError('index must be integers')
......@@ -2165,17 +2167,8 @@ class AdvancedSubtensor(Op):
# TODO: in general, we need to re-pack the inputs into a valid
# index, just like subtensor
out[0] = inputs[0].__getitem__(inputs[1:])
if (numpy.__version__ <= '1.6.1' and
out[0].size != numpy.uint32(out[0].size)):
warnings.warn(
'Numpy versions 1.6.1 and below have a bug preventing '
'advanced indexing from correctly filling arrays that '
'are too big (>= 2^32 elements). It is possible that '
'out[0] (%s), with shape %s, is not correctly filled.'
% (out[0], out[0].shape))
def connection_pattern(self, node):
rval = [[True]]
for ipt in node.inputs[1:]:
......
......@@ -6692,14 +6692,11 @@ class test_arithmetic_cast(unittest.TestCase):
config.int_division == 'floatX'):
assert theano_dtype == config.floatX
continue
numpy_version = [int(v) for v in
numpy.__version__.split('.')[:2]]
if (cfg == 'numpy+floatX' and
a_type == 'complex128' and
(b_type == 'float32' or
b_type == 'float16') and
combo == ('scalar', 'array') and
bool(numpy_version >= [1, 6]) and
theano_dtype == 'complex128' and
numpy_dtype == 'complex64'):
# In numpy 1.6.x adding a complex128 with
......@@ -6707,7 +6704,7 @@ class test_arithmetic_cast(unittest.TestCase):
# of 1.9.2. this is still the case so it is
# probably by design
raise SkipTest("Known issue with"
"numpy >= 1.6.x see #761")
"numpy see #761")
# In any other situation: something wrong is
# going on!
assert False
......
......@@ -20,8 +20,8 @@ from theano.compile import DeepCopyOp
from theano.tensor import (MakeSlice, NotScalarConstantError, _shared,
as_tensor_variable, cscalar, ctensor3, dmatrix,
dscalar, dtensor4, dvector, fmatrix, fscalar,
fvector, iscalar, lmatrix, lrow, lvector, matrix,
vector)
fvector, ftensor4, iscalar, lmatrix, lrow, lvector,
matrix, vector)
from theano.tensor.basic import DimShuffle
from theano.tensor.subtensor import (AdvancedIncSubtensor,
AdvancedIncSubtensor1, AdvancedSubtensor,
......@@ -55,6 +55,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
inc_sub=tensor.IncSubtensor,
adv_sub1=tensor.AdvancedSubtensor1,
adv_incsub1=tensor.AdvancedIncSubtensor1,
adv_sub=tensor.AdvancedSubtensor,
mode=None,
dtype=theano.config.floatX,
type=tensor.TensorType,
......@@ -65,6 +66,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
self.inc_sub = inc_sub
self.adv_sub1 = adv_sub1
self.adv_incsub1 = adv_incsub1
self.adv_sub = adv_sub
self.dimshuffle = dimshuffle
if mode is None:
mode = theano.compile.mode.get_default_mode()
......@@ -354,13 +356,9 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
(3, DimShuffle, self.dimshuffle,
numpy.index_exp[..., [0, 2, 3]]),
(1, DimShuffle, self.dimshuffle,
numpy.index_exp[numpy.newaxis, ...])]
# The following test case is not supported by numpy before 1.9
numpy_version = [int(v) for v in numpy.version.version.split('.')[0:2]]
if numpy_version >= [1, 9]:
test_cases.append(
(1, AdvancedSubtensor, AdvancedSubtensor,
numpy.index_exp[..., numpy.newaxis, [1, 2]]))
numpy.index_exp[numpy.newaxis, ...]),
(1, AdvancedSubtensor, self.adv_sub,
numpy.index_exp[..., numpy.newaxis, [1, 2]])]
for length, op_type, op_type_opt, slice_ in test_cases:
numpy_tval = numpy_n[slice_]
......@@ -1351,6 +1349,7 @@ class TestAdvancedSubtensor(unittest.TestCase):
self.v = fvector()
self.m = dmatrix()
self.t = ctensor3()
self.ft4 = ftensor4()
self.ix1 = lvector() # advanced 1d query
self.ix12 = lvector()
......@@ -1421,11 +1420,57 @@ class TestAdvancedSubtensor(unittest.TestCase):
a = inc_subtensor(subt, subt)
assert a.type == self.v.type, (a.type, self.v.type)
f = theano.function([self.v, self.ix2], a, allow_input_downcast=True)
f = theano.function([self.v, self.ix2], a, allow_input_downcast=True,
mode=self.mode)
aval = f([.4, .9, .1], [[1, 2],
[1, 2]])
assert numpy.allclose(aval, [.4, .9 * 3, .1 * 3])
def test_adv_subtensor_w_int_and_matrix(self):
subt = self.ft4[0, :, self.ix2, :]
f = theano.function([self.ft4, self.ix2], subt, mode=self.mode)
ft4v = numpy.random.random((2, 3, 4, 5)).astype('float32')
ix2v = numpy.asarray([[0, 1], [1, 0]])
aval = f(ft4v, ix2v)
rval = ft4v[0, :, ix2v, :]
utt.assert_allclose(rval, aval)
def test_adv_subtensor_w_none_and_matrix(self):
subt = self.ft4[:, None, :, self.ix2, :]
f = theano.function([self.ft4, self.ix2], subt, mode=self.mode)
ft4v = numpy.random.random((2, 3, 4, 5)).astype('float32')
ix2v = numpy.asarray([[0, 1], [1, 0]])
aval = f(ft4v, ix2v)
rval = ft4v[:, None, :, ix2v, :]
utt.assert_allclose(rval, aval)
def test_adv_subtensor_w_slice_and_matrix(self):
subt = self.ft4[:, 0:1, self.ix2, :]
f = theano.function([self.ft4, self.ix2], subt, mode=self.mode)
ft4v = numpy.random.random((2, 3, 4, 5)).astype('float32')
ix2v = numpy.asarray([[0, 1], [1, 0]])
aval = f(ft4v, ix2v)
rval = ft4v[:, 0:1, ix2v, :]
utt.assert_allclose(rval, aval)
def test_adv_subtensor_w_matrix_and_int(self):
subt = self.ft4[:, :, self.ix2, 0]
f = theano.function([self.ft4, self.ix2], subt, mode=self.mode)
ft4v = numpy.random.random((2, 3, 4, 5)).astype('float32')
ix2v = numpy.asarray([[0, 1], [1, 0]])
aval = f(ft4v, ix2v)
rval = ft4v[:, :, ix2v, 0]
utt.assert_allclose(rval, aval)
def test_adv_subtensor_w_matrix_and_none(self):
subt = self.ft4[:, :, self.ix2, None, :]
f = theano.function([self.ft4, self.ix2], subt, mode=self.mode)
ft4v = numpy.random.random((2, 3, 4, 5)).astype('float32')
ix2v = numpy.asarray([[0, 1], [1, 0]])
aval = f(ft4v, ix2v)
rval = ft4v[:, :, ix2v, None, :]
utt.assert_allclose(rval, aval)
def test_inc_adv_subtensor_w_2vec(self):
if inplace_increment is None:
raise inplace_increment_missing
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论