提交 975e0d2b authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #3227 from abergeron/gpua_advsub1

Implement GpuAdvancedSubtensor1 for gpuarray
......@@ -4,6 +4,8 @@
#include <string.h>
#include <gpuarray_api.h>
#include <numpy_compat.h>
#include <gpuarray/util.h>
static int theano_size_check(PyGpuArrayObject *a, unsigned int nd,
const size_t *dims, int typecode) {
......@@ -42,9 +44,14 @@ static PyGpuArrayObject *theano_try_copy(PyGpuArrayObject *out,
return out;
}
/* This is guaranteed to work and return the raw CUDA/OpenCL object on
* all recent (as of June 2015) version of libgpuarray. This is also
* promised to keep working in future versions. */
#define PyGpuArray_DEV_DATA(ary) (*(void **)((ary)->ga.data))
static inline void *PyGpuArray_DEV_DATA(PyGpuArrayObject *a) {
/* This is guaranteed to work and return the raw CUDA/OpenCL object on
* all recent (as of June 2015) version of libgpuarray. This is also
* promised to keep working in future versions. */
char * p = *((char **)a->ga.data);
/* This only works on cuda since we have a real pointer. */
return (void *)(p + a->ga.offset);
}
#endif
......@@ -35,6 +35,7 @@ from .nnet import (GpuCrossentropySoftmaxArgmax1HotWithBias,
from .elemwise import (GpuElemwise, GpuDimShuffle, GpuCAReduceCuda,
GpuCAReduceCPY)
from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedSubtensor1,
GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20)
......@@ -488,6 +489,12 @@ def local_gpua_incsubtensor(node):
node.op.destroyhandler_tolerate_aliased)
@register_opt('fast_compile')
@op_lifter([tensor.AdvancedSubtensor1])
def local_gpua_advanced_subtensor(node):
return GpuAdvancedSubtensor1()
@register_opt('fast_compile')
@op_lifter([tensor.AdvancedIncSubtensor1])
def local_gpua_advanced_incsubtensor(node):
......@@ -496,7 +503,16 @@ def local_gpua_advanced_incsubtensor(node):
if pygpu.get_default_context().kind != "cuda":
return None
x, y = node.inputs[0:2]
x, y, ilist = node.inputs
# Gpu Ops needs both inputs to have the same dtype
if (x.type.dtype != y.type.dtype):
dtype = scalar.upcast(x.type.dtype, y.type.dtype)
if x.type.dtype != dtype:
x = tensor.cast(x, dtype)
if y.type.dtype != dtype:
y = tensor.cast(y, dtype)
set_instead_of_inc = node.op.set_instead_of_inc
active_device_no = theano.sandbox.cuda.active_device_number()
device_properties = theano.sandbox.cuda.device_properties
......@@ -504,11 +520,11 @@ def local_gpua_advanced_incsubtensor(node):
compute_capability = device_properties(active_device_no)['major']
if (compute_capability < 2 or x.ndim != 2 or y.ndim != 2):
return GpuAdvancedIncSubtensor1(
set_instead_of_inc=set_instead_of_inc)
return [GpuAdvancedIncSubtensor1(
set_instead_of_inc=set_instead_of_inc)(x, y, ilist)]
else:
return GpuAdvancedIncSubtensor1_dev20(
set_instead_of_inc=set_instead_of_inc)
return [GpuAdvancedIncSubtensor1_dev20(
set_instead_of_inc=set_instead_of_inc)(x, y, ilist)]
@register_opt('fast_compile')
......
......@@ -7,6 +7,7 @@ from theano.tensor.tests import test_subtensor
from ..basic_ops import HostFromGpu, GpuFromHost
from ..subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedSubtensor1,
GpuAdvancedIncSubtensor1)
from ..type import gpuarray_shared_constructor
......@@ -24,6 +25,7 @@ class G_subtensor(test_subtensor.T_subtensor):
shared=gpuarray_shared_constructor,
sub=GpuSubtensor,
inc_sub=GpuIncSubtensor,
adv_sub1=GpuAdvancedSubtensor1,
adv_incsub1=GpuAdvancedIncSubtensor1,
mode=mode_with_gpu,
# avoid errors with limited devices
......
......@@ -515,8 +515,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
self.assertRaises(IndexError, g, shp)
def test_adv_sub1_broadcast(self):
ones = numpy.ones((1, 3), dtype=self.dtype)
n = self.shared(ones * 5, broadcastable=(True, False))
v = numpy.arange(3, dtype=self.dtype).reshape((1, 3))
n = self.shared(v*5, broadcastable=(True, False))
idx = tensor.lvector()
t = n[idx]
self.assertTrue(isinstance(t.owner.op, tensor.AdvancedSubtensor1))
......@@ -529,10 +529,10 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
self.assertTrue(isinstance(topo_[0].op, self.adv_sub1))
f_0 = f([0])
self.assertTrue(f_0.shape == (1, 3))
self.assertTrue(numpy.allclose(f_0, ones[0] * 5))
self.assertTrue(numpy.allclose(f_0, v*5))
f_00 = f([0, 0])
self.assertTrue(f_00.shape == (2, 3))
self.assertTrue(numpy.allclose(f_00, 5))
self.assertTrue(numpy.allclose(f_00, v*5))
self.assertRaises(IndexError, f, [0, 1])
# Test the gradient
......
......@@ -160,7 +160,6 @@ whitelist_flake8 = [
"sandbox/linalg/tests/test_linalg.py",
"sandbox/gpuarray/basic_ops.py",
"sandbox/gpuarray/nnet.py",
"sandbox/gpuarray/subtensor.py",
"sandbox/gpuarray/elemwise.py",
"sandbox/gpuarray/type.py",
"sandbox/gpuarray/__init__.py",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论