提交 568af179 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5747 from abergeron/long_incsub

Add support for atomic{Exch,Add} on long longs.
...@@ -871,7 +871,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC, ...@@ -871,7 +871,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC,
return super(GpuAdvancedIncSubtensor1_dev20, self).perform(node, inp, out) return super(GpuAdvancedIncSubtensor1_dev20, self).perform(node, inp, out)
def c_code_cache_version(self): def c_code_cache_version(self):
return (13,) return (14,)
def c_headers(self): def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray_helper.h>', return ['<numpy_compat.h>', '<gpuarray_helper.h>',
...@@ -924,9 +924,8 @@ if (GpuArray_vector_add_fast(%(out)s, %(y)s, %(ind)s, %(params)s->set_instead_of ...@@ -924,9 +924,8 @@ if (GpuArray_vector_add_fast(%(out)s, %(y)s, %(ind)s, %(params)s->set_instead_of
*/ */
#if __CUDA_ARCH__ < 600 #if __CUDA_ARCH__ < 600
__device__ ga_double atomicAdd(ga_double* address, ga_double val) { __device__ ga_double atomicAdd(ga_double* address, ga_double val) {
unsigned long long int* address_as_ull = ga_ulong *address_as_ull = (ga_ulong *)address;
(unsigned long long int*)address; ga_ulong old = *address_as_ull, assumed;
unsigned long long int old = *address_as_ull, assumed;
do { do {
assumed = old; assumed = old;
old = atomicCAS(address_as_ull, assumed, old = atomicCAS(address_as_ull, assumed,
...@@ -938,17 +937,35 @@ __device__ ga_double atomicAdd(ga_double* address, ga_double val) { ...@@ -938,17 +937,35 @@ __device__ ga_double atomicAdd(ga_double* address, ga_double val) {
#endif #endif
__device__ ga_double atomicExch(ga_double *address, ga_double val) { __device__ ga_double atomicExch(ga_double *address, ga_double val) {
return atomicExch((unsigned long long int *)address, return atomicExch((ga_ulong *)address,
__double_as_longlong(val)); __double_as_longlong(val));
} }
/* GA_LONG */
__device__ ga_long atomicAdd(ga_long* address, ga_long val) {
ga_ulong *address_as_ull = (ga_ulong *)address;
ga_ulong old = *address_as_ull, assumed;
do {
assumed = old;
old = atomicCAS(address_as_ull, assumed,
(ga_ulong)(val + (ga_long)assumed));
} while (assumed != old);
return (ga_long)old;
}
__device__ ga_long atomicExch(ga_long *address, ga_long val) {
return (ga_long)atomicExch((ga_ulong *)address, (ga_ulong)val);
}
/* GA_HALF */
/* /*
* This is a version of atomicAdd that works for half-floats. It may * This may read and write 2 bytes more than the size of the array
* read and write 2 bytes more than the size of the array if the array * if the array has an uneven number of elements. The actual value
* has an uneven number of elements. The actual value at that spot * at that spot will not be modified.
* will not be modified.
*/ */
__device__ ga_half atomicAdd(ga_half *addr, ga_half val) { __device__ ga_half atomicAdd(ga_half *addr, ga_half val) {
ga_uint *base = (ga_uint *)((ga_size)addr & ~2); ga_uint *base = (ga_uint *)((ga_size)addr & ~2);
ga_uint old, assumed, sum, new_; ga_uint old, assumed, sum, new_;
......
...@@ -93,7 +93,7 @@ def test_advinc_subtensor1(): ...@@ -93,7 +93,7 @@ def test_advinc_subtensor1():
for node in f.maker.fgraph.toposort()]) == 1 for node in f.maker.fgraph.toposort()]) == 1
rval = f(yval) rval = f(yval)
rep = xval.copy() rep = xval.copy()
rep[[0, 2]] += yval np.add.at(rep, [0, 2], yval)
assert np.allclose(rval, rep) assert np.allclose(rval, rep)
...@@ -101,6 +101,7 @@ def test_advinc_subtensor1_dtype(): ...@@ -101,6 +101,7 @@ def test_advinc_subtensor1_dtype():
# Test the mixed dtype case # Test the mixed dtype case
shp = (3, 4) shp = (3, 4)
for dtype1, dtype2 in [('float32', 'int8'), ('float32', 'float64'), for dtype1, dtype2 in [('float32', 'int8'), ('float32', 'float64'),
('uint64', 'int8'), ('int64', 'uint8'),
('float16', 'int8'), ('float16', 'float64'), ('float16', 'int8'), ('float16', 'float64'),
('float16', 'float16')]: ('float16', 'float16')]:
shared = gpuarray_shared_constructor shared = gpuarray_shared_constructor
...@@ -117,7 +118,7 @@ def test_advinc_subtensor1_dtype(): ...@@ -117,7 +118,7 @@ def test_advinc_subtensor1_dtype():
for node in f.maker.fgraph.toposort()]) == 1 for node in f.maker.fgraph.toposort()]) == 1
rval = f(yval) rval = f(yval)
rep = xval.copy() rep = xval.copy()
rep[[0, 2]] += yval np.add.at(rep, [[0, 2]], yval)
assert np.allclose(rval, rep) assert np.allclose(rval, rep)
...@@ -139,7 +140,7 @@ def test_deterministic_flag(): ...@@ -139,7 +140,7 @@ def test_deterministic_flag():
for node in f.maker.fgraph.toposort()]) == 1 for node in f.maker.fgraph.toposort()]) == 1
rval = f(yval) rval = f(yval)
rep = xval.copy() rep = xval.copy()
rep[[0, 2]] += yval np.add.at(rep, [[0, 2]], yval)
assert np.allclose(rval, rep) assert np.allclose(rval, rep)
...@@ -183,7 +184,7 @@ def test_incsub_f16(): ...@@ -183,7 +184,7 @@ def test_incsub_f16():
for node in f.maker.fgraph.toposort()]) == 1 for node in f.maker.fgraph.toposort()]) == 1
rval = f(yval) rval = f(yval)
rep = xval.copy() rep = xval.copy()
rep[[0, 2]] += yval np.add.at(rep, [[0, 2]], yval)
assert np.allclose(rval, rep) assert np.allclose(rval, rep)
expr = tensor.inc_subtensor(x[1:], y) expr = tensor.inc_subtensor(x[1:], y)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论