提交 fde1fdf1 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Only redefine atomicAdd on doubles for arch < 6

This fixes a compilation issue on Pascal GPUs.
上级 14a89b67
...@@ -844,7 +844,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC, ...@@ -844,7 +844,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC,
return super(GpuAdvancedIncSubtensor1_dev20, self).perform(node, inp, out) return super(GpuAdvancedIncSubtensor1_dev20, self).perform(node, inp, out)
def c_code_cache_version(self): def c_code_cache_version(self):
return (11,) return (12,)
def c_headers(self): def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray_helper.h>', return ['<numpy_compat.h>', '<gpuarray_helper.h>',
...@@ -906,8 +906,9 @@ if (GpuArray_vector_add_fast(%(out)s, %(y)s, %(ind)s, %(set_instead_of_inc)s)) { ...@@ -906,8 +906,9 @@ if (GpuArray_vector_add_fast(%(out)s, %(y)s, %(ind)s, %(set_instead_of_inc)s)) {
code = """ code = """
/* /*
* This is an atomicAdd that works for doubles since that is not provided * This is an atomicAdd that works for doubles since that is not provided
* natively by cuda. * natively by cuda before arch 6.0.
*/ */
#if __CUDA_ARCH__ < 600
__device__ ga_double atomicAdd(ga_double* address, ga_double val) { __device__ ga_double atomicAdd(ga_double* address, ga_double val) {
unsigned long long int* address_as_ull = unsigned long long int* address_as_ull =
(unsigned long long int*)address; (unsigned long long int*)address;
...@@ -920,6 +921,7 @@ __device__ ga_double atomicAdd(ga_double* address, ga_double val) { ...@@ -920,6 +921,7 @@ __device__ ga_double atomicAdd(ga_double* address, ga_double val) {
} while (assumed != old); } while (assumed != old);
return __longlong_as_double(old); return __longlong_as_double(old);
} }
#endif
__device__ ga_double atomicExch(ga_double *address, ga_double val) { __device__ ga_double atomicExch(ga_double *address, ga_double val) {
return atomicExch((unsigned long long int *)address, return atomicExch((unsigned long long int *)address,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论