提交 e3474eda authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Some new fixes to address compilation problems.

上级 22aa4c69
...@@ -615,7 +615,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1): ...@@ -615,7 +615,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
def c_headers(self): def c_headers(self):
if pygpu.get_default_context().kind == 'opencl': if pygpu.get_default_context().kind == 'opencl':
raise MethodNotDefined('cuda only') raise MethodNotDefined('cuda only')
return ['cuda.h', '<numpy_compat.h>', '<gpuarray/ext_cuda.h>', return ['cuda.h', '<numpy_compat.h>', '<gpuarray_helper.h>',
'<gpuarray/types.h>'] '<gpuarray/types.h>']
def c_header_dirs(self): def c_header_dirs(self):
...@@ -627,11 +627,6 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1): ...@@ -627,11 +627,6 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
res.append(os.path.join(cuda_root, 'include')) res.append(os.path.join(cuda_root, 'include'))
return res return res
def c_init_code(self):
if pygpu.get_default_context().kind == 'opencl':
raise MethodNotDefined('cuda only')
return ['setup_ext_cuda();']
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
active_device_no = theano.sandbox.cuda.active_device_number() active_device_no = theano.sandbox.cuda.active_device_number()
device_properties = theano.sandbox.cuda.device_properties device_properties = theano.sandbox.cuda.device_properties
...@@ -748,10 +743,10 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) { ...@@ -748,10 +743,10 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) {
{ {
for(int j = (threadIdx.x); j < numColsX;j += blockDim.x) for(int j = (threadIdx.x); j < numColsX;j += blockDim.x)
{ {
ssize_t x_row = indices_arr[i * stridesIndices]; ga_ssize x_row = indices_arr[i * stridesIndices];
if (x_row < 0) if (x_row < 0)
x_row += numRowsX; x_row += numRowsX;
ssize_t y_row = i; ga_ssize y_row = i;
if (x_row < numRowsX && x_row >= 0) { if (x_row < numRowsX && x_row >= 0) {
atomicAdd(&X[(x_row * stridesX0) + (j * stridesX1)], Y[(y_row * stridesY0) + (j * stridesY1)]); atomicAdd(&X[(x_row * stridesX0) + (j * stridesX1)], Y[(y_row * stridesY0) + (j * stridesY1)]);
} else { } else {
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论