提交 11ebaeb5 authored 作者: Frederic's avatar Frederic

Better indentation

上级 10d9a034
...@@ -3059,14 +3059,18 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1): ...@@ -3059,14 +3059,18 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
return 0; return 0;
} }
unsigned int numcolsX = shapeX[1]; unsigned int numcolsX = shapeX[1];
unsigned int num_threads_per_block = std::min(numcolsX, (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK); unsigned int num_threads_per_block = std::min(
unsigned int num_blocks = std::min(size ,(unsigned int)NUM_VECTOR_OP_BLOCKS); numcolsX, (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK);
unsigned int num_blocks = std::min(
size, (unsigned int)NUM_VECTOR_OP_BLOCKS);
dim3 n_blocks(num_blocks); dim3 n_blocks(num_blocks);
dim3 n_threads(num_threads_per_block); dim3 n_threads(num_threads_per_block);
long *d_indices_arr = NULL; long *d_indices_arr = NULL;
PyArrayObject *cpu_indices_arr = PyArray_GETCONTIGUOUS(indices_arr); PyArrayObject *cpu_indices_arr = PyArray_GETCONTIGUOUS(
d_indices_arr = (long*)device_malloc(PyArray_NBYTES(cpu_indices_arr)); indices_arr);
d_indices_arr = (long*)device_malloc(
PyArray_NBYTES(cpu_indices_arr));
if(!d_indices_arr) if(!d_indices_arr)
return -1; return -1;
...@@ -3078,12 +3082,14 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1): ...@@ -3078,12 +3082,14 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
if(err != cudaSuccess){ if(err != cudaSuccess){
PyErr_Format( PyErr_Format(
PyExc_RuntimeError, PyExc_RuntimeError,
"GpuAdvancedIncSubtensor1_dev20: cudaMemcpy returned an error: %%s", "GpuAdvancedIncSubtensor1_dev20:"
" cudaMemcpy returned an error: %%s",
cudaGetErrorString(err)); cudaGetErrorString(err));
return -1; return -1;
} }
k_vector_add_fast<<<n_blocks, n_threads>>>(shapeX[0], k_vector_add_fast<<<n_blocks, n_threads>>>(
shapeX[0],
shapeX[1], shapeX[1],
strX[0], strX[0],
strX[1], strX[1],
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论