提交 4a241779 authored 作者: Ian Goodfellow's avatar Ian Goodfellow

Fred's fix to k_copy_4d

上级 068c416c
...@@ -3238,14 +3238,17 @@ static __global__ void k_copy_4d(const int N1, ...@@ -3238,14 +3238,17 @@ static __global__ void k_copy_4d(const int N1,
const int sx4, float * y, const int sy1, const int sy2, const int sx4, float * y, const int sy1, const int sy2,
const int sy3, const int sy4) const int sy3, const int sy4)
{ {
// These must be made int instead of unsigned int due to a bug in nvcc
int bx = blockIdx.x;
int by = blockIdx.y;
// N1 and N2 are kept in case a future implementation needs to // N1 and N2 are kept in case a future implementation needs to
// loop on the first two dimensions if there are not enough blocks // loop on the first two dimensions if there are not enough blocks
for (int i = threadIdx.x; i < N3; i += blockDim.x) for (int i = threadIdx.x; i < N3; i += (int) blockDim.x)
{ {
for (int j = threadIdx.y; j < N4; j += blockDim.y) for (int j = threadIdx.y; j < (int) N4; j += (int) blockDim.y)
{ {
y[blockIdx.x*sy1 + blockIdx.y*sy2 + i*sy3 + j*sy4] = y[bx * sy1 + by * sy2 + i * sy3 + j * sy4] =
x[blockIdx.x*sx1 + blockIdx.y*sx2 + i*sx3 + j*sx4]; x[bx * sx1 + by * sx2 + i * sx3 + j * sx4];
} }
} }
} }
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论