提交 0dae8f43 authored 作者: Frederic Bastien's avatar Frederic Bastien

move the convolution from cuda_ndarray to the GpuConvOp.

上级 ccc01a40
差异被折叠。
#include"conv_kernel.cu"
//we store the full image and the full kernel in the shared memory
//each thread compute only one value for the output
//thread block size=out_wid, out_len/nb_split
......
......@@ -28,8 +28,6 @@ for (int iter_m=0; iter_m < Os[0]; iter_m++) {
#ifndef CONV_KERNEL_CU
#define CONV_KERNEL_CU
#include <stdio.h>
/*
#define CHECK_BANK_CONFLICTS 0
#if CHECK_BANK_CONFLICTS
......
......@@ -26,6 +26,16 @@ typedef float real;
#endif
#ifndef SHARED_SIZE
#define SHARED_SIZE (16*1024)
#endif
template <typename T>
static T ceil_intdiv(T a, T b)
{
return (a/b) + ((a % b) ? 1: 0);
}
/**
* struct CudaNdarray
*
......@@ -408,14 +418,6 @@ int CudaNdarray_reduce_max(CudaNdarray * self, CudaNdarray * A);
int CudaNdarray_dimshuffle(CudaNdarray * self, unsigned int len, const int * pattern);
enum { ConvMode_FULL, ConvMode_VALID };
PyObject * CudaNdarray_Conv(const CudaNdarray *img, const CudaNdarray * kern, CudaNdarray * out, const int mode, const int subsample_rows, const int subsample_cols, const int version, const int verbose);
PyObject * CudaNdarray_Conv(const CudaNdarray *img, const CudaNdarray * kern, CudaNdarray * out, const int mode)
{
return CudaNdarray_Conv(img, kern, out, mode, 1, 1, -1, 0);
}
int CudaNdarray_conv(const CudaNdarray *img, const CudaNdarray * kern, CudaNdarray * out, const int mode);
void fprint_CudaNdarray(FILE * fd, const CudaNdarray *self)
{
fprintf(fd, "CudaNdarray <%p, %p> nd=%i \n", self, self->devdata, self->nd);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论