提交 0dae8f43 authored 作者: Frederic Bastien's avatar Frederic Bastien

move the convolution from cuda_ndarray to the GpuConvOp.

上级 ccc01a40
差异被折叠。
#include"conv_kernel.cu"
//we store the full image and the full kernel in the shared memory //we store the full image and the full kernel in the shared memory
//each thread compute only one value for the output //each thread compute only one value for the output
//thread block size=out_wid, out_len/nb_split //thread block size=out_wid, out_len/nb_split
......
...@@ -28,8 +28,6 @@ for (int iter_m=0; iter_m < Os[0]; iter_m++) { ...@@ -28,8 +28,6 @@ for (int iter_m=0; iter_m < Os[0]; iter_m++) {
#ifndef CONV_KERNEL_CU #ifndef CONV_KERNEL_CU
#define CONV_KERNEL_CU #define CONV_KERNEL_CU
#include <stdio.h>
/* /*
#define CHECK_BANK_CONFLICTS 0 #define CHECK_BANK_CONFLICTS 0
#if CHECK_BANK_CONFLICTS #if CHECK_BANK_CONFLICTS
......
...@@ -26,6 +26,16 @@ typedef float real; ...@@ -26,6 +26,16 @@ typedef float real;
#endif #endif
#ifndef SHARED_SIZE
#define SHARED_SIZE (16*1024)
#endif
template <typename T>
static T ceil_intdiv(T a, T b)
{
return (a/b) + ((a % b) ? 1: 0);
}
/** /**
* struct CudaNdarray * struct CudaNdarray
* *
...@@ -408,14 +418,6 @@ int CudaNdarray_reduce_max(CudaNdarray * self, CudaNdarray * A); ...@@ -408,14 +418,6 @@ int CudaNdarray_reduce_max(CudaNdarray * self, CudaNdarray * A);
int CudaNdarray_dimshuffle(CudaNdarray * self, unsigned int len, const int * pattern); int CudaNdarray_dimshuffle(CudaNdarray * self, unsigned int len, const int * pattern);
enum { ConvMode_FULL, ConvMode_VALID };
PyObject * CudaNdarray_Conv(const CudaNdarray *img, const CudaNdarray * kern, CudaNdarray * out, const int mode, const int subsample_rows, const int subsample_cols, const int version, const int verbose);
PyObject * CudaNdarray_Conv(const CudaNdarray *img, const CudaNdarray * kern, CudaNdarray * out, const int mode)
{
return CudaNdarray_Conv(img, kern, out, mode, 1, 1, -1, 0);
}
int CudaNdarray_conv(const CudaNdarray *img, const CudaNdarray * kern, CudaNdarray * out, const int mode);
void fprint_CudaNdarray(FILE * fd, const CudaNdarray *self) void fprint_CudaNdarray(FILE * fd, const CudaNdarray *self)
{ {
fprintf(fd, "CudaNdarray <%p, %p> nd=%i \n", self, self->devdata, self->nd); fprintf(fd, "CudaNdarray <%p, %p> nd=%i \n", self, self->devdata, self->nd);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论