提交 497a2d9f authored 作者: Arjun Jain's avatar Arjun Jain

Including support for 'full' convolutions. It uses the existing pad…

Including support for 'full' convolutions. It uses the existing pad functionality of the c/cuda code. If mode == valid, I make pad = filter_size -1. This is passed to the C code, and everything else gets taken care of automatically. The theano-nose for test_full does not pass, however, the test function test_gemm() passes. @nouiz: do you know why?
上级 46f64a65
......@@ -597,7 +597,6 @@ class GpuConvMM(GpuOp):
border_mode = self.border_mode
sub = sub.copy()
pad = self.pad
sub.update(locals())
return """
......@@ -607,6 +606,7 @@ class GpuConvMM(GpuOp):
//Optional args
int dx = %(dx)s;
int dy = %(dy)s;
int pad = 0;
CudaNdarray * img = %(img)s;
CudaNdarray * kern = %(kern)s;
CudaNdarray * out2 = NULL;
......@@ -640,6 +640,7 @@ class GpuConvMM(GpuOp):
{
logical_rows = CudaNdarray_HOST_DIMS(img)[2] + CudaNdarray_HOST_DIMS(kern)[2] - 1;
logical_cols = CudaNdarray_HOST_DIMS(img)[3] + CudaNdarray_HOST_DIMS(kern)[3] - 1;
pad = CudaNdarray_HOST_DIMS(kern)[2] - 1;
}
out_dim[2] = ceil_intdiv(logical_rows, dx);
out_dim[3] = ceil_intdiv(logical_cols, dy);
......@@ -657,7 +658,7 @@ class GpuConvMM(GpuOp):
}
out2 = validMM(%(img)s, %(kern)s, %(out)s);
out2 = validMM(%(img)s, %(kern)s, %(out)s, pad);
if (out2==NULL){
%(fail)s
}
......@@ -669,6 +670,7 @@ class GpuConvMM(GpuOp):
##
# Not really a BLAS operation, but whatever.
#
class GpuConv(GpuOp):
"""
Implement the batched and stacked 2d convolution on the gpu.
......
......@@ -78,7 +78,8 @@ void im2col(const float* data_im, const int channels,
// Author: Arjun Jain
CudaNdarray* validMM(const CudaNdarray *input,
CudaNdarray *weight,
CudaNdarray *output)
CudaNdarray *output,
int padding = 0)
{
cublasStatus_t status;
......@@ -96,7 +97,6 @@ CudaNdarray* validMM(const CudaNdarray *input,
// TODO: stride(dW, dH) and padding as function parameter
int dH = 1;
int dW = 1;
int padding = 0;
int kH = CudaNdarray_HOST_DIMS(weight)[2];
int kW = CudaNdarray_HOST_DIMS(weight)[3];
int nInputPlane = CudaNdarray_HOST_DIMS(input)[1];
......
......@@ -839,7 +839,7 @@ def test_gemm():
kshape = (nf, ch, rFlt, rFlt)
print "ishape: ", ishape
print "kshape: ", kshape
mode = 'valid'
mode = 'full'
subsample = (1, 1)
npy_img = theano._asarray(numpy.random.rand(*ishape), dtype='float32')
......@@ -938,4 +938,3 @@ def test_stack_rows_segfault_070312():
nkern=1, bsize=1)
f = theano.function([], [], updates=[(out, op(img, kern))], mode=theano_mode)
f()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论