提交 497a2d9f authored 作者: Arjun Jain's avatar Arjun Jain

Including support for 'full' convolutions. It uses the existing pad…

Including support for 'full' convolutions. It uses the existing pad functionality of the c/cuda code. If mode == valid, I make pad = filter_size -1. This is passed to the C code, and everything else gets taken care of automatically. The theano-nose for test_full does not pass, however, the test function test_gemm() passes. @nouiz: do you know why?
上级 46f64a65
...@@ -597,7 +597,6 @@ class GpuConvMM(GpuOp): ...@@ -597,7 +597,6 @@ class GpuConvMM(GpuOp):
border_mode = self.border_mode border_mode = self.border_mode
sub = sub.copy() sub = sub.copy()
pad = self.pad pad = self.pad
sub.update(locals()) sub.update(locals())
return """ return """
...@@ -607,6 +606,7 @@ class GpuConvMM(GpuOp): ...@@ -607,6 +606,7 @@ class GpuConvMM(GpuOp):
//Optional args //Optional args
int dx = %(dx)s; int dx = %(dx)s;
int dy = %(dy)s; int dy = %(dy)s;
int pad = 0;
CudaNdarray * img = %(img)s; CudaNdarray * img = %(img)s;
CudaNdarray * kern = %(kern)s; CudaNdarray * kern = %(kern)s;
CudaNdarray * out2 = NULL; CudaNdarray * out2 = NULL;
...@@ -640,6 +640,7 @@ class GpuConvMM(GpuOp): ...@@ -640,6 +640,7 @@ class GpuConvMM(GpuOp):
{ {
logical_rows = CudaNdarray_HOST_DIMS(img)[2] + CudaNdarray_HOST_DIMS(kern)[2] - 1; logical_rows = CudaNdarray_HOST_DIMS(img)[2] + CudaNdarray_HOST_DIMS(kern)[2] - 1;
logical_cols = CudaNdarray_HOST_DIMS(img)[3] + CudaNdarray_HOST_DIMS(kern)[3] - 1; logical_cols = CudaNdarray_HOST_DIMS(img)[3] + CudaNdarray_HOST_DIMS(kern)[3] - 1;
pad = CudaNdarray_HOST_DIMS(kern)[2] - 1;
} }
out_dim[2] = ceil_intdiv(logical_rows, dx); out_dim[2] = ceil_intdiv(logical_rows, dx);
out_dim[3] = ceil_intdiv(logical_cols, dy); out_dim[3] = ceil_intdiv(logical_cols, dy);
...@@ -657,7 +658,7 @@ class GpuConvMM(GpuOp): ...@@ -657,7 +658,7 @@ class GpuConvMM(GpuOp):
} }
out2 = validMM(%(img)s, %(kern)s, %(out)s); out2 = validMM(%(img)s, %(kern)s, %(out)s, pad);
if (out2==NULL){ if (out2==NULL){
%(fail)s %(fail)s
} }
...@@ -669,6 +670,7 @@ class GpuConvMM(GpuOp): ...@@ -669,6 +670,7 @@ class GpuConvMM(GpuOp):
## ##
# Not really a BLAS operation, but whatever. # Not really a BLAS operation, but whatever.
# #
class GpuConv(GpuOp): class GpuConv(GpuOp):
""" """
Implement the batched and stacked 2d convolution on the gpu. Implement the batched and stacked 2d convolution on the gpu.
......
...@@ -78,7 +78,8 @@ void im2col(const float* data_im, const int channels, ...@@ -78,7 +78,8 @@ void im2col(const float* data_im, const int channels,
// Author: Arjun Jain // Author: Arjun Jain
CudaNdarray* validMM(const CudaNdarray *input, CudaNdarray* validMM(const CudaNdarray *input,
CudaNdarray *weight, CudaNdarray *weight,
CudaNdarray *output) CudaNdarray *output,
int padding = 0)
{ {
cublasStatus_t status; cublasStatus_t status;
...@@ -96,7 +97,6 @@ CudaNdarray* validMM(const CudaNdarray *input, ...@@ -96,7 +97,6 @@ CudaNdarray* validMM(const CudaNdarray *input,
// TODO: stride(dW, dH) and padding as function parameter // TODO: stride(dW, dH) and padding as function parameter
int dH = 1; int dH = 1;
int dW = 1; int dW = 1;
int padding = 0;
int kH = CudaNdarray_HOST_DIMS(weight)[2]; int kH = CudaNdarray_HOST_DIMS(weight)[2];
int kW = CudaNdarray_HOST_DIMS(weight)[3]; int kW = CudaNdarray_HOST_DIMS(weight)[3];
int nInputPlane = CudaNdarray_HOST_DIMS(input)[1]; int nInputPlane = CudaNdarray_HOST_DIMS(input)[1];
......
...@@ -839,7 +839,7 @@ def test_gemm(): ...@@ -839,7 +839,7 @@ def test_gemm():
kshape = (nf, ch, rFlt, rFlt) kshape = (nf, ch, rFlt, rFlt)
print "ishape: ", ishape print "ishape: ", ishape
print "kshape: ", kshape print "kshape: ", kshape
mode = 'valid' mode = 'full'
subsample = (1, 1) subsample = (1, 1)
npy_img = theano._asarray(numpy.random.rand(*ishape), dtype='float32') npy_img = theano._asarray(numpy.random.rand(*ishape), dtype='float32')
...@@ -938,4 +938,3 @@ def test_stack_rows_segfault_070312(): ...@@ -938,4 +938,3 @@ def test_stack_rows_segfault_070312():
nkern=1, bsize=1) nkern=1, bsize=1)
f = theano.function([], [], updates=[(out, op(img, kern))], mode=theano_mode) f = theano.function([], [], updates=[(out, op(img, kern))], mode=theano_mode)
f() f()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论