Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
925a4eb6
提交
925a4eb6
authored
11月 05, 2010
作者:
Olivier Delalleau
浏览文件
操作
浏览文件
下载
差异文件
Merged
上级
646bd761
15aebd1c
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
10 个修改的文件
包含
190 行增加
和
78 行删除
+190
-78
profilemode.py
theano/compile/profilemode.py
+0
-0
blas.py
theano/sandbox/cuda/blas.py
+1
-1
conv.cu
theano/sandbox/cuda/conv.cu
+1
-0
test_conv_cuda_ndarray.py
theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
+27
-5
neighbours.py
theano/sandbox/neighbours.py
+61
-29
test_neighbours.py
theano/sandbox/test_neighbours.py
+24
-20
basic.py
theano/tensor/basic.py
+2
-2
conv.py
theano/tensor/nnet/conv.py
+37
-4
test_conv.py
theano/tensor/nnet/tests/test_conv.py
+30
-15
test_conv.py
theano/tensor/signal/tests/test_conv.py
+7
-2
没有找到文件。
theano/compile/profilemode.py
浏览文件 @
925a4eb6
差异被折叠。
点击展开。
theano/sandbox/cuda/blas.py
浏览文件 @
925a4eb6
...
...
@@ -421,7 +421,7 @@ class GpuDownsampleFactorMax(Op):
#def perform(self, node, input_storage, output_storage):
#raise NotImplementedError('only C is implemented')
def
c_code_cache_version
(
self
):
return
()
return
(
1
)
def
c_code
(
self
,
node
,
nodename
,
(
x
,),
(
z
,),
sub
):
fail
=
sub
[
'fail'
]
ds0
,
ds1
=
self
.
ds
...
...
theano/sandbox/cuda/conv.cu
浏览文件 @
925a4eb6
...
...
@@ -521,6 +521,7 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
}
if (1 && (version==6||version==-1) &&
kern_len<=320 &&
!work_complete) //conv_valid_row_reduce
{
int outsize = CudaNdarray_SIZE(out);
...
...
theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
浏览文件 @
925a4eb6
import
sys
,
time
import
numpy
from
nose.plugins.skip
import
SkipTest
imported_scipy_convolve2d
=
False
try
:
from
scipy.signal
import
convolve2d
imported_scipy_convolve2d
=
True
except
ImportError
:
pass
import
theano
# Skip test if cuda_ndarray is not available.
from
nose.plugins.skip
import
SkipTest
import
theano.sandbox.cuda
as
cuda_ndarray
if
cuda_ndarray
.
cuda_available
==
False
:
raise
SkipTest
(
'Optional package cuda disabled'
)
...
...
@@ -38,9 +46,23 @@ def py_conv_full_numpy(img, kern):
pad_cols
=
2
*
(
kern
.
shape
[
3
]
-
1
)
+
img
.
shape
[
3
]
padded_img
=
numpy
.
zeros
((
img
.
shape
[
0
],
img
.
shape
[
1
],
pad_rows
,
pad_cols
),
dtype
=
img
.
dtype
)
padded_img
[:,:,
kern
.
shape
[
2
]
-
1
:
kern
.
shape
[
2
]
-
1
+
img
.
shape
[
2
],
kern
.
shape
[
3
]
-
1
:
kern
.
shape
[
3
]
-
1
+
img
.
shape
[
3
]]
=
img
return
py_conv_valid
(
padded_img
,
kern
)
return
py_conv_valid_numpy
(
padded_img
,
kern
)
def
py_conv
(
img
,
kern
,
mode
,
subsample
):
"""
use a scipy or numpy implementation depending is scipy is available.
The scipy version is faster.
"""
if
imported_scipy_convolve2d
:
return
py_conv_scipy
(
img
,
kern
,
mode
,
subsample
)
elif
mode
==
'valid'
:
return
py_conv_valid_numpy
(
img
,
kern
)[:,:,::
subsample
[
0
],::
subsample
[
1
]]
elif
mode
==
'full'
:
return
py_conv_full_numpy
(
img
,
kern
)[:,:,::
subsample
[
0
],::
subsample
[
1
]]
else
:
raise
Exception
(
"Can't execute this kernel."
)
def
py_conv_scipy
(
img
,
kern
,
mode
,
subsample
):
from
scipy.signal
import
convolve2d
assert
img
.
shape
[
1
]
==
kern
.
shape
[
1
]
if
mode
==
'valid'
:
outshp
=
(
img
.
shape
[
0
],
kern
.
shape
[
0
],
...
...
@@ -89,7 +111,7 @@ def _params_allgood(ishape, kshape, mode, subsample=(1,1), img_stride=(1,1), ker
rval
=
True
try
:
t0
=
time
.
time
()
cpuval
=
py_conv
_scipy
(
npy_img
,
npy_kern
,
mode
,
subsample
)
cpuval
=
py_conv
(
npy_img
,
npy_kern
,
mode
,
subsample
)
t1
=
time
.
time
()
i
=
cuda_tensor4
()
k
=
cuda_tensor4
()
...
...
@@ -550,7 +572,7 @@ def _test_dummy():
rval
=
True
t0
=
time
.
time
()
cpuval
=
py_conv
_scipy
(
npy_img
,
npy_kern
,
mode
,
subsample
)
cpuval
=
py_conv
(
npy_img
,
npy_kern
,
mode
,
subsample
)
t1
=
time
.
time
()
gpuval
=
cuda_ndarray
.
conv
(
img
,
kern
,
mode
,
subsample
)
t2
=
time
.
time
()
...
...
theano/sandbox/neighbours.py
浏览文件 @
925a4eb6
...
...
@@ -252,13 +252,13 @@ class GpuImages2Neibs(Images2Neibs):
dtype
=
ten4
.
type
.
dtype
)()])
def
c_code_cache_version
(
self
):
return
()
return
(
2
,)
return
(
6
,)
def
c_support_code_apply
(
self
,
node
,
nodename
):
if
self
.
mode
==
"valid"
:
return
"""
static __global__ void k_multi_warp_
%(nodename)
s(
mode
=
self
.
mode
return
"""
//a version that use less register but don't work in all case.
static __global__ void k_multi_warp_less_
%(nodename)
s(
const int nb_batch,
const int nb_stack,
const int height,
...
...
@@ -274,8 +274,10 @@ class GpuImages2Neibs(Images2Neibs):
float * global_out
)
{
for(int tblock = blockIdx.x;tblock<nb_batch*nb_stack*grid_c*grid_d;tblock+=gridDim.x){
const int wrap_centered_idx_shift_x = c/2;
const int wrap_centered_idx_shift_y = d/2;
for(int tblock = blockIdx.x*blockDim.z+threadIdx.z;tblock<nb_batch*nb_stack*grid_c*grid_d;tblock+=gridDim.x*blockDim.z){
const int b = tblock
%%
grid_d;
int left = tblock/grid_d;
const int a = left
%%
grid_c;
...
...
@@ -289,12 +291,23 @@ class GpuImages2Neibs(Images2Neibs):
if(a>grid_c)continue;
if(b>grid_d)continue;
int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n));
for (int i = 0; i < c; i++)
// loop over c
int i = threadIdx.y;
// loop over c
{
int ten4_2 = i + a * step_x;
for (int j = threadIdx.x; j < d; j+=blockDim.x) // loop over d
if("
%(mode)
s"=="wrap_centered"){
ten4_2 -= wrap_centered_idx_shift_x;
if ( ten4_2 < 0 ) ten4_2 += height;
else if (ten4_2 >= height) ten4_2 -= height;
}
int j = threadIdx.x; // loop over d
{
int ten4_3 = j + b * step_y;
if("
%(mode)
s"=="wrap_centered"){
ten4_3 -= wrap_centered_idx_shift_y;
if ( ten4_3 < 0 ) ten4_3 += width;
else if (ten4_3 >= width) ten4_3 -= width;
}
//int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n));
//int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n));
int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n;
...
...
@@ -307,9 +320,6 @@ class GpuImages2Neibs(Images2Neibs):
}
}
"""
%
locals
()
if
self
.
mode
==
"wrap_centered"
:
return
"""
static __global__ void k_multi_warp_
%(nodename)
s(
const int nb_batch,
const int nb_stack,
...
...
@@ -329,7 +339,7 @@ class GpuImages2Neibs(Images2Neibs):
const int wrap_centered_idx_shift_x = c/2;
const int wrap_centered_idx_shift_y = d/2;
for(int tblock = blockIdx.x
;tblock<nb_batch*nb_stack*grid_c*grid_d;tblock+=gridDim.x
){
for(int tblock = blockIdx.x
*blockDim.z+threadIdx.z;tblock<nb_batch*nb_stack*grid_c*grid_d;tblock+=gridDim.x*blockDim.z
){
const int b = tblock
%%
grid_d;
int left = tblock/grid_d;
const int a = left
%%
grid_c;
...
...
@@ -343,19 +353,23 @@ class GpuImages2Neibs(Images2Neibs):
if(a>grid_c)continue;
if(b>grid_d)continue;
int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n));
for (int i =
0; i < c; i++
) // loop over c
for (int i =
threadIdx.y; i < c; i+=blockDim.y
) // loop over c
{
int ten4_2 = i + a * step_x;
ten4_2 -= wrap_centered_idx_shift_x;
if ( ten4_2 < 0 ) ten4_2 += height;
else if (ten4_2 >= height) ten4_2 -= height;
if("
%(mode)
s"=="wrap_centered"){
ten4_2 -= wrap_centered_idx_shift_x;
if ( ten4_2 < 0 ) ten4_2 += height;
else if (ten4_2 >= height) ten4_2 -= height;
}
for (int j = threadIdx.x; j < d; j+=blockDim.x) // loop over d
{
int ten4_3 = j + b * step_y;
ten4_3 -= wrap_centered_idx_shift_y;
if ( ten4_3 < 0 ) ten4_3 += width;
else if (ten4_3 >= width) ten4_3 -= width;
if("
%(mode)
s"=="wrap_centered"){
ten4_3 -= wrap_centered_idx_shift_y;
if ( ten4_3 < 0 ) ten4_3 += width;
else if (ten4_3 >= width) ten4_3 -= width;
}
//int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n));
//int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n));
int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n;
...
...
@@ -370,7 +384,6 @@ class GpuImages2Neibs(Images2Neibs):
"""
%
locals
()
def
c_code
(
self
,
node
,
name
,
(
ten4
,
neib_shape
,
neib_step
),
(
z
,),
sub
):
fail
=
sub
[
'fail'
]
mode
=
self
.
mode
...
...
@@ -473,17 +486,36 @@ class GpuImages2Neibs(Images2Neibs):
const npy_intp step_x = (npy_intp) *(dtype_
%(neib_step)
s*) PyArray_GETPTR1(
%(neib_step)
s, 0);
const npy_intp step_y = (npy_intp) *(dtype_
%(neib_step)
s*) PyArray_GETPTR1(
%(neib_step)
s, 1);
dim3 n_threads(d,c,1);
//Their is a max of 512 threads per blocks
while(n_threads.x*n_threads.y>512 && n_threads.y>1)n_threads.y--;
while(n_threads.x*n_threads.y>512 && n_threads.x>1)n_threads.x--;
//Make bigger block to have better memory access pattern and a higher core utilisation.
//for smaller patch size
while(c*d*(n_threads.z+1) < 128 && n_threads.z<64 && n_threads.z<CudaNdarray_HOST_DIMS(
%(z)
s)[0]){
n_threads.z++;
}
int nb_block;
if (
nb_batch
%% 32
== 0)
nb_block =
nb_batch/32
;
if (
CudaNdarray_HOST_DIMS(
%(z)
s)[0]
%%
n_threads.z
== 0)
nb_block =
CudaNdarray_HOST_DIMS(
%(z)
s)[0] / n_threads.z
;
else
nb_block = (int)((float)nb_batch/32. + 1.);
dim3 n_blocks(std::min(32*1024,CudaNdarray_HOST_DIMS(
%(z)
s)[0]),1,1);
dim3 n_threads(32,1,1);
nb_block = (CudaNdarray_HOST_DIMS(
%(z)
s)[0] / n_threads.z) + 1;
dim3 n_blocks(std::min(32*1024,nb_block));
int n_shared = 0;
k_multi_warp_
%(name)
s<<<n_blocks, n_threads, n_shared>>>(
void (*f)(int, int, int ,int,
int, int, int ,int,
int, int,
int, int, int, int,
float*, float*);
if(n_threads.x==d && n_threads.y==c){
f = k_multi_warp_less_
%(name)
s;
}else{
f = k_multi_warp_
%(name)
s;
}
f<<<n_blocks, n_threads, n_shared>>>(
nb_batch,
nb_stack,
height, width,
...
...
theano/sandbox/test_neighbours.py
浏览文件 @
925a4eb6
...
...
@@ -278,26 +278,30 @@ def test_neibs_wrap_centered_step_manual():
def
test_neibs_gpu
():
if
cuda
.
cuda_available
==
False
:
raise
SkipTest
(
'Optional package cuda disabled'
)
shape
=
(
100
,
40
,
18
,
18
)
images
=
shared
(
numpy
.
arange
(
numpy
.
prod
(
shape
),
dtype
=
'float32'
)
.
reshape
(
shape
))
neib_shape
=
T
.
as_tensor_variable
((
2
,
2
))
#(array((2,2), dtype='float32'))
from
theano.sandbox.cuda.basic_ops
import
gpu_from_host
f
=
function
([],
images2neibs
(
images
,
neib_shape
),
mode
=
mode_with_gpu
)
f_gpu
=
function
([],
images2neibs
(
images
,
neib_shape
),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
GpuImages2Neibs
)
for
node
in
f_gpu
.
maker
.
env
.
toposort
()])
#print images.value
neibs
=
numpy
.
asarray
(
f_gpu
())
assert
numpy
.
allclose
(
neibs
,
f
())
#print neibs
g
=
function
([],
neibs2images
(
neibs
,
neib_shape
,
images
.
shape
),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
GpuImages2Neibs
)
for
node
in
f
.
maker
.
env
.
toposort
()])
#print numpy.asarray(g())
assert
numpy
.
allclose
(
images
.
value
,
g
())
for
shape
,
pshape
in
[((
100
,
40
,
18
,
18
),(
2
,
2
)),
((
100
,
40
,
6
,
18
),(
3
,
2
)),
((
10
,
40
,
66
,
66
),(
33
,
33
)),
((
10
,
40
,
68
,
66
),(
34
,
33
))
]:
images
=
shared
(
numpy
.
arange
(
numpy
.
prod
(
shape
),
dtype
=
'float32'
)
.
reshape
(
shape
))
neib_shape
=
T
.
as_tensor_variable
(
pshape
)
from
theano.sandbox.cuda.basic_ops
import
gpu_from_host
f
=
function
([],
images2neibs
(
images
,
neib_shape
),
mode
=
mode_with_gpu
)
f_gpu
=
function
([],
images2neibs
(
images
,
neib_shape
),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
GpuImages2Neibs
)
for
node
in
f_gpu
.
maker
.
env
.
toposort
()])
#print images.value
neibs
=
numpy
.
asarray
(
f_gpu
())
assert
numpy
.
allclose
(
neibs
,
f
())
#print neibs
g
=
function
([],
neibs2images
(
neibs
,
neib_shape
,
images
.
shape
),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
GpuImages2Neibs
)
for
node
in
f
.
maker
.
env
.
toposort
()])
#print numpy.asarray(g())
assert
numpy
.
allclose
(
images
.
value
,
g
())
def
speed_neibs
():
...
...
theano/tensor/basic.py
浏览文件 @
925a4eb6
...
...
@@ -12,7 +12,7 @@ import numpy, theano
#from copy import copy as python_copy
from
theano
import
gof
,
shared
from
theano.gof
import
Variable
,
Op
,
utils
,
Type
,
Constant
,
Value
from
theano.gof
import
Variable
,
Op
,
Type
,
Constant
,
Value
from
theano.tensor.tsor_apply
import
Apply
from
theano
import
gradient
...
...
@@ -21,7 +21,7 @@ import elemwise
from
theano
import
scalar
as
scal
from
theano.gof.python25
import
partial
,
any
,
all
from
theano
import
compile
,
printing
from
theano.printing
import
pprint
,
Print
from
theano.printing
import
pprint
### set up the external interface
from
elemwise
import
Elemwise
,
DimShuffle
,
CAReduce
,
Sum
...
...
theano/tensor/nnet/conv.py
浏览文件 @
925a4eb6
...
...
@@ -18,6 +18,16 @@ from theano import gof, Op, tensor, config
from
theano.tensor.tsor_apply
import
Apply
from
theano.gof.python25
import
any
imported_scipy_signal
=
False
try
:
# TODO: move these back out to global scope when they no longer cause an atexit error
from
scipy.signal.signaltools
import
_valfrommode
,
_bvalfromboundary
from
scipy.signal.sigtools
import
_convolve2d
imported_scipy_signal
=
True
except
ImportError
:
pass
_logger
=
logging
.
getLogger
(
"theano.signal.conv"
)
def
_debug
(
*
msg
):
_logger
.
debug
(
' '
.
join
([
str
(
x
)
for
x
in
msg
]))
...
...
@@ -547,9 +557,12 @@ class ConvOp(Op):
"""
By default if len(img2d.shape)==3, we
"""
if
not
imported_scipy_signal
:
raise
theano
.
gof
.
utils
.
MethodNotDefined
(
"c_headers"
,
type
(
self
),
self
.
__class__
.
__name__
,
"Need the python package for scipy.signal to be installed for the python implementation. You can use the C implementation instead."
)
# TODO: move these back out to global scope when they no longer cause an atexit error
from
scipy.signal.signaltools
import
_valfrommode
,
_bvalfromboundary
from
scipy.signal.sigtools
import
_convolve2d
imshp
=
self
.
imshp
if
imshp
is
None
or
any
([
x
is
None
for
x
in
imshp
]):
imshp
=
tuple
(
img2d
.
shape
[
1
:])
...
...
@@ -584,8 +597,6 @@ class ConvOp(Op):
z
[
0
]
=
numpy
.
zeros
((
bsize
,)
+
(
nkern
,)
+
fulloutshp
,
dtype
=
img2d
.
dtype
)
zz
=
z
[
0
]
val
=
_valfrommode
(
self
.
out_mode
)
bval
=
_bvalfromboundary
(
'fill'
)
stacklen
=
imshp
[
0
]
...
...
@@ -616,6 +627,9 @@ class ConvOp(Op):
filtersflipped
=
buf
del
buf
,
rstride
,
cstride
val
=
_valfrommode
(
self
.
out_mode
)
bval
=
_bvalfromboundary
(
'fill'
)
for
b
in
range
(
bsize
):
for
n
in
range
(
nkern
):
zz
[
b
,
n
,
...
]
.
fill
(
0
)
...
...
@@ -623,6 +637,25 @@ class ConvOp(Op):
zz
[
b
,
n
,
...
]
+=
_convolve2d
(
\
img2d
[
b
,
im0
,
...
],
filtersflipped
[
n
,
im0
,
...
],
1
,
val
,
bval
,
0
)
if
False
:
if
False
and
self
.
out_mode
==
"full"
:
img2d2
=
numpy
.
zeros
((
bsize
,
stacklen
,
imshp
[
1
]
+
2
*
kshp
[
0
]
-
2
,
imshp
[
2
]
+
2
*
kshp
[
1
]
-
2
))
img2d2
[:,:,
kshp
[
0
]
-
1
:
kshp
[
0
]
-
1
+
imshp
[
1
],
kshp
[
1
]
-
1
:
kshp
[
1
]
-
1
+
imshp
[
2
]]
=
img2d
img2d
=
img2d2
#N_image_shape = image_data.shape
for
b
in
range
(
bsize
):
for
n
in
range
(
nkern
):
zz
[
b
,
n
,
...
]
.
fill
(
0
)
for
im0
in
range
(
stacklen
):
for
row
in
range
(
0
,
zz
.
shape
[
2
],
self
.
dx
):
for
col
in
range
(
0
,
zz
.
shape
[
3
],
self
.
dy
):
zz
[
b
,
n
,
row
,
col
]
+=
(
img2d
[
b
,
im0
,
row
:
row
+
kshp
[
0
],
col
:
col
+
kshp
[
1
]]
*
\
filtersflipped
[
n
,
im0
,::
-
1
,::
-
1
])
.
sum
()
#We copy it to remove the Stride mismatch warning from DEBUG_MODE.
#The copy make that we return an object with the same stride as the c version.
#The copy don't affect the performence during our experience as in that case we
...
...
theano/tensor/nnet/tests/test_conv.py
浏览文件 @
925a4eb6
import
sys
,
time
,
unittest
import
numpy
from
scipy
import
signal
import
theano
import
theano.tensor
as
T
...
...
@@ -60,6 +59,7 @@ class TestConv2D(unittest.TestCase):
############# REFERENCE IMPLEMENTATION ############
s
=
1.
orig_image_data
=
image_data
if
border_mode
is
not
'full'
:
s
=
-
1.
out_shape2d
=
numpy
.
array
(
N_image_shape
[
-
2
:])
+
\
s
*
numpy
.
array
(
N_filter_shape
[
-
2
:])
-
s
...
...
@@ -68,26 +68,41 @@ class TestConv2D(unittest.TestCase):
ref_output
=
numpy
.
zeros
(
out_shape
)
# loop over output feature maps
for
k
in
range
(
N_filter_shape
[
0
]):
# loop over input feature maps
for
l
in
range
(
N_filter_shape
[
1
]):
filter2d
=
filter_data
[
k
,
l
,:,:]
# loop over mini-batches
for
b
in
range
(
N_image_shape
[
0
]):
image2d
=
image_data
[
b
,
l
,:,:]
output2d
=
signal
.
convolve2d
(
image2d
,
filter2d
,
border_mode
)
ref_output
[
b
,
k
,:,:]
+=
\
output2d
[::
subsample
[
0
],::
subsample
[
1
]]
ref_output
.
fill
(
0
)
if
border_mode
==
'full'
:
image_data2
=
numpy
.
zeros
((
N_image_shape
[
0
],
N_image_shape
[
1
],
N_image_shape
[
2
]
+
2
*
N_filter_shape
[
2
]
-
2
,
N_image_shape
[
3
]
+
2
*
N_filter_shape
[
3
]
-
2
))
image_data2
[:,:,
N_filter_shape
[
2
]
-
1
:
N_filter_shape
[
2
]
-
1
+
N_image_shape
[
2
],
N_filter_shape
[
3
]
-
1
:
N_filter_shape
[
3
]
-
1
+
N_image_shape
[
3
]]
=
image_data
image_data
=
image_data2
N_image_shape
=
image_data
.
shape
for
bb
in
range
(
N_image_shape
[
0
]):
for
nn
in
range
(
N_filter_shape
[
0
]):
for
im0
in
range
(
N_image_shape
[
1
]):
filter2d
=
filter_data
[
nn
,
im0
,:,:]
image2d
=
image_data
[
bb
,
im0
,:,:]
for
row
in
range
(
ref_output
.
shape
[
2
]):
irow
=
row
*
subsample
[
0
]
#image row
for
col
in
range
(
ref_output
.
shape
[
3
]):
icol
=
col
*
subsample
[
1
]
#image col
ref_output
[
bb
,
nn
,
row
,
col
]
+=
(
image2d
[
irow
:
irow
+
N_filter_shape
[
2
],
icol
:
icol
+
N_filter_shape
[
3
]]
*
filter2d
[::
-
1
,::
-
1
]
)
.
sum
()
self
.
failUnless
(
_allclose
(
theano_output
,
ref_output
))
############# TEST GRADIENT ############
if
verify_grad
:
utt
.
verify_grad
(
sym_conv2d
,
[
image_data
,
filter_data
])
utt
.
verify_grad
(
sym_conv2d
,
[
orig_image_data
,
filter_data
])
def
test_basic1
(
self
):
"""
Tests that basic convolutions work for odd and even dimensions of image and filter
shapes, as well as rectangular images and filters.
"""
self
.
validate
((
2
,
2
,
3
,
3
),
(
2
,
2
,
2
,
2
),
'valid'
,
verify_grad
=
False
)
def
test_basic
(
self
):
"""
...
...
theano/tensor/signal/tests/test_conv.py
浏览文件 @
925a4eb6
import
sys
,
time
,
unittest
import
numpy
from
scipy
import
signal
import
theano
import
theano.tensor
as
T
...
...
@@ -59,7 +58,13 @@ class TestSignalConv2D(unittest.TestCase):
image2d
=
image_data3d
[
b
,:,:]
filter2d
=
filter_data3d
[
k
,:,:]
output2d
=
signal
.
convolve2d
(
image2d
,
filter2d
,
'valid'
)
output2d
=
numpy
.
zeros
(
ref_output
.
shape
)
for
row
in
range
(
ref_output
.
shape
[
0
]):
for
col
in
range
(
ref_output
.
shape
[
1
]):
output2d
[
row
,
col
]
+=
(
image2d
[
row
:
row
+
filter2d
.
shape
[
0
],
col
:
col
+
filter2d
.
shape
[
1
]]
*
filter2d
[::
-
1
,::
-
1
]
)
.
sum
()
self
.
failUnless
(
_allclose
(
theano_output4d
[
b
,
k
,:,:],
output2d
))
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论