Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
925a4eb6
提交
925a4eb6
authored
11月 05, 2010
作者:
Olivier Delalleau
浏览文件
操作
浏览文件
下载
差异文件
Merged
上级
646bd761
15aebd1c
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
10 个修改的文件
包含
190 行增加
和
78 行删除
+190
-78
profilemode.py
theano/compile/profilemode.py
+0
-0
blas.py
theano/sandbox/cuda/blas.py
+1
-1
conv.cu
theano/sandbox/cuda/conv.cu
+1
-0
test_conv_cuda_ndarray.py
theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
+27
-5
neighbours.py
theano/sandbox/neighbours.py
+61
-29
test_neighbours.py
theano/sandbox/test_neighbours.py
+24
-20
basic.py
theano/tensor/basic.py
+2
-2
conv.py
theano/tensor/nnet/conv.py
+37
-4
test_conv.py
theano/tensor/nnet/tests/test_conv.py
+30
-15
test_conv.py
theano/tensor/signal/tests/test_conv.py
+7
-2
没有找到文件。
theano/compile/profilemode.py
浏览文件 @
925a4eb6
差异被折叠。
点击展开。
theano/sandbox/cuda/blas.py
浏览文件 @
925a4eb6
...
@@ -421,7 +421,7 @@ class GpuDownsampleFactorMax(Op):
...
@@ -421,7 +421,7 @@ class GpuDownsampleFactorMax(Op):
#def perform(self, node, input_storage, output_storage):
#def perform(self, node, input_storage, output_storage):
#raise NotImplementedError('only C is implemented')
#raise NotImplementedError('only C is implemented')
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
()
return
(
1
)
def
c_code
(
self
,
node
,
nodename
,
(
x
,),
(
z
,),
sub
):
def
c_code
(
self
,
node
,
nodename
,
(
x
,),
(
z
,),
sub
):
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
ds0
,
ds1
=
self
.
ds
ds0
,
ds1
=
self
.
ds
...
...
theano/sandbox/cuda/conv.cu
浏览文件 @
925a4eb6
...
@@ -521,6 +521,7 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
...
@@ -521,6 +521,7 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
}
}
if (1 && (version==6||version==-1) &&
if (1 && (version==6||version==-1) &&
kern_len<=320 &&
!work_complete) //conv_valid_row_reduce
!work_complete) //conv_valid_row_reduce
{
{
int outsize = CudaNdarray_SIZE(out);
int outsize = CudaNdarray_SIZE(out);
...
...
theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
浏览文件 @
925a4eb6
import
sys
,
time
import
sys
,
time
import
numpy
import
numpy
from
nose.plugins.skip
import
SkipTest
imported_scipy_convolve2d
=
False
try
:
from
scipy.signal
import
convolve2d
imported_scipy_convolve2d
=
True
except
ImportError
:
pass
import
theano
import
theano
# Skip test if cuda_ndarray is not available.
# Skip test if cuda_ndarray is not available.
from
nose.plugins.skip
import
SkipTest
import
theano.sandbox.cuda
as
cuda_ndarray
import
theano.sandbox.cuda
as
cuda_ndarray
if
cuda_ndarray
.
cuda_available
==
False
:
if
cuda_ndarray
.
cuda_available
==
False
:
raise
SkipTest
(
'Optional package cuda disabled'
)
raise
SkipTest
(
'Optional package cuda disabled'
)
...
@@ -38,9 +46,23 @@ def py_conv_full_numpy(img, kern):
...
@@ -38,9 +46,23 @@ def py_conv_full_numpy(img, kern):
pad_cols
=
2
*
(
kern
.
shape
[
3
]
-
1
)
+
img
.
shape
[
3
]
pad_cols
=
2
*
(
kern
.
shape
[
3
]
-
1
)
+
img
.
shape
[
3
]
padded_img
=
numpy
.
zeros
((
img
.
shape
[
0
],
img
.
shape
[
1
],
pad_rows
,
pad_cols
),
dtype
=
img
.
dtype
)
padded_img
=
numpy
.
zeros
((
img
.
shape
[
0
],
img
.
shape
[
1
],
pad_rows
,
pad_cols
),
dtype
=
img
.
dtype
)
padded_img
[:,:,
kern
.
shape
[
2
]
-
1
:
kern
.
shape
[
2
]
-
1
+
img
.
shape
[
2
],
kern
.
shape
[
3
]
-
1
:
kern
.
shape
[
3
]
-
1
+
img
.
shape
[
3
]]
=
img
padded_img
[:,:,
kern
.
shape
[
2
]
-
1
:
kern
.
shape
[
2
]
-
1
+
img
.
shape
[
2
],
kern
.
shape
[
3
]
-
1
:
kern
.
shape
[
3
]
-
1
+
img
.
shape
[
3
]]
=
img
return
py_conv_valid
(
padded_img
,
kern
)
return
py_conv_valid_numpy
(
padded_img
,
kern
)
def
py_conv
(
img
,
kern
,
mode
,
subsample
):
"""
use a scipy or numpy implementation depending is scipy is available.
The scipy version is faster.
"""
if
imported_scipy_convolve2d
:
return
py_conv_scipy
(
img
,
kern
,
mode
,
subsample
)
elif
mode
==
'valid'
:
return
py_conv_valid_numpy
(
img
,
kern
)[:,:,::
subsample
[
0
],::
subsample
[
1
]]
elif
mode
==
'full'
:
return
py_conv_full_numpy
(
img
,
kern
)[:,:,::
subsample
[
0
],::
subsample
[
1
]]
else
:
raise
Exception
(
"Can't execute this kernel."
)
def
py_conv_scipy
(
img
,
kern
,
mode
,
subsample
):
def
py_conv_scipy
(
img
,
kern
,
mode
,
subsample
):
from
scipy.signal
import
convolve2d
assert
img
.
shape
[
1
]
==
kern
.
shape
[
1
]
assert
img
.
shape
[
1
]
==
kern
.
shape
[
1
]
if
mode
==
'valid'
:
if
mode
==
'valid'
:
outshp
=
(
img
.
shape
[
0
],
kern
.
shape
[
0
],
outshp
=
(
img
.
shape
[
0
],
kern
.
shape
[
0
],
...
@@ -89,7 +111,7 @@ def _params_allgood(ishape, kshape, mode, subsample=(1,1), img_stride=(1,1), ker
...
@@ -89,7 +111,7 @@ def _params_allgood(ishape, kshape, mode, subsample=(1,1), img_stride=(1,1), ker
rval
=
True
rval
=
True
try
:
try
:
t0
=
time
.
time
()
t0
=
time
.
time
()
cpuval
=
py_conv
_scipy
(
npy_img
,
npy_kern
,
mode
,
subsample
)
cpuval
=
py_conv
(
npy_img
,
npy_kern
,
mode
,
subsample
)
t1
=
time
.
time
()
t1
=
time
.
time
()
i
=
cuda_tensor4
()
i
=
cuda_tensor4
()
k
=
cuda_tensor4
()
k
=
cuda_tensor4
()
...
@@ -550,7 +572,7 @@ def _test_dummy():
...
@@ -550,7 +572,7 @@ def _test_dummy():
rval
=
True
rval
=
True
t0
=
time
.
time
()
t0
=
time
.
time
()
cpuval
=
py_conv
_scipy
(
npy_img
,
npy_kern
,
mode
,
subsample
)
cpuval
=
py_conv
(
npy_img
,
npy_kern
,
mode
,
subsample
)
t1
=
time
.
time
()
t1
=
time
.
time
()
gpuval
=
cuda_ndarray
.
conv
(
img
,
kern
,
mode
,
subsample
)
gpuval
=
cuda_ndarray
.
conv
(
img
,
kern
,
mode
,
subsample
)
t2
=
time
.
time
()
t2
=
time
.
time
()
...
...
theano/sandbox/neighbours.py
浏览文件 @
925a4eb6
...
@@ -252,13 +252,13 @@ class GpuImages2Neibs(Images2Neibs):
...
@@ -252,13 +252,13 @@ class GpuImages2Neibs(Images2Neibs):
dtype
=
ten4
.
type
.
dtype
)()])
dtype
=
ten4
.
type
.
dtype
)()])
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
()
return
(
6
,)
return
(
2
,)
def
c_support_code_apply
(
self
,
node
,
nodename
):
def
c_support_code_apply
(
self
,
node
,
nodename
):
if
self
.
mode
==
"valid"
:
mode
=
self
.
mode
return
"""
return
"""
static __global__ void k_multi_warp_
%(nodename)
s(
//a version that use less register but don't work in all case.
static __global__ void k_multi_warp_less_
%(nodename)
s(
const int nb_batch,
const int nb_batch,
const int nb_stack,
const int nb_stack,
const int height,
const int height,
...
@@ -274,8 +274,10 @@ class GpuImages2Neibs(Images2Neibs):
...
@@ -274,8 +274,10 @@ class GpuImages2Neibs(Images2Neibs):
float * global_out
float * global_out
)
)
{
{
const int wrap_centered_idx_shift_x = c/2;
for(int tblock = blockIdx.x;tblock<nb_batch*nb_stack*grid_c*grid_d;tblock+=gridDim.x){
const int wrap_centered_idx_shift_y = d/2;
for(int tblock = blockIdx.x*blockDim.z+threadIdx.z;tblock<nb_batch*nb_stack*grid_c*grid_d;tblock+=gridDim.x*blockDim.z){
const int b = tblock
%%
grid_d;
const int b = tblock
%%
grid_d;
int left = tblock/grid_d;
int left = tblock/grid_d;
const int a = left
%%
grid_c;
const int a = left
%%
grid_c;
...
@@ -289,12 +291,23 @@ class GpuImages2Neibs(Images2Neibs):
...
@@ -289,12 +291,23 @@ class GpuImages2Neibs(Images2Neibs):
if(a>grid_c)continue;
if(a>grid_c)continue;
if(b>grid_d)continue;
if(b>grid_d)continue;
int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n));
int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n));
for (int i = 0; i < c; i++)
// loop over c
int i = threadIdx.y;
// loop over c
{
{
int ten4_2 = i + a * step_x;
int ten4_2 = i + a * step_x;
for (int j = threadIdx.x; j < d; j+=blockDim.x) // loop over d
if("
%(mode)
s"=="wrap_centered"){
ten4_2 -= wrap_centered_idx_shift_x;
if ( ten4_2 < 0 ) ten4_2 += height;
else if (ten4_2 >= height) ten4_2 -= height;
}
int j = threadIdx.x; // loop over d
{
{
int ten4_3 = j + b * step_y;
int ten4_3 = j + b * step_y;
if("
%(mode)
s"=="wrap_centered"){
ten4_3 -= wrap_centered_idx_shift_y;
if ( ten4_3 < 0 ) ten4_3 += width;
else if (ten4_3 >= width) ten4_3 -= width;
}
//int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n));
//int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n));
//int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n));
//int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n));
int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n;
int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n;
...
@@ -307,9 +320,6 @@ class GpuImages2Neibs(Images2Neibs):
...
@@ -307,9 +320,6 @@ class GpuImages2Neibs(Images2Neibs):
}
}
}
}
"""
%
locals
()
if
self
.
mode
==
"wrap_centered"
:
return
"""
static __global__ void k_multi_warp_
%(nodename)
s(
static __global__ void k_multi_warp_
%(nodename)
s(
const int nb_batch,
const int nb_batch,
const int nb_stack,
const int nb_stack,
...
@@ -329,7 +339,7 @@ class GpuImages2Neibs(Images2Neibs):
...
@@ -329,7 +339,7 @@ class GpuImages2Neibs(Images2Neibs):
const int wrap_centered_idx_shift_x = c/2;
const int wrap_centered_idx_shift_x = c/2;
const int wrap_centered_idx_shift_y = d/2;
const int wrap_centered_idx_shift_y = d/2;
for(int tblock = blockIdx.x
;tblock<nb_batch*nb_stack*grid_c*grid_d;tblock+=gridDim.x
){
for(int tblock = blockIdx.x
*blockDim.z+threadIdx.z;tblock<nb_batch*nb_stack*grid_c*grid_d;tblock+=gridDim.x*blockDim.z
){
const int b = tblock
%%
grid_d;
const int b = tblock
%%
grid_d;
int left = tblock/grid_d;
int left = tblock/grid_d;
const int a = left
%%
grid_c;
const int a = left
%%
grid_c;
...
@@ -343,19 +353,23 @@ class GpuImages2Neibs(Images2Neibs):
...
@@ -343,19 +353,23 @@ class GpuImages2Neibs(Images2Neibs):
if(a>grid_c)continue;
if(a>grid_c)continue;
if(b>grid_d)continue;
if(b>grid_d)continue;
int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n));
int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n));
for (int i =
0; i < c; i++
) // loop over c
for (int i =
threadIdx.y; i < c; i+=blockDim.y
) // loop over c
{
{
int ten4_2 = i + a * step_x;
int ten4_2 = i + a * step_x;
ten4_2 -= wrap_centered_idx_shift_x;
if("
%(mode)
s"=="wrap_centered"){
if ( ten4_2 < 0 ) ten4_2 += height;
ten4_2 -= wrap_centered_idx_shift_x;
else if (ten4_2 >= height) ten4_2 -= height;
if ( ten4_2 < 0 ) ten4_2 += height;
else if (ten4_2 >= height) ten4_2 -= height;
}
for (int j = threadIdx.x; j < d; j+=blockDim.x) // loop over d
for (int j = threadIdx.x; j < d; j+=blockDim.x) // loop over d
{
{
int ten4_3 = j + b * step_y;
int ten4_3 = j + b * step_y;
ten4_3 -= wrap_centered_idx_shift_y;
if("
%(mode)
s"=="wrap_centered"){
if ( ten4_3 < 0 ) ten4_3 += width;
ten4_3 -= wrap_centered_idx_shift_y;
else if (ten4_3 >= width) ten4_3 -= width;
if ( ten4_3 < 0 ) ten4_3 += width;
else if (ten4_3 >= width) ten4_3 -= width;
}
//int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n));
//int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n));
//int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n));
//int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n));
int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n;
int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n;
...
@@ -370,7 +384,6 @@ class GpuImages2Neibs(Images2Neibs):
...
@@ -370,7 +384,6 @@ class GpuImages2Neibs(Images2Neibs):
"""
%
locals
()
"""
%
locals
()
def
c_code
(
self
,
node
,
name
,
(
ten4
,
neib_shape
,
neib_step
),
(
z
,),
sub
):
def
c_code
(
self
,
node
,
name
,
(
ten4
,
neib_shape
,
neib_step
),
(
z
,),
sub
):
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
mode
=
self
.
mode
mode
=
self
.
mode
...
@@ -473,17 +486,36 @@ class GpuImages2Neibs(Images2Neibs):
...
@@ -473,17 +486,36 @@ class GpuImages2Neibs(Images2Neibs):
const npy_intp step_x = (npy_intp) *(dtype_
%(neib_step)
s*) PyArray_GETPTR1(
%(neib_step)
s, 0);
const npy_intp step_x = (npy_intp) *(dtype_
%(neib_step)
s*) PyArray_GETPTR1(
%(neib_step)
s, 0);
const npy_intp step_y = (npy_intp) *(dtype_
%(neib_step)
s*) PyArray_GETPTR1(
%(neib_step)
s, 1);
const npy_intp step_y = (npy_intp) *(dtype_
%(neib_step)
s*) PyArray_GETPTR1(
%(neib_step)
s, 1);
dim3 n_threads(d,c,1);
//Their is a max of 512 threads per blocks
while(n_threads.x*n_threads.y>512 && n_threads.y>1)n_threads.y--;
while(n_threads.x*n_threads.y>512 && n_threads.x>1)n_threads.x--;
//Make bigger block to have better memory access pattern and a higher core utilisation.
//for smaller patch size
while(c*d*(n_threads.z+1) < 128 && n_threads.z<64 && n_threads.z<CudaNdarray_HOST_DIMS(
%(z)
s)[0]){
n_threads.z++;
}
int nb_block;
int nb_block;
if (
nb_batch
%% 32
== 0)
if (
CudaNdarray_HOST_DIMS(
%(z)
s)[0]
%%
n_threads.z
== 0)
nb_block =
nb_batch/32
;
nb_block =
CudaNdarray_HOST_DIMS(
%(z)
s)[0] / n_threads.z
;
else
else
nb_block = (int)((float)nb_batch/32. + 1.);
nb_block = (CudaNdarray_HOST_DIMS(
%(z)
s)[0] / n_threads.z) + 1;
dim3 n_blocks(std::min(32*1024,nb_block));
dim3 n_blocks(std::min(32*1024,CudaNdarray_HOST_DIMS(
%(z)
s)[0]),1,1);
dim3 n_threads(32,1,1);
int n_shared = 0;
int n_shared = 0;
k_multi_warp_
%(name)
s<<<n_blocks, n_threads, n_shared>>>(
void (*f)(int, int, int ,int,
int, int, int ,int,
int, int,
int, int, int, int,
float*, float*);
if(n_threads.x==d && n_threads.y==c){
f = k_multi_warp_less_
%(name)
s;
}else{
f = k_multi_warp_
%(name)
s;
}
f<<<n_blocks, n_threads, n_shared>>>(
nb_batch,
nb_batch,
nb_stack,
nb_stack,
height, width,
height, width,
...
...
theano/sandbox/test_neighbours.py
浏览文件 @
925a4eb6
...
@@ -278,26 +278,30 @@ def test_neibs_wrap_centered_step_manual():
...
@@ -278,26 +278,30 @@ def test_neibs_wrap_centered_step_manual():
def
test_neibs_gpu
():
def
test_neibs_gpu
():
if
cuda
.
cuda_available
==
False
:
if
cuda
.
cuda_available
==
False
:
raise
SkipTest
(
'Optional package cuda disabled'
)
raise
SkipTest
(
'Optional package cuda disabled'
)
for
shape
,
pshape
in
[((
100
,
40
,
18
,
18
),(
2
,
2
)),
shape
=
(
100
,
40
,
18
,
18
)
((
100
,
40
,
6
,
18
),(
3
,
2
)),
images
=
shared
(
numpy
.
arange
(
numpy
.
prod
(
shape
),
dtype
=
'float32'
)
.
reshape
(
shape
))
((
10
,
40
,
66
,
66
),(
33
,
33
)),
neib_shape
=
T
.
as_tensor_variable
((
2
,
2
))
#(array((2,2), dtype='float32'))
((
10
,
40
,
68
,
66
),(
34
,
33
))
]:
from
theano.sandbox.cuda.basic_ops
import
gpu_from_host
images
=
shared
(
numpy
.
arange
(
numpy
.
prod
(
shape
),
dtype
=
'float32'
)
.
reshape
(
shape
))
f
=
function
([],
images2neibs
(
images
,
neib_shape
),
neib_shape
=
T
.
as_tensor_variable
(
pshape
)
mode
=
mode_with_gpu
)
f_gpu
=
function
([],
images2neibs
(
images
,
neib_shape
),
from
theano.sandbox.cuda.basic_ops
import
gpu_from_host
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
GpuImages2Neibs
)
for
node
in
f_gpu
.
maker
.
env
.
toposort
()])
f
=
function
([],
images2neibs
(
images
,
neib_shape
),
#print images.value
mode
=
mode_with_gpu
)
neibs
=
numpy
.
asarray
(
f_gpu
())
f_gpu
=
function
([],
images2neibs
(
images
,
neib_shape
),
assert
numpy
.
allclose
(
neibs
,
f
())
mode
=
mode_with_gpu
)
#print neibs
assert
any
([
isinstance
(
node
.
op
,
GpuImages2Neibs
)
for
node
in
f_gpu
.
maker
.
env
.
toposort
()])
g
=
function
([],
neibs2images
(
neibs
,
neib_shape
,
images
.
shape
),
mode
=
mode_with_gpu
)
#print images.value
assert
any
([
isinstance
(
node
.
op
,
GpuImages2Neibs
)
for
node
in
f
.
maker
.
env
.
toposort
()])
neibs
=
numpy
.
asarray
(
f_gpu
())
#print numpy.asarray(g())
assert
numpy
.
allclose
(
neibs
,
f
())
assert
numpy
.
allclose
(
images
.
value
,
g
())
#print neibs
g
=
function
([],
neibs2images
(
neibs
,
neib_shape
,
images
.
shape
),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
GpuImages2Neibs
)
for
node
in
f
.
maker
.
env
.
toposort
()])
#print numpy.asarray(g())
assert
numpy
.
allclose
(
images
.
value
,
g
())
def
speed_neibs
():
def
speed_neibs
():
...
...
theano/tensor/basic.py
浏览文件 @
925a4eb6
...
@@ -12,7 +12,7 @@ import numpy, theano
...
@@ -12,7 +12,7 @@ import numpy, theano
#from copy import copy as python_copy
#from copy import copy as python_copy
from
theano
import
gof
,
shared
from
theano
import
gof
,
shared
from
theano.gof
import
Variable
,
Op
,
utils
,
Type
,
Constant
,
Value
from
theano.gof
import
Variable
,
Op
,
Type
,
Constant
,
Value
from
theano.tensor.tsor_apply
import
Apply
from
theano.tensor.tsor_apply
import
Apply
from
theano
import
gradient
from
theano
import
gradient
...
@@ -21,7 +21,7 @@ import elemwise
...
@@ -21,7 +21,7 @@ import elemwise
from
theano
import
scalar
as
scal
from
theano
import
scalar
as
scal
from
theano.gof.python25
import
partial
,
any
,
all
from
theano.gof.python25
import
partial
,
any
,
all
from
theano
import
compile
,
printing
from
theano
import
compile
,
printing
from
theano.printing
import
pprint
,
Print
from
theano.printing
import
pprint
### set up the external interface
### set up the external interface
from
elemwise
import
Elemwise
,
DimShuffle
,
CAReduce
,
Sum
from
elemwise
import
Elemwise
,
DimShuffle
,
CAReduce
,
Sum
...
...
theano/tensor/nnet/conv.py
浏览文件 @
925a4eb6
...
@@ -18,6 +18,16 @@ from theano import gof, Op, tensor, config
...
@@ -18,6 +18,16 @@ from theano import gof, Op, tensor, config
from
theano.tensor.tsor_apply
import
Apply
from
theano.tensor.tsor_apply
import
Apply
from
theano.gof.python25
import
any
from
theano.gof.python25
import
any
imported_scipy_signal
=
False
try
:
# TODO: move these back out to global scope when they no longer cause an atexit error
from
scipy.signal.signaltools
import
_valfrommode
,
_bvalfromboundary
from
scipy.signal.sigtools
import
_convolve2d
imported_scipy_signal
=
True
except
ImportError
:
pass
_logger
=
logging
.
getLogger
(
"theano.signal.conv"
)
_logger
=
logging
.
getLogger
(
"theano.signal.conv"
)
def
_debug
(
*
msg
):
def
_debug
(
*
msg
):
_logger
.
debug
(
' '
.
join
([
str
(
x
)
for
x
in
msg
]))
_logger
.
debug
(
' '
.
join
([
str
(
x
)
for
x
in
msg
]))
...
@@ -547,9 +557,12 @@ class ConvOp(Op):
...
@@ -547,9 +557,12 @@ class ConvOp(Op):
"""
"""
By default if len(img2d.shape)==3, we
By default if len(img2d.shape)==3, we
"""
"""
if
not
imported_scipy_signal
:
raise
theano
.
gof
.
utils
.
MethodNotDefined
(
"c_headers"
,
type
(
self
),
self
.
__class__
.
__name__
,
"Need the python package for scipy.signal to be installed for the python implementation. You can use the C implementation instead."
)
# TODO: move these back out to global scope when they no longer cause an atexit error
# TODO: move these back out to global scope when they no longer cause an atexit error
from
scipy.signal.signaltools
import
_valfrommode
,
_bvalfromboundary
from
scipy.signal.sigtools
import
_convolve2d
imshp
=
self
.
imshp
imshp
=
self
.
imshp
if
imshp
is
None
or
any
([
x
is
None
for
x
in
imshp
]):
if
imshp
is
None
or
any
([
x
is
None
for
x
in
imshp
]):
imshp
=
tuple
(
img2d
.
shape
[
1
:])
imshp
=
tuple
(
img2d
.
shape
[
1
:])
...
@@ -584,8 +597,6 @@ class ConvOp(Op):
...
@@ -584,8 +597,6 @@ class ConvOp(Op):
z
[
0
]
=
numpy
.
zeros
((
bsize
,)
+
(
nkern
,)
+
fulloutshp
,
z
[
0
]
=
numpy
.
zeros
((
bsize
,)
+
(
nkern
,)
+
fulloutshp
,
dtype
=
img2d
.
dtype
)
dtype
=
img2d
.
dtype
)
zz
=
z
[
0
]
zz
=
z
[
0
]
val
=
_valfrommode
(
self
.
out_mode
)
bval
=
_bvalfromboundary
(
'fill'
)
stacklen
=
imshp
[
0
]
stacklen
=
imshp
[
0
]
...
@@ -616,6 +627,9 @@ class ConvOp(Op):
...
@@ -616,6 +627,9 @@ class ConvOp(Op):
filtersflipped
=
buf
filtersflipped
=
buf
del
buf
,
rstride
,
cstride
del
buf
,
rstride
,
cstride
val
=
_valfrommode
(
self
.
out_mode
)
bval
=
_bvalfromboundary
(
'fill'
)
for
b
in
range
(
bsize
):
for
b
in
range
(
bsize
):
for
n
in
range
(
nkern
):
for
n
in
range
(
nkern
):
zz
[
b
,
n
,
...
]
.
fill
(
0
)
zz
[
b
,
n
,
...
]
.
fill
(
0
)
...
@@ -623,6 +637,25 @@ class ConvOp(Op):
...
@@ -623,6 +637,25 @@ class ConvOp(Op):
zz
[
b
,
n
,
...
]
+=
_convolve2d
(
\
zz
[
b
,
n
,
...
]
+=
_convolve2d
(
\
img2d
[
b
,
im0
,
...
],
filtersflipped
[
n
,
im0
,
...
],
1
,
val
,
bval
,
0
)
img2d
[
b
,
im0
,
...
],
filtersflipped
[
n
,
im0
,
...
],
1
,
val
,
bval
,
0
)
if
False
:
if
False
and
self
.
out_mode
==
"full"
:
img2d2
=
numpy
.
zeros
((
bsize
,
stacklen
,
imshp
[
1
]
+
2
*
kshp
[
0
]
-
2
,
imshp
[
2
]
+
2
*
kshp
[
1
]
-
2
))
img2d2
[:,:,
kshp
[
0
]
-
1
:
kshp
[
0
]
-
1
+
imshp
[
1
],
kshp
[
1
]
-
1
:
kshp
[
1
]
-
1
+
imshp
[
2
]]
=
img2d
img2d
=
img2d2
#N_image_shape = image_data.shape
for
b
in
range
(
bsize
):
for
n
in
range
(
nkern
):
zz
[
b
,
n
,
...
]
.
fill
(
0
)
for
im0
in
range
(
stacklen
):
for
row
in
range
(
0
,
zz
.
shape
[
2
],
self
.
dx
):
for
col
in
range
(
0
,
zz
.
shape
[
3
],
self
.
dy
):
zz
[
b
,
n
,
row
,
col
]
+=
(
img2d
[
b
,
im0
,
row
:
row
+
kshp
[
0
],
col
:
col
+
kshp
[
1
]]
*
\
filtersflipped
[
n
,
im0
,::
-
1
,::
-
1
])
.
sum
()
#We copy it to remove the Stride mismatch warning from DEBUG_MODE.
#We copy it to remove the Stride mismatch warning from DEBUG_MODE.
#The copy make that we return an object with the same stride as the c version.
#The copy make that we return an object with the same stride as the c version.
#The copy don't affect the performence during our experience as in that case we
#The copy don't affect the performence during our experience as in that case we
...
...
theano/tensor/nnet/tests/test_conv.py
浏览文件 @
925a4eb6
import
sys
,
time
,
unittest
import
sys
,
time
,
unittest
import
numpy
import
numpy
from
scipy
import
signal
import
theano
import
theano
import
theano.tensor
as
T
import
theano.tensor
as
T
...
@@ -60,6 +59,7 @@ class TestConv2D(unittest.TestCase):
...
@@ -60,6 +59,7 @@ class TestConv2D(unittest.TestCase):
############# REFERENCE IMPLEMENTATION ############
############# REFERENCE IMPLEMENTATION ############
s
=
1.
s
=
1.
orig_image_data
=
image_data
if
border_mode
is
not
'full'
:
s
=
-
1.
if
border_mode
is
not
'full'
:
s
=
-
1.
out_shape2d
=
numpy
.
array
(
N_image_shape
[
-
2
:])
+
\
out_shape2d
=
numpy
.
array
(
N_image_shape
[
-
2
:])
+
\
s
*
numpy
.
array
(
N_filter_shape
[
-
2
:])
-
s
s
*
numpy
.
array
(
N_filter_shape
[
-
2
:])
-
s
...
@@ -68,26 +68,41 @@ class TestConv2D(unittest.TestCase):
...
@@ -68,26 +68,41 @@ class TestConv2D(unittest.TestCase):
ref_output
=
numpy
.
zeros
(
out_shape
)
ref_output
=
numpy
.
zeros
(
out_shape
)
# loop over output feature maps
# loop over output feature maps
for
k
in
range
(
N_filter_shape
[
0
]):
ref_output
.
fill
(
0
)
# loop over input feature maps
if
border_mode
==
'full'
:
for
l
in
range
(
N_filter_shape
[
1
]):
image_data2
=
numpy
.
zeros
((
N_image_shape
[
0
],
N_image_shape
[
1
],
N_image_shape
[
2
]
+
2
*
N_filter_shape
[
2
]
-
2
,
filter2d
=
filter_data
[
k
,
l
,:,:]
N_image_shape
[
3
]
+
2
*
N_filter_shape
[
3
]
-
2
))
image_data2
[:,:,
N_filter_shape
[
2
]
-
1
:
N_filter_shape
[
2
]
-
1
+
N_image_shape
[
2
],
# loop over mini-batches
N_filter_shape
[
3
]
-
1
:
N_filter_shape
[
3
]
-
1
+
N_image_shape
[
3
]]
=
image_data
for
b
in
range
(
N_image_shape
[
0
]):
image_data
=
image_data2
image2d
=
image_data
[
b
,
l
,:,:]
N_image_shape
=
image_data
.
shape
output2d
=
signal
.
convolve2d
(
image2d
,
filter2d
,
border_mode
)
for
bb
in
range
(
N_image_shape
[
0
]):
for
nn
in
range
(
N_filter_shape
[
0
]):
ref_output
[
b
,
k
,:,:]
+=
\
for
im0
in
range
(
N_image_shape
[
1
]):
output2d
[::
subsample
[
0
],::
subsample
[
1
]]
filter2d
=
filter_data
[
nn
,
im0
,:,:]
image2d
=
image_data
[
bb
,
im0
,:,:]
for
row
in
range
(
ref_output
.
shape
[
2
]):
irow
=
row
*
subsample
[
0
]
#image row
for
col
in
range
(
ref_output
.
shape
[
3
]):
icol
=
col
*
subsample
[
1
]
#image col
ref_output
[
bb
,
nn
,
row
,
col
]
+=
(
image2d
[
irow
:
irow
+
N_filter_shape
[
2
],
icol
:
icol
+
N_filter_shape
[
3
]]
*
filter2d
[::
-
1
,::
-
1
]
)
.
sum
()
self
.
failUnless
(
_allclose
(
theano_output
,
ref_output
))
self
.
failUnless
(
_allclose
(
theano_output
,
ref_output
))
############# TEST GRADIENT ############
############# TEST GRADIENT ############
if
verify_grad
:
if
verify_grad
:
utt
.
verify_grad
(
sym_conv2d
,
[
image_data
,
filter_data
])
utt
.
verify_grad
(
sym_conv2d
,
[
orig_image_data
,
filter_data
])
def
test_basic1
(
self
):
"""
Tests that basic convolutions work for odd and even dimensions of image and filter
shapes, as well as rectangular images and filters.
"""
self
.
validate
((
2
,
2
,
3
,
3
),
(
2
,
2
,
2
,
2
),
'valid'
,
verify_grad
=
False
)
def
test_basic
(
self
):
def
test_basic
(
self
):
"""
"""
...
...
theano/tensor/signal/tests/test_conv.py
浏览文件 @
925a4eb6
import
sys
,
time
,
unittest
import
sys
,
time
,
unittest
import
numpy
import
numpy
from
scipy
import
signal
import
theano
import
theano
import
theano.tensor
as
T
import
theano.tensor
as
T
...
@@ -59,7 +58,13 @@ class TestSignalConv2D(unittest.TestCase):
...
@@ -59,7 +58,13 @@ class TestSignalConv2D(unittest.TestCase):
image2d
=
image_data3d
[
b
,:,:]
image2d
=
image_data3d
[
b
,:,:]
filter2d
=
filter_data3d
[
k
,:,:]
filter2d
=
filter_data3d
[
k
,:,:]
output2d
=
signal
.
convolve2d
(
image2d
,
filter2d
,
'valid'
)
output2d
=
numpy
.
zeros
(
ref_output
.
shape
)
for
row
in
range
(
ref_output
.
shape
[
0
]):
for
col
in
range
(
ref_output
.
shape
[
1
]):
output2d
[
row
,
col
]
+=
(
image2d
[
row
:
row
+
filter2d
.
shape
[
0
],
col
:
col
+
filter2d
.
shape
[
1
]]
*
filter2d
[::
-
1
,::
-
1
]
)
.
sum
()
self
.
failUnless
(
_allclose
(
theano_output4d
[
b
,
k
,:,:],
output2d
))
self
.
failUnless
(
_allclose
(
theano_output4d
[
b
,
k
,:,:],
output2d
))
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论