Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
1ebe9fd6
提交
1ebe9fd6
authored
8月 23, 2012
作者:
Frederic Bastien
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
pep8 code redability.
上级
97e12dae
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
555 行增加
和
422 行删除
+555
-422
conv.py
theano/tensor/nnet/conv.py
+555
-422
没有找到文件。
theano/tensor/nnet/conv.py
浏览文件 @
1ebe9fd6
...
...
@@ -16,25 +16,26 @@ import numpy
import
theano
from
theano.tensor
import
(
as_tensor_variable
,
blas
,
get_constant_value
,
patternbroadcast
)
patternbroadcast
)
from
theano
import
Op
,
config
from
theano.gof
import
Apply
from
theano.gof.python25
import
any
imported_scipy_signal
=
False
try
:
# TODO: move these back out to global scope when they no longer cause an atexit error
from
scipy.signal.signaltools
import
_valfrommode
,
_bvalfromboundary
# TODO: move these back out to global scope when they no longer
# cause an atexit error
from
scipy.signal.signaltools
import
_valfrommode
,
_bvalfromboundary
from
scipy.signal.sigtools
import
_convolve2d
imported_scipy_signal
=
True
except
ImportError
:
pass
_logger
=
logging
.
getLogger
(
"theano.tensor.nnet.conv"
)
_logger
=
logging
.
getLogger
(
"theano.tensor.nnet.conv"
)
def
conv2d
(
input
,
filters
,
image_shape
=
None
,
filter_shape
=
None
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
**
kargs
):
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
**
kargs
):
"""This function will build the symbolic graph for convolving a stack of input
images with a set of filters. The implementation is modelled after
Convolutional Neural Networks (CNN). It is simply a wrapper to the ConvOp but
...
...
@@ -62,8 +63,10 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
:param filter_shape: (nb filters, stack size, nb row, nb col)
Optional, used for optimization.
:param kwargs: kwargs are passed onto ConvOp. Can be used to set the following:
unroll_batch, unroll_kern, unroll_patch, openmp (see ConvOp doc)
:param kwargs: kwargs are passed onto ConvOp.
Can be used to set the following:
unroll_batch, unroll_kern, unroll_patch,
openmp (see ConvOp doc)
openmp: By default have the same value as
config.openmp. For small image, filter,
...
...
@@ -77,8 +80,8 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
with openmp on a core 2 duo.
:rtype: symbolic 4D tensor
:return: set of feature maps generated by convolutional layer. Tensor is
of shape
(batch size, nb filters, output row, output col)
:return: set of feature maps generated by convolutional layer. Tensor is
of shape
(batch size, nb filters, output row, output col)
"""
...
...
@@ -87,20 +90,22 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
image_shape
=
list
(
image_shape
)
for
i
in
xrange
(
len
(
image_shape
)):
if
image_shape
[
i
]
is
not
None
:
image_shape
[
i
]
=
get_constant_value
(
as_tensor_variable
(
image_shape
[
i
]))
image_shape
[
i
]
=
get_constant_value
(
as_tensor_variable
(
image_shape
[
i
]))
assert
str
(
image_shape
[
i
]
.
dtype
)
.
startswith
(
'int'
)
image_shape
[
i
]
=
int
(
image_shape
[
i
])
if
filter_shape
is
not
None
:
filter_shape
=
list
(
filter_shape
)
for
i
in
xrange
(
len
(
filter_shape
)):
if
filter_shape
[
i
]
is
not
None
:
filter_shape
[
i
]
=
get_constant_value
(
as_tensor_variable
(
filter_shape
[
i
]))
filter_shape
[
i
]
=
get_constant_value
(
as_tensor_variable
(
filter_shape
[
i
]))
assert
str
(
filter_shape
[
i
]
.
dtype
)
.
startswith
(
'int'
)
filter_shape
[
i
]
=
int
(
filter_shape
[
i
])
if
image_shape
and
filter_shape
:
try
:
assert
image_shape
[
1
]
==
filter_shape
[
1
]
assert
image_shape
[
1
]
==
filter_shape
[
1
]
except
Exception
:
print
'image '
,
image_shape
,
' filters '
,
filter_shape
raise
...
...
@@ -118,7 +123,7 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
bsize
,
imshp
=
None
,
None
op
=
ConvOp
(
output_mode
=
border_mode
,
dx
=
subsample
[
0
],
dy
=
subsample
[
1
],
imshp
=
imshp
,
kshp
=
kshp
,
nkern
=
nkern
,
bsize
=
bsize
,
**
kargs
)
imshp
=
imshp
,
kshp
=
kshp
,
nkern
=
nkern
,
bsize
=
bsize
,
**
kargs
)
return
op
(
input
,
filters
)
...
...
@@ -141,13 +146,13 @@ class ConvOp(Op):
The output of ConvOp is a 4D tensor, generated as follows:
output[b,k,:,:] =
\
sum_i input[b,i,:,:] * filter[k,i,:,:]
\f
orall b,k
where b is the mini-batch index, k the filter index and * is the
convolution
operator.
where b is the mini-batch index, k the filter index and * is the
convolution
operator.
"""
__attrnames
=
[
'imshp'
,
'kshp'
,
'nkern'
,
'bsize'
,
'dx'
,
'dy'
,
'out_mode'
,
'unroll_batch'
,
'unroll_kern'
,
'unroll_patch'
,
'imshp_logical'
,
'kshp_logical'
,
'kshp_logical_top_aligned'
]
'unroll_batch'
,
'unroll_kern'
,
'unroll_patch'
,
'imshp_logical'
,
'kshp_logical'
,
'kshp_logical_top_aligned'
]
"""These attributes uniquely identify the behaviour of this op for
given inputs. Do not set openmp here.
"""
...
...
@@ -161,63 +166,63 @@ class ConvOp(Op):
# using the real shape and the same dtype could also help.
#unroll_batch, unroll_kern, valid time, full time
speed_unroll_batch_kern
=
[(
1
,
1
,
2.4661250114440918
,
6.5472931861877441
)
,
(
1
,
2
,
1.5869178771972656
,
5.1499760150909424
)
,
(
1
,
3
,
1.4270510673522949
,
3.6593470573425293
)
,
(
1
,
4
,
1.3373479843139648
,
3.3451821804046631
)
,
(
1
,
5
,
1.2818830013275146
,
3.1444568634033203
)
,
(
1
,
6
,
1.2521560192108154
,
3.0256359577178955
)
,
(
1
,
10
,
1.2134110927581787
,
2.9174180030822754
)
,
(
2
,
1
,
1.657214879989624
,
4.5261678695678711
)
,
(
2
,
2
,
1.2123160362243652
,
2.9747390747070312
)
,
(
2
,
3
,
1.0758891105651855
,
2.5690360069274902
)
,
(
2
,
4
,
1.0683329105377197
,
2.4233770370483398
)
,
(
2
,
5
,
1.0955719947814941
,
2.3999948501586914
)
,
(
2
,
6
,
1.5935721397399902
,
2.6878271102905273
)
,
(
2
,
10
,
1.8511250019073486
,
3.2417428493499756
)
,
(
3
,
1
,
1.5948119163513184
,
3.631148099899292
)
,
(
3
,
2
,
1.0761330127716064
,
2.6011371612548828
)
,
(
3
,
3
,
1.0551531314849854
,
2.4200370311737061
)
,
(
3
,
4
,
1.3930759429931641
,
2.5211219787597656
)
,
(
3
,
5
,
1.4330689907073975
,
2.5704989433288574
)
,
(
3
,
6
,
1.362138032913208
,
2.5964410305023193
)
,
(
3
,
10
,
1.6582000255584717
,
2.9907989501953125
)
,
(
4
,
1
,
1.4793620109558105
,
3.3473429679870605
)
,
(
4
,
2
,
1.0671560764312744
,
2.4171769618988037
)
,
(
4
,
3
,
1.2569692134857178
,
2.2807950973510742
)
,
(
4
,
4
,
1.3456289768218994
,
2.6219108104705811
)
,
(
4
,
5
,
1.4055080413818359
,
2.4606490135192871
)
,
(
4
,
6
,
1.372107982635498
,
2.551663875579834
)
,
(
4
,
10
,
1.599470853805542
,
2.9172940254211426
)
,
(
5
,
1
,
1.4115700721740723
,
3.2077109813690186
)
,
(
5
,
2
,
1.0635769367218018
,
2.2648060321807861
)
,
(
5
,
3
,
1.3842809200286865
,
2.6135518550872803
)
,
(
5
,
4
,
1.3470511436462402
,
2.3852400779724121
)
,
(
5
,
5
,
1.3539440631866455
,
2.5245928764343262
)
,
(
5
,
6
,
1.4037849903106689
,
2.5985310077667236
)
,
(
5
,
10
,
1.6120610237121582
,
2.8127608299255371
)
,
(
6
,
1
,
1.3623628616333008
,
3.021122932434082
)
,
(
6
,
2
,
1.1697649955749512
,
2.6285450458526611
)
,
(
6
,
3
,
1.2980999946594238
,
2.4746189117431641
)
,
(
6
,
4
,
1.3739941120147705
,
2.5579929351806641
)
,
(
6
,
5
,
1.3967819213867188
,
2.5522029399871826
)
,
(
6
,
6
,
1.4279270172119141
,
2.6127138137817383
)
,
(
6
,
10
,
1.605496883392334
,
2.864037036895752
)
,
(
10
,
1
,
1.6401121616363525
,
2.970099925994873
)
,
(
10
,
2
,
1.46710205078125
,
2.7231831550598145
)
,
(
10
,
3
,
1.4193780422210693
,
2.6087639331817627
)
,
(
10
,
4
,
1.4657118320465088
,
2.6246678829193115
)
,
(
10
,
5
,
1.5052611827850342
,
2.6542458534240723
)
,
(
10
,
6
,
1.5214400291442871
,
2.7243161201477051
)
,
(
10
,
10
,
1.6116268634796143
,
2.956165075302124
)]
speed_unroll_batch_kern
=
[(
1
,
1
,
2.4661250114440918
,
6.5472931861877441
)
,
(
1
,
2
,
1.5869178771972656
,
5.1499760150909424
)
,
(
1
,
3
,
1.4270510673522949
,
3.6593470573425293
)
,
(
1
,
4
,
1.3373479843139648
,
3.3451821804046631
)
,
(
1
,
5
,
1.2818830013275146
,
3.1444568634033203
)
,
(
1
,
6
,
1.2521560192108154
,
3.0256359577178955
)
,
(
1
,
10
,
1.2134110927581787
,
2.9174180030822754
)
,
(
2
,
1
,
1.657214879989624
,
4.5261678695678711
)
,
(
2
,
2
,
1.2123160362243652
,
2.9747390747070312
)
,
(
2
,
3
,
1.0758891105651855
,
2.5690360069274902
)
,
(
2
,
4
,
1.0683329105377197
,
2.4233770370483398
)
,
(
2
,
5
,
1.0955719947814941
,
2.3999948501586914
)
,
(
2
,
6
,
1.5935721397399902
,
2.6878271102905273
)
,
(
2
,
10
,
1.8511250019073486
,
3.2417428493499756
)
,
(
3
,
1
,
1.5948119163513184
,
3.631148099899292
)
,
(
3
,
2
,
1.0761330127716064
,
2.6011371612548828
)
,
(
3
,
3
,
1.0551531314849854
,
2.4200370311737061
)
,
(
3
,
4
,
1.3930759429931641
,
2.5211219787597656
)
,
(
3
,
5
,
1.4330689907073975
,
2.5704989433288574
)
,
(
3
,
6
,
1.362138032913208
,
2.5964410305023193
)
,
(
3
,
10
,
1.6582000255584717
,
2.9907989501953125
)
,
(
4
,
1
,
1.4793620109558105
,
3.3473429679870605
)
,
(
4
,
2
,
1.0671560764312744
,
2.4171769618988037
)
,
(
4
,
3
,
1.2569692134857178
,
2.2807950973510742
)
,
(
4
,
4
,
1.3456289768218994
,
2.6219108104705811
)
,
(
4
,
5
,
1.4055080413818359
,
2.4606490135192871
)
,
(
4
,
6
,
1.372107982635498
,
2.551663875579834
)
,
(
4
,
10
,
1.599470853805542
,
2.9172940254211426
)
,
(
5
,
1
,
1.4115700721740723
,
3.2077109813690186
)
,
(
5
,
2
,
1.0635769367218018
,
2.2648060321807861
)
,
(
5
,
3
,
1.3842809200286865
,
2.6135518550872803
)
,
(
5
,
4
,
1.3470511436462402
,
2.3852400779724121
)
,
(
5
,
5
,
1.3539440631866455
,
2.5245928764343262
)
,
(
5
,
6
,
1.4037849903106689
,
2.5985310077667236
)
,
(
5
,
10
,
1.6120610237121582
,
2.8127608299255371
)
,
(
6
,
1
,
1.3623628616333008
,
3.021122932434082
)
,
(
6
,
2
,
1.1697649955749512
,
2.6285450458526611
)
,
(
6
,
3
,
1.2980999946594238
,
2.4746189117431641
)
,
(
6
,
4
,
1.3739941120147705
,
2.5579929351806641
)
,
(
6
,
5
,
1.3967819213867188
,
2.5522029399871826
)
,
(
6
,
6
,
1.4279270172119141
,
2.6127138137817383
)
,
(
6
,
10
,
1.605496883392334
,
2.864037036895752
)
,
(
10
,
1
,
1.6401121616363525
,
2.970099925994873
)
,
(
10
,
2
,
1.46710205078125
,
2.7231831550598145
)
,
(
10
,
3
,
1.4193780422210693
,
2.6087639331817627
)
,
(
10
,
4
,
1.4657118320465088
,
2.6246678829193115
)
,
(
10
,
5
,
1.5052611827850342
,
2.6542458534240723
)
,
(
10
,
6
,
1.5214400291442871
,
2.7243161201477051
)
,
(
10
,
10
,
1.6116268634796143
,
2.956165075302124
)]
#valid time, full time
speed_unroll_patch_noshape
=
[
2.0109100341796875
,
5.8175678253173828
]
speed_unroll_patch_noshape
=
[
2.0109100341796875
,
5.8175678253173828
]
#valid time, full time
speed_unroll_patch_shape
=
[
1.2967290878295898
,
5.5283889770507812
]
speed_unroll_patch_shape
=
[
1.2967290878295898
,
5.5283889770507812
]
@staticmethod
def
getOutputShape
(
inshp
,
kshp
,
stride
=
(
1
,
1
),
mode
=
'valid'
):
def
getOutputShape
(
inshp
,
kshp
,
stride
=
(
1
,
1
),
mode
=
'valid'
):
"""
Computes the output dimensions of convolving an image of shape "inshp"
with kernels of shape "kshp".
...
...
@@ -228,26 +233,27 @@ class ConvOp(Op):
:return: (rows,cols) of output image
"""
dx
,
dy
=
stride
if
mode
==
'valid'
:
s
=
-
1
else
:
s
=
1
if
mode
==
'valid'
:
s
=
-
1
else
:
s
=
1
inshp
,
kshp
=
numpy
.
array
(
inshp
),
numpy
.
array
(
kshp
)
return
numpy
.
int64
(
numpy
.
ceil
((
inshp
+
s
*
kshp
-
s
*
1
)
/
\
numpy
.
array
([
dx
,
dy
],
dtype
=
'float'
)))
return
numpy
.
int64
(
numpy
.
ceil
((
inshp
+
s
*
kshp
-
s
*
1
)
/
numpy
.
array
([
dx
,
dy
],
dtype
=
'float'
)))
def
__init__
(
self
,
imshp
=
None
,
kshp
=
None
,
nkern
=
None
,
bsize
=
None
,
dx
=
1
,
dy
=
1
,
output_mode
=
'valid'
,
unroll_batch
=
None
,
unroll_kern
=
None
,
unroll_patch
=
None
,
imshp_logical
=
None
,
kshp_logical
=
None
,
kshp_logical_top_aligned
=
True
,
verbose
=
0
,
version
=-
1
,
openmp
=
None
):
dx
=
1
,
dy
=
1
,
output_mode
=
'valid'
,
unroll_batch
=
None
,
unroll_kern
=
None
,
unroll_patch
=
None
,
imshp_logical
=
None
,
kshp_logical
=
None
,
kshp_logical_top_aligned
=
True
,
verbose
=
0
,
version
=-
1
,
openmp
=
None
):
"""
Initializes a ConvOp with given output_mode (full/valid). All other
parameters are optional and are only used to generate more optimized c
...
...
@@ -259,12 +265,13 @@ class ConvOp(Op):
By default we try to select the fastest version. You can specify it
with the unroll_batch, unroll_kern, and unroll_patch parameter.
The second type of optimization is hardcoding some dimensions into the
code
when all shape are know.
The second type of optimization is hardcoding some dimensions into the
code
when all shape are know.
This make a significant difference for the 'full' output_mode.
Some times, the fastest implementation on x86-64 uses {unroll_batch=4, unroll_kern=4,
unroll_patch=False} with all other shape parameters being provided.
Some times, the fastest implementation on x86-64 uses
{unroll_batch=4, unroll_kern=4, unroll_patch=False}
with all other shape parameters being provided.
For optimizing other architectures, see:
Kazushige Goto and Robert A. Van De Geijn, Anatomy of High-Performance
...
...
@@ -278,7 +285,8 @@ class ConvOp(Op):
Optional parameters: (will generate more optimal c code)
:type imshp: tuple of len 2 or 3: 2 for 2d image, 3 for a stack of 2d images.
:type imshp: tuple of len 2 or 3: 2 for 2d image,
3 for a stack of 2d images.
:param imshp: (stacksize, nb image row, nb image col)
:type kshp: tuple of len 2
:param kshp: (nb kernel row, nb kernel col)
...
...
@@ -294,16 +302,18 @@ class ConvOp(Op):
Params which select the version of code used:
:type unroll_patch: bool
:param unroll_patch: use a version of c_code that unroll the patch loop that don't
request all shape information to work, but if all shape information are present, will
:param unroll_patch: use a version of c_code that unroll the patch loop
that don't request all shape information to work, but if all shape
information are present, will
use it to hardcode the value in the code for faster code.
:type unroll_batch:int
:param unroll_batch: use a version of c_code that unroll the batch
(by unroll_batch) and
the nkern(by unroll_kern) loop. The size must by a multiple of bsize or nkern
respectively.
:param unroll_batch: use a version of c_code that unroll the batch
(by unroll_batch) and the nkern(by unroll_kern) loop. The size
must by a multiple of bsize or nkern
respectively.
:type unroll_kern:int
:param unroll_kern: use a version of c_code that unroll the batch(by unroll_batch) and
the nkern(by unroll_kern) loop. The size must by a multiple of bsize or nkern
:param unroll_kern: use a version of c_code that unroll the batch
(by unroll_batch) and the nkern(by unroll_kern) loop. The size
must by a multiple of bsize or nkern
respectively.
:type verbose: int
...
...
@@ -316,8 +326,10 @@ class ConvOp(Op):
:param kshp_logical_top_aligned: idem
"""
# We must continue to consider None as 1 for backward compatibility.
if
dx
is
None
:
dx
=
1
if
dy
is
None
:
dy
=
1
if
dx
is
None
:
dx
=
1
if
dy
is
None
:
dy
=
1
if
int
(
dx
)
!=
dx
:
raise
TypeError
(
'ConvOp.__init__ param dx must be an int'
,
dx
)
...
...
@@ -330,8 +342,9 @@ class ConvOp(Op):
all_shape
=
imshp
is
not
None
and
kshp
is
not
None
and
\
nkern
is
not
None
and
bsize
is
not
None
if
(
unroll_batch
>
0
or
unroll_kern
>
0
)
and
not
all_shape
:
raise
Exception
(
"In ConvOp, when using unroll_batch and unroll_nkern, all shape are needed"
)
if
(
unroll_batch
>
0
or
unroll_kern
>
0
)
and
not
all_shape
:
raise
Exception
(
"In ConvOp, when using unroll_batch and"
" unroll_nkern, all shape are needed"
)
if
openmp
is
None
:
openmp
=
theano
.
config
.
openmp
...
...
@@ -343,9 +356,9 @@ class ConvOp(Op):
if
imshp
is
not
None
:
imshp
=
tuple
(
imshp
)
if
len
(
imshp
)
==
2
:
imshp
=
(
1
,)
+
imshp
elif
len
(
imshp
)
==
3
:
if
len
(
imshp
)
==
2
:
imshp
=
(
1
,)
+
imshp
elif
len
(
imshp
)
==
3
:
imshp
=
imshp
else
:
raise
Exception
(
"bad len for imshp"
)
...
...
@@ -356,73 +369,83 @@ class ConvOp(Op):
self
.
kshp
=
kshp
self
.
nkern
=
nkern
self
.
bsize
=
bsize
self
.
dx
=
dx
self
.
dy
=
dy
self
.
verbose
=
verbose
self
.
version
=
version
self
.
bsize
=
bsize
self
.
dx
=
dx
self
.
dy
=
dy
self
.
verbose
=
verbose
self
.
version
=
version
if
openmp
is
None
:
openmp
=
config
.
openmp
self
.
openmp
=
openmp
# a triple
self
.
imshp_logical
=
self
.
imshp
if
imshp_logical
is
not
None
:
self
.
imshp_logical
=
tuple
(
imshp_logical
)
if
imshp_logical
is
not
None
:
self
.
imshp_logical
=
tuple
(
imshp_logical
)
assert
(
self
.
imshp
is
None
and
self
.
imshp_logical
is
None
)
or
\
(
len
(
self
.
imshp
)
==
len
(
self
.
imshp_logical
))
# a pair
self
.
kshp_logical
=
self
.
kshp
if
kshp_logical
is
not
None
:
self
.
kshp_logical
=
tuple
(
kshp_logical
)
if
kshp_logical
is
not
None
:
self
.
kshp_logical
=
tuple
(
kshp_logical
)
self
.
kshp_logical_top_aligned
=
kshp_logical_top_aligned
self
.
unroll_batch
=
unroll_batch
self
.
unroll_kern
=
unroll_kern
self
.
unroll_patch
=
unroll_patch
self
.
unroll_batch
=
unroll_batch
self
.
unroll_kern
=
unroll_kern
self
.
unroll_patch
=
unroll_patch
if
self
.
unroll_batch
and
not
self
.
unroll_kern
:
self
.
unroll_kern
=
1
if
self
.
unroll_kern
and
not
self
.
unroll_batch
:
self
.
unroll_batch
=
1
if
self
.
unroll_batch
and
not
self
.
unroll_kern
:
self
.
unroll_kern
=
1
if
self
.
unroll_kern
and
not
self
.
unroll_batch
:
self
.
unroll_batch
=
1
#downcast unroll_batch if not a divisor of batch size
if
self
.
unroll_batch
>
0
and
self
.
bsize
%
self
.
unroll_batch
!=
0
:
if
self
.
unroll_batch
>
0
and
self
.
bsize
%
self
.
unroll_batch
!=
0
:
if
self
.
bsize
<=
self
.
unroll_batch
:
if
self
.
bsize
<=
self
.
unroll_batch
:
self
.
unroll_batch
=
self
.
bsize
else
:
#find the maximum value under unroll_batch that would work
new
=
self
.
unroll_batch
assert
(
new
>=
1
)
while
self
.
bsize
%
new
!=
0
:
new
-=
1
warnstr
=
"OPTIMISATION WARNING: in ConvOp.__init__() unroll_batch(
%
i)"
\
"must be 0 or a divisor of bsize(
%
i). We revert it to
%
i. This"
\
" won't change the result, but may make it slower."
new
=
self
.
unroll_batch
assert
(
new
>=
1
)
while
self
.
bsize
%
new
!=
0
:
new
-=
1
warnstr
=
(
"OPTIMISATION WARNING: in ConvOp.__init__() "
"unroll_batch(
%
i) must be 0 or a divisor of"
" bsize(
%
i). We revert it to
%
i. This"
" won't change the result, but may make it slower."
)
_logger
.
warn
(
warnstr
,
self
.
unroll_batch
,
self
.
bsize
,
new
)
self
.
unroll_batch
=
new
self
.
unroll_batch
=
new
#downcast unroll_kern if not a divisor of nb of kernel
if
self
.
unroll_kern
>
0
and
self
.
nkern
%
self
.
unroll_kern
!=
0
:
if
self
.
unroll_kern
>
0
and
self
.
nkern
%
self
.
unroll_kern
!=
0
:
if
self
.
nkern
<=
self
.
unroll_kern
:
if
self
.
nkern
<=
self
.
unroll_kern
:
self
.
unroll_kern
=
self
.
nkern
else
:
#find the maximum value under unroll_kern that would work
new
=
self
.
unroll_kern
assert
(
new
>=
1
)
while
self
.
nkern
%
new
!=
0
:
new
-=
1
warnstr
=
"OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(
%
i)"
\
"should be 0 or a divisor of nkern(
%
i). We revert it to
%
i."
\
"This won't change the result, but may make it slower."
new
=
self
.
unroll_kern
assert
(
new
>=
1
)
while
self
.
nkern
%
new
!=
0
:
new
-=
1
warnstr
=
(
"OPTIMISATION WARNING: in ConvOp.__init__()"
" unroll_kern(
%
i) should be 0 or a divisor of"
" nkern(
%
i). We revert it to
%
i. This"
" won't change the result, but may make it slower."
)
_logger
.
warn
(
warnstr
,
self
.
unroll_kern
,
self
.
nkern
,
new
)
self
.
unroll_kern
=
new
self
.
unroll_kern
=
new
if
all_shape
:
self
.
outshp
=
ConvOp
.
getOutputShape
(
self
.
imshp_logical
[
1
:],
self
.
kshp_logical
,
(
dx
,
dy
),
output_mode
)
self
.
fulloutshp
=
ConvOp
.
getOutputShape
(
self
.
imshp_logical
[
1
:],
self
.
kshp_logical
,
(
1
,
1
),
output_mode
)
self
.
outshp
=
ConvOp
.
getOutputShape
(
self
.
imshp_logical
[
1
:],
self
.
kshp_logical
,
(
dx
,
dy
),
output_mode
)
self
.
fulloutshp
=
ConvOp
.
getOutputShape
(
self
.
imshp_logical
[
1
:],
self
.
kshp_logical
,
(
1
,
1
),
output_mode
)
else
:
self
.
outshp
=
None
self
.
fulloutshp
=
None
...
...
@@ -430,51 +453,59 @@ class ConvOp(Op):
self
.
out_mode
=
output_mode
if
not
self
.
out_mode
in
[
"valid"
,
"full"
]:
raise
Exception
(
"Mode
%
s not implemented"
%
self
.
out_mode
)
raise
Exception
(
"Mode
%
s not implemented"
%
self
.
out_mode
)
if
all_shape
and
not
(
self
.
outshp
>
0
)
.
all
():
raise
Exception
((
"Bad size for the output shape. Verify that [post-"
\
"supersampling] input shape (
%
s) and kern shape(
%
s) are ok. "
\
"(Hint: kerns must fit inside image in valid mode)"
)
%
(
self
.
imshp_logical
,
self
.
kshp_logical
))
raise
Exception
(
"Bad size for the output shape. Verify that [post-"
"supersampling] input shape (
%
s) and kern"
" shape(
%
s) are ok. (Hint: kerns must fit inside"
" image in valid mode)"
%
(
self
.
imshp_logical
,
self
.
kshp_logical
))
if
(
self
.
unroll_kern
is
None
and
self
.
unroll_batch
is
None
and
self
.
unroll_patch
is
None
):
if
self
.
unroll_kern
is
None
and
self
.
unroll_batch
is
None
and
self
.
unroll_patch
is
None
:
#no version specified. Find the faster we have
if
self
.
bsize
is
None
and
self
.
nkern
is
None
:
self
.
unroll_patch
=
True
elif
self
.
bsize
is
not
None
and
self
.
nkern
is
not
None
:
bsize
=
self
.
bsize
nkern
=
self
.
nkern
bsize
=
self
.
bsize
nkern
=
self
.
nkern
if
bsize
is
None
:
bsize
=
1
bsize
=
1
if
nkern
is
None
:
nkern
=
1
mode_idx
=
0
if
self
.
out_mode
!=
"valid"
:
mode_idx
=
1
nkern
=
1
mode_idx
=
0
if
self
.
out_mode
!=
"valid"
:
mode_idx
=
1
if
all_shape
:
time_unroll_patch
=
self
.
speed_unroll_patch_shape
[
mode_idx
]
else
:
time_unroll_patch
=
self
.
speed_unroll_patch_noshape
[
mode_idx
]
time_unroll_patch
=
self
.
speed_unroll_patch_noshape
[
mode_idx
]
time_unroll_batch_kern
=
9999999
for
i
in
xrange
(
len
(
self
.
speed_unroll_batch_kern
)):
if
bsize
%
self
.
speed_unroll_batch_kern
[
i
][
0
]
==
0
and
nkern
%
self
.
speed_unroll_batch_kern
[
i
][
1
]
==
0
:
if
self
.
speed_unroll_batch_kern
[
i
][
2
+
mode_idx
]
<
time_unroll_batch_kern
:
time_unroll_batch_kern
=
self
.
speed_unroll_batch_kern
[
i
][
2
+
mode_idx
]
time_unroll_batch_kern_idx
=
i
if
(
bsize
%
self
.
speed_unroll_batch_kern
[
i
][
0
]
==
0
and
nkern
%
self
.
speed_unroll_batch_kern
[
i
][
1
]
==
0
):
if
self
.
speed_unroll_batch_kern
[
i
][
2
+
mode_idx
]
<
time_unroll_batch_kern
:
time_unroll_batch_kern
=
self
.
speed_unroll_batch_kern
[
i
][
2
+
mode_idx
]
time_unroll_batch_kern_idx
=
i
if
time_unroll_patch
<
time_unroll_batch_kern
:
self
.
unroll_patch
=
True
else
:
self
.
unroll_batch
=
self
.
speed_unroll_batch_kern
[
time_unroll_batch_kern_idx
][
0
]
self
.
unroll_kern
=
self
.
speed_unroll_batch_kern
[
time_unroll_batch_kern_idx
][
1
]
self
.
unroll_batch
=
self
.
speed_unroll_batch_kern
[
time_unroll_batch_kern_idx
][
0
]
self
.
unroll_kern
=
self
.
speed_unroll_batch_kern
[
time_unroll_batch_kern_idx
][
1
]
self
.
unroll_patch
=
False
_logger
.
debug
(
"AUTO FIND VERSION OF C_CODE OF CONV OP "
"
%
s
%
s
%
s
%
s
%
s
%
s
%
s"
,
self
.
unroll_batch
,
self
.
unroll_kern
,
self
.
unroll_patch
,
self
.
bsize
,
self
.
nkern
,
time_
unroll_patch
,
time_unroll_batch_kern
)
"
%
s
%
s
%
s
%
s
%
s
%
s
%
s"
,
self
.
unroll_batch
,
self
.
unroll_kern
,
self
.
unroll_patch
,
self
.
bsize
,
self
.
nkern
,
time_unroll_patch
,
time_unroll_batch_kern
)
self
.
_rehash
()
if
config
.
op
.
set_flops
:
...
...
@@ -504,41 +535,46 @@ class ConvOp(Op):
return
self
.
__hashval
def
__str__
(
self
):
return
"ConvOp{"
+
","
.
join
(
str
((
a
,
getattr
(
self
,
a
)))
for
a
in
self
.
__attrnames
)
+
"}"
return
"ConvOp{"
+
","
.
join
(
str
((
a
,
getattr
(
self
,
a
)))
for
a
in
self
.
__attrnames
)
+
"}"
def
set_flops
(
self
):
""" Useful with the hack in profilemode to print the MFlops"""
if
self
.
out_mode
==
"valid"
:
self
.
flops
=
self
.
kshp
[
0
]
*
self
.
kshp
[
1
]
*
2
#nb mul and add by output pixed
self
.
flops
*=
self
.
outshp
[
0
]
*
self
.
outshp
[
1
]
#nb flops by output image
self
.
flops
*=
self
.
imshp
[
0
]
*
self
.
nkern
*
self
.
bsize
#for all outputs images#n_stack==self.imshp[0]
else
:
#full mode not implemented
self
.
flops
=
0
for
out_row
in
xrange
(
self
.
outshp
[
0
]):
#loop over output row
for
out_col
in
xrange
(
self
.
outshp
[
0
]):
#loop over output col
for
row
in
xrange
(
self
.
kshp
[
0
]):
#loop over kern row
if
(
row
+
out_row
-
self
.
kshp
[
0
]
+
1
<
0
or
row
+
out_row
-
self
.
kshp
[
0
]
+
1
>=
self
.
imshp
[
1
]):
if
self
.
out_mode
==
"valid"
:
# nb mul and add by output pixed
self
.
flops
=
self
.
kshp
[
0
]
*
self
.
kshp
[
1
]
*
2
#nb flops by output image
self
.
flops
*=
self
.
outshp
[
0
]
*
self
.
outshp
[
1
]
# for all outputs images#n_stack==self.imshp[0]
self
.
flops
*=
self
.
imshp
[
0
]
*
self
.
nkern
*
self
.
bsize
else
:
# full mode not implemented
self
.
flops
=
0
for
out_row
in
xrange
(
self
.
outshp
[
0
]):
# loop over output row
for
out_col
in
xrange
(
self
.
outshp
[
0
]):
# loop over output col
for
row
in
xrange
(
self
.
kshp
[
0
]):
# loop over kern row
if
(
row
+
out_row
-
self
.
kshp
[
0
]
+
1
<
0
or
row
+
out_row
-
self
.
kshp
[
0
]
+
1
>=
self
.
imshp
[
1
]):
continue
col
=
0
max_col
=
self
.
kshp
[
1
]
img_col
=
out_col
-
self
.
kshp
[
1
]
+
1
max_col
=
min
(
max_col
,
self
.
imshp
[
2
]
-
img_col
)
col
=
0
max_col
=
self
.
kshp
[
1
]
img_col
=
out_col
-
self
.
kshp
[
1
]
+
1
max_col
=
min
(
max_col
,
self
.
imshp
[
2
]
-
img_col
)
if
img_col
<
0
:
col
=
-
img_col
img_col
+=
col
while
col
<
max_col
:
#
loop over kern col
self
.
flops
+=
2
col
+=
1
self
.
flops
*=
self
.
imshp
[
0
]
*
self
.
nkern
*
self
.
bsize
#for all outputs images#n_stack==self.imshp[0]
if
img_col
<
0
:
col
=
-
img_col
img_col
+=
col
while
col
<
max_col
:
#
loop over kern col
self
.
flops
+=
2
col
+=
1
# for all outputs images#n_stack==self.imshp[0]
self
.
flops
*=
self
.
imshp
[
0
]
*
self
.
nkern
*
self
.
bsize
assert
self
.
flops
==
self
.
bsize
*
self
.
nkern
*
self
.
imshp
[
0
]
*
\
self
.
kshp
[
0
]
*
self
.
kshp
[
1
]
*
self
.
imshp
[
1
]
*
self
.
imshp
[
2
]
*
2
self
.
kshp
[
0
]
*
self
.
kshp
[
1
]
*
\
self
.
imshp
[
1
]
*
self
.
imshp
[
2
]
*
2
def
make_node
(
self
,
inputs
,
kerns
):
# TODO: find a way to make ConvOp work for N-D (after NIPS09)
...
...
@@ -551,19 +587,23 @@ class ConvOp(Op):
_kerns
=
as_tensor_variable
(
kerns
)
# TODO: lift this restriction by upcasting either inputs or kerns
if
_inputs
.
ndim
!=
4
:
raise
TypeError
(
'ConvOp (make_node) requires input be a 4D tensor; received "
%
s" (
%
i dims)'
%
(
inputs
,
_inputs
.
ndim
))
raise
TypeError
(
'ConvOp (make_node) requires input be a 4D tensor;'
' received "
%
s" (
%
i dims)'
%
(
inputs
,
_inputs
.
ndim
))
if
_kerns
.
ndim
!=
4
:
raise
TypeError
(
'make_node requires 4D tensor of kernels'
)
if
_inputs
.
type
.
dtype
!=
_kerns
.
type
.
dtype
:
raise
NotImplementedError
(
"The image and the kernel must have the same type."
"inputs(
%
s), kerns(
%
s)"
%
(
_inputs
.
dtype
,
_kerns
.
dtype
))
raise
NotImplementedError
(
"The image and the kernel must have the same type."
"inputs(
%
s), kerns(
%
s)"
%
(
_inputs
.
dtype
,
_kerns
.
dtype
))
if
self
.
outshp
is
not
None
:
bcastable23
=
[
self
.
outshp
[
0
]
==
1
,
self
.
outshp
[
1
]
==
1
]
bcastable23
=
[
self
.
outshp
[
0
]
==
1
,
self
.
outshp
[
1
]
==
1
]
else
:
bcastable23
=
[
False
,
False
]
output
=
theano
.
tensor
.
tensor
(
dtype
=
_inputs
.
type
.
dtype
,
broadcastable
=
[
_inputs
.
broadcastable
[
0
],
_kerns
.
broadcastable
[
0
]]
+
bcastable23
);
_kerns
.
broadcastable
[
0
]]
+
bcastable23
)
return
Apply
(
self
,
[
_inputs
,
_kerns
],
[
output
])
...
...
@@ -582,10 +622,12 @@ class ConvOp(Op):
if
self
.
kshp_logical
:
kshp
=
self
.
kshp_logical
try
:
fmshp
=
ConvOp
.
getOutputShape
(
imshp
[
1
:],
kshp
,
(
self
.
dx
,
self
.
dy
),
self
.
out_mode
)
fmshp
=
ConvOp
.
getOutputShape
(
imshp
[
1
:],
kshp
,
(
self
.
dx
,
self
.
dy
),
self
.
out_mode
)
except
TypeError
:
raise
theano
.
tensor
.
ShapeError
()
outshp
=
(
batch_size
,
fmo
)
+
tuple
(
fmshp
)
outshp
=
(
batch_size
,
fmo
)
+
tuple
(
fmshp
)
return
[
outshp
]
else
:
# Haven't implemented this case. imshp and kshp may be symbollic
...
...
@@ -593,8 +635,7 @@ class ConvOp(Op):
# we simply let the default function do its work.
raise
theano
.
tensor
.
ShapeError
()
def
perform
(
self
,
node
,
inp
,
out
):
def
perform
(
self
,
node
,
inp
,
out
):
"""
By default if len(img2d.shape)==3, we
"""
...
...
@@ -603,9 +644,12 @@ class ConvOp(Op):
if
not
imported_scipy_signal
:
raise
theano
.
gof
.
utils
.
MethodNotDefined
(
"c_headers"
,
type
(
self
),
self
.
__class__
.
__name__
,
"Need the python package for scipy.signal to be installed for the python implementation. You can use the C implementation instead."
)
"Need the python package for scipy.signal to be installed "
"for the python implementation. You can use the C"
" implementation instead."
)
# TODO: move these back out to global scope when they no longer cause an atexit error
# TODO: move these back out to global scope when they no longer
# cause an atexit error
imshp
=
self
.
imshp
if
imshp
is
None
or
any
([
x
is
None
for
x
in
imshp
]):
imshp
=
tuple
(
img2d
.
shape
[
1
:])
...
...
@@ -634,39 +678,43 @@ class ConvOp(Op):
if
self
.
fulloutshp
is
not
None
:
fulloutshp
=
tuple
(
self
.
fulloutshp
)
else
:
fulloutshp
=
tuple
(
ConvOp
.
getOutputShape
(
imshp_logical
[
1
:],
kshp_logical
,
(
1
,
1
),
self
.
out_mode
))
fulloutshp
=
tuple
(
ConvOp
.
getOutputShape
(
imshp_logical
[
1
:],
kshp_logical
,
(
1
,
1
),
self
.
out_mode
))
if
z
[
0
]
is
None
or
z
[
0
]
.
shape
!=
(
bsize
,)
+
(
nkern
,)
+
fulloutshp
:
z
[
0
]
=
numpy
.
zeros
((
bsize
,)
+
(
nkern
,)
+
fulloutshp
,
dtype
=
img2d
.
dtype
)
zz
=
z
[
0
]
if
z
[
0
]
is
None
or
z
[
0
]
.
shape
!=
(
bsize
,)
+
(
nkern
,)
+
fulloutshp
:
z
[
0
]
=
numpy
.
zeros
((
bsize
,)
+
(
nkern
,)
+
fulloutshp
,
dtype
=
img2d
.
dtype
)
zz
=
z
[
0
]
stacklen
=
imshp
[
0
]
img2d
=
img2d
.
reshape
((
bsize
,)
+
imshp
)
filtersflipped
=
filtersflipped
.
reshape
((
nkern
,
stacklen
)
+
kshp
)
img2d
=
img2d
.
reshape
((
bsize
,)
+
imshp
)
filtersflipped
=
filtersflipped
.
reshape
((
nkern
,
stacklen
)
+
kshp
)
if
self
.
imshp
!=
self
.
imshp_logical
:
# assuming that to get from imshp to imshp logical we insert zeros in missing spots
rstride
=
int
(
numpy
.
ceil
(
imshp_logical
[
1
]
/
float
(
imshp
[
1
])))
cstride
=
int
(
numpy
.
ceil
(
imshp_logical
[
2
]
/
float
(
imshp
[
2
])))
buf
=
numpy
.
zeros
((
bsize
,)
+
imshp_logical
,
dtype
=
img2d
.
dtype
)
buf
[:,
:,
::
rstride
,
::
cstride
]
=
img2d
buf
=
numpy
.
zeros
((
bsize
,)
+
imshp_logical
,
dtype
=
img2d
.
dtype
)
buf
[:,
:,
::
rstride
,
::
cstride
]
=
img2d
img2d
=
buf
del
buf
,
rstride
,
cstride
if
kshp
!=
kshp_logical
:
rstride
=
int
(
numpy
.
ceil
(
kshp_logical
[
0
]
/
float
(
kshp
[
0
])))
cstride
=
int
(
numpy
.
ceil
(
kshp_logical
[
1
]
/
float
(
kshp
[
1
])))
buf
=
numpy
.
zeros
((
nkern
,
stacklen
)
+
self
.
kshp_logical
,
dtype
=
filtersflipped
.
dtype
)
buf
=
numpy
.
zeros
((
nkern
,
stacklen
)
+
self
.
kshp_logical
,
dtype
=
filtersflipped
.
dtype
)
if
self
.
kshp_logical_top_aligned
:
roffset
=
coffset
=
0
roffset
=
coffset
=
0
else
:
roffset
=
(
kshp_logical
[
0
]
-
(
kshp
[
0
]
*
rstride
)
-
1
+
rstride
)
%
rstride
coffset
=
(
kshp_logical
[
1
]
-
(
kshp
[
1
]
*
cstride
)
-
1
+
cstride
)
%
cstride
roffset
=
(
kshp_logical
[
0
]
-
(
kshp
[
0
]
*
rstride
)
-
1
+
rstride
)
%
rstride
coffset
=
(
kshp_logical
[
1
]
-
(
kshp
[
1
]
*
cstride
)
-
1
+
cstride
)
%
cstride
assert
roffset
>=
0
assert
coffset
>=
0
buf
[:,
:,
roffset
::
rstride
,
coffset
::
cstride
]
=
filtersflipped
buf
[:,
:,
roffset
::
rstride
,
coffset
::
cstride
]
=
filtersflipped
filtersflipped
=
buf
del
buf
,
rstride
,
cstride
...
...
@@ -675,39 +723,39 @@ class ConvOp(Op):
for
b
in
xrange
(
bsize
):
for
n
in
xrange
(
nkern
):
zz
[
b
,
n
,
...
]
.
fill
(
0
)
zz
[
b
,
n
,
...
]
.
fill
(
0
)
for
im0
in
xrange
(
stacklen
):
zz
[
b
,
n
,
...
]
+=
_convolve2d
(
\
img2d
[
b
,
im0
,
...
],
filtersflipped
[
n
,
im0
,
...
],
1
,
val
,
bval
,
0
)
zz
[
b
,
n
,
...
]
+=
_convolve2d
(
img2d
[
b
,
im0
,
...
],
filtersflipped
[
n
,
im0
,
...
],
1
,
val
,
bval
,
0
)
if
False
:
if
False
and
self
.
out_mode
==
"full"
:
img2d2
=
numpy
.
zeros
((
bsize
,
stacklen
,
imshp
[
1
]
+
2
*
kshp
[
0
]
-
2
,
imshp
[
2
]
+
2
*
kshp
[
1
]
-
2
))
img2d2
[:,
:,
kshp
[
0
]
-
1
:
kshp
[
0
]
-
1
+
imshp
[
1
],
kshp
[
1
]
-
1
:
kshp
[
1
]
-
1
+
imshp
[
2
]]
=
img2d
if
False
and
self
.
out_mode
==
"full"
:
img2d2
=
numpy
.
zeros
((
bsize
,
stacklen
,
imshp
[
1
]
+
2
*
kshp
[
0
]
-
2
,
imshp
[
2
]
+
2
*
kshp
[
1
]
-
2
))
img2d2
[:,
:,
kshp
[
0
]
-
1
:
kshp
[
0
]
-
1
+
imshp
[
1
],
kshp
[
1
]
-
1
:
kshp
[
1
]
-
1
+
imshp
[
2
]]
=
img2d
img2d
=
img2d2
#N_image_shape = image_data.shape
for
b
in
xrange
(
bsize
):
for
n
in
xrange
(
nkern
):
zz
[
b
,
n
,
...
]
.
fill
(
0
)
zz
[
b
,
n
,
...
]
.
fill
(
0
)
for
im0
in
xrange
(
stacklen
):
for
row
in
xrange
(
0
,
zz
.
shape
[
2
],
self
.
dx
):
for
col
in
xrange
(
0
,
zz
.
shape
[
3
],
self
.
dy
):
zz
[
b
,
n
,
row
,
col
]
+=
(
img2d
[
b
,
im0
,
row
:
row
+
kshp
[
0
],
col
:
col
+
kshp
[
1
]]
*
\
filtersflipped
[
n
,
im0
,::
-
1
,
::
-
1
])
.
sum
()
for
row
in
xrange
(
0
,
zz
.
shape
[
2
],
self
.
dx
):
for
col
in
xrange
(
0
,
zz
.
shape
[
3
],
self
.
dy
):
zz
[
b
,
n
,
row
,
col
]
+=
(
img2d
[
b
,
im0
,
row
:
row
+
kshp
[
0
],
col
:
col
+
kshp
[
1
]]
*
filtersflipped
[
n
,
im0
,
::
-
1
,
::
-
1
])
.
sum
()
#We copy it to remove the Stride mismatch warning from DEBUG_MODE.
#The copy make that we return an object with the same stride as the c version.
#The copy don't affect the performence during our experience as in that case we
#execute the c version which is much faster.
if
self
.
dx
>
1
or
self
.
dy
>
1
:
zz
=
zz
[:,:,
0
::
self
.
dx
,
0
::
self
.
dy
]
.
copy
()
z
[
0
]
=
zz
if
self
.
dx
>
1
or
self
.
dy
>
1
:
zz
=
zz
[:,
:,
0
::
self
.
dx
,
0
::
self
.
dy
]
.
copy
()
z
[
0
]
=
zz
def
grad
(
self
,
inp
,
grads
):
inputs
,
kerns
=
inp
...
...
@@ -724,34 +772,38 @@ class ConvOp(Op):
# build a "node", that should be equivalent to the one given by
# self.make_node, but using conv3D instead of self.
tmp_node
=
theano
.
tensor
.
nnet
.
conv3D
(
V
=
inputs
.
dimshuffle
(
0
,
2
,
3
,
'x'
,
1
),
W
=
kerns
[:,
:,
::
-
1
,
::
-
1
]
.
dimshuffle
(
0
,
2
,
3
,
'x'
,
1
),
b
=
theano
.
tensor
.
alloc
(
numpy
.
asarray
(
0
,
dtype
=
kerns
.
dtype
),
kerns
.
shape
[
0
]),
d
=
(
self
.
dx
,
self
.
dy
,
1
))
node
=
theano
.
tensor
.
addbroadcast
(
tmp_node
,
3
)
.
dimshuffle
(
0
,
4
,
1
,
2
)
V
=
inputs
.
dimshuffle
(
0
,
2
,
3
,
'x'
,
1
),
W
=
kerns
[:,
:,
::
-
1
,
::
-
1
]
.
dimshuffle
(
0
,
2
,
3
,
'x'
,
1
),
b
=
theano
.
tensor
.
alloc
(
numpy
.
asarray
(
0
,
dtype
=
kerns
.
dtype
),
kerns
.
shape
[
0
]),
d
=
(
self
.
dx
,
self
.
dy
,
1
))
node
=
theano
.
tensor
.
addbroadcast
(
tmp_node
,
3
)
.
dimshuffle
(
0
,
4
,
1
,
2
)
# mimic what happens inside theano.grad: get the input gradient
# of the final cost wrt all variables involved.
tmp_gmap
=
theano
.
gradient
.
grad_sources_inputs
([(
node
,
gz
)],
[
inputs
,
kerns
])
tmp_gmap
=
theano
.
gradient
.
grad_sources_inputs
(
[(
node
,
gz
)],
[
inputs
,
kerns
])
return
[
tmp_gmap
[
inputs
],
tmp_gmap
[
kerns
]]
if
self
.
dx
not
in
(
1
,
2
)
or
self
.
dy
not
in
(
1
,
2
):
raise
NotImplementedError
(
"ERROR: We disable ConvOp.grad now when dx or "
\
"dy are different from 1 and 2, as there is a bug in it."
)
raise
NotImplementedError
(
"ERROR: We disable ConvOp.grad now when dx or "
"dy are different from 1 and 2, as there is a bug in it."
)
all_shape
=
self
.
imshp
is
not
None
and
self
.
kshp
is
not
None
and
\
self
.
nkern
is
not
None
and
self
.
bsize
is
not
None
all_shape
=
(
self
.
imshp
is
not
None
and
self
.
kshp
is
not
None
and
self
.
nkern
is
not
None
and
self
.
bsize
is
not
None
)
if
not
all_shape
and
(
self
.
dx
!=
1
or
self
.
dy
!=
1
):
raise
Exception
(
"ConvOp.grad when dx!=1 or dy!=1 we must have all "
\
if
not
all_shape
and
(
self
.
dx
!=
1
or
self
.
dy
!=
1
):
raise
Exception
(
"ConvOp.grad when dx!=1 or dy!=1 we must have all "
"the optional shape information"
)
####### Determine gradient on kernels ########
assert
inputs
.
ndim
==
4
and
kerns
.
ndim
==
4
assert
inputs
.
ndim
==
4
and
kerns
.
ndim
==
4
newin
=
inputs
.
dimshuffle
((
1
,
0
,
2
,
3
))
newgz
=
gz
.
dimshuffle
((
1
,
0
,
2
,
3
))
newin
=
inputs
.
dimshuffle
((
1
,
0
,
2
,
3
))
newgz
=
gz
.
dimshuffle
((
1
,
0
,
2
,
3
))
(
bsize
,
nkern
)
=
None
,
None
imshp
=
None
...
...
@@ -762,48 +814,55 @@ class ConvOp(Op):
if
self
.
out_mode
==
'valid'
:
(
img
,
filters
)
=
(
newin
,
newgz
)
kshp_logical
=
self
.
fulloutshp
kshp_logical_top_aligned
=
False
kshp_logical_top_aligned
=
False
if
all_shape
:
(
bsize
,
nkern
)
=
(
self
.
imshp
[
0
],
self
.
nkern
)
imshp
=
(
self
.
bsize
,
self
.
imshp
[
1
],
self
.
imshp
[
2
])
kshp
=
self
.
outshp
kshp
=
self
.
outshp
un_b
=
self
.
unroll_batch
un_k
=
self
.
unroll_kern
elif
self
.
out_mode
==
'full'
:
(
img
,
filters
)
=
(
newgz
,
newin
)
kshp_logical
=
None
kshp_logical_top_aligned
=
True
kshp_logical_top_aligned
=
True
if
all_shape
:
imshp_logical
=
(
self
.
bsize
,
self
.
fulloutshp
[
0
],
self
.
fulloutshp
[
1
])
imshp_logical
=
(
self
.
bsize
,
self
.
fulloutshp
[
0
],
self
.
fulloutshp
[
1
])
(
bsize
,
nkern
)
=
(
self
.
nkern
,
self
.
imshp
[
0
])
imshp
=
(
self
.
bsize
,
self
.
outshp
[
0
],
self
.
outshp
[
1
])
kshp
=
self
.
imshp
[
1
:]
kshp
=
self
.
imshp
[
1
:]
un_b
=
self
.
unroll_kern
un_k
=
self
.
unroll_batch
else
:
raise
NotImplementedError
(
'Only [full,valid] modes are currently supported.'
)
raise
NotImplementedError
(
'Only [full,valid] modes are currently supported.'
)
filters
=
filters
[:,
:,::
-
1
,::
-
1
]
#
flip them
filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
#
flip them
if
0
:
#
find good value for the unroll
if
0
:
#
find good value for the unroll
if
all_shape
and
un_b
!=
0
and
bsize
%
un_b
!=
0
:
if
bsize
<
un_b
:
if
all_shape
and
un_b
!=
0
and
bsize
%
un_b
!=
0
:
if
bsize
<
un_b
:
un_b
=
bsize
else
:
un_b
=
1
_logger
.
warn
(
"Optimization Warning: in ConvOp.grad() we can't determine "
\
"a good unroll value for the batch. Maybe you can optimize this!"
)
_logger
.
warn
(
"Optimization Warning: in ConvOp.grad() we can't "
" determine a good unroll value for the batch."
" Maybe you can optimize this!"
)
if
all_shape
and
un_k
!=
0
and
nkern
%
un_k
!=
0
:
if
nkern
<
un_k
:
if
all_shape
and
un_k
!=
0
and
nkern
%
un_k
!=
0
:
if
nkern
<
un_k
:
un_k
=
nkern
else
:
un_k
=
1
_logger
.
warn
(
"Optimization Warning: in ConvOp.grad() we can't determine "
\
"a good unroll value for the kernel. Maybe you can optimize this!"
)
_logger
.
warn
(
"Optimization Warning: in ConvOp.grad() we can't"
" determine a good unroll value for the kernel. Maybe"
" you can optimize this!"
)
dw
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
output_mode
=
'valid'
,
dw
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
output_mode
=
'valid'
,
unroll_batch
=
un_b
,
unroll_kern
=
un_k
,
unroll_patch
=
un_p
,
imshp_logical
=
imshp_logical
,
kshp_logical
=
kshp_logical
,
...
...
@@ -811,8 +870,8 @@ class ConvOp(Op):
version
=
self
.
version
,
verbose
=
self
.
verbose
)
else
:
# let __init__ choose c params be chosen automatically from shapes
dw
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
output_mode
=
'valid'
,
else
:
# let __init__ choose c params be chosen automatically from shapes
dw
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
output_mode
=
'valid'
,
unroll_batch
=
None
,
unroll_kern
=
None
,
unroll_patch
=
None
,
imshp_logical
=
imshp_logical
,
kshp_logical
=
kshp_logical
,
...
...
@@ -820,26 +879,25 @@ class ConvOp(Op):
version
=
self
.
version
,
verbose
=
self
.
verbose
)
if
hasattr
(
self
,
'flops'
):
if
hasattr
(
self
,
'flops'
):
dw
.
set_flops
()
dw
=
dw
(
img
,
filters
)
dw
=
dw
(
img
,
filters
)
if
all_shape
:
assert
(
dw
.
owner
.
op
.
outshp
==
self
.
kshp
)
.
all
()
assert
(
dw
.
owner
.
op
.
outshp
==
self
.
kshp
)
.
all
()
if
self
.
out_mode
==
'valid'
:
# before DimShuffle, dw is of shape visdim x nkern x kshp[0] x kshp[1]
dw
=
dw
.
dimshuffle
((
1
,
0
,
2
,
3
))
dw
=
dw
[:,
:,::
-
1
,
::
-
1
]
dw
=
dw
.
dimshuffle
((
1
,
0
,
2
,
3
))
dw
=
dw
[:,
:,
::
-
1
,
::
-
1
]
####### Determine gradient on inputs ########
mode
=
'valid'
if
not
self
.
out_mode
==
'full'
:
mode
=
'full'
filters
=
kerns
.
dimshuffle
((
1
,
0
,
2
,
3
))
filters
=
filters
[:,
:,::
-
1
,
::
-
1
]
filters
=
kerns
.
dimshuffle
((
1
,
0
,
2
,
3
))
filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
nkern
=
None
imshp
=
None
imshp_logical
=
None
...
...
@@ -848,33 +906,36 @@ class ConvOp(Op):
if
all_shape
:
nkern
=
self
.
imshp
[
0
]
imshp
=
(
self
.
nkern
,
self
.
outshp
[
0
],
self
.
outshp
[
1
])
imshp_logical
=
(
self
.
nkern
,
self
.
fulloutshp
[
0
],
self
.
fulloutshp
[
1
])
imshp_logical
=
(
self
.
nkern
,
self
.
fulloutshp
[
0
],
self
.
fulloutshp
[
1
])
if
0
:
# hard-code c generation parameters
if
0
:
# hard-code c generation parameters
din
=
ConvOp
(
imshp
,
self
.
kshp
,
nkern
,
self
.
bsize
,
1
,
1
,
output_mode
=
mode
,
unroll_batch
=
un_b
,
unroll_kern
=
un_k
,
unroll_patch
=
un_p
,
1
,
1
,
output_mode
=
mode
,
unroll_batch
=
un_b
,
unroll_kern
=
un_k
,
unroll_patch
=
un_p
,
imshp_logical
=
imshp_logical
,
kshp_logical
=
None
,
version
=-
1
,
#
we we change the mode, we don't forward the version.
version
=-
1
,
#
we we change the mode, we don't forward the version.
verbose
=
self
.
verbose
)
else
:
# let __init__ figure out the unrolling / patch sizes
else
:
# let __init__ figure out the unrolling / patch sizes
din
=
ConvOp
(
imshp
,
self
.
kshp
,
nkern
,
self
.
bsize
,
1
,
1
,
output_mode
=
mode
,
unroll_batch
=
None
,
unroll_kern
=
None
,
unroll_patch
=
None
,
1
,
1
,
output_mode
=
mode
,
unroll_batch
=
None
,
unroll_kern
=
None
,
unroll_patch
=
None
,
imshp_logical
=
imshp_logical
,
kshp_logical
=
None
,
version
=-
1
,
#
we we change the mode, we don't forward the version.
version
=-
1
,
#
we we change the mode, we don't forward the version.
verbose
=
self
.
verbose
)
if
hasattr
(
self
,
'flops'
):
if
hasattr
(
self
,
'flops'
):
din
.
set_flops
()
din
=
din
(
gz
,
filters
)
din
=
din
(
gz
,
filters
)
assert
(
din
.
owner
.
op
.
outshp
is
None
and
self
.
imshp
is
None
)
or
\
(
din
.
owner
.
op
.
outshp
is
None
)
or
\
(
din
.
owner
.
op
.
outshp
==
self
.
imshp
[
1
:])
.
all
()
(
din
.
owner
.
op
.
outshp
==
self
.
imshp
[
1
:])
.
all
()
# din and dw should have the same broadcasting pattern as the
# parameters they are the gradient of (resp. inputs and kerns).
...
...
@@ -902,10 +963,14 @@ using namespace std;
""" Return True if we will generate code that use gemm.
"""
#the gemm version only support that case
if
self
.
out_mode
==
'valid'
and
self
.
dx
==
0
and
self
.
dy
==
0
:
if
self
.
out_mode
==
'valid'
and
self
.
dx
==
0
and
self
.
dy
==
0
:
#We use a faster version in those case.
if
(
self
.
imshp
!=
self
.
imshp_logical
or
self
.
kshp
!=
self
.
kshp_logical
or
self
.
unroll_patch
or
self
.
unroll_batch
>
0
or
self
.
unroll_kern
>
0
):
if
(
self
.
imshp
!=
self
.
imshp_logical
or
self
.
kshp
!=
self
.
kshp_logical
or
self
.
unroll_patch
or
self
.
unroll_batch
>
0
or
self
.
unroll_kern
>
0
):
return
False
return
True
return
False
...
...
@@ -918,7 +983,9 @@ using namespace std;
def
c_no_compile_args
(
self
):
#when the ksph==(1,1) gcc 4.3.0 segfault during the
#compilation with -O3. This don't happen at -O2
if
theano
.
gof
.
cmodule
.
gcc_version
()
in
[
'4.3.0'
]
and
self
.
kshp
==
(
1
,
1
):
if
(
theano
.
gof
.
cmodule
.
gcc_version
()
in
[
'4.3.0'
]
and
self
.
kshp
==
(
1
,
1
)):
return
[
'-O3'
]
else
:
return
[]
...
...
@@ -928,7 +995,8 @@ using namespace std;
if
self
.
use_blas
():
ret
=
blas
.
ldflags
(
libs
=
False
,
flags
=
True
)
if
theano
.
gof
.
cmodule
.
gcc_version
()
in
[
'4.3.0'
]
and
self
.
kshp
==
(
1
,
1
):
if
(
theano
.
gof
.
cmodule
.
gcc_version
()
in
[
'4.3.0'
]
and
self
.
kshp
==
(
1
,
1
)):
ret
+=
[
'-O2'
]
if
self
.
openmp
:
ret
+=
[
'-fopenmp'
]
...
...
@@ -951,123 +1019,140 @@ using namespace std;
if
node
.
inputs
[
0
]
.
type
.
dtype
!=
node
.
inputs
[
1
]
.
type
.
dtype
:
raise
NotImplementedError
()
assert
node
.
inputs
[
0
]
.
type
.
dtype
==
node
.
inputs
[
1
]
.
type
.
dtype
d
=
locals
()
d
=
locals
()
d
.
update
(
sub
)
all_shape
=
self
.
imshp
is
not
None
and
self
.
kshp
is
not
None
and
\
self
.
nkern
is
not
None
and
self
.
bsize
is
not
None
all_shape
=
(
self
.
imshp
is
not
None
and
self
.
kshp
is
not
None
and
self
.
nkern
is
not
None
and
self
.
bsize
is
not
None
)
d
[
"self_out_mode"
]
=
self
.
out_mode
d
[
"self_dx"
]
=
self
.
dx
d
[
"self_dy"
]
=
self
.
dy
d
[
"mode"
]
=
self
.
out_mode
.
upper
()
d
[
"affectation"
]
=
"="
d
[
"self_out_mode"
]
=
self
.
out_mode
d
[
"self_dx"
]
=
self
.
dx
d
[
"self_dy"
]
=
self
.
dy
d
[
"mode"
]
=
self
.
out_mode
.
upper
()
d
[
"affectation"
]
=
"="
if
all_shape
:
d
[
"self_bsize"
]
=
self
.
bsize
d
[
"self_nkern"
]
=
self
.
nkern
d
[
"self_outshp0"
]
=
self
.
outshp
[
0
]
d
[
"self_outshp1"
]
=
self
.
outshp
[
1
]
d
[
"self_imshp0"
]
=
self
.
imshp
[
0
]
d
[
"self_imshp1"
]
=
self
.
imshp
[
1
]
d
[
"self_imshp2"
]
=
self
.
imshp
[
2
]
d
[
"self_kshp0"
]
=
self
.
kshp
[
0
]
d
[
"self_kshp1"
]
=
self
.
kshp
[
1
]
d
[
"self_bsize"
]
=
self
.
bsize
d
[
"self_nkern"
]
=
self
.
nkern
d
[
"self_outshp0"
]
=
self
.
outshp
[
0
]
d
[
"self_outshp1"
]
=
self
.
outshp
[
1
]
d
[
"self_imshp0"
]
=
self
.
imshp
[
0
]
d
[
"self_imshp1"
]
=
self
.
imshp
[
1
]
d
[
"self_imshp2"
]
=
self
.
imshp
[
2
]
d
[
"self_kshp0"
]
=
self
.
kshp
[
0
]
d
[
"self_kshp1"
]
=
self
.
kshp
[
1
]
d
[
"self_kshp_logical_r"
]
=
self
.
kshp_logical
[
0
]
d
[
"self_kshp_logical_c"
]
=
self
.
kshp_logical
[
1
]
d
[
"self_kshp_logical_stride_r"
]
=
int
(
numpy
.
ceil
(
self
.
kshp_logical
[
0
]
/
float
(
self
.
kshp
[
0
])))
d
[
"self_kshp_logical_stride_c"
]
=
int
(
numpy
.
ceil
(
self
.
kshp_logical
[
1
]
/
float
(
self
.
kshp
[
1
])))
d
[
"self_imshp_logical_r"
]
=
self
.
imshp_logical
[
1
]
#numpy.B. 1 not 0
d
[
"self_imshp_logical_c"
]
=
self
.
imshp_logical
[
2
]
#numpy.B. 2 not 1
d
[
"self_imshp_logical_stride_r"
]
=
int
(
numpy
.
ceil
(
self
.
imshp_logical
[
1
]
/
float
(
self
.
imshp
[
1
])))
d
[
"self_imshp_logical_stride_c"
]
=
int
(
numpy
.
ceil
(
self
.
imshp_logical
[
2
]
/
float
(
self
.
imshp
[
2
])))
if
not
self
.
imshp
[
0
]
==
1
:
d
[
"affectation"
]
=
"+="
d
[
"all_shape"
]
=
"1"
d
[
"dim_zz_const"
]
=
"const"
d
[
"dim_zz_affect"
]
=
""
d
[
"assert_size"
]
=
"""
d
[
"self_kshp_logical_stride_r"
]
=
int
(
numpy
.
ceil
(
self
.
kshp_logical
[
0
]
/
float
(
self
.
kshp
[
0
])))
d
[
"self_kshp_logical_stride_c"
]
=
int
(
numpy
.
ceil
(
self
.
kshp_logical
[
1
]
/
float
(
self
.
kshp
[
1
])))
d
[
"self_imshp_logical_r"
]
=
self
.
imshp_logical
[
1
]
#numpy.B. 1 not 0
d
[
"self_imshp_logical_c"
]
=
self
.
imshp_logical
[
2
]
# numpy.B. 2 not 1
d
[
"self_imshp_logical_stride_r"
]
=
int
(
numpy
.
ceil
(
self
.
imshp_logical
[
1
]
/
float
(
self
.
imshp
[
1
])))
d
[
"self_imshp_logical_stride_c"
]
=
int
(
numpy
.
ceil
(
self
.
imshp_logical
[
2
]
/
float
(
self
.
imshp
[
2
])))
if
not
self
.
imshp
[
0
]
==
1
:
d
[
"affectation"
]
=
"+="
d
[
"all_shape"
]
=
"1"
d
[
"dim_zz_const"
]
=
"const"
d
[
"dim_zz_affect"
]
=
""
d
[
"assert_size"
]
=
"""
// Check the batch size and the number of kernels (sometimes constant in the graph)
if(img2d_dim[0] !=
%(self_bsize)
s!=0){
PyErr_Format(PyExc_ValueError,
"the batch size in the image (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
(long)img2d_dim[0], (long)
%(self_bsize)
s);
"the batch size in the image (
%%
ld) at run time is different"
" than at build time (
%%
ld) for the ConvOp.",
(long)img2d_dim[0], (long)
%(self_bsize)
s);
%(fail)
s;
}
if(kerns_dim[0] !=
%(self_nkern)
s!=0){
PyErr_Format(PyExc_ValueError,
"the number of kernels in the filter (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
(long)kerns_dim[0], (long)
%(self_nkern)
s);
"the number of kernels in the filter (
%%
ld) at run time is"
" different than at build time (
%%
ld) for the ConvOp.",
(long)kerns_dim[0], (long)
%(self_nkern)
s);
%(fail)
s;
}
// Check the size of the image (sometimes constant in the graph)
if(img2d_dim[1] !=
%(self_imshp0)
s){
PyErr_Format(PyExc_ValueError,
"the image stack size (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
(long)img2d_dim[1], (long)
%(self_imshp0)
s);
"the image stack size (
%%
ld) at run time is different than"
" at build time (
%%
ld) for the ConvOp.",
(long)img2d_dim[1], (long)
%(self_imshp0)
s);
%(fail)
s;
}
if(img2d_dim[2] !=
%(self_imshp1)
s){
PyErr_Format(PyExc_ValueError,
"the number of rows in the image (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
(long)img2d_dim[2], (long)
%(self_imshp1)
s);
"the number of rows in the image (
%%
ld) at run time is different"
" than at build time (
%%
ld) for the ConvOp.",
(long)img2d_dim[2], (long)
%(self_imshp1)
s);
%(fail)
s;
}
if(img2d_dim[3] !=
%(self_imshp2)
s){
PyErr_Format(PyExc_ValueError,
"the number of columns in the image (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
(long)img2d_dim[3], (long)
%(self_imshp2)
s);
"the number of columns in the image (
%%
ld) at run time is"
" different than at build time (
%%
ld) for the ConvOp.",
(long)img2d_dim[3], (long)
%(self_imshp2)
s);
%(fail)
s;
}
// Check the size of the output (sometimes constant in the graph)
if(dim_zz[0] !=
%(self_outshp0)
s!=0){
PyErr_Format(PyExc_ValueError,
"the precomputed number of rows in the output (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
(long)dim_zz[0], (long)
%(self_outshp0)
s);
"the precomputed number of rows in the output (
%%
ld) at run time"
" is different than at build time (
%%
ld) for the ConvOp.",
(long)dim_zz[0], (long)
%(self_outshp0)
s);
%(fail)
s;
}
if(dim_zz[1] !=
%(self_outshp1)
s!=0){
PyErr_Format(PyExc_ValueError,
"the precomputed number of columns in the output (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
(long)dim_zz[1], (long)
%(self_outshp1)
s);
"the precomputed number of columns in the output (
%%
ld) at run"
" time is different than at build time (
%%
ld) for the ConvOp.",
(long)dim_zz[1], (long)
%(self_outshp1)
s);
%(fail)
s;
}
// Check the size of the filter (sometimes constant in the graph)
if(kerns_dim[1]
%% %(self_imshp0)
s!=0){
PyErr_Format(PyExc_ValueError,
"the filter stack size (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
(long)kerns_dim[1], (long)
%(self_imshp0)
s);
"the filter stack size (
%%
ld) at run time is different than at"
" build time (
%%
ld) for the ConvOp.",
(long)kerns_dim[1], (long)
%(self_imshp0)
s);
%(fail)
s;
}
if(kerns_dim[2]
%% %(self_kshp0)
s!=0){
PyErr_Format(PyExc_ValueError,
"the number of rows in the filter (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
(long)kerns_dim[2], (long)
%(self_kshp0)
s);
"the number of rows in the filter (
%%
ld) at run time is different"
" than at build time (
%%
ld) for the ConvOp.",
(long)kerns_dim[2], (long)
%(self_kshp0)
s);
%(fail)
s;
}
if(kerns_dim[3]
%% %(self_kshp1)
s!=0){
PyErr_Format(PyExc_ValueError,
"the number of columns in the filter (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
(long)kerns_dim[3], (long)
%(self_kshp1)
s);
"the number of columns in the filter (
%%
ld) at run time is"
" different than at build time (
%%
ld) for the ConvOp.",
(long)kerns_dim[3], (long)
%(self_kshp1)
s);
%(fail)
s;
}
"""
%
(
locals
())
"""
%
(
locals
())
else
:
d
[
"self_bsize"
]
=
"
%(img2d)
s->dimensions[0]"
%
d
d
[
"self_nkern"
]
=
"
%(filtersflipped)
s->dimensions[0]"
%
d
d
[
"self_outshp0"
]
=
"-1"
d
[
"self_outshp1"
]
=
"-1"
d
[
"self_imshp0"
]
=
"
%(img2d)
s->dimensions[1]"
%
d
d
[
"self_imshp1"
]
=
"
%(img2d)
s->dimensions[2]"
%
d
d
[
"self_imshp2"
]
=
"
%(img2d)
s->dimensions[3]"
%
d
d
[
"self_kshp0"
]
=
"
%(filtersflipped)
s->dimensions[2]"
%
d
d
[
"self_kshp1"
]
=
"
%(filtersflipped)
s->dimensions[3]"
%
d
d
[
"affectation"
]
=
"+="
d
[
"all_shape"
]
=
"0"
d
[
"dim_zz_const"
]
=
""
d
[
"dim_zz_affect"
]
=
"""
d
[
"self_bsize"
]
=
"
%(img2d)
s->dimensions[0]"
%
d
d
[
"self_nkern"
]
=
"
%(filtersflipped)
s->dimensions[0]"
%
d
d
[
"self_outshp0"
]
=
"-1"
d
[
"self_outshp1"
]
=
"-1"
d
[
"self_imshp0"
]
=
"
%(img2d)
s->dimensions[1]"
%
d
d
[
"self_imshp1"
]
=
"
%(img2d)
s->dimensions[2]"
%
d
d
[
"self_imshp2"
]
=
"
%(img2d)
s->dimensions[3]"
%
d
d
[
"self_kshp0"
]
=
"
%(filtersflipped)
s->dimensions[2]"
%
d
d
[
"self_kshp1"
]
=
"
%(filtersflipped)
s->dimensions[3]"
%
d
d
[
"affectation"
]
=
"+="
d
[
"all_shape"
]
=
"0"
d
[
"dim_zz_const"
]
=
""
d
[
"dim_zz_affect"
]
=
"""
if (mode == FULL) {
dim_zz[0] = (int)ceil((dim_im[0]+dim_ker0-1)/float(
%(self_dx)
s));
dim_zz[1] = (int)ceil((dim_im[1]+dim_ker1-1)/float(
%(self_dy)
s));
...
...
@@ -1075,8 +1160,8 @@ if(kerns_dim[3] %% %(self_kshp1)s!=0){
dim_zz[0] = (int)ceil((dim_im[0]-dim_ker0+1)/float(
%(self_dx)
s));
dim_zz[1] = (int)ceil((dim_im[1]-dim_ker1+1)/float(
%(self_dy)
s));
}
"""
%
d
d
[
"assert_size"
]
=
""
"""
%
d
d
[
"assert_size"
]
=
""
if
self
.
kshp_logical_top_aligned
:
d
[
"self_kshp_logical_offset_r"
]
=
0
...
...
@@ -1084,36 +1169,47 @@ if(kerns_dim[3] %% %(self_kshp1)s!=0){
elif
all_shape
:
rstride
=
d
[
"self_kshp_logical_stride_r"
]
cstride
=
d
[
"self_kshp_logical_stride_c"
]
d
[
"self_kshp_logical_offset_r"
]
=
(
self
.
kshp_logical
[
0
]
-
(
self
.
kshp
[
0
]
*
rstride
)
-
1
+
rstride
)
%
rstride
d
[
"self_kshp_logical_offset_c"
]
=
(
self
.
kshp_logical
[
1
]
-
(
self
.
kshp
[
1
]
*
cstride
)
-
1
+
cstride
)
%
cstride
d
[
"self_kshp_logical_offset_r"
]
=
(
self
.
kshp_logical
[
0
]
-
(
self
.
kshp
[
0
]
*
rstride
)
-
1
+
rstride
)
%
rstride
d
[
"self_kshp_logical_offset_c"
]
=
(
self
.
kshp_logical
[
1
]
-
(
self
.
kshp
[
1
]
*
cstride
)
-
1
+
cstride
)
%
cstride
del
rstride
,
cstride
if
node
.
inputs
[
0
]
.
type
.
dtype
==
"float32"
:
d
[
"type"
]
=
"float"
elif
node
.
inputs
[
0
]
.
type
.
dtype
==
"float64"
:
d
[
"type"
]
=
"double"
else
:
raise
Exception
(
"Type
%
s not implemented"
%
node
.
inputs
[
0
]
.
type
.
dtype
)
d
[
"gemm"
]
=
'dgemm_'
if
not
d
[
"type"
]
==
"double"
:
d
[
"gemm"
]
=
'sgemm_'
if
node
.
inputs
[
0
]
.
type
.
dtype
==
"float32"
:
d
[
"type"
]
=
"float"
elif
node
.
inputs
[
0
]
.
type
.
dtype
==
"float64"
:
d
[
"type"
]
=
"double"
else
:
raise
Exception
(
"Type
%
s not implemented"
%
node
.
inputs
[
0
]
.
type
.
dtype
)
d
[
"gemm"
]
=
'dgemm_'
if
not
d
[
"type"
]
==
"double"
:
d
[
"gemm"
]
=
'sgemm_'
if
self
.
imshp
!=
self
.
imshp_logical
or
self
.
kshp
!=
self
.
kshp_logical
:
if
self
.
verbose
:
_logger
.
debug
(
"return imshp!=imshp_logical or self.kshp != self.kshp_logical shape version"
)
_logger
.
debug
(
"return imshp!=imshp_logical or"
" self.kshp != self.kshp_logical shape version"
)
return
_conv_op_code_a
%
d
if
self
.
unroll_patch
:
if
self
.
verbose
:
_logger
.
debug
(
"return unroll patch version. all_shape=
%
s"
,
all_shape
)
return
_conv_op_code_unroll_patch
%
d
if
self
.
unroll_batch
>
0
or
self
.
unroll_kern
>
0
:
assert
self
.
unroll_batch
>
0
assert
self
.
unroll_kern
>
0
_logger
.
debug
(
"return unroll patch version. all_shape=
%
s"
,
all_shape
)
return
_conv_op_code_unroll_patch
%
d
if
self
.
unroll_batch
>
0
or
self
.
unroll_kern
>
0
:
assert
self
.
unroll_batch
>
0
assert
self
.
unroll_kern
>
0
if
self
.
verbose
:
_logger
.
debug
(
"return unrolled batch (
%
s) and kern code (
%
s)"
,
str
(
self
.
unroll_batch
),
str
(
self
.
unroll_kern
))
str
(
self
.
unroll_batch
),
str
(
self
.
unroll_kern
))
return
gen_conv_code_unroll_batch_kern
(
d
,
self
.
unroll_batch
,
self
.
unroll_kern
)
#TODO: should we choose the unroll size automatically with the bigger divisor under 5?
if
self
.
out_mode
==
'valid'
and
self
.
dx
==
0
and
self
.
dy
==
0
:
if
self
.
out_mode
==
'valid'
and
self
.
dx
==
0
and
self
.
dy
==
0
:
if
self
.
verbose
:
_logger
.
debug
(
"return gemm version"
)
return
_conv_op_code_valid_gemm
%
d
...
...
@@ -1126,7 +1222,8 @@ if(kerns_dim[3] %% %(self_kshp1)s!=0){
_conv_op_code_a
=
"""
const int mode=
%(mode)
s;
int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
PyArrayObject *ain1=NULL, *ain2=NULL;
PyArrayObject *filtersflipped_arr=NULL, *img2d_arr=NULL;
const
%(type)
s fill_value = 0;
int type_im=PyArray_TYPE(
%(img2d)
s);
...
...
@@ -1216,12 +1313,17 @@ if ((filtersflipped_arr->strides[3] != (npy_intp)sizeof(%(type)s))
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(mode != VALID && mode != FULL){
PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported");
%(fail)
s;
PyErr_SetString(PyExc_ValueError,
"invalid mode, only full and valid are supported");
%(fail)
s;
}
typenum = PyArray_ObjectType((PyObject*)
%(img2d)
s, 0);
typenum_f = PyArray_ObjectType((PyObject*)
%(filtersflipped)
s, 0);
if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type");
%(fail)
s;}
if (typenum != typenum_f) {PyErr_SetString(PyExc_ValueError, "Input types must match");
%(fail)
s;}
if (typenum != typenum_f) {
PyErr_SetString(PyExc_ValueError, "Input types must match");
%(fail)
s;
}
if (!img2d)
%(fail)
s;
if (!filtersflipped)
%(fail)
s;
...
...
@@ -1249,10 +1351,19 @@ Os[0]=%(self_outshp0)s;
Os[1]=
%(self_outshp1)
s;
//assertions
if (
%(z)
s->strides[0] !=
%(z)
s->dimensions[1] *
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] * (npy_intp)sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[1] !=
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] * (npy_intp)sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[2] !=
%(z)
s->dimensions[3] * (npy_intp)sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[3] != (npy_intp)sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[0] !=
%(z)
s->dimensions[1] *
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] *
(npy_intp)sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[1] !=
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] *
(npy_intp)sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[2] !=
%(z)
s->dimensions[3] * (npy_intp)sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[3] != (npy_intp)sizeof(
%(type)
s))
%(fail)
s;
for(int b=0;b<
%(self_bsize)
s;b++){
for(int n_kern=0;n_kern<
%(self_nkern)
s;n_kern++){
...
...
@@ -1267,34 +1378,41 @@ for(int b=0;b< %(self_bsize)s;b++){
for (int iter_m=0; iter_m < Os[0]; iter_m++) {
/// Reposition index into input image based on requested output size
int pos_m = iter_m*
%(self_dx)
s; //row position in logical output image
int new_m; //row anchor in logical input image (we will loop upward from here)
// Reposition index into input image based on requested output size
//row position in logical output image
int pos_m = iter_m*
%(self_dx)
s;
//row anchor in logical input image (we will loop upward from here)
int new_m;
if (mode == FULL) new_m = pos_m ;
else new_m = (pos_m+dim_ker_log[0]-1);
for (int iter_n=0; iter_n < Os[1]; iter_n++) { // loop over columns
int pos_n=iter_n*
%(self_dy)
s; // current col position in logical output image
// current col position in logical output image
int pos_n=iter_n*
%(self_dy)
s;
%(type)
s sum=0;
// Sum over kernel, if index into image is out of bounds
// fill with the value
for (int j_log=0; j_log <
%(self_kshp_logical_r)
s; j_log++) { // loop over logical rows in kernel
// loop over logical rows in kernel
for (int j_log=0; j_log <
%(self_kshp_logical_r)
s; j_log++) {
// ind0_log: row position in logical input image
int ind0_log = (new_m-j_log);
int ind0_log = (new_m-j_log); // ind0_log: row position in logical input image
if ((j_log <
%(self_kshp_logical_offset_r)
s) || (j_log -
%(self_kshp_logical_offset_r)
s) MOD
%(self_kshp_logical_stride_r)
s)
if ((j_log <
%(self_kshp_logical_offset_r)
s) ||
(j_log -
%(self_kshp_logical_offset_r)
s) MOD
%(self_kshp_logical_stride_r)
s)
continue;
if (ind0_log MOD
%(self_imshp_logical_stride_r)
s)
continue;
int j_phys = ((j_log-
%(self_kshp_logical_offset_r)
s) /
%(self_kshp_logical_stride_r)
s);
int j_phys = ((j_log-
%(self_kshp_logical_offset_r)
s) /
%(self_kshp_logical_stride_r)
s);
int ind0_phys = (ind0_log /
%(self_imshp_logical_stride_r)
s);
//std::cerr <<"j_log" << j_log << " j_phys " << j_phys << " " << ind0_phys << "
\\
n";
if(mode==FULL){
const
%(type)
s * idx_hvals=&hvals[j_phys*dim_ker_phys[1]]; //This is a pointer to the current row of the kernel
//This is a pointer to the current row of the kernel
const
%(type)
s * idx_hvals=&hvals[j_phys*dim_ker_phys[1]];
if(ind0_log < 0 || ind0_log >= dim_im_log[0]){
// the current row of the kernel is off the image
}else{
...
...
@@ -1304,30 +1422,40 @@ for(int b=0;b< %(self_bsize)s;b++){
for (int ind1_log=pos_n-k; k<max_k; k++,ind1_log--) {
if (1)
{
if ((k <
%(self_kshp_logical_offset_c)
s) || (k -
%(self_kshp_logical_offset_c)
s) MOD
%(self_kshp_logical_stride_c)
s)
if ((k <
%(self_kshp_logical_offset_c)
s) ||
(k -
%(self_kshp_logical_offset_c)
s) MOD
%(self_kshp_logical_stride_c)
s)
continue;
if (ind1_log MOD
%(self_imshp_logical_stride_c)
s)
if (ind1_log MOD
%(self_imshp_logical_stride_c)
s)
continue;
}
sum+= idx_hvals[(k-
%(self_kshp_logical_offset_c)
s) /
%(self_kshp_logical_stride_c)
s] * idx_in[ind1_log /
%(self_imshp_logical_stride_c)
s];
sum += idx_hvals[(k-
%(self_kshp_logical_offset_c)
s) /
%(self_kshp_logical_stride_c)
s] *
idx_in[ind1_log /
%(self_imshp_logical_stride_c)
s];
}
}
}else{
const
%(type)
s* idx_in=&in[ind0_phys*dim_im_phys[1]]; //JB: should be dim_im[1] right? (was dim_im[0])
}else{ // mode==VALID
//JB: should be dim_im[1] right? (was dim_im[0])
const
%(type)
s* idx_in=&in[ind0_phys*dim_im_phys[1]];
const
%(type)
s* idx_hvals=&hvals[j_phys*dim_ker_phys[1]];
int new_n = (pos_n+dim_ker_log[1]-1);
if (
%(self_imshp_logical_stride_c)
s != 1) // a general loop
{
for (int k=0,last=new_n; k < dim_ker_log[1]; k++,last--) {
if ((k <
%(self_kshp_logical_offset_c)
s) || (k -
%(self_kshp_logical_offset_c)
s) MOD
%(self_kshp_logical_stride_c)
s)
if ((k <
%(self_kshp_logical_offset_c)
s) ||
(k -
%(self_kshp_logical_offset_c)
s) MOD
%(self_kshp_logical_stride_c)
s)
continue;
else if (last MOD
%(self_imshp_logical_stride_c)
s)
continue;
else
{
sum+=idx_hvals[(k-
%(self_kshp_logical_offset_c)
s) /
%(self_kshp_logical_stride_c)
s]*idx_in[last/
%(self_imshp_logical_stride_c)
s];
sum+=idx_hvals[(k-
%(self_kshp_logical_offset_c)
s) /
%(self_kshp_logical_stride_c)
s] *
idx_in[last/
%(self_imshp_logical_stride_c)
s];
}
}
}
...
...
@@ -1335,7 +1463,8 @@ for(int b=0;b< %(self_bsize)s;b++){
{
int offset =
%(self_kshp_logical_offset_c)
s;
int k_phys=0;
for (int k_log=offset,last=new_n-offset; k_log < dim_ker_log[1]; ) {
for (int k_log=offset,last=new_n-offset;
k_log < dim_ker_log[1]; ) {
sum += idx_hvals[k_phys]*idx_in[last];
++k_phys;
last -=
%(self_kshp_logical_stride_c)
s;
...
...
@@ -1343,10 +1472,10 @@ for(int b=0;b< %(self_bsize)s;b++){
}
}
}
}//for j
}//for j
_log
out[iter_m*dim_zz[1]+iter_n]
%(affectation)
s sum;
}//for n
}//for m
}//for
iter_
n
}//for
iter_
m
}//for stack_size
if (0 && (mode==FULL)){
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i)
...
...
@@ -1585,33 +1714,36 @@ free(kbuf);
Py_XDECREF(img2d);
"""
def
gen_conv_code_unroll_batch_kern
(
d
,
unroll_bsize
=
1
,
unroll_ksize
=
1
):
def
gen_conv_code_unroll_batch_kern
(
d
,
unroll_bsize
=
1
,
unroll_ksize
=
1
):
""" c_code for ConvOp that unroll the batch size loop
"""
assert
unroll_bsize
>
0
and
unroll_ksize
>
0
if
d
.
has_key
(
"unroll_bsize"
)
or
d
.
has_key
(
"unroll_ksize"
)
or
d
.
has_key
(
"unroll_iter"
)
or
d
.
has_key
(
"unroll_biter"
)
or
d
.
has_key
(
"unroll_kiter"
)
:
assert
unroll_bsize
>
0
and
unroll_ksize
>
0
if
"unroll_bsize"
in
d
or
"unroll_ksize"
in
d
or
"unroll_iter"
in
d
or
"unroll_biter"
in
d
or
"unroll_kiter"
in
d
:
raise
Exception
(
"We can't use this dictionnary as we will overwrite some of its containt"
)
d
=
d
.
copy
()
d
=
d
.
copy
()
d
[
"unroll_bsize"
]
=
unroll_bsize
d
[
"unroll_ksize"
]
=
unroll_ksize
def
my_dup
(
st
,
size
):
s
=
""
d
[
"unroll_bsize"
]
=
unroll_bsize
d
[
"unroll_ksize"
]
=
unroll_ksize
def
my_dup
(
st
,
size
):
s
=
""
for
i
in
xrange
(
size
):
d
[
"unroll_iter"
]
=
i
s
+=
st
%
d
return
s
+
"
\n
"
d
[
"unroll_iter"
]
=
i
s
+=
st
%
d
return
s
+
"
\n
"
def
my_dup2
(
st
):
s
=
""
iter
=
0
s
=
""
iter
=
0
for
i
in
xrange
(
unroll_bsize
):
d
[
"unroll_biter"
]
=
i
d
[
"unroll_biter"
]
=
i
for
j
in
xrange
(
unroll_ksize
):
d
[
"unroll_kiter"
]
=
j
d
[
"unroll_iter"
]
=
iter
iter
+=
1
s
+=
st
%
d
return
s
+
"
\n
"
d
[
"unroll_kiter"
]
=
j
d
[
"unroll_iter"
]
=
iter
iter
+=
1
s
+=
st
%
d
return
s
+
"
\n
"
ret
=
"""
const int mode=
%(mode)
s;
int typenum=0, typenum_f=0;
...
...
@@ -1765,7 +1897,8 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
for (int iter_n=0; iter_n < Os[1]; iter_n++) { // loop over columns
int pos_n=iter_n*
%(self_dy)
s;
"""
%
d
ret
+=
my_dup
(
"
%(type)
s sum
%(unroll_iter)
s=0;"
,
unroll_bsize
*
unroll_ksize
)
ret
+=
my_dup
(
"
%(type)
s sum
%(unroll_iter)
s=0;"
,
unroll_bsize
*
unroll_ksize
)
ret
+=
"""
// Sum over kernel, if index into image is out of bounds
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论