Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
1ebe9fd6
提交
1ebe9fd6
authored
8月 23, 2012
作者:
Frederic Bastien
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
pep8 code redability.
上级
97e12dae
显示空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
515 行增加
和
382 行删除
+515
-382
conv.py
theano/tensor/nnet/conv.py
+515
-382
没有找到文件。
theano/tensor/nnet/conv.py
浏览文件 @
1ebe9fd6
...
@@ -23,18 +23,19 @@ from theano.gof.python25 import any
...
@@ -23,18 +23,19 @@ from theano.gof.python25 import any
imported_scipy_signal
=
False
imported_scipy_signal
=
False
try
:
try
:
# TODO: move these back out to global scope when they no longer cause an atexit error
# TODO: move these back out to global scope when they no longer
# cause an atexit error
from
scipy.signal.signaltools
import
_valfrommode
,
_bvalfromboundary
from
scipy.signal.signaltools
import
_valfrommode
,
_bvalfromboundary
from
scipy.signal.sigtools
import
_convolve2d
from
scipy.signal.sigtools
import
_convolve2d
imported_scipy_signal
=
True
imported_scipy_signal
=
True
except
ImportError
:
except
ImportError
:
pass
pass
_logger
=
logging
.
getLogger
(
"theano.tensor.nnet.conv"
)
_logger
=
logging
.
getLogger
(
"theano.tensor.nnet.conv"
)
def
conv2d
(
input
,
filters
,
image_shape
=
None
,
filter_shape
=
None
,
def
conv2d
(
input
,
filters
,
image_shape
=
None
,
filter_shape
=
None
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
**
kargs
):
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
**
kargs
):
"""This function will build the symbolic graph for convolving a stack of input
"""This function will build the symbolic graph for convolving a stack of input
images with a set of filters. The implementation is modelled after
images with a set of filters. The implementation is modelled after
Convolutional Neural Networks (CNN). It is simply a wrapper to the ConvOp but
Convolutional Neural Networks (CNN). It is simply a wrapper to the ConvOp but
...
@@ -62,8 +63,10 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
...
@@ -62,8 +63,10 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
:param filter_shape: (nb filters, stack size, nb row, nb col)
:param filter_shape: (nb filters, stack size, nb row, nb col)
Optional, used for optimization.
Optional, used for optimization.
:param kwargs: kwargs are passed onto ConvOp. Can be used to set the following:
:param kwargs: kwargs are passed onto ConvOp.
unroll_batch, unroll_kern, unroll_patch, openmp (see ConvOp doc)
Can be used to set the following:
unroll_batch, unroll_kern, unroll_patch,
openmp (see ConvOp doc)
openmp: By default have the same value as
openmp: By default have the same value as
config.openmp. For small image, filter,
config.openmp. For small image, filter,
...
@@ -77,8 +80,8 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
...
@@ -77,8 +80,8 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
with openmp on a core 2 duo.
with openmp on a core 2 duo.
:rtype: symbolic 4D tensor
:rtype: symbolic 4D tensor
:return: set of feature maps generated by convolutional layer. Tensor is
of shape
:return: set of feature maps generated by convolutional layer. Tensor is
(batch size, nb filters, output row, output col)
of shape
(batch size, nb filters, output row, output col)
"""
"""
...
@@ -87,20 +90,22 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
...
@@ -87,20 +90,22 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
image_shape
=
list
(
image_shape
)
image_shape
=
list
(
image_shape
)
for
i
in
xrange
(
len
(
image_shape
)):
for
i
in
xrange
(
len
(
image_shape
)):
if
image_shape
[
i
]
is
not
None
:
if
image_shape
[
i
]
is
not
None
:
image_shape
[
i
]
=
get_constant_value
(
as_tensor_variable
(
image_shape
[
i
]))
image_shape
[
i
]
=
get_constant_value
(
as_tensor_variable
(
image_shape
[
i
]))
assert
str
(
image_shape
[
i
]
.
dtype
)
.
startswith
(
'int'
)
assert
str
(
image_shape
[
i
]
.
dtype
)
.
startswith
(
'int'
)
image_shape
[
i
]
=
int
(
image_shape
[
i
])
image_shape
[
i
]
=
int
(
image_shape
[
i
])
if
filter_shape
is
not
None
:
if
filter_shape
is
not
None
:
filter_shape
=
list
(
filter_shape
)
filter_shape
=
list
(
filter_shape
)
for
i
in
xrange
(
len
(
filter_shape
)):
for
i
in
xrange
(
len
(
filter_shape
)):
if
filter_shape
[
i
]
is
not
None
:
if
filter_shape
[
i
]
is
not
None
:
filter_shape
[
i
]
=
get_constant_value
(
as_tensor_variable
(
filter_shape
[
i
]))
filter_shape
[
i
]
=
get_constant_value
(
as_tensor_variable
(
filter_shape
[
i
]))
assert
str
(
filter_shape
[
i
]
.
dtype
)
.
startswith
(
'int'
)
assert
str
(
filter_shape
[
i
]
.
dtype
)
.
startswith
(
'int'
)
filter_shape
[
i
]
=
int
(
filter_shape
[
i
])
filter_shape
[
i
]
=
int
(
filter_shape
[
i
])
if
image_shape
and
filter_shape
:
if
image_shape
and
filter_shape
:
try
:
try
:
assert
image_shape
[
1
]
==
filter_shape
[
1
]
assert
image_shape
[
1
]
==
filter_shape
[
1
]
except
Exception
:
except
Exception
:
print
'image '
,
image_shape
,
' filters '
,
filter_shape
print
'image '
,
image_shape
,
' filters '
,
filter_shape
raise
raise
...
@@ -118,7 +123,7 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
...
@@ -118,7 +123,7 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
bsize
,
imshp
=
None
,
None
bsize
,
imshp
=
None
,
None
op
=
ConvOp
(
output_mode
=
border_mode
,
dx
=
subsample
[
0
],
dy
=
subsample
[
1
],
op
=
ConvOp
(
output_mode
=
border_mode
,
dx
=
subsample
[
0
],
dy
=
subsample
[
1
],
imshp
=
imshp
,
kshp
=
kshp
,
nkern
=
nkern
,
bsize
=
bsize
,
**
kargs
)
imshp
=
imshp
,
kshp
=
kshp
,
nkern
=
nkern
,
bsize
=
bsize
,
**
kargs
)
return
op
(
input
,
filters
)
return
op
(
input
,
filters
)
...
@@ -141,8 +146,8 @@ class ConvOp(Op):
...
@@ -141,8 +146,8 @@ class ConvOp(Op):
The output of ConvOp is a 4D tensor, generated as follows:
The output of ConvOp is a 4D tensor, generated as follows:
output[b,k,:,:] =
\
sum_i input[b,i,:,:] * filter[k,i,:,:]
\f
orall b,k
output[b,k,:,:] =
\
sum_i input[b,i,:,:] * filter[k,i,:,:]
\f
orall b,k
where b is the mini-batch index, k the filter index and * is the
convolution
where b is the mini-batch index, k the filter index and * is the
operator.
convolution
operator.
"""
"""
__attrnames
=
[
'imshp'
,
'kshp'
,
'nkern'
,
'bsize'
,
'dx'
,
'dy'
,
'out_mode'
,
__attrnames
=
[
'imshp'
,
'kshp'
,
'nkern'
,
'bsize'
,
'dx'
,
'dy'
,
'out_mode'
,
...
@@ -161,63 +166,63 @@ class ConvOp(Op):
...
@@ -161,63 +166,63 @@ class ConvOp(Op):
# using the real shape and the same dtype could also help.
# using the real shape and the same dtype could also help.
#unroll_batch, unroll_kern, valid time, full time
#unroll_batch, unroll_kern, valid time, full time
speed_unroll_batch_kern
=
[(
1
,
1
,
2.4661250114440918
,
6.5472931861877441
)
,
speed_unroll_batch_kern
=
[(
1
,
1
,
2.4661250114440918
,
6.5472931861877441
)
,
(
1
,
2
,
1.5869178771972656
,
5.1499760150909424
)
,
(
1
,
2
,
1.5869178771972656
,
5.1499760150909424
)
,
(
1
,
3
,
1.4270510673522949
,
3.6593470573425293
)
,
(
1
,
3
,
1.4270510673522949
,
3.6593470573425293
)
,
(
1
,
4
,
1.3373479843139648
,
3.3451821804046631
)
,
(
1
,
4
,
1.3373479843139648
,
3.3451821804046631
)
,
(
1
,
5
,
1.2818830013275146
,
3.1444568634033203
)
,
(
1
,
5
,
1.2818830013275146
,
3.1444568634033203
)
,
(
1
,
6
,
1.2521560192108154
,
3.0256359577178955
)
,
(
1
,
6
,
1.2521560192108154
,
3.0256359577178955
)
,
(
1
,
10
,
1.2134110927581787
,
2.9174180030822754
)
,
(
1
,
10
,
1.2134110927581787
,
2.9174180030822754
)
,
(
2
,
1
,
1.657214879989624
,
4.5261678695678711
)
,
(
2
,
1
,
1.657214879989624
,
4.5261678695678711
)
,
(
2
,
2
,
1.2123160362243652
,
2.9747390747070312
)
,
(
2
,
2
,
1.2123160362243652
,
2.9747390747070312
)
,
(
2
,
3
,
1.0758891105651855
,
2.5690360069274902
)
,
(
2
,
3
,
1.0758891105651855
,
2.5690360069274902
)
,
(
2
,
4
,
1.0683329105377197
,
2.4233770370483398
)
,
(
2
,
4
,
1.0683329105377197
,
2.4233770370483398
)
,
(
2
,
5
,
1.0955719947814941
,
2.3999948501586914
)
,
(
2
,
5
,
1.0955719947814941
,
2.3999948501586914
)
,
(
2
,
6
,
1.5935721397399902
,
2.6878271102905273
)
,
(
2
,
6
,
1.5935721397399902
,
2.6878271102905273
)
,
(
2
,
10
,
1.8511250019073486
,
3.2417428493499756
)
,
(
2
,
10
,
1.8511250019073486
,
3.2417428493499756
)
,
(
3
,
1
,
1.5948119163513184
,
3.631148099899292
)
,
(
3
,
1
,
1.5948119163513184
,
3.631148099899292
)
,
(
3
,
2
,
1.0761330127716064
,
2.6011371612548828
)
,
(
3
,
2
,
1.0761330127716064
,
2.6011371612548828
)
,
(
3
,
3
,
1.0551531314849854
,
2.4200370311737061
)
,
(
3
,
3
,
1.0551531314849854
,
2.4200370311737061
)
,
(
3
,
4
,
1.3930759429931641
,
2.5211219787597656
)
,
(
3
,
4
,
1.3930759429931641
,
2.5211219787597656
)
,
(
3
,
5
,
1.4330689907073975
,
2.5704989433288574
)
,
(
3
,
5
,
1.4330689907073975
,
2.5704989433288574
)
,
(
3
,
6
,
1.362138032913208
,
2.5964410305023193
)
,
(
3
,
6
,
1.362138032913208
,
2.5964410305023193
)
,
(
3
,
10
,
1.6582000255584717
,
2.9907989501953125
)
,
(
3
,
10
,
1.6582000255584717
,
2.9907989501953125
)
,
(
4
,
1
,
1.4793620109558105
,
3.3473429679870605
)
,
(
4
,
1
,
1.4793620109558105
,
3.3473429679870605
)
,
(
4
,
2
,
1.0671560764312744
,
2.4171769618988037
)
,
(
4
,
2
,
1.0671560764312744
,
2.4171769618988037
)
,
(
4
,
3
,
1.2569692134857178
,
2.2807950973510742
)
,
(
4
,
3
,
1.2569692134857178
,
2.2807950973510742
)
,
(
4
,
4
,
1.3456289768218994
,
2.6219108104705811
)
,
(
4
,
4
,
1.3456289768218994
,
2.6219108104705811
)
,
(
4
,
5
,
1.4055080413818359
,
2.4606490135192871
)
,
(
4
,
5
,
1.4055080413818359
,
2.4606490135192871
)
,
(
4
,
6
,
1.372107982635498
,
2.551663875579834
)
,
(
4
,
6
,
1.372107982635498
,
2.551663875579834
)
,
(
4
,
10
,
1.599470853805542
,
2.9172940254211426
)
,
(
4
,
10
,
1.599470853805542
,
2.9172940254211426
)
,
(
5
,
1
,
1.4115700721740723
,
3.2077109813690186
)
,
(
5
,
1
,
1.4115700721740723
,
3.2077109813690186
)
,
(
5
,
2
,
1.0635769367218018
,
2.2648060321807861
)
,
(
5
,
2
,
1.0635769367218018
,
2.2648060321807861
)
,
(
5
,
3
,
1.3842809200286865
,
2.6135518550872803
)
,
(
5
,
3
,
1.3842809200286865
,
2.6135518550872803
)
,
(
5
,
4
,
1.3470511436462402
,
2.3852400779724121
)
,
(
5
,
4
,
1.3470511436462402
,
2.3852400779724121
)
,
(
5
,
5
,
1.3539440631866455
,
2.5245928764343262
)
,
(
5
,
5
,
1.3539440631866455
,
2.5245928764343262
)
,
(
5
,
6
,
1.4037849903106689
,
2.5985310077667236
)
,
(
5
,
6
,
1.4037849903106689
,
2.5985310077667236
)
,
(
5
,
10
,
1.6120610237121582
,
2.8127608299255371
)
,
(
5
,
10
,
1.6120610237121582
,
2.8127608299255371
)
,
(
6
,
1
,
1.3623628616333008
,
3.021122932434082
)
,
(
6
,
1
,
1.3623628616333008
,
3.021122932434082
)
,
(
6
,
2
,
1.1697649955749512
,
2.6285450458526611
)
,
(
6
,
2
,
1.1697649955749512
,
2.6285450458526611
)
,
(
6
,
3
,
1.2980999946594238
,
2.4746189117431641
)
,
(
6
,
3
,
1.2980999946594238
,
2.4746189117431641
)
,
(
6
,
4
,
1.3739941120147705
,
2.5579929351806641
)
,
(
6
,
4
,
1.3739941120147705
,
2.5579929351806641
)
,
(
6
,
5
,
1.3967819213867188
,
2.5522029399871826
)
,
(
6
,
5
,
1.3967819213867188
,
2.5522029399871826
)
,
(
6
,
6
,
1.4279270172119141
,
2.6127138137817383
)
,
(
6
,
6
,
1.4279270172119141
,
2.6127138137817383
)
,
(
6
,
10
,
1.605496883392334
,
2.864037036895752
)
,
(
6
,
10
,
1.605496883392334
,
2.864037036895752
)
,
(
10
,
1
,
1.6401121616363525
,
2.970099925994873
)
,
(
10
,
1
,
1.6401121616363525
,
2.970099925994873
)
,
(
10
,
2
,
1.46710205078125
,
2.7231831550598145
)
,
(
10
,
2
,
1.46710205078125
,
2.7231831550598145
)
,
(
10
,
3
,
1.4193780422210693
,
2.6087639331817627
)
,
(
10
,
3
,
1.4193780422210693
,
2.6087639331817627
)
,
(
10
,
4
,
1.4657118320465088
,
2.6246678829193115
)
,
(
10
,
4
,
1.4657118320465088
,
2.6246678829193115
)
,
(
10
,
5
,
1.5052611827850342
,
2.6542458534240723
)
,
(
10
,
5
,
1.5052611827850342
,
2.6542458534240723
)
,
(
10
,
6
,
1.5214400291442871
,
2.7243161201477051
)
,
(
10
,
6
,
1.5214400291442871
,
2.7243161201477051
)
,
(
10
,
10
,
1.6116268634796143
,
2.956165075302124
)]
(
10
,
10
,
1.6116268634796143
,
2.956165075302124
)]
#valid time, full time
#valid time, full time
speed_unroll_patch_noshape
=
[
2.0109100341796875
,
5.8175678253173828
]
speed_unroll_patch_noshape
=
[
2.0109100341796875
,
5.8175678253173828
]
#valid time, full time
#valid time, full time
speed_unroll_patch_shape
=
[
1.2967290878295898
,
5.5283889770507812
]
speed_unroll_patch_shape
=
[
1.2967290878295898
,
5.5283889770507812
]
@staticmethod
@staticmethod
def
getOutputShape
(
inshp
,
kshp
,
stride
=
(
1
,
1
),
mode
=
'valid'
):
def
getOutputShape
(
inshp
,
kshp
,
stride
=
(
1
,
1
),
mode
=
'valid'
):
"""
"""
Computes the output dimensions of convolving an image of shape "inshp"
Computes the output dimensions of convolving an image of shape "inshp"
with kernels of shape "kshp".
with kernels of shape "kshp".
...
@@ -228,12 +233,13 @@ class ConvOp(Op):
...
@@ -228,12 +233,13 @@ class ConvOp(Op):
:return: (rows,cols) of output image
:return: (rows,cols) of output image
"""
"""
dx
,
dy
=
stride
dx
,
dy
=
stride
if
mode
==
'valid'
:
s
=
-
1
if
mode
==
'valid'
:
else
:
s
=
1
s
=
-
1
else
:
s
=
1
inshp
,
kshp
=
numpy
.
array
(
inshp
),
numpy
.
array
(
kshp
)
inshp
,
kshp
=
numpy
.
array
(
inshp
),
numpy
.
array
(
kshp
)
return
numpy
.
int64
(
numpy
.
ceil
((
inshp
+
s
*
kshp
-
s
*
1
)
/
\
return
numpy
.
int64
(
numpy
.
ceil
((
inshp
+
s
*
kshp
-
s
*
1
)
/
numpy
.
array
([
dx
,
dy
],
dtype
=
'float'
)))
numpy
.
array
([
dx
,
dy
],
dtype
=
'float'
)))
def
__init__
(
self
,
imshp
=
None
,
kshp
=
None
,
nkern
=
None
,
bsize
=
None
,
def
__init__
(
self
,
imshp
=
None
,
kshp
=
None
,
nkern
=
None
,
bsize
=
None
,
dx
=
1
,
dy
=
1
,
dx
=
1
,
dy
=
1
,
...
@@ -259,12 +265,13 @@ class ConvOp(Op):
...
@@ -259,12 +265,13 @@ class ConvOp(Op):
By default we try to select the fastest version. You can specify it
By default we try to select the fastest version. You can specify it
with the unroll_batch, unroll_kern, and unroll_patch parameter.
with the unroll_batch, unroll_kern, and unroll_patch parameter.
The second type of optimization is hardcoding some dimensions into the
code
The second type of optimization is hardcoding some dimensions into the
when all shape are know.
code
when all shape are know.
This make a significant difference for the 'full' output_mode.
This make a significant difference for the 'full' output_mode.
Some times, the fastest implementation on x86-64 uses {unroll_batch=4, unroll_kern=4,
Some times, the fastest implementation on x86-64 uses
unroll_patch=False} with all other shape parameters being provided.
{unroll_batch=4, unroll_kern=4, unroll_patch=False}
with all other shape parameters being provided.
For optimizing other architectures, see:
For optimizing other architectures, see:
Kazushige Goto and Robert A. Van De Geijn, Anatomy of High-Performance
Kazushige Goto and Robert A. Van De Geijn, Anatomy of High-Performance
...
@@ -278,7 +285,8 @@ class ConvOp(Op):
...
@@ -278,7 +285,8 @@ class ConvOp(Op):
Optional parameters: (will generate more optimal c code)
Optional parameters: (will generate more optimal c code)
:type imshp: tuple of len 2 or 3: 2 for 2d image, 3 for a stack of 2d images.
:type imshp: tuple of len 2 or 3: 2 for 2d image,
3 for a stack of 2d images.
:param imshp: (stacksize, nb image row, nb image col)
:param imshp: (stacksize, nb image row, nb image col)
:type kshp: tuple of len 2
:type kshp: tuple of len 2
:param kshp: (nb kernel row, nb kernel col)
:param kshp: (nb kernel row, nb kernel col)
...
@@ -294,16 +302,18 @@ class ConvOp(Op):
...
@@ -294,16 +302,18 @@ class ConvOp(Op):
Params which select the version of code used:
Params which select the version of code used:
:type unroll_patch: bool
:type unroll_patch: bool
:param unroll_patch: use a version of c_code that unroll the patch loop that don't
:param unroll_patch: use a version of c_code that unroll the patch loop
request all shape information to work, but if all shape information are present, will
that don't request all shape information to work, but if all shape
information are present, will
use it to hardcode the value in the code for faster code.
use it to hardcode the value in the code for faster code.
:type unroll_batch:int
:type unroll_batch:int
:param unroll_batch: use a version of c_code that unroll the batch
(by unroll_batch) and
:param unroll_batch: use a version of c_code that unroll the batch
the nkern(by unroll_kern) loop. The size must by a multiple of bsize or nkern
(by unroll_batch) and the nkern(by unroll_kern) loop. The size
respectively.
must by a multiple of bsize or nkern
respectively.
:type unroll_kern:int
:type unroll_kern:int
:param unroll_kern: use a version of c_code that unroll the batch(by unroll_batch) and
:param unroll_kern: use a version of c_code that unroll the batch
the nkern(by unroll_kern) loop. The size must by a multiple of bsize or nkern
(by unroll_batch) and the nkern(by unroll_kern) loop. The size
must by a multiple of bsize or nkern
respectively.
respectively.
:type verbose: int
:type verbose: int
...
@@ -316,8 +326,10 @@ class ConvOp(Op):
...
@@ -316,8 +326,10 @@ class ConvOp(Op):
:param kshp_logical_top_aligned: idem
:param kshp_logical_top_aligned: idem
"""
"""
# We must continue to consider None as 1 for backward compatibility.
# We must continue to consider None as 1 for backward compatibility.
if
dx
is
None
:
dx
=
1
if
dx
is
None
:
if
dy
is
None
:
dy
=
1
dx
=
1
if
dy
is
None
:
dy
=
1
if
int
(
dx
)
!=
dx
:
if
int
(
dx
)
!=
dx
:
raise
TypeError
(
'ConvOp.__init__ param dx must be an int'
,
dx
)
raise
TypeError
(
'ConvOp.__init__ param dx must be an int'
,
dx
)
...
@@ -330,8 +342,9 @@ class ConvOp(Op):
...
@@ -330,8 +342,9 @@ class ConvOp(Op):
all_shape
=
imshp
is
not
None
and
kshp
is
not
None
and
\
all_shape
=
imshp
is
not
None
and
kshp
is
not
None
and
\
nkern
is
not
None
and
bsize
is
not
None
nkern
is
not
None
and
bsize
is
not
None
if
(
unroll_batch
>
0
or
unroll_kern
>
0
)
and
not
all_shape
:
if
(
unroll_batch
>
0
or
unroll_kern
>
0
)
and
not
all_shape
:
raise
Exception
(
"In ConvOp, when using unroll_batch and unroll_nkern, all shape are needed"
)
raise
Exception
(
"In ConvOp, when using unroll_batch and"
" unroll_nkern, all shape are needed"
)
if
openmp
is
None
:
if
openmp
is
None
:
openmp
=
theano
.
config
.
openmp
openmp
=
theano
.
config
.
openmp
...
@@ -343,9 +356,9 @@ class ConvOp(Op):
...
@@ -343,9 +356,9 @@ class ConvOp(Op):
if
imshp
is
not
None
:
if
imshp
is
not
None
:
imshp
=
tuple
(
imshp
)
imshp
=
tuple
(
imshp
)
if
len
(
imshp
)
==
2
:
if
len
(
imshp
)
==
2
:
imshp
=
(
1
,)
+
imshp
imshp
=
(
1
,)
+
imshp
elif
len
(
imshp
)
==
3
:
elif
len
(
imshp
)
==
3
:
imshp
=
imshp
imshp
=
imshp
else
:
else
:
raise
Exception
(
"bad len for imshp"
)
raise
Exception
(
"bad len for imshp"
)
...
@@ -356,73 +369,83 @@ class ConvOp(Op):
...
@@ -356,73 +369,83 @@ class ConvOp(Op):
self
.
kshp
=
kshp
self
.
kshp
=
kshp
self
.
nkern
=
nkern
self
.
nkern
=
nkern
self
.
bsize
=
bsize
self
.
bsize
=
bsize
self
.
dx
=
dx
self
.
dx
=
dx
self
.
dy
=
dy
self
.
dy
=
dy
self
.
verbose
=
verbose
self
.
verbose
=
verbose
self
.
version
=
version
self
.
version
=
version
if
openmp
is
None
:
if
openmp
is
None
:
openmp
=
config
.
openmp
openmp
=
config
.
openmp
self
.
openmp
=
openmp
self
.
openmp
=
openmp
# a triple
# a triple
self
.
imshp_logical
=
self
.
imshp
self
.
imshp_logical
=
self
.
imshp
if
imshp_logical
is
not
None
:
self
.
imshp_logical
=
tuple
(
imshp_logical
)
if
imshp_logical
is
not
None
:
self
.
imshp_logical
=
tuple
(
imshp_logical
)
assert
(
self
.
imshp
is
None
and
self
.
imshp_logical
is
None
)
or
\
assert
(
self
.
imshp
is
None
and
self
.
imshp_logical
is
None
)
or
\
(
len
(
self
.
imshp
)
==
len
(
self
.
imshp_logical
))
(
len
(
self
.
imshp
)
==
len
(
self
.
imshp_logical
))
# a pair
# a pair
self
.
kshp_logical
=
self
.
kshp
self
.
kshp_logical
=
self
.
kshp
if
kshp_logical
is
not
None
:
self
.
kshp_logical
=
tuple
(
kshp_logical
)
if
kshp_logical
is
not
None
:
self
.
kshp_logical
=
tuple
(
kshp_logical
)
self
.
kshp_logical_top_aligned
=
kshp_logical_top_aligned
self
.
kshp_logical_top_aligned
=
kshp_logical_top_aligned
self
.
unroll_batch
=
unroll_batch
self
.
unroll_batch
=
unroll_batch
self
.
unroll_kern
=
unroll_kern
self
.
unroll_kern
=
unroll_kern
self
.
unroll_patch
=
unroll_patch
self
.
unroll_patch
=
unroll_patch
if
self
.
unroll_batch
and
not
self
.
unroll_kern
:
self
.
unroll_kern
=
1
if
self
.
unroll_batch
and
not
self
.
unroll_kern
:
if
self
.
unroll_kern
and
not
self
.
unroll_batch
:
self
.
unroll_batch
=
1
self
.
unroll_kern
=
1
if
self
.
unroll_kern
and
not
self
.
unroll_batch
:
self
.
unroll_batch
=
1
#downcast unroll_batch if not a divisor of batch size
#downcast unroll_batch if not a divisor of batch size
if
self
.
unroll_batch
>
0
and
self
.
bsize
%
self
.
unroll_batch
!=
0
:
if
self
.
unroll_batch
>
0
and
self
.
bsize
%
self
.
unroll_batch
!=
0
:
if
self
.
bsize
<=
self
.
unroll_batch
:
if
self
.
bsize
<=
self
.
unroll_batch
:
self
.
unroll_batch
=
self
.
bsize
self
.
unroll_batch
=
self
.
bsize
else
:
else
:
#find the maximum value under unroll_batch that would work
#find the maximum value under unroll_batch that would work
new
=
self
.
unroll_batch
new
=
self
.
unroll_batch
assert
(
new
>=
1
)
assert
(
new
>=
1
)
while
self
.
bsize
%
new
!=
0
:
while
self
.
bsize
%
new
!=
0
:
new
-=
1
new
-=
1
warnstr
=
"OPTIMISATION WARNING: in ConvOp.__init__() unroll_batch(
%
i)"
\
warnstr
=
(
"OPTIMISATION WARNING: in ConvOp.__init__() "
"must be 0 or a divisor of bsize(
%
i). We revert it to
%
i. This"
\
"unroll_batch(
%
i) must be 0 or a divisor of"
" won't change the result, but may make it slower."
" bsize(
%
i). We revert it to
%
i. This"
" won't change the result, but may make it slower."
)
_logger
.
warn
(
warnstr
,
self
.
unroll_batch
,
self
.
bsize
,
new
)
_logger
.
warn
(
warnstr
,
self
.
unroll_batch
,
self
.
bsize
,
new
)
self
.
unroll_batch
=
new
self
.
unroll_batch
=
new
#downcast unroll_kern if not a divisor of nb of kernel
#downcast unroll_kern if not a divisor of nb of kernel
if
self
.
unroll_kern
>
0
and
self
.
nkern
%
self
.
unroll_kern
!=
0
:
if
self
.
unroll_kern
>
0
and
self
.
nkern
%
self
.
unroll_kern
!=
0
:
if
self
.
nkern
<=
self
.
unroll_kern
:
if
self
.
nkern
<=
self
.
unroll_kern
:
self
.
unroll_kern
=
self
.
nkern
self
.
unroll_kern
=
self
.
nkern
else
:
else
:
#find the maximum value under unroll_kern that would work
#find the maximum value under unroll_kern that would work
new
=
self
.
unroll_kern
new
=
self
.
unroll_kern
assert
(
new
>=
1
)
assert
(
new
>=
1
)
while
self
.
nkern
%
new
!=
0
:
while
self
.
nkern
%
new
!=
0
:
new
-=
1
new
-=
1
warnstr
=
"OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(
%
i)"
\
warnstr
=
(
"OPTIMISATION WARNING: in ConvOp.__init__()"
"should be 0 or a divisor of nkern(
%
i). We revert it to
%
i."
\
" unroll_kern(
%
i) should be 0 or a divisor of"
"This won't change the result, but may make it slower."
" nkern(
%
i). We revert it to
%
i. This"
" won't change the result, but may make it slower."
)
_logger
.
warn
(
warnstr
,
self
.
unroll_kern
,
self
.
nkern
,
new
)
_logger
.
warn
(
warnstr
,
self
.
unroll_kern
,
self
.
nkern
,
new
)
self
.
unroll_kern
=
new
self
.
unroll_kern
=
new
if
all_shape
:
if
all_shape
:
self
.
outshp
=
ConvOp
.
getOutputShape
(
self
.
imshp_logical
[
1
:],
self
.
kshp_logical
,
(
dx
,
dy
),
output_mode
)
self
.
outshp
=
ConvOp
.
getOutputShape
(
self
.
imshp_logical
[
1
:],
self
.
fulloutshp
=
ConvOp
.
getOutputShape
(
self
.
imshp_logical
[
1
:],
self
.
kshp_logical
,
(
1
,
1
),
output_mode
)
self
.
kshp_logical
,
(
dx
,
dy
),
output_mode
)
self
.
fulloutshp
=
ConvOp
.
getOutputShape
(
self
.
imshp_logical
[
1
:],
self
.
kshp_logical
,
(
1
,
1
),
output_mode
)
else
:
else
:
self
.
outshp
=
None
self
.
outshp
=
None
self
.
fulloutshp
=
None
self
.
fulloutshp
=
None
...
@@ -430,52 +453,60 @@ class ConvOp(Op):
...
@@ -430,52 +453,60 @@ class ConvOp(Op):
self
.
out_mode
=
output_mode
self
.
out_mode
=
output_mode
if
not
self
.
out_mode
in
[
"valid"
,
"full"
]:
if
not
self
.
out_mode
in
[
"valid"
,
"full"
]:
raise
Exception
(
"Mode
%
s not implemented"
%
self
.
out_mode
)
raise
Exception
(
"Mode
%
s not implemented"
%
self
.
out_mode
)
if
all_shape
and
not
(
self
.
outshp
>
0
)
.
all
():
if
all_shape
and
not
(
self
.
outshp
>
0
)
.
all
():
raise
Exception
((
"Bad size for the output shape. Verify that [post-"
\
raise
Exception
(
"Bad size for the output shape. Verify that [post-"
"supersampling] input shape (
%
s) and kern shape(
%
s) are ok. "
\
"supersampling] input shape (
%
s) and kern"
"(Hint: kerns must fit inside image in valid mode)"
)
%
" shape(
%
s) are ok. (Hint: kerns must fit inside"
(
self
.
imshp_logical
,
self
.
kshp_logical
))
" image in valid mode)"
%
(
self
.
imshp_logical
,
self
.
kshp_logical
))
if
(
self
.
unroll_kern
is
None
and
self
.
unroll_batch
is
None
and
self
.
unroll_patch
is
None
):
if
self
.
unroll_kern
is
None
and
self
.
unroll_batch
is
None
and
self
.
unroll_patch
is
None
:
#no version specified. Find the faster we have
#no version specified. Find the faster we have
if
self
.
bsize
is
None
and
self
.
nkern
is
None
:
if
self
.
bsize
is
None
and
self
.
nkern
is
None
:
self
.
unroll_patch
=
True
self
.
unroll_patch
=
True
elif
self
.
bsize
is
not
None
and
self
.
nkern
is
not
None
:
elif
self
.
bsize
is
not
None
and
self
.
nkern
is
not
None
:
bsize
=
self
.
bsize
bsize
=
self
.
bsize
nkern
=
self
.
nkern
nkern
=
self
.
nkern
if
bsize
is
None
:
if
bsize
is
None
:
bsize
=
1
bsize
=
1
if
nkern
is
None
:
if
nkern
is
None
:
nkern
=
1
nkern
=
1
mode_idx
=
0
mode_idx
=
0
if
self
.
out_mode
!=
"valid"
:
if
self
.
out_mode
!=
"valid"
:
mode_idx
=
1
mode_idx
=
1
if
all_shape
:
if
all_shape
:
time_unroll_patch
=
self
.
speed_unroll_patch_shape
[
mode_idx
]
time_unroll_patch
=
self
.
speed_unroll_patch_shape
[
mode_idx
]
else
:
else
:
time_unroll_patch
=
self
.
speed_unroll_patch_noshape
[
mode_idx
]
time_unroll_patch
=
self
.
speed_unroll_patch_noshape
[
mode_idx
]
time_unroll_batch_kern
=
9999999
time_unroll_batch_kern
=
9999999
for
i
in
xrange
(
len
(
self
.
speed_unroll_batch_kern
)):
for
i
in
xrange
(
len
(
self
.
speed_unroll_batch_kern
)):
if
bsize
%
self
.
speed_unroll_batch_kern
[
i
][
0
]
==
0
and
nkern
%
self
.
speed_unroll_batch_kern
[
i
][
1
]
==
0
:
if
(
bsize
%
self
.
speed_unroll_batch_kern
[
i
][
0
]
==
0
and
if
self
.
speed_unroll_batch_kern
[
i
][
2
+
mode_idx
]
<
time_unroll_batch_kern
:
nkern
%
self
.
speed_unroll_batch_kern
[
i
][
1
]
==
0
):
time_unroll_batch_kern
=
self
.
speed_unroll_batch_kern
[
i
][
2
+
mode_idx
]
if
self
.
speed_unroll_batch_kern
[
i
][
2
+
mode_idx
]
<
time_unroll_batch_kern
:
time_unroll_batch_kern_idx
=
i
time_unroll_batch_kern
=
self
.
speed_unroll_batch_kern
[
i
][
2
+
mode_idx
]
time_unroll_batch_kern_idx
=
i
if
time_unroll_patch
<
time_unroll_batch_kern
:
if
time_unroll_patch
<
time_unroll_batch_kern
:
self
.
unroll_patch
=
True
self
.
unroll_patch
=
True
else
:
else
:
self
.
unroll_batch
=
self
.
speed_unroll_batch_kern
[
time_unroll_batch_kern_idx
][
0
]
self
.
unroll_batch
=
self
.
speed_unroll_batch_kern
[
self
.
unroll_kern
=
self
.
speed_unroll_batch_kern
[
time_unroll_batch_kern_idx
][
1
]
time_unroll_batch_kern_idx
][
0
]
self
.
unroll_kern
=
self
.
speed_unroll_batch_kern
[
time_unroll_batch_kern_idx
][
1
]
self
.
unroll_patch
=
False
self
.
unroll_patch
=
False
_logger
.
debug
(
"AUTO FIND VERSION OF C_CODE OF CONV OP "
_logger
.
debug
(
"AUTO FIND VERSION OF C_CODE OF CONV OP "
"
%
s
%
s
%
s
%
s
%
s
%
s
%
s"
,
"
%
s
%
s
%
s
%
s
%
s
%
s
%
s"
,
self
.
unroll_batch
,
self
.
unroll_kern
,
self
.
unroll_patch
,
self
.
unroll_batch
,
self
.
unroll_kern
,
self
.
unroll_patch
,
self
.
bsize
,
self
.
nkern
,
time_unroll_patch
,
self
.
bsize
,
self
.
nkern
,
time_unroll_patch
,
time_unroll_batch_kern
)
time_unroll_batch_kern
)
self
.
_rehash
()
self
.
_rehash
()
if
config
.
op
.
set_flops
:
if
config
.
op
.
set_flops
:
self
.
set_flops
()
self
.
set_flops
()
...
@@ -504,41 +535,46 @@ class ConvOp(Op):
...
@@ -504,41 +535,46 @@ class ConvOp(Op):
return
self
.
__hashval
return
self
.
__hashval
def
__str__
(
self
):
def
__str__
(
self
):
return
"ConvOp{"
+
","
.
join
(
str
((
a
,
getattr
(
self
,
a
)))
for
a
in
self
.
__attrnames
)
+
"}"
return
"ConvOp{"
+
","
.
join
(
str
((
a
,
getattr
(
self
,
a
)))
for
a
in
self
.
__attrnames
)
+
"}"
def
set_flops
(
self
):
def
set_flops
(
self
):
""" Useful with the hack in profilemode to print the MFlops"""
""" Useful with the hack in profilemode to print the MFlops"""
if
self
.
out_mode
==
"valid"
:
if
self
.
out_mode
==
"valid"
:
self
.
flops
=
self
.
kshp
[
0
]
*
self
.
kshp
[
1
]
*
2
#nb mul and add by output pixed
# nb mul and add by output pixed
self
.
flops
*=
self
.
outshp
[
0
]
*
self
.
outshp
[
1
]
#nb flops by output image
self
.
flops
=
self
.
kshp
[
0
]
*
self
.
kshp
[
1
]
*
2
self
.
flops
*=
self
.
imshp
[
0
]
*
self
.
nkern
*
self
.
bsize
#for all outputs images#n_stack==self.imshp[0]
#nb flops by output image
else
:
#full mode not implemented
self
.
flops
*=
self
.
outshp
[
0
]
*
self
.
outshp
[
1
]
# for all outputs images#n_stack==self.imshp[0]
self
.
flops
=
0
self
.
flops
*=
self
.
imshp
[
0
]
*
self
.
nkern
*
self
.
bsize
for
out_row
in
xrange
(
self
.
outshp
[
0
]):
#loop over output row
else
:
# full mode not implemented
for
out_col
in
xrange
(
self
.
outshp
[
0
]):
#loop over output col
for
row
in
xrange
(
self
.
kshp
[
0
]):
#loop over kern row
self
.
flops
=
0
for
out_row
in
xrange
(
self
.
outshp
[
0
]):
# loop over output row
if
(
row
+
out_row
-
self
.
kshp
[
0
]
+
1
<
0
or
for
out_col
in
xrange
(
self
.
outshp
[
0
]):
# loop over output col
row
+
out_row
-
self
.
kshp
[
0
]
+
1
>=
self
.
imshp
[
1
]):
for
row
in
xrange
(
self
.
kshp
[
0
]):
# loop over kern row
if
(
row
+
out_row
-
self
.
kshp
[
0
]
+
1
<
0
or
row
+
out_row
-
self
.
kshp
[
0
]
+
1
>=
self
.
imshp
[
1
]):
continue
continue
col
=
0
col
=
0
max_col
=
self
.
kshp
[
1
]
max_col
=
self
.
kshp
[
1
]
img_col
=
out_col
-
self
.
kshp
[
1
]
+
1
img_col
=
out_col
-
self
.
kshp
[
1
]
+
1
max_col
=
min
(
max_col
,
self
.
imshp
[
2
]
-
img_col
)
max_col
=
min
(
max_col
,
self
.
imshp
[
2
]
-
img_col
)
if
img_col
<
0
:
col
=-
img_col
img_col
+=
col
while
col
<
max_col
:
#loop over kern col
self
.
flops
+=
2
col
+=
1
self
.
flops
*=
self
.
imshp
[
0
]
*
self
.
nkern
*
self
.
bsize
#for all outputs images#n_stack==self.imshp[0]
if
img_col
<
0
:
col
=
-
img_col
img_col
+=
col
while
col
<
max_col
:
# loop over kern col
self
.
flops
+=
2
col
+=
1
# for all outputs images#n_stack==self.imshp[0]
self
.
flops
*=
self
.
imshp
[
0
]
*
self
.
nkern
*
self
.
bsize
assert
self
.
flops
==
self
.
bsize
*
self
.
nkern
*
self
.
imshp
[
0
]
*
\
assert
self
.
flops
==
self
.
bsize
*
self
.
nkern
*
self
.
imshp
[
0
]
*
\
self
.
kshp
[
0
]
*
self
.
kshp
[
1
]
*
self
.
imshp
[
1
]
*
self
.
imshp
[
2
]
*
2
self
.
kshp
[
0
]
*
self
.
kshp
[
1
]
*
\
self
.
imshp
[
1
]
*
self
.
imshp
[
2
]
*
2
def
make_node
(
self
,
inputs
,
kerns
):
def
make_node
(
self
,
inputs
,
kerns
):
# TODO: find a way to make ConvOp work for N-D (after NIPS09)
# TODO: find a way to make ConvOp work for N-D (after NIPS09)
...
@@ -551,19 +587,23 @@ class ConvOp(Op):
...
@@ -551,19 +587,23 @@ class ConvOp(Op):
_kerns
=
as_tensor_variable
(
kerns
)
_kerns
=
as_tensor_variable
(
kerns
)
# TODO: lift this restriction by upcasting either inputs or kerns
# TODO: lift this restriction by upcasting either inputs or kerns
if
_inputs
.
ndim
!=
4
:
if
_inputs
.
ndim
!=
4
:
raise
TypeError
(
'ConvOp (make_node) requires input be a 4D tensor; received "
%
s" (
%
i dims)'
%
(
inputs
,
_inputs
.
ndim
))
raise
TypeError
(
'ConvOp (make_node) requires input be a 4D tensor;'
' received "
%
s" (
%
i dims)'
%
(
inputs
,
_inputs
.
ndim
))
if
_kerns
.
ndim
!=
4
:
if
_kerns
.
ndim
!=
4
:
raise
TypeError
(
'make_node requires 4D tensor of kernels'
)
raise
TypeError
(
'make_node requires 4D tensor of kernels'
)
if
_inputs
.
type
.
dtype
!=
_kerns
.
type
.
dtype
:
if
_inputs
.
type
.
dtype
!=
_kerns
.
type
.
dtype
:
raise
NotImplementedError
(
"The image and the kernel must have the same type."
raise
NotImplementedError
(
"inputs(
%
s), kerns(
%
s)"
%
(
_inputs
.
dtype
,
_kerns
.
dtype
))
"The image and the kernel must have the same type."
"inputs(
%
s), kerns(
%
s)"
%
(
_inputs
.
dtype
,
_kerns
.
dtype
))
if
self
.
outshp
is
not
None
:
if
self
.
outshp
is
not
None
:
bcastable23
=
[
self
.
outshp
[
0
]
==
1
,
self
.
outshp
[
1
]
==
1
]
bcastable23
=
[
self
.
outshp
[
0
]
==
1
,
self
.
outshp
[
1
]
==
1
]
else
:
else
:
bcastable23
=
[
False
,
False
]
bcastable23
=
[
False
,
False
]
output
=
theano
.
tensor
.
tensor
(
dtype
=
_inputs
.
type
.
dtype
,
output
=
theano
.
tensor
.
tensor
(
dtype
=
_inputs
.
type
.
dtype
,
broadcastable
=
[
_inputs
.
broadcastable
[
0
],
broadcastable
=
[
_inputs
.
broadcastable
[
0
],
_kerns
.
broadcastable
[
0
]]
+
bcastable23
);
_kerns
.
broadcastable
[
0
]]
+
bcastable23
)
return
Apply
(
self
,
[
_inputs
,
_kerns
],
[
output
])
return
Apply
(
self
,
[
_inputs
,
_kerns
],
[
output
])
...
@@ -582,10 +622,12 @@ class ConvOp(Op):
...
@@ -582,10 +622,12 @@ class ConvOp(Op):
if
self
.
kshp_logical
:
if
self
.
kshp_logical
:
kshp
=
self
.
kshp_logical
kshp
=
self
.
kshp_logical
try
:
try
:
fmshp
=
ConvOp
.
getOutputShape
(
imshp
[
1
:],
kshp
,
(
self
.
dx
,
self
.
dy
),
self
.
out_mode
)
fmshp
=
ConvOp
.
getOutputShape
(
imshp
[
1
:],
kshp
,
(
self
.
dx
,
self
.
dy
),
self
.
out_mode
)
except
TypeError
:
except
TypeError
:
raise
theano
.
tensor
.
ShapeError
()
raise
theano
.
tensor
.
ShapeError
()
outshp
=
(
batch_size
,
fmo
)
+
tuple
(
fmshp
)
outshp
=
(
batch_size
,
fmo
)
+
tuple
(
fmshp
)
return
[
outshp
]
return
[
outshp
]
else
:
else
:
# Haven't implemented this case. imshp and kshp may be symbollic
# Haven't implemented this case. imshp and kshp may be symbollic
...
@@ -593,8 +635,7 @@ class ConvOp(Op):
...
@@ -593,8 +635,7 @@ class ConvOp(Op):
# we simply let the default function do its work.
# we simply let the default function do its work.
raise
theano
.
tensor
.
ShapeError
()
raise
theano
.
tensor
.
ShapeError
()
def
perform
(
self
,
node
,
inp
,
out
):
def
perform
(
self
,
node
,
inp
,
out
):
"""
"""
By default if len(img2d.shape)==3, we
By default if len(img2d.shape)==3, we
"""
"""
...
@@ -603,9 +644,12 @@ class ConvOp(Op):
...
@@ -603,9 +644,12 @@ class ConvOp(Op):
if
not
imported_scipy_signal
:
if
not
imported_scipy_signal
:
raise
theano
.
gof
.
utils
.
MethodNotDefined
(
raise
theano
.
gof
.
utils
.
MethodNotDefined
(
"c_headers"
,
type
(
self
),
self
.
__class__
.
__name__
,
"c_headers"
,
type
(
self
),
self
.
__class__
.
__name__
,
"Need the python package for scipy.signal to be installed for the python implementation. You can use the C implementation instead."
)
"Need the python package for scipy.signal to be installed "
"for the python implementation. You can use the C"
" implementation instead."
)
# TODO: move these back out to global scope when they no longer cause an atexit error
# TODO: move these back out to global scope when they no longer
# cause an atexit error
imshp
=
self
.
imshp
imshp
=
self
.
imshp
if
imshp
is
None
or
any
([
x
is
None
for
x
in
imshp
]):
if
imshp
is
None
or
any
([
x
is
None
for
x
in
imshp
]):
imshp
=
tuple
(
img2d
.
shape
[
1
:])
imshp
=
tuple
(
img2d
.
shape
[
1
:])
...
@@ -634,39 +678,43 @@ class ConvOp(Op):
...
@@ -634,39 +678,43 @@ class ConvOp(Op):
if
self
.
fulloutshp
is
not
None
:
if
self
.
fulloutshp
is
not
None
:
fulloutshp
=
tuple
(
self
.
fulloutshp
)
fulloutshp
=
tuple
(
self
.
fulloutshp
)
else
:
else
:
fulloutshp
=
tuple
(
ConvOp
.
getOutputShape
(
imshp_logical
[
1
:],
kshp_logical
,
(
1
,
1
),
self
.
out_mode
))
fulloutshp
=
tuple
(
ConvOp
.
getOutputShape
(
imshp_logical
[
1
:],
kshp_logical
,
(
1
,
1
),
self
.
out_mode
))
if
z
[
0
]
is
None
or
z
[
0
]
.
shape
!=
(
bsize
,)
+
(
nkern
,)
+
fulloutshp
:
if
z
[
0
]
is
None
or
z
[
0
]
.
shape
!=
(
bsize
,)
+
(
nkern
,)
+
fulloutshp
:
z
[
0
]
=
numpy
.
zeros
((
bsize
,)
+
(
nkern
,)
+
fulloutshp
,
z
[
0
]
=
numpy
.
zeros
((
bsize
,)
+
(
nkern
,)
+
fulloutshp
,
dtype
=
img2d
.
dtype
)
dtype
=
img2d
.
dtype
)
zz
=
z
[
0
]
zz
=
z
[
0
]
stacklen
=
imshp
[
0
]
stacklen
=
imshp
[
0
]
img2d
=
img2d
.
reshape
((
bsize
,)
+
imshp
)
img2d
=
img2d
.
reshape
((
bsize
,)
+
imshp
)
filtersflipped
=
filtersflipped
.
reshape
((
nkern
,
stacklen
)
+
kshp
)
filtersflipped
=
filtersflipped
.
reshape
((
nkern
,
stacklen
)
+
kshp
)
if
self
.
imshp
!=
self
.
imshp_logical
:
if
self
.
imshp
!=
self
.
imshp_logical
:
# assuming that to get from imshp to imshp logical we insert zeros in missing spots
# assuming that to get from imshp to imshp logical we insert zeros in missing spots
rstride
=
int
(
numpy
.
ceil
(
imshp_logical
[
1
]
/
float
(
imshp
[
1
])))
rstride
=
int
(
numpy
.
ceil
(
imshp_logical
[
1
]
/
float
(
imshp
[
1
])))
cstride
=
int
(
numpy
.
ceil
(
imshp_logical
[
2
]
/
float
(
imshp
[
2
])))
cstride
=
int
(
numpy
.
ceil
(
imshp_logical
[
2
]
/
float
(
imshp
[
2
])))
buf
=
numpy
.
zeros
((
bsize
,)
+
imshp_logical
,
dtype
=
img2d
.
dtype
)
buf
=
numpy
.
zeros
((
bsize
,)
+
imshp_logical
,
dtype
=
img2d
.
dtype
)
buf
[:,
:,
::
rstride
,
::
cstride
]
=
img2d
buf
[:,
:,
::
rstride
,
::
cstride
]
=
img2d
img2d
=
buf
img2d
=
buf
del
buf
,
rstride
,
cstride
del
buf
,
rstride
,
cstride
if
kshp
!=
kshp_logical
:
if
kshp
!=
kshp_logical
:
rstride
=
int
(
numpy
.
ceil
(
kshp_logical
[
0
]
/
float
(
kshp
[
0
])))
rstride
=
int
(
numpy
.
ceil
(
kshp_logical
[
0
]
/
float
(
kshp
[
0
])))
cstride
=
int
(
numpy
.
ceil
(
kshp_logical
[
1
]
/
float
(
kshp
[
1
])))
cstride
=
int
(
numpy
.
ceil
(
kshp_logical
[
1
]
/
float
(
kshp
[
1
])))
buf
=
numpy
.
zeros
((
nkern
,
stacklen
)
+
self
.
kshp_logical
,
dtype
=
filtersflipped
.
dtype
)
buf
=
numpy
.
zeros
((
nkern
,
stacklen
)
+
self
.
kshp_logical
,
dtype
=
filtersflipped
.
dtype
)
if
self
.
kshp_logical_top_aligned
:
if
self
.
kshp_logical_top_aligned
:
roffset
=
coffset
=
0
roffset
=
coffset
=
0
else
:
else
:
roffset
=
(
kshp_logical
[
0
]
-
(
kshp
[
0
]
*
rstride
)
-
1
+
rstride
)
%
rstride
roffset
=
(
kshp_logical
[
0
]
-
(
kshp
[
0
]
*
coffset
=
(
kshp_logical
[
1
]
-
(
kshp
[
1
]
*
cstride
)
-
1
+
cstride
)
%
cstride
rstride
)
-
1
+
rstride
)
%
rstride
coffset
=
(
kshp_logical
[
1
]
-
(
kshp
[
1
]
*
cstride
)
-
1
+
cstride
)
%
cstride
assert
roffset
>=
0
assert
roffset
>=
0
assert
coffset
>=
0
assert
coffset
>=
0
buf
[:,
:,
roffset
::
rstride
,
coffset
::
cstride
]
=
filtersflipped
buf
[:,
:,
roffset
::
rstride
,
coffset
::
cstride
]
=
filtersflipped
filtersflipped
=
buf
filtersflipped
=
buf
del
buf
,
rstride
,
cstride
del
buf
,
rstride
,
cstride
...
@@ -675,39 +723,39 @@ class ConvOp(Op):
...
@@ -675,39 +723,39 @@ class ConvOp(Op):
for
b
in
xrange
(
bsize
):
for
b
in
xrange
(
bsize
):
for
n
in
xrange
(
nkern
):
for
n
in
xrange
(
nkern
):
zz
[
b
,
n
,
...
]
.
fill
(
0
)
zz
[
b
,
n
,
...
]
.
fill
(
0
)
for
im0
in
xrange
(
stacklen
):
for
im0
in
xrange
(
stacklen
):
zz
[
b
,
n
,
...
]
+=
_convolve2d
(
\
zz
[
b
,
n
,
...
]
+=
_convolve2d
(
img2d
[
b
,
im0
,
...
],
img2d
[
b
,
im0
,
...
],
filtersflipped
[
n
,
im0
,
...
],
1
,
val
,
bval
,
0
)
filtersflipped
[
n
,
im0
,
...
],
1
,
val
,
bval
,
0
)
if
False
:
if
False
:
if
False
and
self
.
out_mode
==
"full"
:
if
False
and
self
.
out_mode
==
"full"
:
img2d2
=
numpy
.
zeros
((
bsize
,
stacklen
,
img2d2
=
numpy
.
zeros
((
bsize
,
stacklen
,
imshp
[
1
]
+
2
*
kshp
[
0
]
-
2
,
imshp
[
1
]
+
2
*
kshp
[
0
]
-
2
,
imshp
[
2
]
+
2
*
kshp
[
1
]
-
2
))
imshp
[
2
]
+
2
*
kshp
[
1
]
-
2
))
img2d2
[:,
:,
kshp
[
0
]
-
1
:
kshp
[
0
]
-
1
+
imshp
[
1
],
img2d2
[:,
:,
kshp
[
0
]
-
1
:
kshp
[
0
]
-
1
+
imshp
[
1
],
kshp
[
1
]
-
1
:
kshp
[
1
]
-
1
+
imshp
[
2
]]
=
img2d
kshp
[
1
]
-
1
:
kshp
[
1
]
-
1
+
imshp
[
2
]]
=
img2d
img2d
=
img2d2
img2d
=
img2d2
#N_image_shape = image_data.shape
#N_image_shape = image_data.shape
for
b
in
xrange
(
bsize
):
for
b
in
xrange
(
bsize
):
for
n
in
xrange
(
nkern
):
for
n
in
xrange
(
nkern
):
zz
[
b
,
n
,
...
]
.
fill
(
0
)
zz
[
b
,
n
,
...
]
.
fill
(
0
)
for
im0
in
xrange
(
stacklen
):
for
im0
in
xrange
(
stacklen
):
for
row
in
xrange
(
0
,
zz
.
shape
[
2
],
self
.
dx
):
for
row
in
xrange
(
0
,
zz
.
shape
[
2
],
self
.
dx
):
for
col
in
xrange
(
0
,
zz
.
shape
[
3
],
self
.
dy
):
for
col
in
xrange
(
0
,
zz
.
shape
[
3
],
self
.
dy
):
zz
[
b
,
n
,
row
,
col
]
+=
(
img2d
[
b
,
im0
,
row
:
row
+
kshp
[
0
],
col
:
col
+
kshp
[
1
]]
*
\
zz
[
b
,
n
,
row
,
col
]
+=
(
img2d
[
b
,
im0
,
row
:
row
+
kshp
[
0
],
col
:
col
+
kshp
[
1
]]
*
filtersflipped
[
n
,
im0
,::
-
1
,
::
-
1
])
.
sum
()
filtersflipped
[
n
,
im0
,
::
-
1
,
::
-
1
])
.
sum
()
#We copy it to remove the Stride mismatch warning from DEBUG_MODE.
#We copy it to remove the Stride mismatch warning from DEBUG_MODE.
#The copy make that we return an object with the same stride as the c version.
#The copy make that we return an object with the same stride as the c version.
#The copy don't affect the performence during our experience as in that case we
#The copy don't affect the performence during our experience as in that case we
#execute the c version which is much faster.
#execute the c version which is much faster.
if
self
.
dx
>
1
or
self
.
dy
>
1
:
if
self
.
dx
>
1
or
self
.
dy
>
1
:
zz
=
zz
[:,:,
0
::
self
.
dx
,
0
::
self
.
dy
]
.
copy
()
zz
=
zz
[:,
:,
0
::
self
.
dx
,
0
::
self
.
dy
]
.
copy
()
z
[
0
]
=
zz
z
[
0
]
=
zz
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
inputs
,
kerns
=
inp
inputs
,
kerns
=
inp
...
@@ -726,32 +774,36 @@ class ConvOp(Op):
...
@@ -726,32 +774,36 @@ class ConvOp(Op):
tmp_node
=
theano
.
tensor
.
nnet
.
conv3D
(
tmp_node
=
theano
.
tensor
.
nnet
.
conv3D
(
V
=
inputs
.
dimshuffle
(
0
,
2
,
3
,
'x'
,
1
),
V
=
inputs
.
dimshuffle
(
0
,
2
,
3
,
'x'
,
1
),
W
=
kerns
[:,
:,
::
-
1
,
::
-
1
]
.
dimshuffle
(
0
,
2
,
3
,
'x'
,
1
),
W
=
kerns
[:,
:,
::
-
1
,
::
-
1
]
.
dimshuffle
(
0
,
2
,
3
,
'x'
,
1
),
b
=
theano
.
tensor
.
alloc
(
numpy
.
asarray
(
0
,
dtype
=
kerns
.
dtype
),
kerns
.
shape
[
0
]),
b
=
theano
.
tensor
.
alloc
(
numpy
.
asarray
(
0
,
dtype
=
kerns
.
dtype
),
kerns
.
shape
[
0
]),
d
=
(
self
.
dx
,
self
.
dy
,
1
))
d
=
(
self
.
dx
,
self
.
dy
,
1
))
node
=
theano
.
tensor
.
addbroadcast
(
tmp_node
,
3
)
.
dimshuffle
(
0
,
4
,
1
,
2
)
node
=
theano
.
tensor
.
addbroadcast
(
tmp_node
,
3
)
.
dimshuffle
(
0
,
4
,
1
,
2
)
# mimic what happens inside theano.grad: get the input gradient
# mimic what happens inside theano.grad: get the input gradient
# of the final cost wrt all variables involved.
# of the final cost wrt all variables involved.
tmp_gmap
=
theano
.
gradient
.
grad_sources_inputs
([(
node
,
gz
)],
[
inputs
,
kerns
])
tmp_gmap
=
theano
.
gradient
.
grad_sources_inputs
(
[(
node
,
gz
)],
[
inputs
,
kerns
])
return
[
tmp_gmap
[
inputs
],
tmp_gmap
[
kerns
]]
return
[
tmp_gmap
[
inputs
],
tmp_gmap
[
kerns
]]
if
self
.
dx
not
in
(
1
,
2
)
or
self
.
dy
not
in
(
1
,
2
):
if
self
.
dx
not
in
(
1
,
2
)
or
self
.
dy
not
in
(
1
,
2
):
raise
NotImplementedError
(
"ERROR: We disable ConvOp.grad now when dx or "
\
raise
NotImplementedError
(
"ERROR: We disable ConvOp.grad now when dx or "
"dy are different from 1 and 2, as there is a bug in it."
)
"dy are different from 1 and 2, as there is a bug in it."
)
all_shape
=
self
.
imshp
is
not
None
and
self
.
kshp
is
not
None
and
\
all_shape
=
(
self
.
imshp
is
not
None
and
self
.
kshp
is
not
None
and
self
.
nkern
is
not
None
and
self
.
bsize
is
not
None
self
.
nkern
is
not
None
and
self
.
bsize
is
not
None
)
if
not
all_shape
and
(
self
.
dx
!=
1
or
self
.
dy
!=
1
):
if
not
all_shape
and
(
self
.
dx
!=
1
or
self
.
dy
!=
1
):
raise
Exception
(
"ConvOp.grad when dx!=1 or dy!=1 we must have all "
\
raise
Exception
(
"ConvOp.grad when dx!=1 or dy!=1 we must have all "
"the optional shape information"
)
"the optional shape information"
)
####### Determine gradient on kernels ########
####### Determine gradient on kernels ########
assert
inputs
.
ndim
==
4
and
kerns
.
ndim
==
4
assert
inputs
.
ndim
==
4
and
kerns
.
ndim
==
4
newin
=
inputs
.
dimshuffle
((
1
,
0
,
2
,
3
))
newin
=
inputs
.
dimshuffle
((
1
,
0
,
2
,
3
))
newgz
=
gz
.
dimshuffle
((
1
,
0
,
2
,
3
))
newgz
=
gz
.
dimshuffle
((
1
,
0
,
2
,
3
))
(
bsize
,
nkern
)
=
None
,
None
(
bsize
,
nkern
)
=
None
,
None
imshp
=
None
imshp
=
None
...
@@ -762,7 +814,7 @@ class ConvOp(Op):
...
@@ -762,7 +814,7 @@ class ConvOp(Op):
if
self
.
out_mode
==
'valid'
:
if
self
.
out_mode
==
'valid'
:
(
img
,
filters
)
=
(
newin
,
newgz
)
(
img
,
filters
)
=
(
newin
,
newgz
)
kshp_logical
=
self
.
fulloutshp
kshp_logical
=
self
.
fulloutshp
kshp_logical_top_aligned
=
False
kshp_logical_top_aligned
=
False
if
all_shape
:
if
all_shape
:
(
bsize
,
nkern
)
=
(
self
.
imshp
[
0
],
self
.
nkern
)
(
bsize
,
nkern
)
=
(
self
.
imshp
[
0
],
self
.
nkern
)
imshp
=
(
self
.
bsize
,
self
.
imshp
[
1
],
self
.
imshp
[
2
])
imshp
=
(
self
.
bsize
,
self
.
imshp
[
1
],
self
.
imshp
[
2
])
...
@@ -772,38 +824,45 @@ class ConvOp(Op):
...
@@ -772,38 +824,45 @@ class ConvOp(Op):
elif
self
.
out_mode
==
'full'
:
elif
self
.
out_mode
==
'full'
:
(
img
,
filters
)
=
(
newgz
,
newin
)
(
img
,
filters
)
=
(
newgz
,
newin
)
kshp_logical
=
None
kshp_logical
=
None
kshp_logical_top_aligned
=
True
kshp_logical_top_aligned
=
True
if
all_shape
:
if
all_shape
:
imshp_logical
=
(
self
.
bsize
,
self
.
fulloutshp
[
0
],
self
.
fulloutshp
[
1
])
imshp_logical
=
(
self
.
bsize
,
self
.
fulloutshp
[
0
],
self
.
fulloutshp
[
1
])
(
bsize
,
nkern
)
=
(
self
.
nkern
,
self
.
imshp
[
0
])
(
bsize
,
nkern
)
=
(
self
.
nkern
,
self
.
imshp
[
0
])
imshp
=
(
self
.
bsize
,
self
.
outshp
[
0
],
self
.
outshp
[
1
])
imshp
=
(
self
.
bsize
,
self
.
outshp
[
0
],
self
.
outshp
[
1
])
kshp
=
self
.
imshp
[
1
:]
kshp
=
self
.
imshp
[
1
:]
un_b
=
self
.
unroll_kern
un_b
=
self
.
unroll_kern
un_k
=
self
.
unroll_batch
un_k
=
self
.
unroll_batch
else
:
else
:
raise
NotImplementedError
(
'Only [full,valid] modes are currently supported.'
)
raise
NotImplementedError
(
'Only [full,valid] modes are currently supported.'
)
filters
=
filters
[:,
:,::
-
1
,::
-
1
]
#
flip them
filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
#
flip them
if
0
:
#
find good value for the unroll
if
0
:
#
find good value for the unroll
if
all_shape
and
un_b
!=
0
and
bsize
%
un_b
!=
0
:
if
all_shape
and
un_b
!=
0
and
bsize
%
un_b
!=
0
:
if
bsize
<
un_b
:
if
bsize
<
un_b
:
un_b
=
bsize
un_b
=
bsize
else
:
else
:
un_b
=
1
un_b
=
1
_logger
.
warn
(
"Optimization Warning: in ConvOp.grad() we can't determine "
\
_logger
.
warn
(
"a good unroll value for the batch. Maybe you can optimize this!"
)
"Optimization Warning: in ConvOp.grad() we can't "
" determine a good unroll value for the batch."
" Maybe you can optimize this!"
)
if
all_shape
and
un_k
!=
0
and
nkern
%
un_k
!=
0
:
if
all_shape
and
un_k
!=
0
and
nkern
%
un_k
!=
0
:
if
nkern
<
un_k
:
if
nkern
<
un_k
:
un_k
=
nkern
un_k
=
nkern
else
:
else
:
un_k
=
1
un_k
=
1
_logger
.
warn
(
"Optimization Warning: in ConvOp.grad() we can't determine "
\
_logger
.
warn
(
"a good unroll value for the kernel. Maybe you can optimize this!"
)
"Optimization Warning: in ConvOp.grad() we can't"
" determine a good unroll value for the kernel. Maybe"
" you can optimize this!"
)
dw
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
output_mode
=
'valid'
,
dw
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
output_mode
=
'valid'
,
unroll_batch
=
un_b
,
unroll_kern
=
un_k
,
unroll_patch
=
un_p
,
unroll_batch
=
un_b
,
unroll_kern
=
un_k
,
unroll_patch
=
un_p
,
imshp_logical
=
imshp_logical
,
imshp_logical
=
imshp_logical
,
kshp_logical
=
kshp_logical
,
kshp_logical
=
kshp_logical
,
...
@@ -812,7 +871,7 @@ class ConvOp(Op):
...
@@ -812,7 +871,7 @@ class ConvOp(Op):
verbose
=
self
.
verbose
)
verbose
=
self
.
verbose
)
else
:
# let __init__ choose c params be chosen automatically from shapes
else
:
# let __init__ choose c params be chosen automatically from shapes
dw
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
output_mode
=
'valid'
,
dw
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
output_mode
=
'valid'
,
unroll_batch
=
None
,
unroll_kern
=
None
,
unroll_patch
=
None
,
unroll_batch
=
None
,
unroll_kern
=
None
,
unroll_patch
=
None
,
imshp_logical
=
imshp_logical
,
imshp_logical
=
imshp_logical
,
kshp_logical
=
kshp_logical
,
kshp_logical
=
kshp_logical
,
...
@@ -820,26 +879,25 @@ class ConvOp(Op):
...
@@ -820,26 +879,25 @@ class ConvOp(Op):
version
=
self
.
version
,
version
=
self
.
version
,
verbose
=
self
.
verbose
)
verbose
=
self
.
verbose
)
if
hasattr
(
self
,
'flops'
):
if
hasattr
(
self
,
'flops'
):
dw
.
set_flops
()
dw
.
set_flops
()
dw
=
dw
(
img
,
filters
)
dw
=
dw
(
img
,
filters
)
if
all_shape
:
if
all_shape
:
assert
(
dw
.
owner
.
op
.
outshp
==
self
.
kshp
)
.
all
()
assert
(
dw
.
owner
.
op
.
outshp
==
self
.
kshp
)
.
all
()
if
self
.
out_mode
==
'valid'
:
if
self
.
out_mode
==
'valid'
:
# before DimShuffle, dw is of shape visdim x nkern x kshp[0] x kshp[1]
# before DimShuffle, dw is of shape visdim x nkern x kshp[0] x kshp[1]
dw
=
dw
.
dimshuffle
((
1
,
0
,
2
,
3
))
dw
=
dw
.
dimshuffle
((
1
,
0
,
2
,
3
))
dw
=
dw
[:,
:,::
-
1
,
::
-
1
]
dw
=
dw
[:,
:,
::
-
1
,
::
-
1
]
####### Determine gradient on inputs ########
####### Determine gradient on inputs ########
mode
=
'valid'
mode
=
'valid'
if
not
self
.
out_mode
==
'full'
:
if
not
self
.
out_mode
==
'full'
:
mode
=
'full'
mode
=
'full'
filters
=
kerns
.
dimshuffle
((
1
,
0
,
2
,
3
))
filters
=
kerns
.
dimshuffle
((
1
,
0
,
2
,
3
))
filters
=
filters
[:,
:,::
-
1
,
::
-
1
]
filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
nkern
=
None
nkern
=
None
imshp
=
None
imshp
=
None
imshp_logical
=
None
imshp_logical
=
None
...
@@ -848,33 +906,36 @@ class ConvOp(Op):
...
@@ -848,33 +906,36 @@ class ConvOp(Op):
if
all_shape
:
if
all_shape
:
nkern
=
self
.
imshp
[
0
]
nkern
=
self
.
imshp
[
0
]
imshp
=
(
self
.
nkern
,
self
.
outshp
[
0
],
self
.
outshp
[
1
])
imshp
=
(
self
.
nkern
,
self
.
outshp
[
0
],
self
.
outshp
[
1
])
imshp_logical
=
(
self
.
nkern
,
self
.
fulloutshp
[
0
],
self
.
fulloutshp
[
1
])
imshp_logical
=
(
self
.
nkern
,
self
.
fulloutshp
[
0
],
self
.
fulloutshp
[
1
])
if
0
:
# hard-code c generation parameters
if
0
:
# hard-code c generation parameters
din
=
ConvOp
(
imshp
,
self
.
kshp
,
nkern
,
self
.
bsize
,
din
=
ConvOp
(
imshp
,
self
.
kshp
,
nkern
,
self
.
bsize
,
1
,
1
,
output_mode
=
mode
,
1
,
1
,
output_mode
=
mode
,
unroll_batch
=
un_b
,
unroll_kern
=
un_k
,
unroll_patch
=
un_p
,
unroll_batch
=
un_b
,
unroll_kern
=
un_k
,
unroll_patch
=
un_p
,
imshp_logical
=
imshp_logical
,
imshp_logical
=
imshp_logical
,
kshp_logical
=
None
,
kshp_logical
=
None
,
version
=-
1
,
#
we we change the mode, we don't forward the version.
version
=-
1
,
#
we we change the mode, we don't forward the version.
verbose
=
self
.
verbose
)
verbose
=
self
.
verbose
)
else
:
# let __init__ figure out the unrolling / patch sizes
else
:
# let __init__ figure out the unrolling / patch sizes
din
=
ConvOp
(
imshp
,
self
.
kshp
,
nkern
,
self
.
bsize
,
din
=
ConvOp
(
imshp
,
self
.
kshp
,
nkern
,
self
.
bsize
,
1
,
1
,
output_mode
=
mode
,
1
,
1
,
output_mode
=
mode
,
unroll_batch
=
None
,
unroll_kern
=
None
,
unroll_patch
=
None
,
unroll_batch
=
None
,
unroll_kern
=
None
,
unroll_patch
=
None
,
imshp_logical
=
imshp_logical
,
imshp_logical
=
imshp_logical
,
kshp_logical
=
None
,
kshp_logical
=
None
,
version
=-
1
,
#
we we change the mode, we don't forward the version.
version
=-
1
,
#
we we change the mode, we don't forward the version.
verbose
=
self
.
verbose
)
verbose
=
self
.
verbose
)
if
hasattr
(
self
,
'flops'
):
if
hasattr
(
self
,
'flops'
):
din
.
set_flops
()
din
.
set_flops
()
din
=
din
(
gz
,
filters
)
din
=
din
(
gz
,
filters
)
assert
(
din
.
owner
.
op
.
outshp
is
None
and
self
.
imshp
is
None
)
or
\
assert
(
din
.
owner
.
op
.
outshp
is
None
and
self
.
imshp
is
None
)
or
\
(
din
.
owner
.
op
.
outshp
is
None
)
or
\
(
din
.
owner
.
op
.
outshp
is
None
)
or
\
(
din
.
owner
.
op
.
outshp
==
self
.
imshp
[
1
:])
.
all
()
(
din
.
owner
.
op
.
outshp
==
self
.
imshp
[
1
:])
.
all
()
# din and dw should have the same broadcasting pattern as the
# din and dw should have the same broadcasting pattern as the
# parameters they are the gradient of (resp. inputs and kerns).
# parameters they are the gradient of (resp. inputs and kerns).
...
@@ -902,10 +963,14 @@ using namespace std;
...
@@ -902,10 +963,14 @@ using namespace std;
""" Return True if we will generate code that use gemm.
""" Return True if we will generate code that use gemm.
"""
"""
#the gemm version only support that case
#the gemm version only support that case
if
self
.
out_mode
==
'valid'
and
self
.
dx
==
0
and
self
.
dy
==
0
:
if
self
.
out_mode
==
'valid'
and
self
.
dx
==
0
and
self
.
dy
==
0
:
#We use a faster version in those case.
#We use a faster version in those case.
if
(
self
.
imshp
!=
self
.
imshp_logical
or
self
.
kshp
!=
self
.
kshp_logical
if
(
self
.
imshp
!=
self
.
imshp_logical
or
or
self
.
unroll_patch
or
self
.
unroll_batch
>
0
or
self
.
unroll_kern
>
0
):
self
.
kshp
!=
self
.
kshp_logical
or
self
.
unroll_patch
or
self
.
unroll_batch
>
0
or
self
.
unroll_kern
>
0
):
return
False
return
False
return
True
return
True
return
False
return
False
...
@@ -918,7 +983,9 @@ using namespace std;
...
@@ -918,7 +983,9 @@ using namespace std;
def
c_no_compile_args
(
self
):
def
c_no_compile_args
(
self
):
#when the ksph==(1,1) gcc 4.3.0 segfault during the
#when the ksph==(1,1) gcc 4.3.0 segfault during the
#compilation with -O3. This don't happen at -O2
#compilation with -O3. This don't happen at -O2
if
theano
.
gof
.
cmodule
.
gcc_version
()
in
[
'4.3.0'
]
and
self
.
kshp
==
(
1
,
1
):
if
(
theano
.
gof
.
cmodule
.
gcc_version
()
in
[
'4.3.0'
]
and
self
.
kshp
==
(
1
,
1
)):
return
[
'-O3'
]
return
[
'-O3'
]
else
:
else
:
return
[]
return
[]
...
@@ -928,7 +995,8 @@ using namespace std;
...
@@ -928,7 +995,8 @@ using namespace std;
if
self
.
use_blas
():
if
self
.
use_blas
():
ret
=
blas
.
ldflags
(
libs
=
False
,
flags
=
True
)
ret
=
blas
.
ldflags
(
libs
=
False
,
flags
=
True
)
if
theano
.
gof
.
cmodule
.
gcc_version
()
in
[
'4.3.0'
]
and
self
.
kshp
==
(
1
,
1
):
if
(
theano
.
gof
.
cmodule
.
gcc_version
()
in
[
'4.3.0'
]
and
self
.
kshp
==
(
1
,
1
)):
ret
+=
[
'-O2'
]
ret
+=
[
'-O2'
]
if
self
.
openmp
:
if
self
.
openmp
:
ret
+=
[
'-fopenmp'
]
ret
+=
[
'-fopenmp'
]
...
@@ -951,50 +1019,59 @@ using namespace std;
...
@@ -951,50 +1019,59 @@ using namespace std;
if
node
.
inputs
[
0
]
.
type
.
dtype
!=
node
.
inputs
[
1
]
.
type
.
dtype
:
if
node
.
inputs
[
0
]
.
type
.
dtype
!=
node
.
inputs
[
1
]
.
type
.
dtype
:
raise
NotImplementedError
()
raise
NotImplementedError
()
assert
node
.
inputs
[
0
]
.
type
.
dtype
==
node
.
inputs
[
1
]
.
type
.
dtype
assert
node
.
inputs
[
0
]
.
type
.
dtype
==
node
.
inputs
[
1
]
.
type
.
dtype
d
=
locals
()
d
=
locals
()
d
.
update
(
sub
)
d
.
update
(
sub
)
all_shape
=
self
.
imshp
is
not
None
and
self
.
kshp
is
not
None
and
\
all_shape
=
(
self
.
imshp
is
not
None
and
self
.
kshp
is
not
None
and
self
.
nkern
is
not
None
and
self
.
bsize
is
not
None
self
.
nkern
is
not
None
and
self
.
bsize
is
not
None
)
d
[
"self_out_mode"
]
=
self
.
out_mode
d
[
"self_out_mode"
]
=
self
.
out_mode
d
[
"self_dx"
]
=
self
.
dx
d
[
"self_dx"
]
=
self
.
dx
d
[
"self_dy"
]
=
self
.
dy
d
[
"self_dy"
]
=
self
.
dy
d
[
"mode"
]
=
self
.
out_mode
.
upper
()
d
[
"mode"
]
=
self
.
out_mode
.
upper
()
d
[
"affectation"
]
=
"="
d
[
"affectation"
]
=
"="
if
all_shape
:
if
all_shape
:
d
[
"self_bsize"
]
=
self
.
bsize
d
[
"self_bsize"
]
=
self
.
bsize
d
[
"self_nkern"
]
=
self
.
nkern
d
[
"self_nkern"
]
=
self
.
nkern
d
[
"self_outshp0"
]
=
self
.
outshp
[
0
]
d
[
"self_outshp0"
]
=
self
.
outshp
[
0
]
d
[
"self_outshp1"
]
=
self
.
outshp
[
1
]
d
[
"self_outshp1"
]
=
self
.
outshp
[
1
]
d
[
"self_imshp0"
]
=
self
.
imshp
[
0
]
d
[
"self_imshp0"
]
=
self
.
imshp
[
0
]
d
[
"self_imshp1"
]
=
self
.
imshp
[
1
]
d
[
"self_imshp1"
]
=
self
.
imshp
[
1
]
d
[
"self_imshp2"
]
=
self
.
imshp
[
2
]
d
[
"self_imshp2"
]
=
self
.
imshp
[
2
]
d
[
"self_kshp0"
]
=
self
.
kshp
[
0
]
d
[
"self_kshp0"
]
=
self
.
kshp
[
0
]
d
[
"self_kshp1"
]
=
self
.
kshp
[
1
]
d
[
"self_kshp1"
]
=
self
.
kshp
[
1
]
d
[
"self_kshp_logical_r"
]
=
self
.
kshp_logical
[
0
]
d
[
"self_kshp_logical_r"
]
=
self
.
kshp_logical
[
0
]
d
[
"self_kshp_logical_c"
]
=
self
.
kshp_logical
[
1
]
d
[
"self_kshp_logical_c"
]
=
self
.
kshp_logical
[
1
]
d
[
"self_kshp_logical_stride_r"
]
=
int
(
numpy
.
ceil
(
self
.
kshp_logical
[
0
]
/
float
(
self
.
kshp
[
0
])))
d
[
"self_kshp_logical_stride_r"
]
=
int
(
numpy
.
ceil
(
d
[
"self_kshp_logical_stride_c"
]
=
int
(
numpy
.
ceil
(
self
.
kshp_logical
[
1
]
/
float
(
self
.
kshp
[
1
])))
self
.
kshp_logical
[
0
]
/
float
(
self
.
kshp
[
0
])))
d
[
"self_imshp_logical_r"
]
=
self
.
imshp_logical
[
1
]
#numpy.B. 1 not 0
d
[
"self_kshp_logical_stride_c"
]
=
int
(
numpy
.
ceil
(
d
[
"self_imshp_logical_c"
]
=
self
.
imshp_logical
[
2
]
#numpy.B. 2 not 1
self
.
kshp_logical
[
1
]
/
float
(
self
.
kshp
[
1
])))
d
[
"self_imshp_logical_stride_r"
]
=
int
(
numpy
.
ceil
(
self
.
imshp_logical
[
1
]
/
float
(
self
.
imshp
[
1
])))
d
[
"self_imshp_logical_r"
]
=
self
.
imshp_logical
[
1
]
d
[
"self_imshp_logical_stride_c"
]
=
int
(
numpy
.
ceil
(
self
.
imshp_logical
[
2
]
/
float
(
self
.
imshp
[
2
])))
#numpy.B. 1 not 0
if
not
self
.
imshp
[
0
]
==
1
:
d
[
"affectation"
]
=
"+="
d
[
"self_imshp_logical_c"
]
=
self
.
imshp_logical
[
2
]
d
[
"all_shape"
]
=
"1"
# numpy.B. 2 not 1
d
[
"dim_zz_const"
]
=
"const"
d
[
"self_imshp_logical_stride_r"
]
=
int
(
numpy
.
ceil
(
d
[
"dim_zz_affect"
]
=
""
self
.
imshp_logical
[
1
]
/
float
(
self
.
imshp
[
1
])))
d
[
"assert_size"
]
=
"""
d
[
"self_imshp_logical_stride_c"
]
=
int
(
numpy
.
ceil
(
self
.
imshp_logical
[
2
]
/
float
(
self
.
imshp
[
2
])))
if
not
self
.
imshp
[
0
]
==
1
:
d
[
"affectation"
]
=
"+="
d
[
"all_shape"
]
=
"1"
d
[
"dim_zz_const"
]
=
"const"
d
[
"dim_zz_affect"
]
=
""
d
[
"assert_size"
]
=
"""
// Check the batch size and the number of kernels (sometimes constant in the graph)
// Check the batch size and the number of kernels (sometimes constant in the graph)
if(img2d_dim[0] !=
%(self_bsize)
s!=0){
if(img2d_dim[0] !=
%(self_bsize)
s!=0){
PyErr_Format(PyExc_ValueError,
PyErr_Format(PyExc_ValueError,
"the batch size in the image (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
"the batch size in the image (
%%
ld) at run time is different"
" than at build time (
%%
ld) for the ConvOp.",
(long)img2d_dim[0], (long)
%(self_bsize)
s);
(long)img2d_dim[0], (long)
%(self_bsize)
s);
%(fail)
s;
%(fail)
s;
}
}
if(kerns_dim[0] !=
%(self_nkern)
s!=0){
if(kerns_dim[0] !=
%(self_nkern)
s!=0){
PyErr_Format(PyExc_ValueError,
PyErr_Format(PyExc_ValueError,
"the number of kernels in the filter (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
"the number of kernels in the filter (
%%
ld) at run time is"
" different than at build time (
%%
ld) for the ConvOp.",
(long)kerns_dim[0], (long)
%(self_nkern)
s);
(long)kerns_dim[0], (long)
%(self_nkern)
s);
%(fail)
s;
%(fail)
s;
}
}
...
@@ -1002,19 +1079,22 @@ if(kerns_dim[0] != %(self_nkern)s!=0){
...
@@ -1002,19 +1079,22 @@ if(kerns_dim[0] != %(self_nkern)s!=0){
// Check the size of the image (sometimes constant in the graph)
// Check the size of the image (sometimes constant in the graph)
if(img2d_dim[1] !=
%(self_imshp0)
s){
if(img2d_dim[1] !=
%(self_imshp0)
s){
PyErr_Format(PyExc_ValueError,
PyErr_Format(PyExc_ValueError,
"the image stack size (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
"the image stack size (
%%
ld) at run time is different than"
" at build time (
%%
ld) for the ConvOp.",
(long)img2d_dim[1], (long)
%(self_imshp0)
s);
(long)img2d_dim[1], (long)
%(self_imshp0)
s);
%(fail)
s;
%(fail)
s;
}
}
if(img2d_dim[2] !=
%(self_imshp1)
s){
if(img2d_dim[2] !=
%(self_imshp1)
s){
PyErr_Format(PyExc_ValueError,
PyErr_Format(PyExc_ValueError,
"the number of rows in the image (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
"the number of rows in the image (
%%
ld) at run time is different"
" than at build time (
%%
ld) for the ConvOp.",
(long)img2d_dim[2], (long)
%(self_imshp1)
s);
(long)img2d_dim[2], (long)
%(self_imshp1)
s);
%(fail)
s;
%(fail)
s;
}
}
if(img2d_dim[3] !=
%(self_imshp2)
s){
if(img2d_dim[3] !=
%(self_imshp2)
s){
PyErr_Format(PyExc_ValueError,
PyErr_Format(PyExc_ValueError,
"the number of columns in the image (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
"the number of columns in the image (
%%
ld) at run time is"
" different than at build time (
%%
ld) for the ConvOp.",
(long)img2d_dim[3], (long)
%(self_imshp2)
s);
(long)img2d_dim[3], (long)
%(self_imshp2)
s);
%(fail)
s;
%(fail)
s;
}
}
...
@@ -1022,13 +1102,15 @@ if(img2d_dim[3] != %(self_imshp2)s){
...
@@ -1022,13 +1102,15 @@ if(img2d_dim[3] != %(self_imshp2)s){
// Check the size of the output (sometimes constant in the graph)
// Check the size of the output (sometimes constant in the graph)
if(dim_zz[0] !=
%(self_outshp0)
s!=0){
if(dim_zz[0] !=
%(self_outshp0)
s!=0){
PyErr_Format(PyExc_ValueError,
PyErr_Format(PyExc_ValueError,
"the precomputed number of rows in the output (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
"the precomputed number of rows in the output (
%%
ld) at run time"
" is different than at build time (
%%
ld) for the ConvOp.",
(long)dim_zz[0], (long)
%(self_outshp0)
s);
(long)dim_zz[0], (long)
%(self_outshp0)
s);
%(fail)
s;
%(fail)
s;
}
}
if(dim_zz[1] !=
%(self_outshp1)
s!=0){
if(dim_zz[1] !=
%(self_outshp1)
s!=0){
PyErr_Format(PyExc_ValueError,
PyErr_Format(PyExc_ValueError,
"the precomputed number of columns in the output (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
"the precomputed number of columns in the output (
%%
ld) at run"
" time is different than at build time (
%%
ld) for the ConvOp.",
(long)dim_zz[1], (long)
%(self_outshp1)
s);
(long)dim_zz[1], (long)
%(self_outshp1)
s);
%(fail)
s;
%(fail)
s;
}
}
...
@@ -1036,38 +1118,41 @@ if(dim_zz[1] != %(self_outshp1)s!=0){
...
@@ -1036,38 +1118,41 @@ if(dim_zz[1] != %(self_outshp1)s!=0){
// Check the size of the filter (sometimes constant in the graph)
// Check the size of the filter (sometimes constant in the graph)
if(kerns_dim[1]
%% %(self_imshp0)
s!=0){
if(kerns_dim[1]
%% %(self_imshp0)
s!=0){
PyErr_Format(PyExc_ValueError,
PyErr_Format(PyExc_ValueError,
"the filter stack size (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
"the filter stack size (
%%
ld) at run time is different than at"
" build time (
%%
ld) for the ConvOp.",
(long)kerns_dim[1], (long)
%(self_imshp0)
s);
(long)kerns_dim[1], (long)
%(self_imshp0)
s);
%(fail)
s;
%(fail)
s;
}
}
if(kerns_dim[2]
%% %(self_kshp0)
s!=0){
if(kerns_dim[2]
%% %(self_kshp0)
s!=0){
PyErr_Format(PyExc_ValueError,
PyErr_Format(PyExc_ValueError,
"the number of rows in the filter (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
"the number of rows in the filter (
%%
ld) at run time is different"
" than at build time (
%%
ld) for the ConvOp.",
(long)kerns_dim[2], (long)
%(self_kshp0)
s);
(long)kerns_dim[2], (long)
%(self_kshp0)
s);
%(fail)
s;
%(fail)
s;
}
}
if(kerns_dim[3]
%% %(self_kshp1)
s!=0){
if(kerns_dim[3]
%% %(self_kshp1)
s!=0){
PyErr_Format(PyExc_ValueError,
PyErr_Format(PyExc_ValueError,
"the number of columns in the filter (
%%
ld) at run time is different than at build time (
%%
ld) for the ConvOp.",
"the number of columns in the filter (
%%
ld) at run time is"
" different than at build time (
%%
ld) for the ConvOp.",
(long)kerns_dim[3], (long)
%(self_kshp1)
s);
(long)kerns_dim[3], (long)
%(self_kshp1)
s);
%(fail)
s;
%(fail)
s;
}
}
"""
%
(
locals
())
"""
%
(
locals
())
else
:
else
:
d
[
"self_bsize"
]
=
"
%(img2d)
s->dimensions[0]"
%
d
d
[
"self_bsize"
]
=
"
%(img2d)
s->dimensions[0]"
%
d
d
[
"self_nkern"
]
=
"
%(filtersflipped)
s->dimensions[0]"
%
d
d
[
"self_nkern"
]
=
"
%(filtersflipped)
s->dimensions[0]"
%
d
d
[
"self_outshp0"
]
=
"-1"
d
[
"self_outshp0"
]
=
"-1"
d
[
"self_outshp1"
]
=
"-1"
d
[
"self_outshp1"
]
=
"-1"
d
[
"self_imshp0"
]
=
"
%(img2d)
s->dimensions[1]"
%
d
d
[
"self_imshp0"
]
=
"
%(img2d)
s->dimensions[1]"
%
d
d
[
"self_imshp1"
]
=
"
%(img2d)
s->dimensions[2]"
%
d
d
[
"self_imshp1"
]
=
"
%(img2d)
s->dimensions[2]"
%
d
d
[
"self_imshp2"
]
=
"
%(img2d)
s->dimensions[3]"
%
d
d
[
"self_imshp2"
]
=
"
%(img2d)
s->dimensions[3]"
%
d
d
[
"self_kshp0"
]
=
"
%(filtersflipped)
s->dimensions[2]"
%
d
d
[
"self_kshp0"
]
=
"
%(filtersflipped)
s->dimensions[2]"
%
d
d
[
"self_kshp1"
]
=
"
%(filtersflipped)
s->dimensions[3]"
%
d
d
[
"self_kshp1"
]
=
"
%(filtersflipped)
s->dimensions[3]"
%
d
d
[
"affectation"
]
=
"+="
d
[
"affectation"
]
=
"+="
d
[
"all_shape"
]
=
"0"
d
[
"all_shape"
]
=
"0"
d
[
"dim_zz_const"
]
=
""
d
[
"dim_zz_const"
]
=
""
d
[
"dim_zz_affect"
]
=
"""
d
[
"dim_zz_affect"
]
=
"""
if (mode == FULL) {
if (mode == FULL) {
dim_zz[0] = (int)ceil((dim_im[0]+dim_ker0-1)/float(
%(self_dx)
s));
dim_zz[0] = (int)ceil((dim_im[0]+dim_ker0-1)/float(
%(self_dx)
s));
dim_zz[1] = (int)ceil((dim_im[1]+dim_ker1-1)/float(
%(self_dy)
s));
dim_zz[1] = (int)ceil((dim_im[1]+dim_ker1-1)/float(
%(self_dy)
s));
...
@@ -1075,8 +1160,8 @@ if(kerns_dim[3] %% %(self_kshp1)s!=0){
...
@@ -1075,8 +1160,8 @@ if(kerns_dim[3] %% %(self_kshp1)s!=0){
dim_zz[0] = (int)ceil((dim_im[0]-dim_ker0+1)/float(
%(self_dx)
s));
dim_zz[0] = (int)ceil((dim_im[0]-dim_ker0+1)/float(
%(self_dx)
s));
dim_zz[1] = (int)ceil((dim_im[1]-dim_ker1+1)/float(
%(self_dy)
s));
dim_zz[1] = (int)ceil((dim_im[1]-dim_ker1+1)/float(
%(self_dy)
s));
}
}
"""
%
d
"""
%
d
d
[
"assert_size"
]
=
""
d
[
"assert_size"
]
=
""
if
self
.
kshp_logical_top_aligned
:
if
self
.
kshp_logical_top_aligned
:
d
[
"self_kshp_logical_offset_r"
]
=
0
d
[
"self_kshp_logical_offset_r"
]
=
0
...
@@ -1084,28 +1169,39 @@ if(kerns_dim[3] %% %(self_kshp1)s!=0){
...
@@ -1084,28 +1169,39 @@ if(kerns_dim[3] %% %(self_kshp1)s!=0){
elif
all_shape
:
elif
all_shape
:
rstride
=
d
[
"self_kshp_logical_stride_r"
]
rstride
=
d
[
"self_kshp_logical_stride_r"
]
cstride
=
d
[
"self_kshp_logical_stride_c"
]
cstride
=
d
[
"self_kshp_logical_stride_c"
]
d
[
"self_kshp_logical_offset_r"
]
=
(
self
.
kshp_logical
[
0
]
-
(
self
.
kshp
[
0
]
*
rstride
)
-
1
+
rstride
)
%
rstride
d
[
"self_kshp_logical_offset_r"
]
=
(
self
.
kshp_logical
[
0
]
-
d
[
"self_kshp_logical_offset_c"
]
=
(
self
.
kshp_logical
[
1
]
-
(
self
.
kshp
[
1
]
*
cstride
)
-
1
+
cstride
)
%
cstride
(
self
.
kshp
[
0
]
*
rstride
)
-
1
+
rstride
)
%
rstride
d
[
"self_kshp_logical_offset_c"
]
=
(
self
.
kshp_logical
[
1
]
-
(
self
.
kshp
[
1
]
*
cstride
)
-
1
+
cstride
)
%
cstride
del
rstride
,
cstride
del
rstride
,
cstride
if
node
.
inputs
[
0
]
.
type
.
dtype
==
"float32"
:
d
[
"type"
]
=
"float"
if
node
.
inputs
[
0
]
.
type
.
dtype
==
"float32"
:
elif
node
.
inputs
[
0
]
.
type
.
dtype
==
"float64"
:
d
[
"type"
]
=
"double"
d
[
"type"
]
=
"float"
else
:
raise
Exception
(
"Type
%
s not implemented"
%
node
.
inputs
[
0
]
.
type
.
dtype
)
elif
node
.
inputs
[
0
]
.
type
.
dtype
==
"float64"
:
d
[
"gemm"
]
=
'dgemm_'
d
[
"type"
]
=
"double"
if
not
d
[
"type"
]
==
"double"
:
d
[
"gemm"
]
=
'sgemm_'
else
:
raise
Exception
(
"Type
%
s not implemented"
%
node
.
inputs
[
0
]
.
type
.
dtype
)
d
[
"gemm"
]
=
'dgemm_'
if
not
d
[
"type"
]
==
"double"
:
d
[
"gemm"
]
=
'sgemm_'
if
self
.
imshp
!=
self
.
imshp_logical
or
self
.
kshp
!=
self
.
kshp_logical
:
if
self
.
imshp
!=
self
.
imshp_logical
or
self
.
kshp
!=
self
.
kshp_logical
:
if
self
.
verbose
:
if
self
.
verbose
:
_logger
.
debug
(
"return imshp!=imshp_logical or self.kshp != self.kshp_logical shape version"
)
_logger
.
debug
(
"return imshp!=imshp_logical or"
" self.kshp != self.kshp_logical shape version"
)
return
_conv_op_code_a
%
d
return
_conv_op_code_a
%
d
if
self
.
unroll_patch
:
if
self
.
unroll_patch
:
if
self
.
verbose
:
if
self
.
verbose
:
_logger
.
debug
(
"return unroll patch version. all_shape=
%
s"
,
all_shape
)
_logger
.
debug
(
"return unroll patch version. all_shape=
%
s"
,
return
_conv_op_code_unroll_patch
%
d
all_shape
)
if
self
.
unroll_batch
>
0
or
self
.
unroll_kern
>
0
:
return
_conv_op_code_unroll_patch
%
d
assert
self
.
unroll_batch
>
0
if
self
.
unroll_batch
>
0
or
self
.
unroll_kern
>
0
:
assert
self
.
unroll_kern
>
0
assert
self
.
unroll_batch
>
0
assert
self
.
unroll_kern
>
0
if
self
.
verbose
:
if
self
.
verbose
:
_logger
.
debug
(
"return unrolled batch (
%
s) and kern code (
%
s)"
,
_logger
.
debug
(
"return unrolled batch (
%
s) and kern code (
%
s)"
,
str
(
self
.
unroll_batch
),
str
(
self
.
unroll_kern
))
str
(
self
.
unroll_batch
),
str
(
self
.
unroll_kern
))
...
@@ -1113,7 +1209,7 @@ if(kerns_dim[3] %% %(self_kshp1)s!=0){
...
@@ -1113,7 +1209,7 @@ if(kerns_dim[3] %% %(self_kshp1)s!=0){
self
.
unroll_kern
)
self
.
unroll_kern
)
#TODO: should we choose the unroll size automatically with the bigger divisor under 5?
#TODO: should we choose the unroll size automatically with the bigger divisor under 5?
if
self
.
out_mode
==
'valid'
and
self
.
dx
==
0
and
self
.
dy
==
0
:
if
self
.
out_mode
==
'valid'
and
self
.
dx
==
0
and
self
.
dy
==
0
:
if
self
.
verbose
:
if
self
.
verbose
:
_logger
.
debug
(
"return gemm version"
)
_logger
.
debug
(
"return gemm version"
)
return
_conv_op_code_valid_gemm
%
d
return
_conv_op_code_valid_gemm
%
d
...
@@ -1126,7 +1222,8 @@ if(kerns_dim[3] %% %(self_kshp1)s!=0){
...
@@ -1126,7 +1222,8 @@ if(kerns_dim[3] %% %(self_kshp1)s!=0){
_conv_op_code_a
=
"""
_conv_op_code_a
=
"""
const int mode=
%(mode)
s;
const int mode=
%(mode)
s;
int typenum=0, typenum_f=0;
int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
PyArrayObject *ain1=NULL, *ain2=NULL;
PyArrayObject *filtersflipped_arr=NULL, *img2d_arr=NULL;
const
%(type)
s fill_value = 0;
const
%(type)
s fill_value = 0;
int type_im=PyArray_TYPE(
%(img2d)
s);
int type_im=PyArray_TYPE(
%(img2d)
s);
...
@@ -1216,12 +1313,17 @@ if ((filtersflipped_arr->strides[3] != (npy_intp)sizeof(%(type)s))
...
@@ -1216,12 +1313,17 @@ if ((filtersflipped_arr->strides[3] != (npy_intp)sizeof(%(type)s))
filtersflipped_arr = (PyArrayObject*)filtersflipped;
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(mode != VALID && mode != FULL){
if(mode != VALID && mode != FULL){
PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported");
%(fail)
s;
PyErr_SetString(PyExc_ValueError,
"invalid mode, only full and valid are supported");
%(fail)
s;
}
}
typenum = PyArray_ObjectType((PyObject*)
%(img2d)
s, 0);
typenum = PyArray_ObjectType((PyObject*)
%(img2d)
s, 0);
typenum_f = PyArray_ObjectType((PyObject*)
%(filtersflipped)
s, 0);
typenum_f = PyArray_ObjectType((PyObject*)
%(filtersflipped)
s, 0);
if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type");
%(fail)
s;}
if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type");
%(fail)
s;}
if (typenum != typenum_f) {PyErr_SetString(PyExc_ValueError, "Input types must match");
%(fail)
s;}
if (typenum != typenum_f) {
PyErr_SetString(PyExc_ValueError, "Input types must match");
%(fail)
s;
}
if (!img2d)
%(fail)
s;
if (!img2d)
%(fail)
s;
if (!filtersflipped)
%(fail)
s;
if (!filtersflipped)
%(fail)
s;
...
@@ -1249,10 +1351,19 @@ Os[0]=%(self_outshp0)s;
...
@@ -1249,10 +1351,19 @@ Os[0]=%(self_outshp0)s;
Os[1]=
%(self_outshp1)
s;
Os[1]=
%(self_outshp1)
s;
//assertions
//assertions
if (
%(z)
s->strides[0] !=
%(z)
s->dimensions[1] *
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] * (npy_intp)sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[0] !=
%(z)
s->dimensions[1] *
if (
%(z)
s->strides[1] !=
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] * (npy_intp)sizeof(
%(type)
s))
%(fail)
s;
%(z)
s->dimensions[2] *
if (
%(z)
s->strides[2] !=
%(z)
s->dimensions[3] * (npy_intp)sizeof(
%(type)
s))
%(fail)
s;
%(z)
s->dimensions[3] *
if (
%(z)
s->strides[3] != (npy_intp)sizeof(
%(type)
s))
%(fail)
s;
(npy_intp)sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[1] !=
%(z)
s->dimensions[2] *
%(z)
s->dimensions[3] *
(npy_intp)sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[2] !=
%(z)
s->dimensions[3] * (npy_intp)sizeof(
%(type)
s))
%(fail)
s;
if (
%(z)
s->strides[3] != (npy_intp)sizeof(
%(type)
s))
%(fail)
s;
for(int b=0;b<
%(self_bsize)
s;b++){
for(int b=0;b<
%(self_bsize)
s;b++){
for(int n_kern=0;n_kern<
%(self_nkern)
s;n_kern++){
for(int n_kern=0;n_kern<
%(self_nkern)
s;n_kern++){
...
@@ -1267,34 +1378,41 @@ for(int b=0;b< %(self_bsize)s;b++){
...
@@ -1267,34 +1378,41 @@ for(int b=0;b< %(self_bsize)s;b++){
for (int iter_m=0; iter_m < Os[0]; iter_m++) {
for (int iter_m=0; iter_m < Os[0]; iter_m++) {
/// Reposition index into input image based on requested output size
// Reposition index into input image based on requested output size
int pos_m = iter_m*
%(self_dx)
s; //row position in logical output image
//row position in logical output image
int new_m; //row anchor in logical input image (we will loop upward from here)
int pos_m = iter_m*
%(self_dx)
s;
//row anchor in logical input image (we will loop upward from here)
int new_m;
if (mode == FULL) new_m = pos_m ;
if (mode == FULL) new_m = pos_m ;
else new_m = (pos_m+dim_ker_log[0]-1);
else new_m = (pos_m+dim_ker_log[0]-1);
for (int iter_n=0; iter_n < Os[1]; iter_n++) { // loop over columns
for (int iter_n=0; iter_n < Os[1]; iter_n++) { // loop over columns
int pos_n=iter_n*
%(self_dy)
s; // current col position in logical output image
// current col position in logical output image
int pos_n=iter_n*
%(self_dy)
s;
%(type)
s sum=0;
%(type)
s sum=0;
// Sum over kernel, if index into image is out of bounds
// Sum over kernel, if index into image is out of bounds
// fill with the value
// fill with the value
for (int j_log=0; j_log <
%(self_kshp_logical_r)
s; j_log++) { // loop over logical rows in kernel
// loop over logical rows in kernel
for (int j_log=0; j_log <
%(self_kshp_logical_r)
s; j_log++) {
int ind0_log = (new_m-j_log); // ind0_log: row position in logical input image
// ind0_log: row position in logical input image
int ind0_log = (new_m-j_log);
if ((j_log <
%(self_kshp_logical_offset_r)
s) || (j_log -
%(self_kshp_logical_offset_r)
s) MOD
%(self_kshp_logical_stride_r)
s)
if ((j_log <
%(self_kshp_logical_offset_r)
s) ||
(j_log -
%(self_kshp_logical_offset_r)
s) MOD
%(self_kshp_logical_stride_r)
s)
continue;
continue;
if (ind0_log MOD
%(self_imshp_logical_stride_r)
s)
if (ind0_log MOD
%(self_imshp_logical_stride_r)
s)
continue;
continue;
int j_phys = ((j_log-
%(self_kshp_logical_offset_r)
s) /
%(self_kshp_logical_stride_r)
s);
int j_phys = ((j_log-
%(self_kshp_logical_offset_r)
s) /
%(self_kshp_logical_stride_r)
s);
int ind0_phys = (ind0_log /
%(self_imshp_logical_stride_r)
s);
int ind0_phys = (ind0_log /
%(self_imshp_logical_stride_r)
s);
//std::cerr <<"j_log" << j_log << " j_phys " << j_phys << " " << ind0_phys << "
\\
n";
//std::cerr <<"j_log" << j_log << " j_phys " << j_phys << " " << ind0_phys << "
\\
n";
if(mode==FULL){
if(mode==FULL){
const
%(type)
s * idx_hvals=&hvals[j_phys*dim_ker_phys[1]]; //This is a pointer to the current row of the kernel
//This is a pointer to the current row of the kernel
const
%(type)
s * idx_hvals=&hvals[j_phys*dim_ker_phys[1]];
if(ind0_log < 0 || ind0_log >= dim_im_log[0]){
if(ind0_log < 0 || ind0_log >= dim_im_log[0]){
// the current row of the kernel is off the image
// the current row of the kernel is off the image
}else{
}else{
...
@@ -1304,30 +1422,40 @@ for(int b=0;b< %(self_bsize)s;b++){
...
@@ -1304,30 +1422,40 @@ for(int b=0;b< %(self_bsize)s;b++){
for (int ind1_log=pos_n-k; k<max_k; k++,ind1_log--) {
for (int ind1_log=pos_n-k; k<max_k; k++,ind1_log--) {
if (1)
if (1)
{
{
if ((k <
%(self_kshp_logical_offset_c)
s) || (k -
%(self_kshp_logical_offset_c)
s) MOD
%(self_kshp_logical_stride_c)
s)
if ((k <
%(self_kshp_logical_offset_c)
s) ||
(k -
%(self_kshp_logical_offset_c)
s) MOD
%(self_kshp_logical_stride_c)
s)
continue;
continue;
if (ind1_log MOD
%(self_imshp_logical_stride_c)
s)
if (ind1_log MOD
%(self_imshp_logical_stride_c)
s)
continue;
continue;
}
}
sum+= idx_hvals[(k-
%(self_kshp_logical_offset_c)
s) /
%(self_kshp_logical_stride_c)
s] * idx_in[ind1_log /
%(self_imshp_logical_stride_c)
s];
sum += idx_hvals[(k-
%(self_kshp_logical_offset_c)
s) /
%(self_kshp_logical_stride_c)
s] *
idx_in[ind1_log /
%(self_imshp_logical_stride_c)
s];
}
}
}
}
}else{
}else{ // mode==VALID
const
%(type)
s* idx_in=&in[ind0_phys*dim_im_phys[1]]; //JB: should be dim_im[1] right? (was dim_im[0])
//JB: should be dim_im[1] right? (was dim_im[0])
const
%(type)
s* idx_in=&in[ind0_phys*dim_im_phys[1]];
const
%(type)
s* idx_hvals=&hvals[j_phys*dim_ker_phys[1]];
const
%(type)
s* idx_hvals=&hvals[j_phys*dim_ker_phys[1]];
int new_n = (pos_n+dim_ker_log[1]-1);
int new_n = (pos_n+dim_ker_log[1]-1);
if (
%(self_imshp_logical_stride_c)
s != 1) // a general loop
if (
%(self_imshp_logical_stride_c)
s != 1) // a general loop
{
{
for (int k=0,last=new_n; k < dim_ker_log[1]; k++,last--) {
for (int k=0,last=new_n; k < dim_ker_log[1]; k++,last--) {
if ((k <
%(self_kshp_logical_offset_c)
s) || (k -
%(self_kshp_logical_offset_c)
s) MOD
%(self_kshp_logical_stride_c)
s)
if ((k <
%(self_kshp_logical_offset_c)
s) ||
(k -
%(self_kshp_logical_offset_c)
s) MOD
%(self_kshp_logical_stride_c)
s)
continue;
continue;
else if (last MOD
%(self_imshp_logical_stride_c)
s)
else if (last MOD
%(self_imshp_logical_stride_c)
s)
continue;
continue;
else
else
{
{
sum+=idx_hvals[(k-
%(self_kshp_logical_offset_c)
s) /
%(self_kshp_logical_stride_c)
s]*idx_in[last/
%(self_imshp_logical_stride_c)
s];
sum+=idx_hvals[(k-
%(self_kshp_logical_offset_c)
s) /
%(self_kshp_logical_stride_c)
s] *
idx_in[last/
%(self_imshp_logical_stride_c)
s];
}
}
}
}
}
}
...
@@ -1335,7 +1463,8 @@ for(int b=0;b< %(self_bsize)s;b++){
...
@@ -1335,7 +1463,8 @@ for(int b=0;b< %(self_bsize)s;b++){
{
{
int offset =
%(self_kshp_logical_offset_c)
s;
int offset =
%(self_kshp_logical_offset_c)
s;
int k_phys=0;
int k_phys=0;
for (int k_log=offset,last=new_n-offset; k_log < dim_ker_log[1]; ) {
for (int k_log=offset,last=new_n-offset;
k_log < dim_ker_log[1]; ) {
sum += idx_hvals[k_phys]*idx_in[last];
sum += idx_hvals[k_phys]*idx_in[last];
++k_phys;
++k_phys;
last -=
%(self_kshp_logical_stride_c)
s;
last -=
%(self_kshp_logical_stride_c)
s;
...
@@ -1343,10 +1472,10 @@ for(int b=0;b< %(self_bsize)s;b++){
...
@@ -1343,10 +1472,10 @@ for(int b=0;b< %(self_bsize)s;b++){
}
}
}
}
}
}
}//for j
}//for j
_log
out[iter_m*dim_zz[1]+iter_n]
%(affectation)
s sum;
out[iter_m*dim_zz[1]+iter_n]
%(affectation)
s sum;
}//for n
}//for
iter_
n
}//for m
}//for
iter_
m
}//for stack_size
}//for stack_size
if (0 && (mode==FULL)){
if (0 && (mode==FULL)){
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i)
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i)
...
@@ -1585,33 +1714,36 @@ free(kbuf);
...
@@ -1585,33 +1714,36 @@ free(kbuf);
Py_XDECREF(img2d);
Py_XDECREF(img2d);
"""
"""
def
gen_conv_code_unroll_batch_kern
(
d
,
unroll_bsize
=
1
,
unroll_ksize
=
1
):
def
gen_conv_code_unroll_batch_kern
(
d
,
unroll_bsize
=
1
,
unroll_ksize
=
1
):
""" c_code for ConvOp that unroll the batch size loop
""" c_code for ConvOp that unroll the batch size loop
"""
"""
assert
unroll_bsize
>
0
and
unroll_ksize
>
0
assert
unroll_bsize
>
0
and
unroll_ksize
>
0
if
d
.
has_key
(
"unroll_bsize"
)
or
d
.
has_key
(
"unroll_ksize"
)
or
d
.
has_key
(
"unroll_iter"
)
or
d
.
has_key
(
"unroll_biter"
)
or
d
.
has_key
(
"unroll_kiter"
)
:
if
"unroll_bsize"
in
d
or
"unroll_ksize"
in
d
or
"unroll_iter"
in
d
or
"unroll_biter"
in
d
or
"unroll_kiter"
in
d
:
raise
Exception
(
"We can't use this dictionnary as we will overwrite some of its containt"
)
raise
Exception
(
"We can't use this dictionnary as we will overwrite some of its containt"
)
d
=
d
.
copy
()
d
=
d
.
copy
()
d
[
"unroll_bsize"
]
=
unroll_bsize
d
[
"unroll_bsize"
]
=
unroll_bsize
d
[
"unroll_ksize"
]
=
unroll_ksize
d
[
"unroll_ksize"
]
=
unroll_ksize
def
my_dup
(
st
,
size
):
s
=
""
def
my_dup
(
st
,
size
):
s
=
""
for
i
in
xrange
(
size
):
for
i
in
xrange
(
size
):
d
[
"unroll_iter"
]
=
i
d
[
"unroll_iter"
]
=
i
s
+=
st
%
d
s
+=
st
%
d
return
s
+
"
\n
"
return
s
+
"
\n
"
def
my_dup2
(
st
):
def
my_dup2
(
st
):
s
=
""
s
=
""
iter
=
0
iter
=
0
for
i
in
xrange
(
unroll_bsize
):
for
i
in
xrange
(
unroll_bsize
):
d
[
"unroll_biter"
]
=
i
d
[
"unroll_biter"
]
=
i
for
j
in
xrange
(
unroll_ksize
):
for
j
in
xrange
(
unroll_ksize
):
d
[
"unroll_kiter"
]
=
j
d
[
"unroll_kiter"
]
=
j
d
[
"unroll_iter"
]
=
iter
d
[
"unroll_iter"
]
=
iter
iter
+=
1
iter
+=
1
s
+=
st
%
d
s
+=
st
%
d
return
s
+
"
\n
"
return
s
+
"
\n
"
ret
=
"""
ret
=
"""
const int mode=
%(mode)
s;
const int mode=
%(mode)
s;
int typenum=0, typenum_f=0;
int typenum=0, typenum_f=0;
...
@@ -1765,7 +1897,8 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
...
@@ -1765,7 +1897,8 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
for (int iter_n=0; iter_n < Os[1]; iter_n++) { // loop over columns
for (int iter_n=0; iter_n < Os[1]; iter_n++) { // loop over columns
int pos_n=iter_n*
%(self_dy)
s;
int pos_n=iter_n*
%(self_dy)
s;
"""
%
d
"""
%
d
ret
+=
my_dup
(
"
%(type)
s sum
%(unroll_iter)
s=0;"
,
unroll_bsize
*
unroll_ksize
)
ret
+=
my_dup
(
"
%(type)
s sum
%(unroll_iter)
s=0;"
,
unroll_bsize
*
unroll_ksize
)
ret
+=
"""
ret
+=
"""
// Sum over kernel, if index into image is out of bounds
// Sum over kernel, if index into image is out of bounds
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论