Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
4736c9b3
提交
4736c9b3
authored
10月 26, 2015
作者:
Pascal Lamblin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2665 from ballasn/conv2d_interface
New conv2d interface (work in progress)
上级
8d3a67b7
10f87868
显示空白字符变更
内嵌
并排
正在显示
5 个修改的文件
包含
1167 行增加
和
3 行删除
+1167
-3
dnn.py
theano/sandbox/cuda/dnn.py
+105
-0
opt.py
theano/sandbox/cuda/opt.py
+182
-0
test_abstractconv.py
theano/sandbox/cuda/tests/test_abstractconv.py
+334
-0
Conv3D.py
theano/tensor/nnet/Conv3D.py
+3
-3
abstract_conv2d.py
theano/tensor/nnet/abstract_conv2d.py
+543
-0
没有找到文件。
theano/sandbox/cuda/dnn.py
浏览文件 @
4736c9b3
...
@@ -13,6 +13,9 @@ from theano.compile.ops import shape_i
...
@@ -13,6 +13,9 @@ from theano.compile.ops import shape_i
from
theano.tensor.nnet
import
SoftmaxGrad
from
theano.tensor.nnet
import
SoftmaxGrad
from
theano.tensor.signal.downsample
import
(
from
theano.tensor.signal.downsample
import
(
DownsampleFactorMax
,
MaxPoolGrad
,
AveragePoolGrad
)
DownsampleFactorMax
,
MaxPoolGrad
,
AveragePoolGrad
)
from
theano.tensor.opt
import
register_specialize_device
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.sandbox.cuda
import
GpuOp
from
theano.sandbox.cuda
import
GpuOp
from
theano.sandbox.cuda.basic_ops
import
(
as_cuda_ndarray_variable
,
from
theano.sandbox.cuda.basic_ops
import
(
as_cuda_ndarray_variable
,
host_from_gpu
,
host_from_gpu
,
...
@@ -27,6 +30,12 @@ from theano.sandbox.cuda import gpu_seqopt, register_opt
...
@@ -27,6 +30,12 @@ from theano.sandbox.cuda import gpu_seqopt, register_opt
from
theano.sandbox.cuda.nvcc_compiler
import
NVCC_compiler
from
theano.sandbox.cuda.nvcc_compiler
import
NVCC_compiler
from
theano.tensor.nnet.abstract_conv2d
import
(
AbstractConv2d
,
AbstractConv2d_gradWeights
,
AbstractConv2d_gradInputs
)
from
theano.tensor.opt
import
register_specialize_device
def
dnn_available
():
def
dnn_available
():
if
dnn_available
.
avail
is
None
:
if
dnn_available
.
avail
is
None
:
...
@@ -1276,6 +1285,58 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
...
@@ -1276,6 +1285,58 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
return
GpuDnnConv3d
(
algo
=
algo
)(
img
,
kerns
,
out
,
desc
)
return
GpuDnnConv3d
(
algo
=
algo
)(
img
,
kerns
,
out
,
desc
)
def
dnn_gradweight
(
img
,
topgrad
,
kerns_shp
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
conv_mode
=
'conv'
):
"""
GPU convolution gradient with respect to weight using cuDNN from NVIDIA.
The memory layout to use is 'bc01', that is 'batch', 'channel',
'first dim', 'second dim' in that order.
FIXME parameters doc
:warning: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
work with this Op.
"""
img
=
gpu_contiguous
(
img
)
topgrad
=
gpu_contiguous
(
topgrad
)
kerns_shp
=
theano
.
tensor
.
as_tensor_variable
(
kerns_shp
)
desc
=
GpuDnnConvDesc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
conv_mode
=
conv_mode
)(
img
.
shape
,
kerns_shp
)
out
=
gpu_alloc_empty
(
*
kerns_shp
)
return
GpuDnnConvGradW
()(
img
,
topgrad
,
out
,
desc
)
def
dnn_gradinput
(
kerns
,
topgrad
,
img_shp
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
conv_mode
=
'conv'
):
"""
GPU convolution gradient with respect to input using cuDNN from NVIDIA.
The memory layout to use is 'bc01', that is 'batch', 'channel',
'first dim', 'second dim' in that order.
FIXME parameters doc
:warning: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
work with this Op.
"""
kerns
=
gpu_contiguous
(
kerns
)
topgrad
=
gpu_contiguous
(
topgrad
)
img_shp
=
theano
.
tensor
.
as_tensor_variable
(
img_shp
)
desc
=
GpuDnnConvDesc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
conv_mode
=
conv_mode
)(
img_shp
,
kerns
.
shape
)
out
=
gpu_alloc_empty
(
*
img_shp
)
return
GpuDnnConvGradI
()(
kerns
,
topgrad
,
out
,
desc
)
class
GpuDnnPoolDesc
(
GpuOp
):
class
GpuDnnPoolDesc
(
GpuOp
):
"""
"""
This Op builds a pooling descriptor for use in the other pooling operations.
This Op builds a pooling descriptor for use in the other pooling operations.
...
@@ -2383,3 +2444,47 @@ if True:
...
@@ -2383,3 +2444,47 @@ if True:
gpu_contiguous
(
ins
[
1
])
gpu_contiguous
(
ins
[
1
])
)
)
return
[
out
.
dimshuffle
(
0
,
1
)]
return
[
out
.
dimshuffle
(
0
,
1
)]
### AbstractConv Optimizations
@local_optimizer
([
AbstractConv2d
,
AbstractConv2d_gradWeights
,
AbstractConv2d_gradInputs
])
def
local_abstractconv_cudnn
(
node
):
inp1
=
node
.
inputs
[
0
]
inp2
=
node
.
inputs
[
1
]
if
((
not
isinstance
(
node
.
op
,
AbstractConv2d
)
or
not
isinstance
(
node
.
op
,
AbstractConv2d_gradWeights
)
or
not
isinstance
(
node
.
op
,
AbstractConv2d_gradInputs
))):
return
None
if
not
isinstance
(
inp1
.
type
,
CudaNdarrayType
)
or
\
not
isinstance
(
inp2
.
type
,
CudaNdarrayType
):
return
None
if
not
dnn_available
():
return
None
if
node
.
op
.
filters_flip
:
conv_mode
=
'conv'
else
:
conv_mode
=
'cross'
if
(
isinstance
(
node
.
op
,
AbstractConv2d
)):
rval
=
dnn_conv
(
inp1
,
inp2
,
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
direction_hint
=
'forward'
,
conv_mode
=
conv_mode
)
return
[
rval
]
if
(
isinstance
(
node
.
op
,
AbstractConv2d_gradWeights
)):
shape
=
(
inp2
.
shape
[
1
],
inp1
.
shape
[
1
],
node
.
inputs
[
2
][
0
],
node
.
inputs
[
2
][
1
])
rval
=
dnn_gradweight
(
inp1
,
inp2
,
shape
,
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
conv_mode
=
conv_mode
)
return
[
rval
]
if
(
isinstance
(
node
.
op
,
AbstractConv2d_gradInputs
)):
shape
=
(
inp2
.
shape
[
0
],
inp1
.
shape
[
1
],
node
.
inputs
[
2
][
0
],
node
.
inputs
[
2
][
1
])
rval
=
dnn_gradinput
(
inp1
,
inp2
,
shape
,
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
conv_mode
=
conv_mode
)
return
[
rval
]
theano/sandbox/cuda/opt.py
浏览文件 @
4736c9b3
...
@@ -75,6 +75,12 @@ from theano.tensor import slinalg
...
@@ -75,6 +75,12 @@ from theano.tensor import slinalg
from
theano.tensor.nnet.Conv3D
import
Conv3D
from
theano.tensor.nnet.Conv3D
import
Conv3D
from
theano.tests.breakpoint
import
PdbBreakpoint
from
theano.tests.breakpoint
import
PdbBreakpoint
from
theano.tensor.nnet.abstract_conv2d
import
(
BaseAbstractConv2d
,
AbstractConv2d
,
AbstractConv2d_gradWeights
,
AbstractConv2d_gradInputs
)
from
theano.tensor.opt
import
register_specialize_device
try
:
try
:
# We need to be able to import this file even if cuda isn't avail.
# We need to be able to import this file even if cuda isn't avail.
from
theano.sandbox.cuda
import
device_properties
from
theano.sandbox.cuda
import
device_properties
...
@@ -2622,3 +2628,179 @@ optdb.register('local_inplace_gpu_sparse_block_outer',
...
@@ -2622,3 +2628,179 @@ optdb.register('local_inplace_gpu_sparse_block_outer',
import
theano.sandbox.cuda.extra_ops
import
theano.sandbox.cuda.extra_ops
### Move to Gpu optimization
@local_optimizer
([
gpu_from_host
,
AbstractConv2d
,
AbstractConv2d_gradWeights
,
AbstractConv2d_gradInputs
])
def
local_conv2d_gpu_conv
(
node
):
"""
gpu_from_host(AbstractConv) -> AbstractConv(gpu_from_host)
AbstractConv(host_from_gpu) -> host_from_gpu(AbstractConv)
"""
if
isinstance
(
node
.
op
,
GpuFromHost
):
host_input
=
node
.
inputs
[
0
]
if
host_input
.
owner
and
isinstance
(
host_input
.
owner
.
op
,
BaseAbstractConv2d
):
conv
=
host_input
.
owner
.
op
inps
=
list
(
host_input
.
owner
.
inputs
)
inps
[
0
]
=
as_cuda_ndarray_variable
(
inps
[
0
])
inps
[
1
]
=
as_cuda_ndarray_variable
(
inps
[
1
])
out
=
conv
(
*
inps
)
# out is on the GPU because both inputs are.
out
=
theano
.
tensor
.
patternbroadcast
(
out
,
node
.
outputs
[
0
]
.
broadcastable
)
out
.
values_eq_approx
=
values_eq_approx_high_tol
return
[
out
]
if
isinstance
(
node
.
op
,
BaseAbstractConv2d
):
# conv(host_from_gpu) -> host_from_gpu(gpu_conv)
inp1
=
node
.
inputs
[
0
]
inp2
=
node
.
inputs
[
1
]
if
((
isinstance
(
inp1
.
type
,
CudaNdarrayType
)
and
isinstance
(
inp2
.
type
,
CudaNdarrayType
))):
# Both inputs are already directly on the GPU, nothing to do
return
inp1_on_gpu
=
(
isinstance
(
inp1
.
type
,
CudaNdarrayType
)
or
(
inp1
.
owner
and
isinstance
(
inp1
.
owner
.
op
,
HostFromGpu
)))
inp2_on_gpu
=
(
isinstance
(
inp2
.
type
,
CudaNdarrayType
)
or
(
inp2
.
owner
and
isinstance
(
inp2
.
owner
.
op
,
HostFromGpu
)))
if
inp1_on_gpu
or
inp2_on_gpu
:
conv
=
node
.
op
inps
=
list
(
node
.
inputs
)
inps
[
0
]
=
as_cuda_ndarray_variable
(
inps
[
0
])
inps
[
1
]
=
as_cuda_ndarray_variable
(
inps
[
1
])
out
=
conv
(
*
inps
)
# out is on the GPU because both inputs are.
out
=
theano
.
tensor
.
patternbroadcast
(
out
,
node
.
outputs
[
0
]
.
broadcastable
)
out
.
values_eq_approx
=
values_eq_approx_high_tol
# If the original output was on CPU, we have to transfer it
if
isinstance
(
node
.
outputs
[
0
]
.
type
,
tensor
.
TensorType
):
return
[
tensor
.
as_tensor_variable
(
out
)]
else
:
return
[
out
]
register_opt
()(
local_conv2d_gpu_conv
)
### Corrmm opt
@local_optimizer
([
AbstractConv2d
])
def
local_abstractconv_gemm
(
node
):
if
not
isinstance
(
node
.
op
,
AbstractConv2d
):
return
None
img
,
kern
=
node
.
inputs
if
(
not
isinstance
(
img
.
type
,
CudaNdarrayType
)
or
not
isinstance
(
kern
.
type
,
CudaNdarrayType
)):
return
None
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
if
(
border_mode
==
'full'
)
and
(
subsample
==
(
1
,
1
)):
if
not
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
# need to dimshuffle the kernel for full convolution
kern
=
kern
.
dimshuffle
(
1
,
0
,
2
,
3
)
# call GpuCorrMM_gradInputs
rval
=
GpuCorrMM_gradInputs
(
'valid'
,
subsample
)(
gpu_contiguous
(
kern
),
gpu_contiguous
(
img
))
else
:
# need to flip the kernel if necessary
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
# By default use GpuCorrMM
rval
=
GpuCorrMM
(
border_mode
,
subsample
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
kern
))
# call GpuCorrMM_gradWeights if good
# (the latter is faster if batchsize * kernelHeight * kernelWidth
# is larger than inputChannels * outputHeight * outputWidth.
# GpuConv does not always store information on the batchsize and
# channels, though, so we only use what information we have.)
if
((
subsample
==
(
1
,
1
))
and
(
node
.
op
.
imshp
is
not
None
)
and
(
None
not
in
node
.
op
.
imshp
[
-
2
:])
and
(
node
.
op
.
kshp
is
not
None
)
and
(
None
not
in
node
.
op
.
kshp
)):
# we know the kernel and output size
prod1
=
node
.
op
.
kshp
[
0
]
*
node
.
op
.
kshp
[
1
]
prod2
=
((
node
.
op
.
imshp
[
-
2
]
-
node
.
op
.
kshp
[
0
]
+
1
)
*
(
node
.
op
.
imshp
[
-
1
]
-
node
.
op
.
kshp
[
1
]
+
1
))
if
(
None
not
in
node
.
op
.
imshp
[:
1
]):
# we also know batchsize and input channels
prod1
*=
node
.
op
.
imshp
[
0
]
prod2
*=
node
.
op
.
imshp
[
1
]
# compare to decide
if
prod1
>
prod2
:
# (we need to wrap the result in as_cuda_ndarray_variable,
# because we are not allowed to replace a CudaNdarray with
# a DimShuffle instance in a graph optimization)
rval
=
theano
.
sandbox
.
cuda
.
as_cuda_ndarray_variable
(
GpuCorrMM_gradWeights
(
border_mode
,
subsample
)(
gpu_contiguous
(
img
.
dimshuffle
(
1
,
0
,
2
,
3
)),
gpu_contiguous
(
kern
.
dimshuffle
(
1
,
0
,
2
,
3
))
)
.
dimshuffle
(
1
,
0
,
2
,
3
))
return
[
rval
]
@local_optimizer
([
AbstractConv2d_gradWeights
])
def
local_abstractconv_gradweight_gemm
(
node
):
if
not
isinstance
(
node
.
op
,
AbstractConv2d_gradWeights
):
return
None
img
,
topgrad
,
shape
=
node
.
inputs
if
not
isinstance
(
img
.
type
,
CudaNdarrayType
)
or
\
not
isinstance
(
topgrad
.
type
,
CudaNdarrayType
):
return
None
rval
=
GpuCorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
topgrad
),
shape
)
if
node
.
op
.
filter_flip
:
rval
=
rval
[:,
:,
::
-
1
,
::
-
1
]
rval
=
tensor
.
patternbroadcast
(
rval
,
node
.
outputs
[
0
]
.
broadcastable
)
rval
=
as_cuda_ndarray_variable
(
rval
)
return
[
rval
]
@local_optimizer
([
AbstractConv2d_gradInputs
])
def
local_abstractconv_gradinputs_gemm
(
node
):
if
not
isinstance
(
node
.
op
,
AbstractConv2d_gradInputs
):
return
None
kern
,
topgrad
,
shape
=
node
.
inputs
if
not
isinstance
(
kern
.
type
,
CudaNdarrayType
)
or
\
not
isinstance
(
topgrad
.
type
,
CudaNdarrayType
):
return
None
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
rval
=
GpuCorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
)(
gpu_contiguous
(
kern
),
gpu_contiguous
(
topgrad
),
shape
)
return
[
rval
]
# Register GPU convolution implementation
# They are tried in a specific order so we can control
# which ones take precedence over others.
abstractconv_groupopt
=
theano
.
gof
.
optdb
.
LocalGroupDB
()
abstractconv_groupopt
.
__name__
=
"gpu_abstractconv_opts"
register_specialize_device
(
abstractconv_groupopt
,
'gpu'
,
'fast_compile'
)
# cuDNN is first, but only registered if cuDNN is available.
conv_groupopt
.
register
(
'local_abstractconv_dnn'
,
dnn
.
local_abstractconv_cudnn
,
20
,
'conv_dnn'
,
'gpu'
,
'fast_compile'
,
'fast_run'
,
'cudnn'
)
# The GEMM-based convolution comes last to catch all remaining cases.
# It can be disabled by excluding 'conv_gemm'.
conv_groupopt
.
register
(
'local_abstractconv_gemm'
,
local_abstractconv_gemm
,
30
,
'conv_gemm'
,
'gpu'
,
'fast_compile'
,
'fast_run'
)
conv_groupopt
.
register
(
'local_abstractconv_gradweight_gemm'
,
local_abstractconv_gradweight_gemm
,
30
,
'conv_gemm'
,
'gpu'
,
'fast_compile'
,
'fast_run'
)
conv_groupopt
.
register
(
'local_abstractconv_gradinputs_gemm'
,
local_abstractconv_gradinputs_gemm
,
30
,
'conv_gemm'
,
'gpu'
,
'fast_compile'
,
'fast_run'
)
theano/sandbox/cuda/tests/test_abstractconv.py
0 → 100644
浏览文件 @
4736c9b3
import
unittest
import
numpy
import
itertools
import
theano
from
theano.tests
import
unittest_tools
as
utt
import
theano.tensor.nnet.abstract_conv2d
as
conv
from
theano.sandbox.cuda
import
float32_shared_constructor
as
gpu_shared
from
theano.compile
import
shared
as
cpu_shared
from
theano.sandbox.cuda.dnn
import
dnn_available
,
dnn_conv
,
dnn_gradweight
,
dnn_gradinput
from
nose.plugins.skip
import
SkipTest
import
theano.sandbox.cuda
as
cuda
if
not
cuda
.
cuda_available
:
raise
SkipTest
(
'Optional package cuda disabled'
)
if
theano
.
config
.
mode
==
'FAST_COMPILE'
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
including
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
excluding
(
'gpu'
)
else
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
get_default_mode
()
.
excluding
(
'gpu'
)
class
TestConv2d
(
unittest
.
TestCase
):
def
setUp
(
self
):
super
(
TestConv2d
,
self
)
.
setUp
()
self
.
inputs_shapes
=
[(
8
,
1
,
12
,
12
),
(
8
,
1
,
18
,
18
),
(
2
,
1
,
4
,
4
),
(
6
,
1
,
10
,
11
),
(
2
,
1
,
6
,
5
),
(
1
,
5
,
9
,
9
)]
self
.
filters_shapes
=
[(
5
,
1
,
2
,
2
),
(
4
,
1
,
3
,
3
),
(
2
,
1
,
3
,
3
),
(
1
,
1
,
2
,
5
),
(
4
,
1
,
2
,
2
),
(
4
,
5
,
2
,
2
)]
self
.
subsamples
=
[(
1
,
1
),
(
2
,
2
),
(
2
,
4
)]
self
.
border_modes
=
[
"valid"
,
"full"
,
(
0
,
0
),
(
1
,
1
),
(
5
,
5
),
(
5
,
2
)]
self
.
filter_flip
=
[
True
,
False
]
def
get_output_shape
(
self
,
inputs_shape
,
filters_shape
,
subsample
,
border_mode
):
if
border_mode
==
"valid"
:
border_mode
=
(
0
,
0
)
if
border_mode
==
"full"
:
border_mode
=
(
filters_shape
[
2
]
-
1
,
filters_shape
[
3
]
-
1
)
batch_size
=
inputs_shape
[
0
]
num_filters
=
filters_shape
[
0
]
return
(
batch_size
,
num_filters
,)
\
+
tuple
(
None
if
i
is
None
or
k
is
None
else
((
i
+
2
*
pad
-
k
)
//
d
+
1
)
for
i
,
k
,
d
,
pad
in
zip
(
inputs_shape
[
2
:],
filters_shape
[
2
:],
subsample
,
border_mode
))
def
run_fwd
(
self
,
inputs_shape
,
filters_shape
,
ref
=
dnn_conv
,
subsample
=
(
1
,
1
),
verify_grad
=
True
,
mode
=
mode_without_gpu
,
border_mode
=
'valid'
,
filter_flip
=
True
,
device
=
'cpu'
,
provide_shape
=
False
):
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
if
device
==
'gpu'
:
inputs
=
gpu_shared
(
inputs_val
)
filters
=
gpu_shared
(
filters_val
)
else
:
inputs
=
theano
.
tensor
.
as_tensor_variable
(
cpu_shared
(
inputs_val
))
filters
=
theano
.
tensor
.
as_tensor_variable
(
cpu_shared
(
filters_val
))
if
provide_shape
:
imshp
=
inputs_shape
kshp
=
filters_shape
else
:
imshp
=
None
kshp
=
None
if
filter_flip
:
conv_mode
=
'conv'
else
:
conv_mode
=
'cross'
c_ref
=
ref
(
inputs
,
filters
,
border_mode
=
border_mode
,
subsample
=
subsample
,
conv_mode
=
conv_mode
)
c
=
conv
.
conv2d
(
inputs
,
filters
,
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
input_shape
=
imshp
,
filter_shape
=
kshp
)
f_ref
=
theano
.
function
([],
c_ref
,
mode
=
mode
)
f
=
theano
.
function
([],
c
,
mode
)
res_ref
=
numpy
.
array
(
f_ref
())
res
=
numpy
.
array
(
f
())
utt
.
assert_allclose
(
res_ref
,
res
)
if
verify_grad
:
utt
.
verify_grad
(
conv
.
AbstractConv2d
(
border_mode
=
"valid"
,
imshp
=
imshp
,
kshp
=
kshp
,
subsample
=
subsample
),
[
inputs_val
,
filters_val
],
mode
=
mode
)
def
run_gradweight
(
self
,
inputs_shape
,
filters_shape
,
output_shape
,
ref
=
dnn_gradweight
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
verify_grad
=
True
,
mode
=
mode_without_gpu
,
border_mode
=
'valid'
,
device
=
'cpu'
,
provide_shape
=
False
):
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
output_val
=
numpy
.
random
.
random
(
output_shape
)
.
astype
(
'float32'
)
if
device
==
'gpu'
:
inputs
=
gpu_shared
(
inputs_val
)
output
=
gpu_shared
(
output_val
)
else
:
inputs
=
theano
.
tensor
.
as_tensor_variable
(
cpu_shared
(
inputs_val
))
output
=
theano
.
tensor
.
as_tensor_variable
(
cpu_shared
(
output_val
))
if
provide_shape
:
imshp
=
inputs_shape
kshp
=
filters_shape
else
:
imshp
=
None
kshp
=
None
if
filter_flip
:
conv_mode
=
'conv'
else
:
conv_mode
=
'cross'
c
=
conv
.
AbstractConv2d_gradWeights
(
border_mode
=
border_mode
,
filter_flip
=
filter_flip
,
subsample
=
subsample
,
imshp
=
imshp
,
kshp
=
kshp
)
c
=
c
(
inputs
,
output
,
filters_shape
[
-
2
:])
c_ref
=
ref
(
inputs
,
output
,
filters_shape
,
border_mode
=
border_mode
,
subsample
=
subsample
,
conv_mode
=
conv_mode
)
f
=
theano
.
function
([],
c
,
mode
)
f_ref
=
theano
.
function
([],
c_ref
,
mode
)
res_ref
=
numpy
.
array
(
f_ref
())
res
=
numpy
.
array
(
f
())
utt
.
assert_allclose
(
res_ref
,
res
)
def
abstract_conv2d_gradweight
(
inputs_val
,
output_val
):
conv_op
=
conv
.
AbstractConv2d_gradWeights
(
border_mode
=
border_mode
,
subsample
=
subsample
)
return
conv_op
(
inputs_val
,
output_val
,
filters_shape
[
-
2
:])
if
verify_grad
:
utt
.
verify_grad
(
abstract_conv2d_gradweight
,
[
inputs_val
,
output_val
],
mode
=
mode
,
eps
=
1
)
def
run_gradinput
(
self
,
inputs_shape
,
filters_shape
,
output_shape
,
ref
=
dnn_gradinput
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
verify_grad
=
True
,
mode
=
mode_without_gpu
,
border_mode
=
'valid'
,
device
=
'cpu'
,
provide_shape
=
False
):
output_val
=
numpy
.
random
.
random
(
output_shape
)
.
astype
(
'float32'
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
if
device
==
'gpu'
:
output
=
gpu_shared
(
output_val
)
filters
=
gpu_shared
(
filters_val
)
else
:
output
=
theano
.
tensor
.
as_tensor_variable
(
cpu_shared
(
output_val
))
filters
=
theano
.
tensor
.
as_tensor_variable
(
cpu_shared
(
filters_val
))
if
provide_shape
:
imshp
=
inputs_shape
kshp
=
filters_shape
else
:
imshp
=
None
kshp
=
None
if
filter_flip
:
conv_mode
=
'conv'
else
:
conv_mode
=
'cross'
c
=
conv
.
AbstractConv2d_gradInputs
(
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
imshp
=
imshp
,
kshp
=
kshp
)
c
=
c
(
filters
,
output
,
inputs_shape
[
-
2
:])
c_ref
=
ref
(
filters
,
output
,
inputs_shape
,
border_mode
=
border_mode
,
subsample
=
subsample
,
conv_mode
=
conv_mode
)
f
=
theano
.
function
([],
c
,
mode
)
f_ref
=
theano
.
function
([],
c_ref
,
mode
)
res_ref
=
numpy
.
array
(
f_ref
())
res
=
numpy
.
array
(
f
())
utt
.
assert_allclose
(
res_ref
,
res
)
def
abstract_conv2d_gradinputs
(
filters_val
,
output_val
):
conv_op
=
conv
.
AbstractConv2d_gradInputs
(
border_mode
=
border_mode
,
subsample
=
subsample
)
return
conv_op
(
filters_val
,
output_val
,
inputs_shape
[
-
2
:])
if
verify_grad
:
utt
.
verify_grad
(
abstract_conv2d_gradinputs
,
[
filters_val
,
output_val
],
mode
=
mode
,
eps
=
1
)
def
test_dnn_conv
(
self
):
if
not
dnn_available
():
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
mode
=
mode_with_gpu
# provide_shape is not used by the CuDNN impementation
provide_shape
=
False
for
(
i
,
f
),
s
,
b
,
flip
in
itertools
.
product
(
zip
(
self
.
inputs_shapes
,
self
.
filters_shapes
),
self
.
subsamples
,
self
.
border_modes
,
self
.
filter_flip
):
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
)
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
device
=
'gpu'
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
)
self
.
run_gradweight
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
device
=
'gpu'
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
)
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
device
=
'gpu'
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
)
def
test_cormm_conv
(
self
):
if
not
dnn_available
():
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
mode
=
mode_with_gpu
.
excluding
(
'cudnn'
)
for
(
i
,
f
),
s
,
b
,
flip
,
provide_shape
in
itertools
.
product
(
zip
(
self
.
inputs_shapes
,
self
.
filters_shapes
),
self
.
subsamples
,
self
.
border_modes
,
self
.
filter_flip
,
[
False
,
True
]):
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
)
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
device
=
'gpu'
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
)
self
.
run_gradweight
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
device
=
'gpu'
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
)
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
device
=
'gpu'
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
)
def
test_cpu_conv
(
self
):
if
not
dnn_available
():
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
mode
=
mode_without_gpu
for
(
i
,
f
),
s
,
b
,
flip
,
provide_shape
in
itertools
.
product
(
zip
(
self
.
inputs_shapes
,
self
.
filters_shapes
),
self
.
subsamples
,
self
.
border_modes
,
self
.
filter_flip
,
[
False
,
True
]):
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
)
fwd_OK
=
True
gradweight_OK
=
True
gradinput_OK
=
True
if
not
flip
:
fwd_OK
=
False
gradweight_OK
=
False
gradinput_OK
=
False
if
b
not
in
(
'valid'
,
'full'
):
fwd_OK
=
False
gradweight_OK
=
False
gradinput_OK
=
False
if
(
not
provide_shape
)
and
(
s
!=
(
1
,
1
))
and
(
b
==
'full'
):
gradweight_OK
=
False
gradinput_OK
=
False
if
((
s
[
0
]
not
in
(
1
,
2
))
or
(
s
[
1
]
not
in
(
1
,
2
)))
and
(
b
==
'full'
):
gradweight_OK
=
False
gradinput_OK
=
False
if
fwd_OK
:
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
device
=
'cpu'
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
)
else
:
self
.
assertRaises
(
NotImplementedError
,
self
.
run_fwd
,
inputs_shape
=
i
,
filters_shape
=
f
,
subsample
=
s
,
verify_grad
=
False
,
mode
=
mode
,
device
=
'cpu'
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
)
if
gradweight_OK
:
self
.
run_gradweight
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
False
,
mode
=
mode
,
device
=
'cpu'
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
)
else
:
self
.
assertRaises
(
NotImplementedError
,
self
.
run_gradweight
,
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
False
,
mode
=
mode
,
device
=
'cpu'
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
)
if
gradinput_OK
:
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
False
,
mode
=
mode
,
device
=
'cpu'
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
)
else
:
self
.
assertRaises
(
NotImplementedError
,
self
.
run_gradinput
,
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
False
,
mode
=
mode
,
device
=
'cpu'
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
)
theano/tensor/nnet/Conv3D.py
浏览文件 @
4736c9b3
...
@@ -158,9 +158,9 @@ class Conv3D(theano.Op):
...
@@ -158,9 +158,9 @@ class Conv3D(theano.Op):
vidDur
=
V_shape
[
3
]
vidDur
=
V_shape
[
3
]
filterDur
=
W_shape
[
3
]
filterDur
=
W_shape
[
3
]
output_height
=
T
.
floor
((
vidHeight
-
filterHeight
)
//
dr
)
+
1
output_height
=
((
vidHeight
-
filterHeight
)
//
dr
)
+
1
output_width
=
T
.
floor
((
vidWidth
-
filterWidth
)
//
dc
)
+
1
output_width
=
((
vidWidth
-
filterWidth
)
//
dc
)
+
1
output_dur
=
T
.
floor
((
vidDur
-
filterDur
)
//
dt
)
+
1
output_dur
=
((
vidDur
-
filterDur
)
//
dt
)
+
1
rval
=
(
batch_size
,
output_height
,
output_width
,
output_dur
,
output_channels
)
rval
=
(
batch_size
,
output_height
,
output_width
,
output_dur
,
output_channels
)
...
...
theano/tensor/nnet/abstract_conv2d.py
0 → 100644
浏览文件 @
4736c9b3
"""
Define abstract conv2d interface
"""
import
logging
import
theano
from
theano.tensor
import
(
as_tensor_variable
,
patternbroadcast
)
from
theano.tensor
import
TensorType
from
theano.gof
import
Apply
,
Op
from
theano.gof
import
local_optimizer
from
theano.tensor.opt
import
register_specialize_device
# Cpu implementation
from
theano.tensor.nnet
import
conv2d
as
cpu_conv2d
,
ConvOp
from
theano.tensor.nnet.ConvGrad3D
import
convGrad3D
from
theano.tensor.nnet.ConvTransp3D
import
convTransp3D
__docformat__
=
"restructuredtext en"
_logger
=
logging
.
getLogger
(
"theano.tensor.nnet.conv2d"
)
def
conv2d
(
input
,
filters
,
input_shape
=
None
,
filter_shape
=
None
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
filter_flip
=
True
):
"""
This function will build the symbolic graph for convolving a mini-batch of a
stack of 2D inputs with a set of 2D filters. The implementation is modelled
after Convolutional Neural Networks (CNN).
:type input: symbolic 4D tensor
:param input: mini-batch of feature map stacks, of shape
(batch size, input channels, input rows, input columns).
See the optional parameter ``input_shape``.
:type filters: symbolic 4D tensor
:param filters: set of filters used in CNN layer of shape
(output channels, input channels, filter rows, filter columns).
See the optional parameter ``filter_shape``.
:type input_shape: None, tuple/list of len 4 of int or Constant variable
:param input_shape: The shape of the input parameter.
Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify that this
element is not known at compile time.
:type filter_shape: None, tuple/list of len 4 of int or Constant variable
:param filter_shape: The shape of the filters parameter.
Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify that this
element is not known at compile time.
:type border_mode: str, int or tuple of two int
:param border_mode: Either of the following:
* ``'valid'``: apply filter wherever it completely overlaps with the
input. Generates output of shape: input shape - filter shape + 1
* ``'full'``: apply filter wherever it partly overlaps with the input.
Generates output of shape: input shape + filter shape - 1
* ``'half'``: pad input with a symmetric border of ``filter rows // 2``
rows and ``filter columns // 2`` columns, then perform a valid
convolution. For filters with an odd number of rows and columns, this
leads to the output shape being equal to the input shape.
* ``int``: pad input with a symmetric border of zeros of the given
width, then perform a valid convolution.
* ``(int1, int2)``: pad input with a symmetric border of ``int1`` rows
and ``int2`` columns, then perform a valid convolution.
:type subsample: tuple of len 2
:param subsample: factor by which to subsample the output.
Also called strides elsewhere.
:type filter_flip: bool
:param filter_flip: If ``True``, will flip the filter rows and columns
before sliding them over the input. This operation is normally referred
to as a convolution, and this is the default. If ``False``, the filters
are not flipped and the operation is referred to as a cross-correlation.
:rtype: symbolic 4D tensor
:return: set of feature maps generated by convolutional layer. Tensor is
of shape (batch size, output channels, output rows, output columns)
"""
conv_op
=
AbstractConv2d
(
imshp
=
input_shape
,
kshp
=
filter_shape
,
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_flip
=
filter_flip
)
return
conv_op
(
input
,
filters
)
class
BaseAbstractConv2d
(
Op
):
"""
Base class for AbstractConv
Define an abstract convolution op that will be replaced with the appropriate implementation
:type imshp: None, tuple/list of len 4 of int or Constant variable
:param imshp: The shape of the input parameter.
Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify that this
element is not known at compile time.
imshp is defined w.r.t the forward conv.
:type kshp: None, tuple/list of len 4 of int or Constant variable
:param kshp: The shape of the filters parameter.
Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify that this
element is not known at compile time.
kshp is defined w.r.t the forward conv.
:type border_mode: str, int or tuple of two int
:param border_mode: Either of the following:
* ``'valid'``: apply filter wherever it completely overlaps with the
input. Generates output of shape: input shape - filter shape + 1
* ``'full'``: apply filter wherever it partly overlaps with the input.
Generates output of shape: input shape + filter shape - 1
* ``'half'``: pad input with a symmetric border of ``filter rows // 2``
rows and ``filter columns // 2`` columns, then perform a valid
convolution. For filters with an odd number of rows and columns, this
leads to the output shape being equal to the input shape.
* ``int``: pad input with a symmetric border of zeros of the given
width, then perform a valid convolution.
* ``(int1, int2)``: pad input with a symmetric border of ``int1`` rows
and ``int2`` columns, then perform a valid convolution.
:type subsample: tuple of len 2
:param subsample: factor by which to subsample the output.
Also called strides elsewhere.
:type filter_flip: bool
:param filter_flip: If ``True``, will flip the filter rows and columns
before sliding them over the input. This operation is normally referred
to as a convolution, and this is the default. If ``False``, the filters
are not flipped and the operation is referred to as a cross-correlation.
"""
check_broadcast
=
False
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_flip'
,
'imshp'
,
'kshp'
)
def
__init__
(
self
,
imshp
=
None
,
kshp
=
None
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_flip
=
True
):
if
isinstance
(
border_mode
,
int
):
border_mode
=
(
border_mode
,
border_mode
)
if
isinstance
(
border_mode
,
tuple
):
pad_h
,
pad_w
=
map
(
int
,
border_mode
)
border_mode
=
(
pad_h
,
pad_w
)
if
not
((
isinstance
(
border_mode
,
tuple
)
and
min
(
border_mode
)
>=
0
)
or
border_mode
in
(
'valid'
,
'full'
,
'half'
)):
raise
ValueError
(
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a pair of'
' integers'
.
format
(
border_mode
))
self
.
imshp
=
imshp
self
.
kshp
=
kshp
self
.
border_mode
=
border_mode
self
.
filter_flip
=
filter_flip
if
len
(
subsample
)
!=
2
:
raise
ValueError
(
"subsample must have two elements"
)
self
.
subsample
=
subsample
def
flops
(
self
,
inp
,
outp
):
""" Useful with the hack in profilemode to print the MFlops"""
# if the output shape is correct, then this gives the correct
# flops for any direction, sampling, padding, and border mode
inputs
,
filters
=
inp
outputs
,
=
outp
assert
inputs
[
1
]
==
filters
[
1
]
# nb mul and add by output pixel
flops
=
filters
[
2
]
*
filters
[
3
]
*
2
# nb flops by output image
flops
*=
outputs
[
2
]
*
outputs
[
3
]
# nb patch multiplied
flops
*=
inputs
[
1
]
*
filters
[
0
]
*
inputs
[
0
]
return
flops
class
AbstractConv2d
(
BaseAbstractConv2d
):
"""
Abstract Op for the forward convolution.
"""
def
__init__
(
self
,
imshp
=
None
,
kshp
=
None
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_flip
=
True
):
super
(
AbstractConv2d
,
self
)
.
__init__
(
imshp
,
kshp
,
border_mode
,
subsample
,
filter_flip
)
def
make_node
(
self
,
img
,
kern
):
if
img
.
type
.
ndim
!=
4
:
raise
TypeError
(
'img must be 4D tensor'
)
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
broadcastable
=
[
img
.
broadcastable
[
0
],
kern
.
broadcastable
[
0
],
False
,
False
]
output
=
img
.
type
.
clone
(
broadcastable
=
broadcastable
)()
return
Apply
(
self
,
[
img
,
kern
],
[
output
])
def
perform
(
self
,
node
,
inp
,
out_
):
raise
NotImplementedError
(
'AbstractConv2d theano optimization failed'
)
def
grad
(
self
,
inp
,
grads
):
bottom
,
weights
=
inp
top
,
=
grads
d_bottom
=
AbstractConv2d_gradInputs
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
subsample
,
self
.
filter_flip
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_weights
=
AbstractConv2d_gradWeights
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
subsample
,
self
.
filter_flip
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
return
d_bottom
,
d_weights
class
AbstractConv2d_gradWeights
(
BaseAbstractConv2d
):
"""Gradient wrt. filters for `AbstractConv2d`.
:note: You will not want to use this directly, but rely on
Theano's automatic differentiation or graph optimization to
use it as needed.
"""
def
__init__
(
self
,
imshp
=
None
,
kshp
=
None
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_flip
=
True
):
super
(
AbstractConv2d_gradWeights
,
self
)
.
__init__
(
imshp
,
kshp
,
border_mode
,
subsample
,
filter_flip
)
# Update shape/height_width
def
make_node
(
self
,
img
,
topgrad
,
shape
):
if
img
.
type
.
ndim
!=
4
:
raise
TypeError
(
'img must be 4D tensor'
)
if
topgrad
.
type
.
ndim
!=
4
:
raise
TypeError
(
'topgrad must be 4D tensor'
)
shape
=
as_tensor_variable
(
shape
)
broadcastable
=
[
topgrad
.
broadcastable
[
1
],
img
.
broadcastable
[
1
],
False
,
False
]
output
=
img
.
type
.
clone
(
broadcastable
=
broadcastable
)()
return
Apply
(
self
,
[
img
,
topgrad
,
shape
],
[
output
])
def
perform
(
self
,
node
,
inp
,
out_
):
raise
NotImplementedError
(
'AbstractConv2d_gradWeight theano optimization failed'
)
def
grad
(
self
,
inp
,
grads
):
bottom
,
top
=
inp
[:
2
]
weights
,
=
grads
d_bottom
=
AbstractConv2d_gradInputs
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
subsample
,
self
.
filter_flip
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_top
=
AbstractConv2d
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
subsample
,
self
.
filter_flip
)(
bottom
,
weights
)
d_height_width
=
(
theano
.
gradient
.
DisconnectedType
()(),)
return
(
d_bottom
,
d_top
)
+
d_height_width
def
connection_pattern
(
self
,
node
):
return
[[
1
],
[
1
],
[
0
]]
# no connection to height, width
class
AbstractConv2d_gradInputs
(
BaseAbstractConv2d
):
"""Gradient wrt. inputs for `AbstractConv2d`.
:note: You will not want to use this directly, but rely on
Theano's automatic differentiation or graph optimization to
use it as needed.
"""
def
__init__
(
self
,
imshp
=
None
,
kshp
=
None
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_flip
=
True
):
super
(
AbstractConv2d_gradInputs
,
self
)
.
__init__
(
imshp
,
kshp
,
border_mode
,
subsample
,
filter_flip
)
# Update shape/height_width
def
make_node
(
self
,
kern
,
topgrad
,
shape
):
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
if
topgrad
.
type
.
ndim
!=
4
:
raise
TypeError
(
'topgrad must be 4D tensor'
)
shape
=
as_tensor_variable
(
shape
)
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
],
False
,
False
]
output
=
kern
.
type
.
clone
(
broadcastable
=
broadcastable
)()
return
Apply
(
self
,
[
kern
,
topgrad
,
shape
],
[
output
])
def
perform
(
self
,
node
,
inp
,
out_
):
raise
NotImplementedError
(
'AbstractConv2d_gradWeight theano optimization failed'
)
def
grad
(
self
,
inp
,
grads
):
weights
,
top
=
inp
[:
2
]
bottom
,
=
grads
d_weights
=
AbstractConv2d_gradWeights
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
subsample
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
d_top
=
AbstractConv2d
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
subsample
)(
bottom
,
weights
)
d_height_width
=
(
theano
.
gradient
.
DisconnectedType
()(),)
return
(
d_weights
,
d_top
)
+
d_height_width
def
connection_pattern
(
self
,
node
):
return
[[
1
],
[
1
],
[
0
]]
# no connection to height, width
# Cpu Optmization
@local_optimizer
([
AbstractConv2d
])
def
local_conv2d_cpu
(
node
):
if
not
isinstance
(
node
.
op
,
AbstractConv2d
):
return
None
img
,
kern
=
node
.
inputs
if
((
not
isinstance
(
img
.
type
,
TensorType
)
or
not
isinstance
(
kern
.
type
,
TensorType
))):
return
None
if
node
.
op
.
border_mode
not
in
[
'full'
,
'valid'
]:
return
None
if
not
node
.
op
.
filter_flip
:
# Not tested yet
return
None
rval
=
cpu_conv2d
(
img
,
kern
,
node
.
op
.
imshp
,
node
.
op
.
kshp
,
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
)
return
[
rval
]
register_specialize_device
(
local_conv2d_cpu
,
'fast_compile'
)
@local_optimizer
([
AbstractConv2d_gradWeights
])
def
local_conv2d_gradweight_cpu
(
node
):
img
,
topgrad
,
shape
=
node
.
inputs
if
((
not
isinstance
(
img
.
type
,
TensorType
)
or
not
isinstance
(
topgrad
.
type
,
TensorType
))):
return
None
if
node
.
op
.
border_mode
not
in
[
'full'
,
'valid'
]:
return
None
if
not
node
.
op
.
filter_flip
:
# Not tested yet
return
if
node
.
op
.
border_mode
==
'valid'
and
\
(
node
.
op
.
subsample
!=
(
1
,
1
)):
# Use the gradient as defined in conv3D, because the implementation
# by Conv is slow (about 3x slower than conv3D, and probably 10x
# slower than it could be), nad incorrect when subsample > 2.
# build a "node", that should be equivalent to the one given by
# self.make_node, but using convGrad3D instead.
shuffled_img
=
img
.
dimshuffle
(
0
,
2
,
3
,
'x'
,
1
)
shuffled_topgrad
=
topgrad
.
dimshuffle
(
0
,
2
,
3
,
'x'
,
1
)
rval
=
convGrad3D
(
V
=
shuffled_img
,
d
=
(
node
.
op
.
subsample
[
0
],
node
.
op
.
subsample
[
1
],
1
),
WShape
=
(
shuffled_topgrad
.
shape
[
4
],
shape
[
0
],
shape
[
1
],
1
,
shuffled_img
.
shape
[
4
]),
dCdH
=
shuffled_topgrad
)
rval
=
theano
.
tensor
.
addbroadcast
(
rval
,
3
)
rval
=
rval
.
dimshuffle
(
0
,
4
,
1
,
2
)
rval
=
rval
[:,
:,
::
-
1
,
::
-
1
]
rval
=
patternbroadcast
(
rval
,
node
.
outputs
[
0
]
.
broadcastable
)
return
[
rval
]
dx
,
dy
=
node
.
op
.
subsample
if
dx
not
in
(
1
,
2
)
or
dy
not
in
(
1
,
2
):
# Not implemented in the gradient of ConvOp
return
None
if
node
.
op
.
imshp
is
None
:
op_imshp
=
(
None
,
None
,
None
,
None
)
else
:
op_imshp
=
node
.
op
.
imshp
if
node
.
op
.
kshp
is
None
:
op_kshp
=
(
None
,
None
,
None
,
None
)
else
:
op_kshp
=
node
.
op
.
kshp
if
None
in
op_imshp
or
None
in
op_kshp
:
if
(
dx
,
dy
)
!=
(
1
,
1
):
# We cannot infer the shapes
return
None
# Determine gradient on kernels
assert
len
(
op_imshp
)
==
4
and
len
(
op_kshp
)
==
4
outshp
=
ConvOp
.
getOutputShape
(
op_imshp
[
2
:],
op_kshp
[
2
:],
node
.
op
.
subsample
,
node
.
op
.
border_mode
)
fulloutshp
=
ConvOp
.
getOutputShape
(
op_imshp
[
2
:],
op_kshp
[
2
:],
(
1
,
1
),
node
.
op
.
border_mode
)
newimg
=
img
.
dimshuffle
((
1
,
0
,
2
,
3
))
newtopgrad
=
topgrad
.
dimshuffle
((
1
,
0
,
2
,
3
))
if
node
.
op
.
border_mode
==
'valid'
:
(
img
,
filters
)
=
(
newimg
,
newtopgrad
)
kshp_logical
=
fulloutshp
kshp_logical_top_aligned
=
False
imshp_logical
=
None
(
bsize
,
nkern
)
=
(
op_imshp
[
1
],
op_kshp
[
0
])
imshp
=
(
op_imshp
[
0
],
op_imshp
[
2
],
op_imshp
[
3
])
kshp
=
outshp
elif
node
.
op
.
border_mode
==
'full'
:
(
img
,
filters
)
=
(
newtopgrad
,
newimg
)
kshp_logical
=
None
kshp_logical_top_aligned
=
True
imshp_logical
=
(
op_imshp
[
0
],
fulloutshp
[
0
],
fulloutshp
[
1
])
(
bsize
,
nkern
)
=
(
op_kshp
[
0
],
op_imshp
[
1
])
imshp
=
(
op_imshp
[
0
],
outshp
[
0
],
outshp
[
1
])
kshp
=
op_imshp
[
2
:]
else
:
raise
NotImplementedError
(
'Only [full,valid] modes are currently supported.'
)
# Flip the kernels
filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
dw
=
ConvOp
(
imshp
,
kshp
,
nkern
,
bsize
,
1
,
1
,
output_mode
=
'valid'
,
unroll_batch
=
None
,
unroll_kern
=
None
,
unroll_patch
=
None
,
imshp_logical
=
imshp_logical
,
kshp_logical
=
kshp_logical
,
kshp_logical_top_aligned
=
kshp_logical_top_aligned
,
direction_hint
=
'bprop weights'
)
res
=
dw
(
img
,
filters
)
if
node
.
op
.
border_mode
==
'valid'
:
res
=
res
.
dimshuffle
((
1
,
0
,
2
,
3
))
res
=
res
[:,
:,
::
-
1
,
::
-
1
]
res
=
patternbroadcast
(
res
,
node
.
outputs
[
0
]
.
broadcastable
)
return
[
res
]
register_specialize_device
(
local_conv2d_gradweight_cpu
,
'fast_compile'
)
@local_optimizer
([
AbstractConv2d_gradInputs
])
def
local_conv2d_gradinputs_cpu
(
node
):
kern
,
topgrad
,
shape
=
node
.
inputs
if
((
not
isinstance
(
kern
.
type
,
TensorType
)
or
not
isinstance
(
topgrad
.
type
,
TensorType
))):
return
None
if
node
.
op
.
border_mode
not
in
[
'full'
,
'valid'
]:
return
None
if
not
node
.
op
.
filter_flip
:
# Not tested yet
return
None
# Conv 3d implementation, needed when subsample > 2
if
node
.
op
.
border_mode
==
'valid'
and
node
.
op
.
subsample
!=
(
1
,
1
):
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
shuffled_kern
=
kern
.
dimshuffle
(
0
,
2
,
3
,
'x'
,
1
)
shuffled_topgrad
=
topgrad
.
dimshuffle
(
0
,
2
,
3
,
'x'
,
1
)
b
=
theano
.
tensor
.
zeros_like
(
shuffled_kern
[
0
,
0
,
0
,
0
,
:])
rval
=
convTransp3D
(
W
=
shuffled_kern
,
b
=
b
,
d
=
(
node
.
op
.
subsample
[
0
],
node
.
op
.
subsample
[
1
],
1
),
H
=
shuffled_topgrad
,
RShape
=
(
shape
[
0
],
shape
[
1
],
1
))
rval
=
theano
.
tensor
.
addbroadcast
(
rval
,
3
)
rval
=
rval
.
dimshuffle
(
0
,
4
,
1
,
2
)
rval
=
patternbroadcast
(
rval
,
node
.
outputs
[
0
]
.
broadcastable
)
return
[
rval
]
# Conv2d Implementation
dx
,
dy
=
node
.
op
.
subsample
if
dx
not
in
(
1
,
2
)
or
dy
not
in
(
1
,
2
):
# Not implemented in the gradient of ConvOp
return
None
if
node
.
op
.
imshp
is
None
:
op_imshp
=
(
None
,
None
,
None
,
None
)
else
:
op_imshp
=
node
.
op
.
imshp
if
node
.
op
.
kshp
is
None
:
op_kshp
=
(
None
,
None
,
None
,
None
)
else
:
op_kshp
=
node
.
op
.
kshp
if
None
in
op_imshp
or
None
in
op_kshp
:
if
(
dx
,
dy
)
!=
(
1
,
1
):
return
None
mode
=
'valid'
if
not
node
.
op
.
border_mode
==
'full'
:
mode
=
'full'
filters
=
kern
.
dimshuffle
((
1
,
0
,
2
,
3
))
filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
outshp
=
ConvOp
.
getOutputShape
(
op_imshp
[
2
:],
op_kshp
[
2
:],
node
.
op
.
subsample
,
node
.
op
.
border_mode
)
fulloutshp
=
ConvOp
.
getOutputShape
(
op_imshp
[
2
:],
op_kshp
[
2
:],
(
1
,
1
),
node
.
op
.
border_mode
)
nkern
=
op_imshp
[
1
]
imshp
=
(
op_kshp
[
0
],
outshp
[
0
],
outshp
[
1
])
imshp_logical
=
(
op_kshp
[
0
],
fulloutshp
[
0
],
fulloutshp
[
1
])
din
=
ConvOp
(
imshp
,
op_kshp
[
2
:],
nkern
,
op_imshp
[
0
],
1
,
1
,
output_mode
=
mode
,
unroll_batch
=
None
,
unroll_kern
=
None
,
unroll_patch
=
None
,
imshp_logical
=
imshp_logical
,
kshp_logical
=
None
,
version
=-
1
,
direction_hint
=
'bprop inputs'
)
din
=
din
(
topgrad
,
filters
)
din
=
patternbroadcast
(
din
,
node
.
outputs
[
0
]
.
broadcastable
)
return
[
din
]
register_specialize_device
(
local_conv2d_gradinputs_cpu
,
'fast_compile'
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论