Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
cea45e8b
提交
cea45e8b
authored
11月 08, 2016
作者:
Frédéric Bastien
提交者:
GitHub
11月 08, 2016
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #5174 from abergeron/cormm_f16
Make corrMM work in float16/64
上级
29af0e5b
d79d38c1
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
10 个修改的文件
包含
107 行增加
和
54 行删除
+107
-54
basic_ops.py
theano/gpuarray/basic_ops.py
+31
-14
blas.py
theano/gpuarray/blas.py
+21
-8
corr3d_gemm.c
theano/gpuarray/corr3d_gemm.c
+0
-0
corr_gemm.c
theano/gpuarray/corr_gemm.c
+0
-0
elemwise.py
theano/gpuarray/elemwise.py
+9
-0
neighbours.py
theano/gpuarray/neighbours.py
+9
-0
config.py
theano/gpuarray/tests/config.py
+8
-0
test_dnn.py
theano/gpuarray/tests/test_dnn.py
+1
-8
test_gemmcorr.py
theano/gpuarray/tests/test_gemmcorr.py
+15
-12
test_gemmcorr3d.py
theano/gpuarray/tests/test_gemmcorr3d.py
+13
-12
没有找到文件。
theano/gpuarray/basic_ops.py
浏览文件 @
cea45e8b
...
@@ -173,11 +173,15 @@ class Kernel(object):
...
@@ -173,11 +173,15 @@ class Kernel(object):
fname: str
fname: str
the name of the function wrapper.
the name of the function wrapper.
(defaults to name + `_call`)
(defaults to name + `_call`)
sname: str
the name of the scheduled call function
(defaults to name _ `_scall`)
"""
"""
def
__init__
(
self
,
code
,
params
,
name
,
flags
,
def
__init__
(
self
,
code
,
params
,
name
,
flags
,
codevar
=
None
,
binvar
=
None
,
objvar
=
None
,
fname
=
None
):
codevar
=
None
,
binvar
=
None
,
objvar
=
None
,
fname
=
None
,
sname
=
None
):
self
.
code
=
code
self
.
code
=
code
self
.
params
=
params
self
.
params
=
params
self
.
name
=
name
self
.
name
=
name
...
@@ -194,6 +198,9 @@ class Kernel(object):
...
@@ -194,6 +198,9 @@ class Kernel(object):
if
fname
is
None
:
if
fname
is
None
:
fname
=
name
+
'_call'
fname
=
name
+
'_call'
self
.
fname
=
fname
self
.
fname
=
fname
if
sname
is
None
:
sname
=
name
+
'_scall'
self
.
sname
=
sname
@staticmethod
@staticmethod
def
get_flags
(
*
types
):
def
get_flags
(
*
types
):
...
@@ -338,22 +345,30 @@ class GpuKernelBase(object):
...
@@ -338,22 +345,30 @@ class GpuKernelBase(object):
setargs
=
'
\n
'
.
join
(
setargs
)
setargs
=
'
\n
'
.
join
(
setargs
)
return
"""
return
"""
int {fname}(unsigned int
nd, size_t *gdim, size_t *ldim, size_t
shared,
int {fname}(unsigned int
_nd, size_t *_gdim, size_t *_ldim, size_t _
shared,
{args}) {{
{args}) {{
{setargs}
{setargs}
return GpuKernel_call(&{kname},
nd, ldim, gdim,
shared, NULL);
return GpuKernel_call(&{kname},
_nd, _ldim, _gdim, _
shared, NULL);
}}
}}
"""
.
format
(
args
=
args
,
fname
=
k
.
fname
,
setargs
=
setargs
,
kname
=
k
.
objvar
)
def
c_support_code
(
self
):
int {sname}(unsigned int _nd, size_t *_n, size_t _shared, {args}) {{
return
"""
size_t _ls = 0;
template <typename T>
size_t _gs = 0;
static T ceil_intdiv(T a, T b)
int _err;
{
return (a/b) + ((a
%
b) ? 1: 0);
if (_nd != 1) return GA_UNSUPPORTED_ERROR;
}
"""
_err = GpuKernel_sched(&{kname}, _n[0], &_ls, &_gs);
if (_err != GA_NO_ERROR)
return _err;
{setargs}
return GpuKernel_call(&{kname}, 1, &_ls, &_gs, _shared, NULL);
}}
"""
.
format
(
args
=
args
,
fname
=
k
.
fname
,
setargs
=
setargs
,
sname
=
k
.
sname
,
kname
=
k
.
objvar
)
def
c_support_code_apply
(
self
,
node
,
name
):
def
c_support_code_apply
(
self
,
node
,
name
):
kernels
=
self
.
gpu_kernels
(
node
,
name
)
kernels
=
self
.
gpu_kernels
(
node
,
name
)
...
@@ -428,7 +443,7 @@ int {fname}(unsigned int nd, size_t *gdim, size_t *ldim, size_t shared,
...
@@ -428,7 +443,7 @@ int {fname}(unsigned int nd, size_t *gdim, size_t *ldim, size_t shared,
The node that we need the cache version for.
The node that we need the cache version for.
"""
"""
return
(
6
,
self
.
get_params
(
node
)
.
bin_id
)
return
(
7
,
self
.
get_params
(
node
)
.
bin_id
)
def
forward_string_meth
(
name
):
def
forward_string_meth
(
name
):
...
@@ -466,12 +481,14 @@ class CGpuKernelBase(COp, GpuKernelBase):
...
@@ -466,12 +481,14 @@ class CGpuKernelBase(COp, GpuKernelBase):
kernel_re
=
re
.
compile
(
r'^#kernel ([a-zA-Z_].*?)$'
,
re
.
MULTILINE
)
kernel_re
=
re
.
compile
(
r'^#kernel ([a-zA-Z_].*?)$'
,
re
.
MULTILINE
)
c_support_code
=
forward_string_meth
(
'c_support_code'
)
c_support_code_apply
=
forward_string_meth
(
'c_support_code_apply'
)
c_support_code_apply
=
forward_string_meth
(
'c_support_code_apply'
)
c_support_code_struct
=
forward_string_meth
(
'c_support_code_struct'
)
c_support_code_struct
=
forward_string_meth
(
'c_support_code_struct'
)
c_init_code_struct
=
forward_string_meth
(
'c_init_code_struct'
)
c_init_code_struct
=
forward_string_meth
(
'c_init_code_struct'
)
c_cleanup_code_struct
=
forward_string_meth
(
'c_cleanup_code_struct'
)
c_cleanup_code_struct
=
forward_string_meth
(
'c_cleanup_code_struct'
)
def
c_code_cache_version_apply
(
self
,
node
):
return
GpuKernelBase
.
c_code_cache_version_apply
(
self
,
node
)
def
_type_macros
(
self
,
node
):
def
_type_macros
(
self
,
node
):
define_template
=
"#define
%
s
%
s
\n
"
define_template
=
"#define
%
s
%
s
\n
"
undef_template
=
"#undef
%
s
\n
"
undef_template
=
"#undef
%
s
\n
"
...
...
theano/gpuarray/blas.py
浏览文件 @
cea45e8b
...
@@ -414,7 +414,7 @@ gpugemmbatch_no_inplace = GpuGemmBatch(inplace=False)
...
@@ -414,7 +414,7 @@ gpugemmbatch_no_inplace = GpuGemmBatch(inplace=False)
gpugemmbatch_inplace
=
GpuGemmBatch
(
inplace
=
True
)
gpugemmbatch_inplace
=
GpuGemmBatch
(
inplace
=
True
)
class
BaseGpuCorrMM
(
CGpuKernelBase
,
BlasOp
):
class
BaseGpuCorrMM
(
CGpuKernelBase
):
"""
"""
Base class for `GpuCorrMM`, `GpuCorrMM_gradWeights` and
Base class for `GpuCorrMM`, `GpuCorrMM_gradWeights` and
`GpuCorrMM_gradInputs`. Cannot be used directly.
`GpuCorrMM_gradInputs`. Cannot be used directly.
...
@@ -429,9 +429,9 @@ class BaseGpuCorrMM(CGpuKernelBase, BlasOp):
...
@@ -429,9 +429,9 @@ class BaseGpuCorrMM(CGpuKernelBase, BlasOp):
filter_dilation
filter_dilation
Perform subsampling of the input, also known as dilation (default: (1, 1)).
Perform subsampling of the input, also known as dilation (default: (1, 1)).
"""
"""
check_broadcast
=
False
check_broadcast
=
False
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
)
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
)
_f16_ok
=
True
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
)):
filter_dilation
=
(
1
,
1
)):
...
@@ -489,9 +489,15 @@ class BaseGpuCorrMM(CGpuKernelBase, BlasOp):
...
@@ -489,9 +489,15 @@ class BaseGpuCorrMM(CGpuKernelBase, BlasOp):
def
get_params
(
self
,
node
):
def
get_params
(
self
,
node
):
return
node
.
inputs
[
0
]
.
type
.
context
return
node
.
inputs
[
0
]
.
type
.
context
def
c_headers
(
self
):
return
[
"<gpuarray/array.h>"
,
"<gpuarray/blas.h>"
,
"gpuarray_helper.h"
]
def
c_header_dirs
(
self
):
return
[
os
.
path
.
dirname
(
__file__
)]
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
#
raise this whenever modifying any of the support_code_files
#
Raise this whenever modifying the code below.
return
(
0
,
2
)
return
(
2
,
)
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
height
=
None
,
width
=
None
):
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
height
=
None
,
width
=
None
):
"""
"""
...
@@ -953,7 +959,7 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
...
@@ -953,7 +959,7 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
return
[[
1
],
[
1
],
[
0
],
[
0
]]
# no connection to height, width
return
[[
1
],
[
1
],
[
0
],
[
0
]]
# no connection to height, width
class
BaseGpuCorr3dMM
(
CGpuKernelBase
,
BlasOp
):
class
BaseGpuCorr3dMM
(
CGpuKernelBase
):
"""
"""
Base class for `GpuCorr3dMM`, `GpuCorr3dMM_gradWeights` and
Base class for `GpuCorr3dMM`, `GpuCorr3dMM_gradWeights` and
`GpuCorr3dMM_gradInputs`. Cannot be used directly.
`GpuCorr3dMM_gradInputs`. Cannot be used directly.
...
@@ -967,10 +973,11 @@ class BaseGpuCorr3dMM(CGpuKernelBase, BlasOp):
...
@@ -967,10 +973,11 @@ class BaseGpuCorr3dMM(CGpuKernelBase, BlasOp):
Perform subsampling of the output (default: (1, 1, 1)).
Perform subsampling of the output (default: (1, 1, 1)).
filter_dilation
filter_dilation
Perform subsampling of the input, also known as dilation (default: (1, 1, 1)).
Perform subsampling of the input, also known as dilation (default: (1, 1, 1)).
"""
"""
check_broadcast
=
False
check_broadcast
=
False
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
)
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
)
_f16_ok
=
True
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
,
1
),
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
,
1
),
filter_dilation
=
(
1
,
1
,
1
)):
filter_dilation
=
(
1
,
1
,
1
)):
...
@@ -1028,9 +1035,15 @@ class BaseGpuCorr3dMM(CGpuKernelBase, BlasOp):
...
@@ -1028,9 +1035,15 @@ class BaseGpuCorr3dMM(CGpuKernelBase, BlasOp):
def
get_params
(
self
,
node
):
def
get_params
(
self
,
node
):
return
node
.
inputs
[
0
]
.
type
.
context
return
node
.
inputs
[
0
]
.
type
.
context
def
c_headers
(
self
):
return
[
"<gpuarray/array.h>"
,
"<gpuarray/blas.h>"
,
"gpuarray_helper.h"
]
def
c_header_dirs
(
self
):
return
[
os
.
path
.
dirname
(
__file__
)]
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# raise this whenever modifying
any of the support_code_files
# raise this whenever modifying
the code below.
return
(
0
,
2
)
return
(
2
,
)
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
height
=
None
,
width
=
None
,
depth
=
None
):
height
=
None
,
width
=
None
,
depth
=
None
):
...
...
theano/gpuarray/corr3d_gemm.c
浏览文件 @
cea45e8b
差异被折叠。
点击展开。
theano/gpuarray/corr_gemm.c
浏览文件 @
cea45e8b
差异被折叠。
点击展开。
theano/gpuarray/elemwise.py
浏览文件 @
cea45e8b
...
@@ -613,6 +613,15 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
...
@@ -613,6 +613,15 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
def
c_headers
(
self
):
def
c_headers
(
self
):
return
[
'<numpy_compat.h>'
,
'<gpuarray/types.h>'
]
return
[
'<numpy_compat.h>'
,
'<gpuarray/types.h>'
]
def
c_support_code
(
self
):
return
"""
template <typename T>
static T ceil_intdiv(T a, T b)
{
return (a/b) + ((a
%
b) ? 1: 0);
}
"""
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
x
,
=
inp
x
,
=
inp
z
,
=
out
z
,
=
out
...
...
theano/gpuarray/neighbours.py
浏览文件 @
cea45e8b
...
@@ -242,6 +242,15 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
...
@@ -242,6 +242,15 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
flags
=
flags
,
objvar
=
k_var
))
flags
=
flags
,
objvar
=
k_var
))
return
kernels
return
kernels
def
c_support_code
(
self
):
return
"""
template <typename T>
static T ceil_intdiv(T a, T b)
{
return (a/b) + ((a
%
b) ? 1: 0);
}
"""
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
dtype_ten4
=
node
.
inputs
[
0
]
.
dtype
dtype_ten4
=
node
.
inputs
[
0
]
.
dtype
dtype_neib_shape
=
node
.
inputs
[
1
]
.
dtype
dtype_neib_shape
=
node
.
inputs
[
1
]
.
dtype
...
...
theano/gpuarray/tests/config.py
浏览文件 @
cea45e8b
from
__future__
import
absolute_import
,
print_function
,
division
from
__future__
import
absolute_import
,
print_function
,
division
from
nose.plugins.skip
import
SkipTest
from
nose.plugins.skip
import
SkipTest
import
theano.tensor
import
theano.gpuarray
import
theano.gpuarray
if
theano
.
gpuarray
.
pygpu
is
None
:
if
theano
.
gpuarray
.
pygpu
is
None
:
...
@@ -21,3 +22,10 @@ if theano.config.mode == 'FAST_COMPILE':
...
@@ -21,3 +22,10 @@ if theano.config.mode == 'FAST_COMPILE':
else
:
else
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'gpuarray'
)
.
excluding
(
'gpu'
)
mode_with_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'gpuarray'
)
.
excluding
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'gpuarray'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'gpuarray'
)
# If using float16, cast reference input to float32
def
ref_cast
(
x
):
if
x
.
type
.
dtype
==
'float16'
:
x
=
theano
.
tensor
.
cast
(
x
,
'float32'
)
return
x
theano/gpuarray/tests/test_dnn.py
浏览文件 @
cea45e8b
...
@@ -17,7 +17,7 @@ from .. import dnn
...
@@ -17,7 +17,7 @@ from .. import dnn
from
..basic_ops
import
GpuAllocEmpty
from
..basic_ops
import
GpuAllocEmpty
from
..type
import
gpuarray_shared_constructor
from
..type
import
gpuarray_shared_constructor
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
test_ctx_name
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
test_ctx_name
,
ref_cast
from
.
import
test_nnet
from
.
import
test_nnet
from
.rnn_support
import
Model
,
GRU
,
LSTM
,
WrapperLayer
from
.rnn_support
import
Model
,
GRU
,
LSTM
,
WrapperLayer
...
@@ -33,13 +33,6 @@ def set_precision(floatX):
...
@@ -33,13 +33,6 @@ def set_precision(floatX):
return
precision
return
precision
# If using float16, cast reference input to float32
def
ref_cast
(
x
):
if
theano
.
config
.
floatX
==
'float16'
:
x
=
T
.
cast
(
x
,
'float32'
)
return
x
def
test_dnn_conv_desc_merge
():
def
test_dnn_conv_desc_merge
():
if
not
dnn
.
dnn_available
(
test_ctx_name
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
...
...
theano/gpuarray/tests/test_gemmcorr.py
浏览文件 @
cea45e8b
...
@@ -3,13 +3,14 @@ import unittest
...
@@ -3,13 +3,14 @@ import unittest
import
numpy
import
numpy
import
theano
import
theano
from
theano
import
config
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests
import
unittest_tools
as
utt
from
theano.tensor.nnet.corr
import
CorrMM
,
CorrMM_gradWeights
,
CorrMM_gradInputs
from
theano.tensor.nnet.corr
import
CorrMM
,
CorrMM_gradWeights
,
CorrMM_gradInputs
from
..type
import
gpuarray_shared_constructor
from
..type
import
gpuarray_shared_constructor
from
..blas
import
GpuCorrMM
,
GpuCorrMM_gradWeights
,
GpuCorrMM_gradInputs
from
..blas
import
GpuCorrMM
,
GpuCorrMM_gradWeights
,
GpuCorrMM_gradInputs
from
.config
import
mode_with_gpu
,
mode_without_gpu
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
ref_cast
class
TestCorrMM
(
unittest
.
TestCase
):
class
TestCorrMM
(
unittest
.
TestCase
):
...
@@ -22,15 +23,16 @@ class TestCorrMM(unittest.TestCase):
...
@@ -22,15 +23,16 @@ class TestCorrMM(unittest.TestCase):
inputs_shape
=
[
inputs_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
inputs_shape
=
[
inputs_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
filters_shape
=
[
filters_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
filters_shape
=
[
filters_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
config
.
floatX
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
config
.
floatX
)
inputs
=
gpuarray_shared_constructor
(
inputs_val
)
inputs
=
gpuarray_shared_constructor
(
inputs_val
)
filters
=
gpuarray_shared_constructor
(
filters_val
)
filters
=
gpuarray_shared_constructor
(
filters_val
)
conv_ref
=
CorrMM
(
border_mode
=
border_mode
,
conv_ref
=
CorrMM
(
border_mode
=
border_mode
,
filter_dilation
=
filter_dilation
,
filter_dilation
=
filter_dilation
,
subsample
=
subsample
)(
inputs
,
filters
)
subsample
=
subsample
)(
ref_cast
(
inputs
),
ref_cast
(
filters
))
f_ref
=
theano
.
function
([],
conv_ref
,
mode
=
mode_without_gpu
)
f_ref
=
theano
.
function
([],
conv_ref
,
mode
=
mode_without_gpu
)
conv
=
GpuCorrMM
(
border_mode
=
border_mode
,
conv
=
GpuCorrMM
(
border_mode
=
border_mode
,
...
@@ -120,20 +122,20 @@ class TestCorrMM(unittest.TestCase):
...
@@ -120,20 +122,20 @@ class TestCorrMM(unittest.TestCase):
filters_shape
=
[
filters_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
filters_shape
=
[
filters_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
dCdH_shape
=
[
dCdH_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
dCdH_shape
=
[
dCdH_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
config
.
floatX
)
dCdH_val
=
numpy
.
random
.
random
(
dCdH_shape
)
.
astype
(
'float32'
)
dCdH_val
=
numpy
.
random
.
random
(
dCdH_shape
)
.
astype
(
config
.
floatX
)
inputs
=
gpuarray_shared_constructor
(
inputs_val
)
inputs
=
gpuarray_shared_constructor
(
inputs_val
)
dCdH
=
gpuarray_shared_constructor
(
dCdH_val
)
dCdH
=
gpuarray_shared_constructor
(
dCdH_val
)
shape
=
gpuarray_shared_constructor
(
numpy
.
array
(
filters_shape
[
2
:]))
shape
=
gpuarray_shared_constructor
(
numpy
.
array
(
filters_shape
[
2
:]))
if
(
subsample
==
(
1
,
1
)):
if
(
subsample
==
(
1
,
1
)):
conv_ref
=
CorrMM_gradWeights
(
subsample
=
subsample
)(
conv_ref
=
CorrMM_gradWeights
(
subsample
=
subsample
)(
inputs
,
dCdH
)
ref_cast
(
inputs
),
ref_cast
(
dCdH
)
)
conv_gemm
=
GpuCorrMM_gradWeights
(
subsample
=
subsample
)(
conv_gemm
=
GpuCorrMM_gradWeights
(
subsample
=
subsample
)(
inputs
,
dCdH
)
inputs
,
dCdH
)
else
:
else
:
conv_ref
=
CorrMM_gradWeights
(
subsample
=
subsample
)(
conv_ref
=
CorrMM_gradWeights
(
subsample
=
subsample
)(
inputs
,
dCdH
,
shape
=
shape
)
ref_cast
(
inputs
),
ref_cast
(
dCdH
)
,
shape
=
shape
)
conv_gemm
=
GpuCorrMM_gradWeights
(
subsample
=
subsample
)(
conv_gemm
=
GpuCorrMM_gradWeights
(
subsample
=
subsample
)(
inputs
,
dCdH
,
shape
=
shape
)
inputs
,
dCdH
,
shape
=
shape
)
...
@@ -167,8 +169,8 @@ class TestCorrMM(unittest.TestCase):
...
@@ -167,8 +169,8 @@ class TestCorrMM(unittest.TestCase):
inputs_shape
=
[
inputs_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
inputs_shape
=
[
inputs_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
filters_shape
=
[
filters_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
filters_shape
=
[
filters_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
config
.
floatX
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
config
.
floatX
)
inputs
=
gpuarray_shared_constructor
(
inputs_val
)
inputs
=
gpuarray_shared_constructor
(
inputs_val
)
filters
=
gpuarray_shared_constructor
(
filters_val
)
filters
=
gpuarray_shared_constructor
(
filters_val
)
...
@@ -178,12 +180,13 @@ class TestCorrMM(unittest.TestCase):
...
@@ -178,12 +180,13 @@ class TestCorrMM(unittest.TestCase):
if
(
subsample
==
(
1
,
1
)):
if
(
subsample
==
(
1
,
1
)):
conv_ref
=
CorrMM_gradInputs
(
subsample
=
subsample
)(
conv_ref
=
CorrMM_gradInputs
(
subsample
=
subsample
)(
kern
=
filters
,
topgrad
=
inputs
)
kern
=
ref_cast
(
filters
),
topgrad
=
ref_cast
(
inputs
)
)
conv_gemm
=
GpuCorrMM_gradInputs
(
subsample
=
subsample
)(
conv_gemm
=
GpuCorrMM_gradInputs
(
subsample
=
subsample
)(
kern
=
filters
,
topgrad
=
inputs
)
kern
=
filters
,
topgrad
=
inputs
)
else
:
else
:
conv_ref
=
CorrMM_gradInputs
(
subsample
=
subsample
)(
conv_ref
=
CorrMM_gradInputs
(
subsample
=
subsample
)(
kern
=
filters
,
topgrad
=
inputs
,
shape
=
bottom_shape
)
kern
=
ref_cast
(
filters
),
topgrad
=
ref_cast
(
inputs
),
shape
=
bottom_shape
)
conv_gemm
=
GpuCorrMM_gradInputs
(
subsample
=
subsample
)(
conv_gemm
=
GpuCorrMM_gradInputs
(
subsample
=
subsample
)(
kern
=
filters
,
topgrad
=
inputs
,
shape
=
bottom_shape
)
kern
=
filters
,
topgrad
=
inputs
,
shape
=
bottom_shape
)
...
...
theano/gpuarray/tests/test_gemmcorr3d.py
浏览文件 @
cea45e8b
...
@@ -3,13 +3,14 @@ import unittest
...
@@ -3,13 +3,14 @@ import unittest
import
numpy
import
numpy
import
theano
import
theano
from
theano
import
config
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests
import
unittest_tools
as
utt
from
theano.tensor.nnet.corr3d
import
Corr3dMM
,
Corr3dMM_gradWeights
,
Corr3dMM_gradInputs
from
theano.tensor.nnet.corr3d
import
Corr3dMM
,
Corr3dMM_gradWeights
,
Corr3dMM_gradInputs
from
..type
import
gpuarray_shared_constructor
from
..type
import
gpuarray_shared_constructor
from
..blas
import
GpuCorr3dMM
,
GpuCorr3dMM_gradWeights
,
GpuCorr3dMM_gradInputs
from
..blas
import
GpuCorr3dMM
,
GpuCorr3dMM_gradWeights
,
GpuCorr3dMM_gradInputs
from
.config
import
mode_with_gpu
,
mode_without_gpu
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
ref_cast
class
TestCorr3dMM
(
unittest
.
TestCase
):
class
TestCorr3dMM
(
unittest
.
TestCase
):
...
@@ -22,15 +23,15 @@ class TestCorr3dMM(unittest.TestCase):
...
@@ -22,15 +23,15 @@ class TestCorr3dMM(unittest.TestCase):
inputs_shape
=
[
inputs_shape
[
i
]
for
i
in
(
0
,
4
,
1
,
2
,
3
)]
inputs_shape
=
[
inputs_shape
[
i
]
for
i
in
(
0
,
4
,
1
,
2
,
3
)]
filters_shape
=
[
filters_shape
[
i
]
for
i
in
(
0
,
4
,
1
,
2
,
3
)]
filters_shape
=
[
filters_shape
[
i
]
for
i
in
(
0
,
4
,
1
,
2
,
3
)]
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
config
.
floatX
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
config
.
floatX
)
inputs
=
gpuarray_shared_constructor
(
inputs_val
)
inputs
=
gpuarray_shared_constructor
(
inputs_val
)
filters
=
gpuarray_shared_constructor
(
filters_val
)
filters
=
gpuarray_shared_constructor
(
filters_val
)
conv_ref
=
Corr3dMM
(
border_mode
=
border_mode
,
conv_ref
=
Corr3dMM
(
border_mode
=
border_mode
,
filter_dilation
=
filter_dilation
,
filter_dilation
=
filter_dilation
,
subsample
=
subsample
)(
inputs
,
filters
)
subsample
=
subsample
)(
ref_cast
(
inputs
),
ref_cast
(
filters
)
)
f_ref
=
theano
.
function
([],
conv_ref
,
mode
=
mode_without_gpu
)
f_ref
=
theano
.
function
([],
conv_ref
,
mode
=
mode_without_gpu
)
conv
=
GpuCorr3dMM
(
border_mode
=
border_mode
,
conv
=
GpuCorr3dMM
(
border_mode
=
border_mode
,
...
@@ -120,20 +121,20 @@ class TestCorr3dMM(unittest.TestCase):
...
@@ -120,20 +121,20 @@ class TestCorr3dMM(unittest.TestCase):
filters_shape
=
[
filters_shape
[
i
]
for
i
in
(
0
,
4
,
1
,
2
,
3
)]
filters_shape
=
[
filters_shape
[
i
]
for
i
in
(
0
,
4
,
1
,
2
,
3
)]
dCdH_shape
=
[
dCdH_shape
[
i
]
for
i
in
(
0
,
4
,
1
,
2
,
3
)]
dCdH_shape
=
[
dCdH_shape
[
i
]
for
i
in
(
0
,
4
,
1
,
2
,
3
)]
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
config
.
floatX
)
dCdH_val
=
numpy
.
random
.
random
(
dCdH_shape
)
.
astype
(
'float32'
)
dCdH_val
=
numpy
.
random
.
random
(
dCdH_shape
)
.
astype
(
config
.
floatX
)
inputs
=
gpuarray_shared_constructor
(
inputs_val
)
inputs
=
gpuarray_shared_constructor
(
inputs_val
)
dCdH
=
gpuarray_shared_constructor
(
dCdH_val
)
dCdH
=
gpuarray_shared_constructor
(
dCdH_val
)
shape
=
gpuarray_shared_constructor
(
numpy
.
array
(
filters_shape
[
2
:]))
shape
=
gpuarray_shared_constructor
(
numpy
.
array
(
filters_shape
[
2
:]))
if
(
subsample
==
(
1
,
1
,
1
)):
if
(
subsample
==
(
1
,
1
,
1
)):
conv_ref
=
Corr3dMM_gradWeights
(
subsample
=
subsample
)(
conv_ref
=
Corr3dMM_gradWeights
(
subsample
=
subsample
)(
inputs
,
dCdH
)
ref_cast
(
inputs
),
ref_cast
(
dCdH
)
)
conv_gemm
=
GpuCorr3dMM_gradWeights
(
subsample
=
subsample
)(
conv_gemm
=
GpuCorr3dMM_gradWeights
(
subsample
=
subsample
)(
inputs
,
dCdH
)
inputs
,
dCdH
)
else
:
else
:
conv_ref
=
Corr3dMM_gradWeights
(
subsample
=
subsample
)(
conv_ref
=
Corr3dMM_gradWeights
(
subsample
=
subsample
)(
inputs
,
dCdH
,
shape
=
shape
)
ref_cast
(
inputs
),
ref_cast
(
dCdH
)
,
shape
=
shape
)
conv_gemm
=
GpuCorr3dMM_gradWeights
(
subsample
=
subsample
)(
conv_gemm
=
GpuCorr3dMM_gradWeights
(
subsample
=
subsample
)(
inputs
,
dCdH
,
shape
=
shape
)
inputs
,
dCdH
,
shape
=
shape
)
...
@@ -167,8 +168,8 @@ class TestCorr3dMM(unittest.TestCase):
...
@@ -167,8 +168,8 @@ class TestCorr3dMM(unittest.TestCase):
inputs_shape
=
[
inputs_shape
[
i
]
for
i
in
(
0
,
4
,
1
,
2
,
3
)]
inputs_shape
=
[
inputs_shape
[
i
]
for
i
in
(
0
,
4
,
1
,
2
,
3
)]
filters_shape
=
[
filters_shape
[
i
]
for
i
in
(
0
,
4
,
1
,
2
,
3
)]
filters_shape
=
[
filters_shape
[
i
]
for
i
in
(
0
,
4
,
1
,
2
,
3
)]
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
config
.
floatX
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
config
.
floatX
)
inputs
=
gpuarray_shared_constructor
(
inputs_val
)
inputs
=
gpuarray_shared_constructor
(
inputs_val
)
filters
=
gpuarray_shared_constructor
(
filters_val
)
filters
=
gpuarray_shared_constructor
(
filters_val
)
...
@@ -179,12 +180,12 @@ class TestCorr3dMM(unittest.TestCase):
...
@@ -179,12 +180,12 @@ class TestCorr3dMM(unittest.TestCase):
if
(
subsample
==
(
1
,
1
,
1
)):
if
(
subsample
==
(
1
,
1
,
1
)):
conv_ref
=
Corr3dMM_gradInputs
(
subsample
=
subsample
)(
conv_ref
=
Corr3dMM_gradInputs
(
subsample
=
subsample
)(
kern
=
filters
,
topgrad
=
inputs
)
kern
=
ref_cast
(
filters
),
topgrad
=
ref_cast
(
inputs
)
)
conv_gemm
=
GpuCorr3dMM_gradInputs
(
subsample
=
subsample
)(
conv_gemm
=
GpuCorr3dMM_gradInputs
(
subsample
=
subsample
)(
kern
=
filters
,
topgrad
=
inputs
)
kern
=
filters
,
topgrad
=
inputs
)
else
:
else
:
conv_ref
=
Corr3dMM_gradInputs
(
subsample
=
subsample
)(
conv_ref
=
Corr3dMM_gradInputs
(
subsample
=
subsample
)(
kern
=
filters
,
topgrad
=
inputs
,
shape
=
bottom_shape
)
kern
=
ref_cast
(
filters
),
topgrad
=
ref_cast
(
inputs
)
,
shape
=
bottom_shape
)
conv_gemm
=
GpuCorr3dMM_gradInputs
(
subsample
=
subsample
)(
conv_gemm
=
GpuCorr3dMM_gradInputs
(
subsample
=
subsample
)(
kern
=
filters
,
topgrad
=
inputs
,
shape
=
bottom_shape
)
kern
=
filters
,
topgrad
=
inputs
,
shape
=
bottom_shape
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论