Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
645557f9
提交
645557f9
authored
10月 02, 2015
作者:
Pascal Lamblin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3476 from abergeron/move_config
Multiple fixes preparing for multi-gpu
上级
1ec1cd9b
71dea2cf
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
29 个修改的文件
包含
294 行增加
和
467 行删除
+294
-467
setup.cfg
setup.cfg
+3
-0
__init__.py
theano/__init__.py
+4
-2
configdefaults.py
theano/configdefaults.py
+112
-19
configparser.py
theano/configparser.py
+11
-7
op.py
theano/gof/op.py
+8
-4
__init__.py
theano/sandbox/cuda/__init__.py
+1
-1
dnn.py
theano/sandbox/cuda/dnn.py
+6
-6
nvcc_compiler.py
theano/sandbox/cuda/nvcc_compiler.py
+4
-71
dnn_flags.py
theano/sandbox/dnn_flags.py
+0
-42
__init__.py
theano/sandbox/gpuarray/__init__.py
+7
-12
basic_ops.py
theano/sandbox/gpuarray/basic_ops.py
+0
-0
conv.py
theano/sandbox/gpuarray/conv.py
+19
-39
dnn.py
theano/sandbox/gpuarray/dnn.py
+21
-46
elemwise.py
theano/sandbox/gpuarray/elemwise.py
+0
-0
kernel_codegen.py
theano/sandbox/gpuarray/kernel_codegen.py
+11
-12
neighbours.py
theano/sandbox/gpuarray/neighbours.py
+4
-26
nerv.py
theano/sandbox/gpuarray/nerv.py
+2
-2
nnet.py
theano/sandbox/gpuarray/nnet.py
+0
-0
opt.py
theano/sandbox/gpuarray/opt.py
+2
-2
opt_util.py
theano/sandbox/gpuarray/opt_util.py
+0
-0
subtensor.py
theano/sandbox/gpuarray/subtensor.py
+1
-11
test_basic_ops.py
theano/sandbox/gpuarray/tests/test_basic_ops.py
+23
-73
test_blas.py
theano/sandbox/gpuarray/tests/test_blas.py
+41
-38
test_conv_cuda_ndarray.py
theano/sandbox/gpuarray/tests/test_conv_cuda_ndarray.py
+0
-0
test_neighbours.py
theano/sandbox/gpuarray/tests/test_neighbours.py
+1
-3
test_nnet.py
theano/sandbox/gpuarray/tests/test_nnet.py
+7
-30
test_type.py
theano/sandbox/gpuarray/tests/test_type.py
+0
-3
type.py
theano/sandbox/gpuarray/type.py
+6
-5
test_flake8.py
theano/tests/test_flake8.py
+0
-13
没有找到文件。
setup.cfg
浏览文件 @
645557f9
[nosetest]
[nosetest]
match=^test
match=^test
nocapture=1
nocapture=1
[flake8]
ignore=E501,E123,E133
theano/__init__.py
浏览文件 @
645557f9
...
@@ -109,8 +109,10 @@ if config.device.startswith('gpu') or config.init_gpu_device.startswith('gpu'):
...
@@ -109,8 +109,10 @@ if config.device.startswith('gpu') or config.init_gpu_device.startswith('gpu'):
theano
.
sandbox
.
cuda
.
tests
.
test_driver
.
test_nvidia_driver1
()
theano
.
sandbox
.
cuda
.
tests
.
test_driver
.
test_nvidia_driver1
()
if
config
.
device
.
startswith
(
'cuda'
)
or
config
.
device
.
startswith
(
'opencl'
)
or
\
if
(
config
.
device
.
startswith
(
'cuda'
)
or
config
.
gpuarray
.
init_device
!=
''
:
config
.
device
.
startswith
(
'opencl'
)
or
config
.
init_gpu_device
.
startswith
(
'cuda'
)
or
config
.
init_gpu_device
.
startswith
(
'opencl'
)):
import
theano.sandbox.gpuarray
import
theano.sandbox.gpuarray
# Use config.numpy to call numpy.seterr
# Use config.numpy to call numpy.seterr
...
...
theano/configdefaults.py
浏览文件 @
645557f9
...
@@ -73,19 +73,19 @@ class DeviceParam(ConfigParam):
...
@@ -73,19 +73,19 @@ class DeviceParam(ConfigParam):
self
.
default
=
default
self
.
default
=
default
def
filter
(
val
):
def
filter
(
val
):
if
val
.
startswith
(
'cpu'
)
or
val
.
startswith
(
'gpu'
)
\
if
val
==
self
.
default
or
val
.
startswith
(
'gpu'
)
\
or
val
.
startswith
(
'opencl'
)
or
val
.
startswith
(
'cuda'
):
or
val
.
startswith
(
'opencl'
)
or
val
.
startswith
(
'cuda'
):
return
val
return
val
else
:
else
:
raise
ValueError
((
'Invalid value ("
%
s") for configuration '
raise
ValueError
((
'Invalid value ("
%
s") for configuration '
'variable "
%
s". Valid options start with '
'variable "
%
s". Valid options start with '
'one of "
cpu
", "gpu", "opencl", "cuda"'
'one of "
%
s
", "gpu", "opencl", "cuda"'
%
(
val
,
self
.
fullname
)))
%
(
self
.
default
,
val
,
self
.
fullname
)))
over
=
kwargs
.
get
(
"allow_override"
,
True
)
over
=
kwargs
.
get
(
"allow_override"
,
True
)
super
(
DeviceParam
,
self
)
.
__init__
(
default
,
filter
,
over
)
super
(
DeviceParam
,
self
)
.
__init__
(
default
,
filter
,
over
)
def
__str__
(
self
):
def
__str__
(
self
):
return
'
%
s (
cpu, gpu*, opencl*, cuda*) '
%
(
self
.
fullname
,
)
return
'
%
s (
%
s, gpu*, opencl*, cuda*) '
%
(
self
.
fullname
,
self
.
default
)
AddConfigVar
(
AddConfigVar
(
'device'
,
'device'
,
...
@@ -94,15 +94,7 @@ AddConfigVar(
...
@@ -94,15 +94,7 @@ AddConfigVar(
"on it. Do not use upper case letters, only lower case even if "
"on it. Do not use upper case letters, only lower case even if "
"NVIDIA use capital letters."
),
"NVIDIA use capital letters."
),
DeviceParam
(
'cpu'
,
allow_override
=
False
),
DeviceParam
(
'cpu'
,
allow_override
=
False
),
in_c_key
=
False
,)
in_c_key
=
False
)
AddConfigVar
(
'gpuarray.init_device'
,
"""
Device to initialize for gpuarray use without moving
computations automatically.
"""
,
StrParam
(
''
),
in_c_key
=
False
)
AddConfigVar
(
AddConfigVar
(
'init_gpu_device'
,
'init_gpu_device'
,
...
@@ -110,12 +102,7 @@ AddConfigVar(
...
@@ -110,12 +102,7 @@ AddConfigVar(
"Unlike 'device', setting this option will NOT move computations, "
"Unlike 'device', setting this option will NOT move computations, "
"nor shared variables, to the specified GPU. "
"nor shared variables, to the specified GPU. "
"It can be used to run GPU-specific tests on a particular GPU."
),
"It can be used to run GPU-specific tests on a particular GPU."
),
EnumStr
(
''
,
'gpu'
,
DeviceParam
(
''
,
allow_override
=
False
),
'gpu0'
,
'gpu1'
,
'gpu2'
,
'gpu3'
,
'gpu4'
,
'gpu5'
,
'gpu6'
,
'gpu7'
,
'gpu8'
,
'gpu9'
,
'gpu10'
,
'gpu11'
,
'gpu12'
,
'gpu13'
,
'gpu14'
,
'gpu15'
,
allow_override
=
False
),
in_c_key
=
False
)
in_c_key
=
False
)
AddConfigVar
(
AddConfigVar
(
...
@@ -131,6 +118,112 @@ AddConfigVar(
...
@@ -131,6 +118,112 @@ AddConfigVar(
in_c_key
=
False
)
in_c_key
=
False
)
def
default_cuda_root
():
v
=
os
.
getenv
(
'CUDA_ROOT'
,
""
)
if
v
:
return
v
s
=
os
.
getenv
(
"PATH"
)
if
not
s
:
return
''
for
dir
in
s
.
split
(
os
.
path
.
pathsep
):
if
os
.
path
.
exists
(
os
.
path
.
join
(
dir
,
"nvcc"
)):
return
os
.
path
.
split
(
dir
)[
0
]
return
''
AddConfigVar
(
'cuda.root'
,
"""directory with bin/, lib/, include/ for cuda utilities.
This directory is included via -L and -rpath when linking
dynamically compiled modules. If AUTO and nvcc is in the
path, it will use one of nvcc parent directory. Otherwise
/usr/local/cuda will be used. Leave empty to prevent extra
linker directives. Default: environment variable "CUDA_ROOT"
or else "AUTO".
"""
,
StrParam
(
default_cuda_root
),
in_c_key
=
False
)
def
filter_nvcc_flags
(
s
):
assert
isinstance
(
s
,
str
)
flags
=
[
flag
for
flag
in
s
.
split
(
' '
)
if
flag
]
if
any
([
f
for
f
in
flags
if
not
f
.
startswith
(
"-"
)]):
raise
ValueError
(
"Theano nvcc.flags support only parameter/value pairs without"
" space between them. e.g.: '--machine 64' is not supported,"
" but '--machine=64' is supported. Please add the '=' symbol."
" nvcc.flags value is '
%
s'"
%
s
)
return
' '
.
join
(
flags
)
AddConfigVar
(
'nvcc.flags'
,
"Extra compiler flags for nvcc"
,
ConfigParam
(
""
,
filter_nvcc_flags
),
# Not needed in c key as it is already added.
# We remove it as we don't make the md5 of config to change
# if theano.sandbox.cuda is loaded or not.
in_c_key
=
False
)
AddConfigVar
(
'nvcc.compiler_bindir'
,
"If defined, nvcc compiler driver will seek g++ and gcc"
" in this directory"
,
StrParam
(
""
),
in_c_key
=
False
)
AddConfigVar
(
'nvcc.fastmath'
,
""
,
BoolParam
(
False
),
# Not needed in c key as it is already added.
# We remove it as we don't make the md5 of config to change
# if theano.sandbox.cuda is loaded or not.
in_c_key
=
False
)
AddConfigVar
(
'gpuarray.sync'
,
"""If True, every op will make sure its work is done before
returning. Setting this to True will slow down execution,
but give much more accurate results in profiling."""
,
BoolParam
(
False
),
in_c_key
=
True
)
AddConfigVar
(
'dnn.conv.workmem'
,
"This flag is deprecated; use dnn.conv.algo_fwd."
,
EnumStr
(
''
),
in_c_key
=
False
)
AddConfigVar
(
'dnn.conv.workmem_bwd'
,
"This flag is deprecated; use dnn.conv.algo_bwd."
,
EnumStr
(
''
),
in_c_key
=
False
)
AddConfigVar
(
'dnn.conv.algo_fwd'
,
"Default implementation to use for CuDNN forward convolution."
,
EnumStr
(
'small'
,
'none'
,
'large'
,
'fft'
,
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
),
in_c_key
=
False
)
AddConfigVar
(
'dnn.conv.algo_bwd'
,
"Default implementation to use for CuDNN backward convolution."
,
EnumStr
(
'none'
,
'deterministic'
,
'fft'
,
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
),
in_c_key
=
False
)
def
default_dnn_path
(
suffix
):
def
f
(
suffix
=
suffix
):
if
config
.
cuda
.
root
==
''
:
return
''
return
os
.
path
.
join
(
config
.
cuda
.
root
,
suffix
)
return
f
AddConfigVar
(
'dnn.include_path'
,
"Location of the cudnn header (defaults to the cuda root)"
,
StrParam
(
default_dnn_path
(
'include'
)))
AddConfigVar
(
'dnn.library_path'
,
"Location of the cudnn header (defaults to the cuda root)"
,
StrParam
(
default_dnn_path
(
'lib64'
)))
# This flag determines whether or not to raise error/warning message if
# This flag determines whether or not to raise error/warning message if
# there is a CPU Op in the computational graph.
# there is a CPU Op in the computational graph.
AddConfigVar
(
AddConfigVar
(
...
...
theano/configparser.py
浏览文件 @
645557f9
...
@@ -102,7 +102,7 @@ def change_flags(**kwargs):
...
@@ -102,7 +102,7 @@ def change_flags(**kwargs):
l
=
[
v
for
v
in
theano
.
configparser
.
_config_var_list
l
=
[
v
for
v
in
theano
.
configparser
.
_config_var_list
if
v
.
fullname
==
k
]
if
v
.
fullname
==
k
]
assert
len
(
l
)
==
1
assert
len
(
l
)
==
1
old_val
[
k
]
=
l
[
0
]
.
__get__
()
old_val
[
k
]
=
l
[
0
]
.
__get__
(
True
,
None
)
try
:
try
:
for
k
in
kwargs
:
for
k
in
kwargs
:
l
=
[
v
for
v
in
theano
.
configparser
.
_config_var_list
l
=
[
v
for
v
in
theano
.
configparser
.
_config_var_list
...
@@ -167,7 +167,7 @@ def _config_print(thing, buf):
...
@@ -167,7 +167,7 @@ def _config_print(thing, buf):
for
cv
in
_config_var_list
:
for
cv
in
_config_var_list
:
print
(
cv
,
file
=
buf
)
print
(
cv
,
file
=
buf
)
print
(
" Doc: "
,
cv
.
doc
,
file
=
buf
)
print
(
" Doc: "
,
cv
.
doc
,
file
=
buf
)
print
(
" Value: "
,
cv
.
__get__
(),
file
=
buf
)
print
(
" Value: "
,
cv
.
__get__
(
True
,
None
),
file
=
buf
)
print
(
""
,
file
=
buf
)
print
(
""
,
file
=
buf
)
...
@@ -182,7 +182,7 @@ def get_config_md5():
...
@@ -182,7 +182,7 @@ def get_config_md5():
all_opts
=
sorted
([
c
for
c
in
_config_var_list
if
c
.
in_c_key
],
all_opts
=
sorted
([
c
for
c
in
_config_var_list
if
c
.
in_c_key
],
key
=
lambda
cv
:
cv
.
fullname
)
key
=
lambda
cv
:
cv
.
fullname
)
return
theano
.
gof
.
utils
.
hash_from_code
(
'
\n
'
.
join
(
return
theano
.
gof
.
utils
.
hash_from_code
(
'
\n
'
.
join
(
[
'
%
s =
%
s'
%
(
cv
.
fullname
,
cv
.
__get__
())
for
cv
in
all_opts
]))
[
'
%
s =
%
s'
%
(
cv
.
fullname
,
cv
.
__get__
(
True
,
None
))
for
cv
in
all_opts
]))
class
TheanoConfigParser
(
object
):
class
TheanoConfigParser
(
object
):
...
@@ -270,14 +270,14 @@ def AddConfigVar(name, doc, configparam, root=config, in_c_key=True):
...
@@ -270,14 +270,14 @@ def AddConfigVar(name, doc, configparam, root=config, in_c_key=True):
# Trigger a read of the value from config files and env vars
# Trigger a read of the value from config files and env vars
# This allow to filter wrong value from the user.
# This allow to filter wrong value from the user.
if
not
callable
(
configparam
.
default
):
if
not
callable
(
configparam
.
default
):
configparam
.
__get__
()
configparam
.
__get__
(
root
,
type
(
root
)
)
else
:
else
:
# We do not want to evaluate now the default value
# We do not want to evaluate now the default value
# when it is a callable.
# when it is a callable.
try
:
try
:
fetch_val_for_key
(
configparam
.
fullname
)
fetch_val_for_key
(
configparam
.
fullname
)
# The user provided a value, filter it now.
# The user provided a value, filter it now.
configparam
.
__get__
()
configparam
.
__get__
(
root
,
type
(
root
)
)
except
KeyError
:
except
KeyError
:
pass
pass
setattr
(
root
.
__class__
,
sections
[
0
],
configparam
)
setattr
(
root
.
__class__
,
sections
[
0
],
configparam
)
...
@@ -294,6 +294,7 @@ class ConfigParam(object):
...
@@ -294,6 +294,7 @@ class ConfigParam(object):
self
.
default
=
default
self
.
default
=
default
self
.
filter
=
filter
self
.
filter
=
filter
self
.
allow_override
=
allow_override
self
.
allow_override
=
allow_override
self
.
is_default
=
True
# N.B. --
# N.B. --
# self.fullname # set by AddConfigVar
# self.fullname # set by AddConfigVar
# self.doc # set by AddConfigVar
# self.doc # set by AddConfigVar
...
@@ -304,16 +305,19 @@ class ConfigParam(object):
...
@@ -304,16 +305,19 @@ class ConfigParam(object):
# Calling `filter` here may actually be harmful if the default value is
# Calling `filter` here may actually be harmful if the default value is
# invalid and causes a crash or has unwanted side effects.
# invalid and causes a crash or has unwanted side effects.
def
__get__
(
self
,
*
args
):
def
__get__
(
self
,
cls
,
type_
):
if
cls
is
None
:
return
self
if
not
hasattr
(
self
,
'val'
):
if
not
hasattr
(
self
,
'val'
):
try
:
try
:
val_str
=
fetch_val_for_key
(
self
.
fullname
)
val_str
=
fetch_val_for_key
(
self
.
fullname
)
self
.
is_default
=
False
except
KeyError
:
except
KeyError
:
if
callable
(
self
.
default
):
if
callable
(
self
.
default
):
val_str
=
self
.
default
()
val_str
=
self
.
default
()
else
:
else
:
val_str
=
self
.
default
val_str
=
self
.
default
self
.
__set__
(
None
,
val_str
)
self
.
__set__
(
cls
,
val_str
)
# print "RVAL", self.val
# print "RVAL", self.val
return
self
.
val
return
self
.
val
...
...
theano/gof/op.py
浏览文件 @
645557f9
...
@@ -1171,7 +1171,7 @@ def apply_meth(tag):
...
@@ -1171,7 +1171,7 @@ def apply_meth(tag):
code
=
self
.
code_sections
[
tag
]
code
=
self
.
code_sections
[
tag
]
define_macros
,
undef_macros
=
self
.
get_c_macros
(
node
,
name
)
define_macros
,
undef_macros
=
self
.
get_c_macros
(
node
,
name
)
return
os
.
linesep
.
join
([
define_macros
,
code
,
return
os
.
linesep
.
join
([
''
,
define_macros
,
code
,
undef_macros
])
undef_macros
])
else
:
else
:
raise
utils
.
MethodNotDefined
(
raise
utils
.
MethodNotDefined
(
...
@@ -1428,7 +1428,7 @@ class COp(Op):
...
@@ -1428,7 +1428,7 @@ class COp(Op):
def_macros
,
undef_macros
=
self
.
get_c_macros
(
node
,
name
)
def_macros
,
undef_macros
=
self
.
get_c_macros
(
node
,
name
)
def_sub
,
undef_sub
=
self
.
get_sub_macros
(
sub
)
def_sub
,
undef_sub
=
self
.
get_sub_macros
(
sub
)
return
os
.
linesep
.
join
([
def_macros
,
def_sub
,
return
os
.
linesep
.
join
([
''
,
def_macros
,
def_sub
,
op_code
,
op_code
,
undef_sub
,
undef_macros
])
undef_sub
,
undef_macros
])
else
:
else
:
...
@@ -1442,17 +1442,21 @@ class COp(Op):
...
@@ -1442,17 +1442,21 @@ class COp(Op):
define_macros
,
undef_macros
=
self
.
get_c_macros
(
node
,
name
,
define_macros
,
undef_macros
=
self
.
get_c_macros
(
node
,
name
,
check_input
=
False
)
check_input
=
False
)
ctx
=
""
if
'context'
in
sub
:
ctx
=
",
%
s"
%
(
sub
[
'context'
],)
# Generate the C code
# Generate the C code
return
"""
return
"""
%(define_macros)
s
%(define_macros)
s
{
{
if (
%(func_name)
s(
%(func_args)
s) != 0) {
if (
%(func_name)
s(
%(func_args)
s
%(ctx)
s
) != 0) {
%(fail)
s
%(fail)
s
}
}
}
}
%(undef_macros)
s
%(undef_macros)
s
"""
%
dict
(
func_name
=
self
.
func_name
,
"""
%
dict
(
func_name
=
self
.
func_name
,
fail
=
sub
[
'fail'
],
fail
=
sub
[
'fail'
],
ctx
=
ctx
,
func_args
=
self
.
format_c_function_args
(
inp
,
out
),
func_args
=
self
.
format_c_function_args
(
inp
,
out
),
define_macros
=
define_macros
,
define_macros
=
define_macros
,
undef_macros
=
undef_macros
)
undef_macros
=
undef_macros
)
...
...
theano/sandbox/cuda/__init__.py
浏览文件 @
645557f9
...
@@ -535,7 +535,7 @@ def handle_shared_float32(tf):
...
@@ -535,7 +535,7 @@ def handle_shared_float32(tf):
# import dependency. So we also test it in the file theano/__init__.py
# import dependency. So we also test it in the file theano/__init__.py
if
config
.
device
.
startswith
(
'gpu'
):
if
config
.
device
.
startswith
(
'gpu'
):
use
(
device
=
config
.
device
,
force
=
config
.
force_device
,
test_driver
=
False
)
use
(
device
=
config
.
device
,
force
=
config
.
force_device
,
test_driver
=
False
)
elif
config
.
init_gpu_device
:
elif
config
.
init_gpu_device
.
startswith
(
'gpu'
)
:
assert
config
.
device
==
"cpu"
,
(
assert
config
.
device
==
"cpu"
,
(
"We can use the Theano flag init_gpu_device"
"We can use the Theano flag init_gpu_device"
" only when the Theano flag device=='cpu'"
)
" only when the Theano flag device=='cpu'"
)
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
645557f9
...
@@ -27,8 +27,6 @@ from theano.sandbox.cuda import gpu_seqopt, register_opt
...
@@ -27,8 +27,6 @@ from theano.sandbox.cuda import gpu_seqopt, register_opt
from
theano.sandbox.cuda.nvcc_compiler
import
NVCC_compiler
from
theano.sandbox.cuda.nvcc_compiler
import
NVCC_compiler
import
theano.sandbox.dnn_flags
def
dnn_available
():
def
dnn_available
():
if
dnn_available
.
avail
is
None
:
if
dnn_available
.
avail
is
None
:
...
@@ -57,15 +55,17 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
...
@@ -57,15 +55,17 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
return 1;
return 1;
}
}
"""
"""
params
=
[
"-l"
,
"cudnn"
,
"-I"
+
os
.
path
.
dirname
(
__file__
)]
if
config
.
dnn
.
include_path
:
params
.
append
(
"-I"
+
config
.
dnn
.
include_path
)
if
config
.
dnn
.
library_path
:
params
.
append
(
"-L"
+
config
.
dnn
.
library_path
)
# Do not run here the test program. It would run on the
# Do not run here the test program. It would run on the
# default gpu, not the one selected by the user. If mixed
# default gpu, not the one selected by the user. If mixed
# GPU are installed or if the GPUs are configured in
# GPU are installed or if the GPUs are configured in
# exclusive mode, this cause bad detection.
# exclusive mode, this cause bad detection.
comp
,
out
,
err
=
NVCC_compiler
.
try_flags
(
comp
,
out
,
err
=
NVCC_compiler
.
try_flags
(
[
"-l"
,
"cudnn"
,
"-I"
+
os
.
path
.
dirname
(
__file__
),
params
=
params
,
preambule
=
preambule
,
body
=
body
,
"-I"
+
config
.
dnn
.
include_path
,
"-L"
+
config
.
dnn
.
library_path
],
preambule
=
preambule
,
body
=
body
,
try_run
=
False
,
output
=
True
)
try_run
=
False
,
output
=
True
)
dnn_available
.
avail
=
comp
dnn_available
.
avail
=
comp
...
...
theano/sandbox/cuda/nvcc_compiler.py
浏览文件 @
645557f9
...
@@ -8,6 +8,7 @@ import warnings
...
@@ -8,6 +8,7 @@ import warnings
import
numpy
import
numpy
from
theano
import
config
from
theano.compat
import
decode
,
decode_iter
from
theano.compat
import
decode
,
decode_iter
from
theano.gof
import
local_bitwidth
from
theano.gof
import
local_bitwidth
from
theano.gof.utils
import
hash_from_file
from
theano.gof.utils
import
hash_from_file
...
@@ -19,67 +20,6 @@ from theano.misc.windows import output_subprocess_Popen
...
@@ -19,67 +20,6 @@ from theano.misc.windows import output_subprocess_Popen
_logger
=
logging
.
getLogger
(
"theano.sandbox.cuda.nvcc_compiler"
)
_logger
=
logging
.
getLogger
(
"theano.sandbox.cuda.nvcc_compiler"
)
from
theano.configparser
import
(
config
,
AddConfigVar
,
StrParam
,
BoolParam
,
ConfigParam
)
AddConfigVar
(
'nvcc.compiler_bindir'
,
"If defined, nvcc compiler driver will seek g++ and gcc"
" in this directory"
,
StrParam
(
""
),
in_c_key
=
False
)
user_provided_cuda_root
=
True
def
default_cuda_root
():
global
user_provided_cuda_root
v
=
os
.
getenv
(
'CUDA_ROOT'
,
""
)
user_provided_cuda_root
=
False
if
v
:
return
v
return
find_cuda_root
()
AddConfigVar
(
'cuda.root'
,
"""directory with bin/, lib/, include/ for cuda utilities.
This directory is included via -L and -rpath when linking
dynamically compiled modules. If AUTO and nvcc is in the
path, it will use one of nvcc parent directory. Otherwise
/usr/local/cuda will be used. Leave empty to prevent extra
linker directives. Default: environment variable "CUDA_ROOT"
or else "AUTO".
"""
,
StrParam
(
default_cuda_root
),
in_c_key
=
False
)
def
filter_nvcc_flags
(
s
):
assert
isinstance
(
s
,
str
)
flags
=
[
flag
for
flag
in
s
.
split
(
' '
)
if
flag
]
if
any
([
f
for
f
in
flags
if
not
f
.
startswith
(
"-"
)]):
raise
ValueError
(
"Theano nvcc.flags support only parameter/value pairs without"
" space between them. e.g.: '--machine 64' is not supported,"
" but '--machine=64' is supported. Please add the '=' symbol."
" nvcc.flags value is '
%
s'"
%
s
)
return
' '
.
join
(
flags
)
AddConfigVar
(
'nvcc.flags'
,
"Extra compiler flags for nvcc"
,
ConfigParam
(
""
,
filter_nvcc_flags
),
# Not needed in c key as it is already added.
# We remove it as we don't make the md5 of config to change
# if theano.sandbox.cuda is loaded or not.
in_c_key
=
False
)
AddConfigVar
(
'nvcc.fastmath'
,
""
,
BoolParam
(
False
),
# Not needed in c key as it is already added.
# We remove it as we don't make the md5 of config to change
# if theano.sandbox.cuda is loaded or not.
in_c_key
=
False
)
nvcc_path
=
'nvcc'
nvcc_path
=
'nvcc'
nvcc_version
=
None
nvcc_version
=
None
...
@@ -115,14 +55,6 @@ def is_nvcc_available():
...
@@ -115,14 +55,6 @@ def is_nvcc_available():
return
False
return
False
def
find_cuda_root
():
s
=
os
.
getenv
(
"PATH"
)
if
not
s
:
return
for
dir
in
s
.
split
(
os
.
path
.
pathsep
):
if
os
.
path
.
exists
(
os
.
path
.
join
(
dir
,
"nvcc"
)):
return
os
.
path
.
split
(
dir
)[
0
]
rpath_defaults
=
[]
rpath_defaults
=
[]
...
@@ -229,7 +161,7 @@ class NVCC_compiler(Compiler):
...
@@ -229,7 +161,7 @@ class NVCC_compiler(Compiler):
include_dirs
include_dirs
A list of include directory names (each gets prefixed with -I).
A list of include directory names (each gets prefixed with -I).
lib_dirs
lib_dirs
A list of library search path directory names (each gets
A list of library search path directory names (each gets
prefixed with -L).
prefixed with -L).
libs
libs
A list of libraries to link with (each gets prefixed with -l).
A list of libraries to link with (each gets prefixed with -l).
...
@@ -359,7 +291,8 @@ class NVCC_compiler(Compiler):
...
@@ -359,7 +291,8 @@ class NVCC_compiler(Compiler):
# provided an cuda.root flag, we need to add one, but
# provided an cuda.root flag, we need to add one, but
# otherwise, we don't add it. See gh-1540 and
# otherwise, we don't add it. See gh-1540 and
# https://wiki.debian.org/RpathIssue for details.
# https://wiki.debian.org/RpathIssue for details.
if
(
user_provided_cuda_root
and
if
(
not
type
(
config
.
cuda
)
.
root
.
is_default
and
os
.
path
.
exists
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib'
))):
os
.
path
.
exists
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib'
))):
rpaths
.
append
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib'
))
rpaths
.
append
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib'
))
...
...
theano/sandbox/dnn_flags.py
deleted
100644 → 0
浏览文件 @
1ec1cd9b
"""
This module contains the configuration flags for cudnn support.
Those are shared between the cuda and gpuarray backend which is why
they are in this file.
"""
import
os.path
from
theano.configparser
import
AddConfigVar
,
EnumStr
,
StrParam
from
theano
import
config
AddConfigVar
(
'dnn.conv.workmem'
,
"This flag is deprecated; use dnn.conv.algo_fwd."
,
EnumStr
(
''
),
in_c_key
=
False
)
AddConfigVar
(
'dnn.conv.workmem_bwd'
,
"This flag is deprecated; use dnn.conv.algo_bwd."
,
EnumStr
(
''
),
in_c_key
=
False
)
AddConfigVar
(
'dnn.conv.algo_fwd'
,
"Default implementation to use for CuDNN forward convolution."
,
EnumStr
(
'small'
,
'none'
,
'large'
,
'fft'
,
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
),
in_c_key
=
False
)
AddConfigVar
(
'dnn.conv.algo_bwd'
,
"Default implementation to use for CuDNN backward convolution."
,
EnumStr
(
'none'
,
'deterministic'
,
'fft'
,
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
),
in_c_key
=
False
)
AddConfigVar
(
'dnn.include_path'
,
"Location of the cudnn header (defaults to the cuda root)"
,
StrParam
(
lambda
:
os
.
path
.
join
(
config
.
cuda
.
root
,
'include'
)))
AddConfigVar
(
'dnn.library_path'
,
"Location of the cudnn header (defaults to the cuda root)"
,
StrParam
(
lambda
:
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib64'
)))
theano/sandbox/gpuarray/__init__.py
浏览文件 @
645557f9
...
@@ -19,13 +19,6 @@ try:
...
@@ -19,13 +19,6 @@ try:
except
ImportError
:
except
ImportError
:
pygpu
=
None
pygpu
=
None
AddConfigVar
(
'gpuarray.sync'
,
"""If True, every op will make sure its work is done before
returning. Setting this to True will slow down execution,
but give much more accurate results in profiling."""
,
BoolParam
(
False
),
in_c_key
=
True
)
# This is for documentation not to depend on the availability of pygpu
# This is for documentation not to depend on the availability of pygpu
from
.type
import
(
GpuArrayType
,
GpuArrayVariable
,
GpuArrayConstant
,
from
.type
import
(
GpuArrayType
,
GpuArrayVariable
,
GpuArrayConstant
,
GpuArraySharedVariable
,
gpuarray_shared_constructor
)
GpuArraySharedVariable
,
gpuarray_shared_constructor
)
...
@@ -57,8 +50,9 @@ if pygpu:
...
@@ -57,8 +50,9 @@ if pygpu:
import
theano.compile
import
theano.compile
theano
.
compile
.
shared_constructor
(
gpuarray_shared_constructor
)
theano
.
compile
.
shared_constructor
(
gpuarray_shared_constructor
)
optdb
.
add_tags
(
'gpuarray_opt'
,
'fast_run'
,
'fast_compile'
)
optdb
.
add_tags
(
'gpuarray_opt'
,
'fast_run'
,
'fast_compile'
)
elif
config
.
gpuarray
.
init_device
!=
''
:
elif
(
config
.
init_gpu_device
.
startswith
(
'cuda'
)
or
init_dev
(
config
.
gpuarray
.
init_device
)
config
.
init_gpu_device
.
startswith
(
'opencl'
)):
init_dev
(
config
.
init_gpu_device
)
from
.basic_ops
import
(
GpuAlloc
,
GpuContiguous
,
GpuEye
,
GpuFromHost
,
from
.basic_ops
import
(
GpuAlloc
,
GpuContiguous
,
GpuEye
,
GpuFromHost
,
GpuJoin
,
GpuReshape
,
GpuSplit
,
HostFromGpu
)
GpuJoin
,
GpuReshape
,
GpuSplit
,
HostFromGpu
)
...
@@ -70,7 +64,8 @@ if pygpu:
...
@@ -70,7 +64,8 @@ if pygpu:
except
Exception
:
except
Exception
:
error
(
"Could not initialize pygpu, support disabled"
,
exc_info
=
True
)
error
(
"Could not initialize pygpu, support disabled"
,
exc_info
=
True
)
else
:
else
:
if
(
config
.
gpuarray
.
init_device
!=
''
or
if
(
config
.
init_gpu_device
.
startswith
(
'cuda'
)
or
config
.
device
.
startswith
(
'opencl'
)
or
config
.
init_gpu_device
.
startswith
(
'opencl'
)
or
config
.
device
.
startswith
(
'cuda'
)):
config
.
device
.
startswith
(
'opencl'
)
or
config
.
device
.
startswith
(
'cuda'
)):
error
(
"pygpu was configured but could not be imported"
,
exc_info
=
True
)
error
(
"pygpu was configured but could not be imported"
,
exc_info
=
True
)
theano/sandbox/gpuarray/basic_ops.py
浏览文件 @
645557f9
差异被折叠。
点击展开。
theano/sandbox/gpuarray/conv.py
浏览文件 @
645557f9
...
@@ -5,17 +5,15 @@ import theano
...
@@ -5,17 +5,15 @@ import theano
from
theano
import
config
,
gof
from
theano
import
config
,
gof
try
:
try
:
import
pygpu
from
pygpu
import
gpuarray
from
pygpu
import
gpuarray
except
ImportError
:
except
ImportError
:
pass
pass
from
six.moves
import
reduce
from
.comp
import
NVCC_compiler
from
.type
import
GpuArrayType
from
.type
import
GpuArrayType
from
.basic_ops
import
(
as_gpuarray_variable
,
GpuKernelBase
,
Kernel
)
from
.basic_ops
import
as_gpuarray_variable
,
GpuKernelBase
,
Kernel
from
theano.gof
import
utils
from
theano.gof
import
utils
class
GpuConv
(
GpuKernelBase
,
gof
.
Op
):
class
GpuConv
(
GpuKernelBase
,
gof
.
Op
):
"""
"""
Implement the batched and stacked 2d convolution on the gpu.
Implement the batched and stacked 2d convolution on the gpu.
...
@@ -70,19 +68,19 @@ class GpuConv(GpuKernelBase, gof.Op):
...
@@ -70,19 +68,19 @@ class GpuConv(GpuKernelBase, gof.Op):
raise
ValueError
(
mode
)
raise
ValueError
(
mode
)
def
__init__
(
self
,
border_mode
,
def
__init__
(
self
,
border_mode
,
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
logical_img_hw
=
None
,
logical_img_hw
=
None
,
logical_kern_hw
=
None
,
logical_kern_hw
=
None
,
logical_kern_align_top
=
True
,
logical_kern_align_top
=
True
,
version
=-
1
,
version
=-
1
,
direction_hint
=
None
,
direction_hint
=
None
,
verbose
=
0
,
verbose
=
0
,
kshp
=
None
,
kshp
=
None
,
imshp
=
None
,
imshp
=
None
,
max_threads_dim0
=
None
,
max_threads_dim0
=
None
,
nkern
=
None
,
nkern
=
None
,
bsize
=
None
,
bsize
=
None
,
fft_opt
=
True
):
fft_opt
=
True
):
self
.
border_mode
=
border_mode
self
.
border_mode
=
border_mode
self
.
subsample
=
subsample
self
.
subsample
=
subsample
if
logical_img_hw
is
not
None
:
if
logical_img_hw
is
not
None
:
...
@@ -182,7 +180,7 @@ class GpuConv(GpuKernelBase, gof.Op):
...
@@ -182,7 +180,7 @@ class GpuConv(GpuKernelBase, gof.Op):
def
flops
(
self
,
inputs
,
outputs
):
def
flops
(
self
,
inputs
,
outputs
):
"""
"""
Useful with the hack in profilemode to print the MFlops.
Useful with the hack in profilemode to print the MFlops.
"""
"""
images
,
kerns
=
inputs
images
,
kerns
=
inputs
out
,
=
outputs
out
,
=
outputs
...
@@ -227,32 +225,14 @@ class GpuConv(GpuKernelBase, gof.Op):
...
@@ -227,32 +225,14 @@ class GpuConv(GpuKernelBase, gof.Op):
nb
=
0
nb
=
0
if
self
.
kshp
is
not
None
:
if
self
.
kshp
is
not
None
:
nb
=
self
.
kshp
[
1
]
nb
=
self
.
kshp
[
1
]
return
[
'-DTHEANO_KERN_WID='
+
str
(
nb
)]
# ,'-g','-G']
return
[
'-DTHEANO_KERN_WID='
+
str
(
nb
)]
def
c_headers
(
self
):
def
c_headers
(
self
):
if
pygpu
.
get_default_context
()
.
kind
==
'opencl'
:
return
[
'<stdio.h>'
,
'<numpy_compat.h>'
,
'<gpuarray/types.h>'
]
raise
MethodNotDefined
(
'cuda only'
)
return
[
'<stdint.h>'
,
'<stdio.h>'
,
'cuda.h'
,
'<gpuarray/extension.h>'
,
'<numpy_compat.h>'
,
'<gpuarray/ext_cuda.h>'
,
'<gpuarray/types.h>'
]
def
c_header_dirs
(
self
):
if
pygpu
.
get_default_context
()
.
kind
==
'opencl'
:
raise
MethodNotDefined
(
'cuda only'
)
cuda_root
=
config
.
cuda
.
root
if
cuda_root
:
return
[
os
.
path
.
join
(
cuda_root
,
'include'
)]
else
:
return
[]
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
# raise this whenever modifying any of the support_code_files
return
(
0
,
21
)
return
(
0
,
22
)
def
c_init_code
(
self
):
if
pygpu
.
get_default_context
()
.
kind
==
'opencl'
:
raise
MethodNotDefined
(
'cuda only'
)
return
[
'setup_ext_cuda();'
]
def
c_code
(
self
,
node
,
nodename
,
inp
,
out_
,
sub
):
def
c_code
(
self
,
node
,
nodename
,
inp
,
out_
,
sub
):
img
,
kern
=
inp
img
,
kern
=
inp
...
...
theano/sandbox/gpuarray/dnn.py
浏览文件 @
645557f9
...
@@ -26,10 +26,7 @@ from .conv import GpuConv
...
@@ -26,10 +26,7 @@ from .conv import GpuConv
# GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
# GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
from
.nnet
import
GpuSoftmax
from
.nnet
import
GpuSoftmax
from
.opt
import
gpu_seqopt
,
register_opt
,
conv_groupopt
,
op_lifter
from
.opt
import
gpu_seqopt
,
register_opt
,
conv_groupopt
,
op_lifter
from
.opt_util
import
alpha_merge
,
output_merge
from
.opt_util
import
alpha_merge
,
output_merge
,
inplace_allocempty
# We need to import this to define the flags.
from
theano.sandbox
import
dnn_flags
# noqa
def
dnn_available
():
def
dnn_available
():
...
@@ -50,7 +47,6 @@ def dnn_available():
...
@@ -50,7 +47,6 @@ def dnn_available():
dnn_available
.
avail
=
False
dnn_available
.
avail
=
False
preambule
=
"""
preambule
=
"""
#include <stdio.h>
#include <stdio.h>
#include <cuda.h>
#include <cudnn.h>
#include <cudnn.h>
#include <cudnn_helper.h>
#include <cudnn_helper.h>
"""
"""
...
@@ -64,15 +60,18 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
...
@@ -64,15 +60,18 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
return 1;
return 1;
}
}
"""
"""
params
=
[
"-l"
,
"cudnn"
,
"-I"
+
os
.
path
.
dirname
(
__file__
)]
if
config
.
dnn
.
include_path
:
params
.
append
(
"-I"
+
config
.
dnn
.
include_path
)
if
config
.
dnn
.
library_path
:
params
.
append
(
"-L"
+
config
.
dnn
.
library_path
)
# Do not run here the test program. It would run on the
# Do not run here the test program. It would run on the
# default gpu, not the one selected by the user. If mixed
# default gpu, not the one selected by the user. If mixed
# GPU are installed or if the GPUs are configured in
# GPU are installed or if the GPUs are configured in
# exclusive mode, this cause bad detection.
# exclusive mode, this cause bad detection.
comp
,
out
,
err
=
GCC_compiler
.
try_flags
(
comp
,
out
,
err
=
GCC_compiler
.
try_flags
(
[
"-l"
,
"cudnn"
,
"-I"
+
os
.
path
.
dirname
(
__file__
),
params
,
preambule
=
preambule
,
body
=
body
,
"-I"
+
config
.
dnn
.
include_path
,
"-L"
+
config
.
dnn
.
library_path
],
preambule
=
preambule
,
body
=
body
,
try_run
=
False
,
output
=
True
)
try_run
=
False
,
output
=
True
)
dnn_available
.
avail
=
comp
dnn_available
.
avail
=
comp
...
@@ -1242,86 +1241,62 @@ conv_groupopt.register('local_conv_dnn', local_conv_dnn, 20,
...
@@ -1242,86 +1241,62 @@ conv_groupopt.register('local_conv_dnn', local_conv_dnn, 20,
'conv_dnn'
,
'fast_compile'
,
'fast_run'
,
'cudnn'
)
'conv_dnn'
,
'fast_compile'
,
'fast_run'
,
'cudnn'
)
@local_optimizer
([
GpuDnnConv
],
inplace
=
True
)
@inplace_allocempty
(
GpuDnnConv
,
2
)
def
local_dnn_conv_inplace
(
node
):
def
local_dnn_conv_inplace
(
node
,
inputs
):
if
type
(
node
.
op
)
!=
GpuDnnConv
or
node
.
op
.
inplace
:
return
inputs
=
list
(
node
.
inputs
)
dest
=
inputs
[
2
]
if
(
dest
.
owner
and
isinstance
(
dest
.
owner
.
op
,
GpuAllocEmpty
)
and
len
(
dest
.
clients
)
>
1
):
inputs
[
2
]
=
GpuAllocEmpty
(
dest
.
owner
.
op
.
dtype
)(
*
dest
.
owner
.
inputs
)
return
[
GpuDnnConv
(
algo
=
node
.
op
.
algo
,
inplace
=
True
)(
*
inputs
)]
return
[
GpuDnnConv
(
algo
=
node
.
op
.
algo
,
inplace
=
True
)(
*
inputs
)]
@local_optimizer
([
GpuDnnConvGradW
],
inplace
=
True
)
@inplace_allocempty
(
GpuDnnConvGradW
,
2
)
def
local_dnn_convgw_inplace
(
node
):
def
local_dnn_convgw_inplace
(
node
,
inputs
):
if
type
(
node
.
op
)
!=
GpuDnnConvGradW
or
node
.
op
.
inplace
:
return
inputs
=
list
(
node
.
inputs
)
dest
=
inputs
[
2
]
if
(
dest
.
owner
and
isinstance
(
dest
.
owner
.
op
,
GpuAllocEmpty
)
and
len
(
dest
.
clients
)
>
1
):
inputs
[
2
]
=
GpuAllocEmpty
(
dest
.
owner
.
op
.
dtype
)(
*
dest
.
owner
.
inputs
)
return
[
GpuDnnConvGradW
(
algo
=
node
.
op
.
algo
,
inplace
=
True
)(
*
inputs
)]
return
[
GpuDnnConvGradW
(
algo
=
node
.
op
.
algo
,
inplace
=
True
)(
*
inputs
)]
@local_optimizer
([
GpuDnnConvGradI
],
inplace
=
True
)
@inplace_allocempty
(
GpuDnnConvGradI
,
2
)
def
local_dnn_convgi_inplace
(
node
):
def
local_dnn_convgi_inplace
(
node
,
inputs
):
if
type
(
node
.
op
)
!=
GpuDnnConvGradI
or
node
.
op
.
inplace
:
return
inputs
=
list
(
node
.
inputs
)
dest
=
inputs
[
2
]
if
(
dest
.
owner
and
isinstance
(
dest
.
owner
.
op
,
GpuAllocEmpty
)
and
len
(
dest
.
clients
)
>
1
):
inputs
[
2
]
=
GpuAllocEmpty
(
dest
.
owner
.
op
.
dtype
)(
*
dest
.
owner
.
inputs
)
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
,
inplace
=
True
)(
*
inputs
)]
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
,
inplace
=
True
)(
*
inputs
)]
optdb
.
register
(
'local_dnna_conv_inplace'
,
optdb
.
register
(
'local_dnna_conv_inplace'
,
tensor
.
opt
.
in2out
(
local_dnn_conv_inplace
,
tensor
.
opt
.
in2out
(
local_dnn_conv_inplace
,
local_dnn_convgw_inplace
,
local_dnn_convgw_inplace
,
local_dnn_convgi_inplace
,
local_dnn_convgi_inplace
,
name
=
"local_dnn_conv_inplace"
),
name
=
"local_dnn
a
_conv_inplace"
),
70.0
,
'fast_run'
,
'inplace'
,
'gpuarray'
,
'cudnn'
)
70.0
,
'fast_run'
,
'inplace'
,
'gpuarray'
,
'cudnn'
)
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
)
@alpha_merge
(
GpuDnnConv
,
alpha_in
=
4
,
beta_in
=
5
,
nd
=
4
)
@alpha_merge
(
GpuDnnConv
,
alpha_in
=
4
,
beta_in
=
5
)
def
local_dnn_conv_alpha_merge
(
node
,
*
inputs
):
def
local_dnn_conv_alpha_merge
(
node
,
*
inputs
):
return
[
GpuDnnConv
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
return
[
GpuDnnConv
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
)
@alpha_merge
(
GpuDnnConvGradW
,
alpha_in
=
4
,
beta_in
=
5
,
nd
=
4
)
@alpha_merge
(
GpuDnnConvGradW
,
alpha_in
=
4
,
beta_in
=
5
)
def
local_dnn_convw_alpha_merge
(
node
,
*
inputs
):
def
local_dnn_convw_alpha_merge
(
node
,
*
inputs
):
return
[
GpuDnnConvGradW
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
return
[
GpuDnnConvGradW
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
)
@alpha_merge
(
GpuDnnConvGradI
,
alpha_in
=
4
,
beta_in
=
5
,
nd
=
4
)
@alpha_merge
(
GpuDnnConvGradI
,
alpha_in
=
4
,
beta_in
=
5
)
def
local_dnn_convi_alpha_merge
(
node
,
*
inputs
):
def
local_dnn_convi_alpha_merge
(
node
,
*
inputs
):
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
)
@output_merge
(
GpuDnnConv
,
alpha_in
=
4
,
beta_in
=
5
,
out_in
=
2
,
nd
=
4
)
@output_merge
(
GpuDnnConv
,
alpha_in
=
4
,
beta_in
=
5
,
out_in
=
2
)
def
local_dnn_conv_output_merge
(
node
,
*
inputs
):
def
local_dnn_conv_output_merge
(
node
,
*
inputs
):
inputs
=
inputs
[
0
:
2
]
+
(
gpu_contiguous
(
inputs
[
2
]),)
+
inputs
[
3
:]
inputs
=
inputs
[
0
:
2
]
+
(
gpu_contiguous
(
inputs
[
2
]),)
+
inputs
[
3
:]
return
[
GpuDnnConv
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
return
[
GpuDnnConv
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
)
@output_merge
(
GpuDnnConvGradW
,
alpha_in
=
4
,
beta_in
=
5
,
out_in
=
2
,
nd
=
4
)
@output_merge
(
GpuDnnConvGradW
,
alpha_in
=
4
,
beta_in
=
5
,
out_in
=
2
)
def
local_dnn_convw_output_merge
(
node
,
*
inputs
):
def
local_dnn_convw_output_merge
(
node
,
*
inputs
):
inputs
=
inputs
[
0
:
2
]
+
(
gpu_contiguous
(
inputs
[
2
]),)
+
inputs
[
3
:]
inputs
=
inputs
[
0
:
2
]
+
(
gpu_contiguous
(
inputs
[
2
]),)
+
inputs
[
3
:]
return
[
GpuDnnConvGradW
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
return
[
GpuDnnConvGradW
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
)
@output_merge
(
GpuDnnConvGradI
,
alpha_in
=
4
,
beta_in
=
5
,
out_in
=
2
,
nd
=
4
)
@output_merge
(
GpuDnnConvGradI
,
alpha_in
=
4
,
beta_in
=
5
,
out_in
=
2
)
def
local_dnn_convi_output_merge
(
node
,
*
inputs
):
def
local_dnn_convi_output_merge
(
node
,
*
inputs
):
inputs
=
inputs
[
0
:
2
]
+
(
gpu_contiguous
(
inputs
[
2
]),)
+
inputs
[
3
:]
inputs
=
inputs
[
0
:
2
]
+
(
gpu_contiguous
(
inputs
[
2
]),)
+
inputs
[
3
:]
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
...
...
theano/sandbox/gpuarray/elemwise.py
浏览文件 @
645557f9
差异被折叠。
点击展开。
theano/sandbox/gpuarray/kernel_codegen.py
浏览文件 @
645557f9
...
@@ -4,11 +4,11 @@ Helper routines for generating gpu kernels for nvcc.
...
@@ -4,11 +4,11 @@ Helper routines for generating gpu kernels for nvcc.
"""
"""
try
:
try
:
import
pygpu
from
pygpu
import
gpuarray
from
pygpu
import
gpuarray
except
ImportError
:
except
ImportError
:
pass
pass
def
nvcc_kernel
(
name
,
params
,
body
):
def
nvcc_kernel
(
name
,
params
,
body
):
"""
"""
Return the c code of a kernel function.
Return the c code of a kernel function.
...
@@ -174,16 +174,15 @@ def inline_softmax(N, buf, buf2, threadPos, threadCount, dtype="float32"):
...
@@ -174,16 +174,15 @@ def inline_softmax(N, buf, buf2, threadPos, threadCount, dtype="float32"):
"""
"""
ctype
=
gpuarray
.
dtype_to_ctype
(
dtype
)
ctype
=
gpuarray
.
dtype_to_ctype
(
dtype
)
return
[
# get max of buf (trashing all but buf[0])
# get max of buf (trashing all but buf[0])
return
[
inline_reduce_max
(
N
,
buf
,
threadPos
,
threadCount
),
inline_reduce_max
(
N
,
buf
,
threadPos
,
threadCount
),
'__syncthreads()'
,
'__syncthreads()'
,
(
'
%
s row_max = '
+
buf
+
'[0]'
)
%
ctype
,
(
'
%
s row_max = '
+
buf
+
'[0]'
)
%
ctype
,
'__syncthreads()'
,
'__syncthreads()'
,
'for(int __i='
+
threadPos
+
'; __i<'
+
N
+
'for(int __i='
+
threadPos
+
'; __i<'
+
N
+
'; __i+='
+
threadCount
+
'){'
,
'; __i+='
+
threadCount
+
'){'
,
buf
+
'[__i] = exp('
+
buf2
+
'[__i] - row_max)'
,
buf
+
'[__i] = exp('
+
buf2
+
'[__i] - row_max)'
,
buf2
+
'[__i] = '
+
buf
+
'[__i]'
,
buf2
+
'[__i] = '
+
buf
+
'[__i]'
,
'}'
,
'}'
,
'__syncthreads()'
,
'__syncthreads()'
,
inline_reduce_sum
(
N
,
buf
,
threadPos
,
threadCount
),
inline_reduce_sum
(
N
,
buf
,
threadPos
,
threadCount
),
...
@@ -192,8 +191,8 @@ def inline_softmax(N, buf, buf2, threadPos, threadCount, dtype="float32"):
...
@@ -192,8 +191,8 @@ def inline_softmax(N, buf, buf2, threadPos, threadCount, dtype="float32"):
'__syncthreads()'
,
'__syncthreads()'
,
# divide each exp() result by the sum to complete the job.
# divide each exp() result by the sum to complete the job.
'for(int __i='
+
threadPos
+
'; __i<'
+
N
+
'for(int __i='
+
threadPos
+
'; __i<'
+
N
+
'; __i+='
+
threadCount
+
'){'
,
'; __i+='
+
threadCount
+
'){'
,
buf
+
'[__i] = '
+
buf2
+
'[__i] / row_sum'
,
buf
+
'[__i] = '
+
buf2
+
'[__i] / row_sum'
,
'}'
,
'}'
,
'__syncthreads()'
,
'__syncthreads()'
,
]
]
...
@@ -232,7 +231,7 @@ def inline_reduce_fixed_shared(N, buf, x, stride_x, load_x, pos, count,
...
@@ -232,7 +231,7 @@ def inline_reduce_fixed_shared(N, buf, x, stride_x, load_x, pos, count,
Optional, the dtype of the output.
Optional, the dtype of the output.
manner_fn
manner_fn
A function that accepts strings of arguments a and b, and returns c code
A function that accepts strings of arguments a and b, and returns c code
for their reduction.
for their reduction.
Example: return "
%(a)
s +
%(b)
s" for a sum reduction.
Example: return "
%(a)
s +
%(b)
s" for a sum reduction.
manner_init
manner_init
A function that accepts strings of arguments a and return c code for its
A function that accepts strings of arguments a and return c code for its
...
@@ -259,7 +258,7 @@ def inline_reduce_fixed_shared(N, buf, x, stride_x, load_x, pos, count,
...
@@ -259,7 +258,7 @@ def inline_reduce_fixed_shared(N, buf, x, stride_x, load_x, pos, count,
loop_line
=
manner_fn
(
"red"
,
manner_init
(
"
%(load_x)
s(
%(x)
s[i *
%(stride_x)
s])"
%
loop_line
=
manner_fn
(
"red"
,
manner_init
(
"
%(load_x)
s(
%(x)
s[i *
%(stride_x)
s])"
%
locals
()))
locals
()))
loop_line2
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
loop_line2
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[i]"
%
buf
)
"
%
s[i]"
%
buf
)
r_16
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[
%
s+16]"
%
(
buf
,
pos
))
r_16
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[
%
s+16]"
%
(
buf
,
pos
))
r_8
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[
%
s+8]"
%
(
buf
,
pos
))
r_8
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[
%
s+8]"
%
(
buf
,
pos
))
r_4
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[
%
s+4]"
%
(
buf
,
pos
))
r_4
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[
%
s+4]"
%
(
buf
,
pos
))
...
@@ -324,7 +323,7 @@ def inline_softmax_fixed_shared(N, buf, x, stride_x, load_x,
...
@@ -324,7 +323,7 @@ def inline_softmax_fixed_shared(N, buf, x, stride_x, load_x,
Parameters
Parameters
----------
----------
N
N
Length of the buffer, atleast waprSize(32).
Length of the buffer, atleast waprSize(32).
buf
buf
A shared memory buffer of size warpSize * sizeof(dtype).
A shared memory buffer of size warpSize * sizeof(dtype).
...
...
theano/sandbox/gpuarray/neighbours.py
浏览文件 @
645557f9
import
os
import
numpy
import
numpy
from
theano
import
Op
,
Apply
,
config
from
theano
import
Op
,
Apply
,
config
from
theano.gof
import
local_optimizer
from
theano.tensor.nnet.neighbours
import
Images2Neibs
from
theano.tensor.nnet.neighbours
import
Images2Neibs
import
theano.tensor
as
T
import
theano.tensor
as
T
try
:
try
:
import
pygpu
import
pygpu
from
pygpu
import
gpuarray
,
elemwise
from
pygpu
import
gpuarray
except
ImportError
:
except
ImportError
:
pass
pass
from
.basic_ops
import
(
as_gpuarray_variable
,
from
.basic_ops
import
as_gpuarray_variable
,
GpuKernelBase
,
Kernel
host_from_gpu
,
gpu_from_host
,
GpuKernelBase
,
Kernel
)
from
.opt
import
register_opt
as
register_gpu_opt
,
op_lifter
from
.opt
import
register_opt
as
register_gpu_opt
,
op_lifter
from
.type
import
GpuArrayType
from
.type
import
GpuArrayType
from
.comp
import
NVCC_compiler
class
GpuImages2Neibs
(
GpuKernelBase
,
Images2Neibs
,
Op
):
class
GpuImages2Neibs
(
GpuKernelBase
,
Images2Neibs
,
Op
):
...
@@ -45,27 +40,10 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
...
@@ -45,27 +40,10 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
dtype
=
ten4
.
type
.
dtype
)()])
dtype
=
ten4
.
type
.
dtype
)()])
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
1
0
,
1
)
return
(
1
1
,
)
def
c_headers
(
self
):
def
c_headers
(
self
):
if
pygpu
.
get_default_context
()
.
kind
==
'opencl'
:
return
[
'<numpy_compat.h>'
,
'<gpuarray/types.h>'
]
raise
MethodNotDefined
(
'cuda only'
)
return
[
'cuda.h'
,
'<gpuarray/extension.h>'
,
'<numpy_compat.h>'
,
'<gpuarray/ext_cuda.h>'
,
'<gpuarray/types.h>'
]
def
c_header_dirs
(
self
):
if
pygpu
.
get_default_context
()
.
kind
==
'opencl'
:
raise
MethodNotDefined
(
'cuda only'
)
cuda_root
=
config
.
cuda
.
root
if
cuda_root
:
return
[
os
.
path
.
join
(
cuda_root
,
'include'
)]
else
:
return
[]
def
c_init_code
(
self
):
if
pygpu
.
get_default_context
()
.
kind
==
'opencl'
:
raise
MethodNotDefined
(
'cuda only'
)
return
[
'setup_ext_cuda();'
]
def
gpu_kernels
(
self
,
node
,
nodename
):
def
gpu_kernels
(
self
,
node
,
nodename
):
dtype_ten4
=
node
.
inputs
[
0
]
.
dtype
dtype_ten4
=
node
.
inputs
[
0
]
.
dtype
...
...
theano/sandbox/gpuarray/nerv.py
浏览文件 @
645557f9
...
@@ -176,13 +176,13 @@ def local_dot_to_gemm16(node):
...
@@ -176,13 +176,13 @@ def local_dot_to_gemm16(node):
@opt.register_opt
()
@opt.register_opt
()
@alpha_merge
(
Gemm16
,
alpha_in
=
1
,
beta_in
=
4
,
nd
=
2
)
@alpha_merge
(
Gemm16
,
alpha_in
=
1
,
beta_in
=
4
)
def
local_gemm16_alpha_merge
(
node
,
*
inputs
):
def
local_gemm16_alpha_merge
(
node
,
*
inputs
):
return
[
Gemm16
(
relu
=
node
.
op
.
relu
)(
*
inputs
)]
return
[
Gemm16
(
relu
=
node
.
op
.
relu
)(
*
inputs
)]
@opt.register_opt
()
@opt.register_opt
()
@output_merge
(
Gemm16
,
alpha_in
=
1
,
beta_in
=
4
,
out_in
=
0
,
nd
=
2
)
@output_merge
(
Gemm16
,
alpha_in
=
1
,
beta_in
=
4
,
out_in
=
0
)
def
local_gemm16_output_merge
(
node
,
*
inputs
):
def
local_gemm16_output_merge
(
node
,
*
inputs
):
return
[
Gemm16
(
relu
=
node
.
op
.
relu
)(
*
inputs
)]
return
[
Gemm16
(
relu
=
node
.
op
.
relu
)(
*
inputs
)]
...
...
theano/sandbox/gpuarray/nnet.py
浏览文件 @
645557f9
差异被折叠。
点击展开。
theano/sandbox/gpuarray/opt.py
浏览文件 @
645557f9
...
@@ -645,13 +645,13 @@ def local_gpua_hgemm(node):
...
@@ -645,13 +645,13 @@ def local_gpua_hgemm(node):
@register_opt
()
@register_opt
()
@alpha_merge
(
GpuGemm
,
alpha_in
=
1
,
beta_in
=
4
,
nd
=
2
)
@alpha_merge
(
GpuGemm
,
alpha_in
=
1
,
beta_in
=
4
)
def
local_gpuagemm_alpha_merge
(
node
,
*
inputs
):
def
local_gpuagemm_alpha_merge
(
node
,
*
inputs
):
return
[
gpugemm_no_inplace
(
*
inputs
)]
return
[
gpugemm_no_inplace
(
*
inputs
)]
@register_opt
()
@register_opt
()
@output_merge
(
GpuGemm
,
alpha_in
=
1
,
beta_in
=
4
,
out_in
=
0
,
nd
=
2
)
@output_merge
(
GpuGemm
,
alpha_in
=
1
,
beta_in
=
4
,
out_in
=
0
)
def
local_gpuagemm_output_merge
(
node
,
*
inputs
):
def
local_gpuagemm_output_merge
(
node
,
*
inputs
):
return
[
gpugemm_no_inplace
(
*
inputs
)]
return
[
gpugemm_no_inplace
(
*
inputs
)]
...
...
theano/sandbox/gpuarray/opt_util.py
浏览文件 @
645557f9
差异被折叠。
点击展开。
theano/sandbox/gpuarray/subtensor.py
浏览文件 @
645557f9
...
@@ -180,19 +180,9 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
...
@@ -180,19 +180,9 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
def
_f16_ok
(
self
):
def
_f16_ok
(
self
):
return
self
.
iadd_node
.
op
.
_f16_ok
return
self
.
iadd_node
.
op
.
_f16_ok
def
c_header_dirs
(
self
):
cuda_root
=
config
.
cuda
.
root
if
cuda_root
:
return
[
os
.
path
.
join
(
cuda_root
,
'include'
)]
else
:
return
[]
def
c_headers
(
self
):
def
c_headers
(
self
):
return
self
.
iadd_node
.
op
.
c_headers
()
return
self
.
iadd_node
.
op
.
c_headers
()
def
c_compiler
(
self
):
return
self
.
iadd_node
.
op
.
c_compiler
()
def
c_init_code
(
self
):
def
c_init_code
(
self
):
return
self
.
iadd_node
.
op
.
c_init_code
()
return
self
.
iadd_node
.
op
.
c_init_code
()
...
@@ -404,7 +394,7 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
...
@@ -404,7 +394,7 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
elemwise_version
=
self
.
iadd_node
.
c_code_cache_version
()
elemwise_version
=
self
.
iadd_node
.
c_code_cache_version
()
if
not
parent_version
or
not
elemwise_version
:
if
not
parent_version
or
not
elemwise_version
:
return
return
return
parent_version
+
elemwise_version
+
(
2
,)
return
parent_version
+
elemwise_version
+
(
3
,)
class
GpuAdvancedSubtensor1
(
HideC
,
tensor
.
AdvancedSubtensor1
):
class
GpuAdvancedSubtensor1
(
HideC
,
tensor
.
AdvancedSubtensor1
):
...
...
theano/sandbox/gpuarray/tests/test_basic_ops.py
浏览文件 @
645557f9
import
unittest
import
unittest
from
theano.compat
import
izip
from
theano.compat
import
izip
from
copy
import
copy
,
deepcopy
from
six
import
iteritems
from
six
import
iteritems
...
@@ -13,16 +12,31 @@ from theano.tensor.basic import alloc
...
@@ -13,16 +12,31 @@ from theano.tensor.basic import alloc
# Don't import test classes otherwise they get tested as part of the file
# Don't import test classes otherwise they get tested as part of the file
from
theano.tensor.tests
import
test_basic
from
theano.tensor.tests
import
test_basic
from
theano.tensor.tests.test_basic
import
rand
,
safe_make_node
from
theano.tensor.tests.test_basic
import
rand
,
safe_make_node
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests.unittest_tools
import
SkipTest
from
theano.tests.unittest_tools
import
SkipTest
import
theano.sandbox.gpuarray
import
theano.sandbox.gpuarray
from
..type
import
(
GpuArrayType
,
gpuarray_shared_constructor
)
from
..basic_ops
import
(
host_from_gpu
,
gpu_from_host
,
HostFromGpu
,
GpuFromHost
,
GpuReshape
,
gpu_alloc
,
GpuAlloc
,
GpuAllocEmpty
,
GpuContiguous
,
gpu_join
,
GpuJoin
,
GpuSplit
,
GpuEye
,
gpu_contiguous
)
from
..subtensor
import
GpuSubtensor
import
theano.sandbox.cuda
as
cuda_ndarray
try
:
from
pygpu
import
gpuarray
except
:
pass
if
theano
.
sandbox
.
gpuarray
.
pygpu
is
None
:
if
theano
.
sandbox
.
gpuarray
.
pygpu
is
None
:
raise
SkipTest
(
"pygpu not installed"
)
raise
SkipTest
(
"pygpu not installed"
)
# If you are writing a new test file, don't copy this code, but rather
# If you are writing a new test file, don't copy this code, but rather
# import stuff from this file (like mode_with_gpu) to reuse it.
# import stuff from this file (like mode_with_gpu) to reuse it.
import
theano.sandbox.cuda
as
cuda_ndarray
if
cuda_ndarray
.
cuda_available
and
not
theano
.
sandbox
.
gpuarray
.
pygpu_activated
:
if
cuda_ndarray
.
cuda_available
and
not
theano
.
sandbox
.
gpuarray
.
pygpu_activated
:
if
not
cuda_ndarray
.
use
.
device_number
:
if
not
cuda_ndarray
.
use
.
device_number
:
# We should not enable all the use like the flag device=gpu,
# We should not enable all the use like the flag device=gpu,
...
@@ -36,25 +50,9 @@ if cuda_ndarray.cuda_available and not theano.sandbox.gpuarray.pygpu_activated:
...
@@ -36,25 +50,9 @@ if cuda_ndarray.cuda_available and not theano.sandbox.gpuarray.pygpu_activated:
if
not
theano
.
sandbox
.
gpuarray
.
pygpu_activated
:
if
not
theano
.
sandbox
.
gpuarray
.
pygpu_activated
:
raise
SkipTest
(
"pygpu disabled"
)
raise
SkipTest
(
"pygpu disabled"
)
from
..type
import
(
GpuArrayType
,
gpuarray_shared_constructor
)
from
..basic_ops
import
(
host_from_gpu
,
gpu_from_host
,
gpu_alloc
,
GpuAlloc
,
GpuAllocEmpty
,
gpu_from_cuda
,
cuda_from_gpu
,
HostFromGpu
,
GpuContiguous
,
GpuFromHost
,
GpuReshape
,
gpu_join
,
GpuJoin
,
GpuSplit
,
GpuEye
,
gpu_contiguous
)
from
..subtensor
import
GpuSubtensor
from
theano.tests
import
unittest_tools
as
utt
utt
.
seed_rng
()
utt
.
seed_rng
()
rng
=
numpy
.
random
.
RandomState
(
seed
=
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
seed
=
utt
.
fetch_seed
())
from
pygpu
import
gpuarray
if
theano
.
config
.
mode
==
'FAST_COMPILE'
:
if
theano
.
config
.
mode
==
'FAST_COMPILE'
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
including
(
'gpuarray'
)
.
excluding
(
'gpu'
)
mode_with_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
including
(
'gpuarray'
)
.
excluding
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
excluding
(
'gpuarray'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
excluding
(
'gpuarray'
)
...
@@ -63,22 +61,6 @@ else:
...
@@ -63,22 +61,6 @@ else:
mode_without_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'gpuarray'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'gpuarray'
)
def
may_fail
(
msg
,
EClass
):
"""Mark a test that requires very specific conditions to work to
mask a specific exception class."""
def
test_decorator
(
f
):
def
wrapper
():
try
:
f
()
except
Exception
as
e
:
if
isinstance
(
e
,
EClass
):
raise
SkipTest
(
msg
,
e
)
raise
wrapper
.
__name__
=
f
.
__name__
return
wrapper
return
test_decorator
def
inplace_func
(
inputs
,
outputs
,
mode
=
None
,
allow_input_downcast
=
False
,
def
inplace_func
(
inputs
,
outputs
,
mode
=
None
,
allow_input_downcast
=
False
,
on_unused_input
=
'raise'
,
name
=
None
):
on_unused_input
=
'raise'
,
name
=
None
):
if
mode
is
None
:
if
mode
is
None
:
...
@@ -183,9 +165,9 @@ def makeTester(name, op, gpu_op, cases, checks=None, mode_gpu=mode_with_gpu,
...
@@ -183,9 +165,9 @@ def makeTester(name, op, gpu_op, cases, checks=None, mode_gpu=mode_with_gpu,
else
:
else
:
err_msg
=
(
"Test
%
s::
%
s: exception raised during test "
err_msg
=
(
"Test
%
s::
%
s: exception raised during test "
"call was not the same as the reference "
"call was not the same as the reference "
"call (got:
%
s, expected
%
s)"
)
%
\
"call (got:
%
s, expected
%
s)"
%
(
self
.
gpu_op
,
testname
,
type
(
exc
),
(
self
.
gpu_op
,
testname
,
type
(
exc
),
type
(
ref_e
))
type
(
ref_e
))
)
exc
.
args
+=
(
err_msg
,)
exc
.
args
+=
(
err_msg
,)
raise
raise
...
@@ -197,9 +179,9 @@ def makeTester(name, op, gpu_op, cases, checks=None, mode_gpu=mode_with_gpu,
...
@@ -197,9 +179,9 @@ def makeTester(name, op, gpu_op, cases, checks=None, mode_gpu=mode_with_gpu,
expected
):
expected
):
self
.
fail
((
"Test
%
s::
%
s: Output
%
s gave the wrong "
self
.
fail
((
"Test
%
s::
%
s: Output
%
s gave the wrong "
"value. With inputs
%
s, expected
%
s "
"value. With inputs
%
s, expected
%
s "
"(dtype
%
s), got
%
s (dtype
%
s)."
)
%
(
"(dtype
%
s), got
%
s (dtype
%
s)."
%
self
.
op
,
testname
,
i
,
inputs
,
expected
,
(
self
.
op
,
testname
,
i
,
inputs
,
expected
,
expected
.
dtype
,
variable
,
variable
.
dtype
))
expected
.
dtype
,
variable
,
variable
.
dtype
)
))
for
description
,
check
in
iteritems
(
self
.
checks
):
for
description
,
check
in
iteritems
(
self
.
checks
):
if
not
check
(
inputs
,
variables
):
if
not
check
(
inputs
,
variables
):
...
@@ -250,36 +232,6 @@ def test_transfer_strided():
...
@@ -250,36 +232,6 @@ def test_transfer_strided():
assert
numpy
.
all
(
fv
==
av
)
assert
numpy
.
all
(
fv
==
av
)
@may_fail
(
"Op fails if both contexts are not the same and it's rare "
"that the tests will be run this way"
,
ValueError
)
def
test_transfer_cuda_gpu
():
import
theano.sandbox.cuda
as
cuda_ndarray
if
cuda_ndarray
.
cuda_available
is
False
:
raise
SkipTest
(
"Can't test interaction with cuda if cuda not present"
)
g
=
GpuArrayType
(
dtype
=
'float32'
,
broadcastable
=
(
False
,
False
))(
'g'
)
c
=
cuda_ndarray
.
CudaNdarrayType
((
False
,
False
))(
'c'
)
av
=
theano
.
_asarray
(
rng
.
rand
(
5
,
4
),
dtype
=
'float32'
)
gv
=
gpuarray
.
array
(
av
)
cv
=
cuda_ndarray
.
CudaNdarray
(
av
)
gvs
=
gv
[:,
::
-
2
]
cvs
=
cv
[:,
::
-
2
]
f
=
theano
.
function
([
c
],
gpu_from_cuda
(
c
))
fv
=
f
(
cv
)
assert
GpuArrayType
.
values_eq_approx
(
fv
,
gv
)
fvs
=
f
(
cvs
)
assert
GpuArrayType
.
values_eq_approx
(
fvs
,
gvs
)
f
=
theano
.
function
([
g
],
cuda_from_gpu
(
g
))
fv
=
f
(
gv
)
assert
cuda_ndarray
.
CudaNdarrayType
.
values_eq_approx
(
fv
,
cv
)
fvs
=
f
(
gvs
)
assert
cuda_ndarray
.
CudaNdarrayType
.
values_eq_approx
(
fvs
,
cvs
)
def
gpu_alloc_expected
(
x
,
*
shp
):
def
gpu_alloc_expected
(
x
,
*
shp
):
g
=
gpuarray
.
empty
(
shp
,
dtype
=
x
.
dtype
)
g
=
gpuarray
.
empty
(
shp
,
dtype
=
x
.
dtype
)
g
[:]
=
x
g
[:]
=
x
...
@@ -291,8 +243,8 @@ GpuAllocTester = makeTester(
...
@@ -291,8 +243,8 @@ GpuAllocTester = makeTester(
gpu_op
=
gpu_alloc
,
gpu_op
=
gpu_alloc
,
cases
=
dict
(
cases
=
dict
(
correct01
=
(
rand
(),
numpy
.
int32
(
7
)),
correct01
=
(
rand
(),
numpy
.
int32
(
7
)),
# just gives a DeepCopyOp with possibly wrong results on the CPU
# just gives a DeepCopyOp with possibly wrong results on the CPU
#
correct01_bcast=(rand(1), numpy.int32(7)),
#
correct01_bcast=(rand(1), numpy.int32(7)),
correct02
=
(
rand
(),
numpy
.
int32
(
4
),
numpy
.
int32
(
7
)),
correct02
=
(
rand
(),
numpy
.
int32
(
4
),
numpy
.
int32
(
7
)),
correct12
=
(
rand
(
7
),
numpy
.
int32
(
4
),
numpy
.
int32
(
7
)),
correct12
=
(
rand
(
7
),
numpy
.
int32
(
4
),
numpy
.
int32
(
7
)),
correct13
=
(
rand
(
7
),
numpy
.
int32
(
2
),
numpy
.
int32
(
4
),
correct13
=
(
rand
(
7
),
numpy
.
int32
(
2
),
numpy
.
int32
(
4
),
...
@@ -486,8 +438,6 @@ def test_hostfromgpu_shape_i():
...
@@ -486,8 +438,6 @@ def test_hostfromgpu_shape_i():
cv
=
gpuarray
.
asarray
(
numpy
.
random
.
rand
(
5
,
4
),
cv
=
gpuarray
.
asarray
(
numpy
.
random
.
rand
(
5
,
4
),
dtype
=
'float32'
)
dtype
=
'float32'
)
gpu_from_host
=
theano
.
sandbox
.
gpuarray
.
basic_ops
.
gpu_from_host
host_from_gpu
=
theano
.
sandbox
.
gpuarray
.
basic_ops
.
host_from_gpu
f
=
theano
.
function
([
a
],
gpu_from_host
(
a
),
mode
=
m
)
f
=
theano
.
function
([
a
],
gpu_from_host
(
a
),
mode
=
m
)
assert
gpu_from_host
in
[
x
.
op
assert
gpu_from_host
in
[
x
.
op
for
x
in
f
.
maker
.
fgraph
.
toposort
()]
for
x
in
f
.
maker
.
fgraph
.
toposort
()]
...
...
theano/sandbox/gpuarray/tests/test_blas.py
浏览文件 @
645557f9
...
@@ -6,8 +6,7 @@ import numpy
...
@@ -6,8 +6,7 @@ import numpy
import
theano
import
theano
from
theano
import
tensor
from
theano
import
tensor
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests
import
unittest_tools
as
utt
from
theano.tensor.blas
import
(
gemv_inplace
,
gemm_inplace
,
ger_destructive
,
from
theano.tensor.blas
import
gemv_inplace
,
gemm_inplace
,
_dot22
_dot22
)
from
theano.tensor.tests.test_blas
import
TestGer
,
BaseGemv
from
theano.tensor.tests.test_blas
import
TestGer
,
BaseGemv
from
..
import
gpuarray_shared_constructor
from
..
import
gpuarray_shared_constructor
...
@@ -15,22 +14,22 @@ from .test_basic_ops import (makeTester, rand,
...
@@ -15,22 +14,22 @@ from .test_basic_ops import (makeTester, rand,
mode_with_gpu
)
mode_with_gpu
)
from
..blas
import
(
gpugemv_inplace
,
gpugemv_no_inplace
,
from
..blas
import
(
gpugemv_inplace
,
gpugemv_no_inplace
,
gpugemm_inplace
,
gpugemm_no_inplace
,
gpugemm_inplace
,
gpuger_inplace
,
gpuger_no_inplace
,
gpuger_inplace
,
gpuger_no_inplace
,
GpuGer
,
gpu_dot22
,
GpuGemm
)
GpuGer
,
gpu_dot22
,
GpuGemm
)
GpuGemvTester
=
makeTester
(
'GpuGemvTester'
,
GpuGemvTester
=
makeTester
(
op
=
gemv_inplace
,
gpu_op
=
gpugemv_inplace
,
'GpuGemvTester'
,
cases
=
dict
(
op
=
gemv_inplace
,
gpu_op
=
gpugemv_inplace
,
dot_vv
=
[
rand
(
1
),
1
,
rand
(
1
,
2
),
rand
(
2
),
0
],
cases
=
dict
(
dot_vv
=
[
rand
(
1
),
1
,
rand
(
1
,
2
),
rand
(
2
),
0
],
dot_vm
=
[
rand
(
3
),
1
,
rand
(
3
,
2
),
rand
(
2
),
0
],
dot_vm
=
[
rand
(
3
),
1
,
rand
(
3
,
2
),
rand
(
2
),
0
],
#
test_02=[rand(0), 1, rand(0, 2), rand(2), 0],
#
test_02=[rand(0), 1, rand(0, 2), rand(2), 0],
#
test_30=[rand(3), 1, rand(3, 0), rand(0), 0],
#
test_30=[rand(3), 1, rand(3, 0), rand(0), 0],
#
test_00=[rand(0), 1, rand(0, 0), rand(0), 0],
#
test_00=[rand(0), 1, rand(0, 0), rand(0), 0],
test_stride
=
[
rand
(
3
)[::
-
1
],
1
,
rand
(
3
,
2
)[::
-
1
],
rand
(
2
)[::
-
1
],
0
],
test_stride
=
[
rand
(
3
)[::
-
1
],
1
,
rand
(
3
,
2
)[::
-
1
],
rand
(
2
)[::
-
1
],
0
],
)
)
)
)
class
TestGpuSgemv
(
TestCase
,
BaseGemv
,
utt
.
TestOptimizationMixin
):
class
TestGpuSgemv
(
TestCase
,
BaseGemv
,
utt
.
TestOptimizationMixin
):
...
@@ -48,24 +47,24 @@ class TestGpuSgemv(TestCase, BaseGemv, utt.TestOptimizationMixin):
...
@@ -48,24 +47,24 @@ class TestGpuSgemv(TestCase, BaseGemv, utt.TestOptimizationMixin):
return
theano
.
shared
(
val
)
return
theano
.
shared
(
val
)
GpuGemmTester
=
makeTester
(
'GpuGemmTester'
,
GpuGemmTester
=
makeTester
(
op
=
gemm_inplace
,
gpu_op
=
gpugemm_inplace
,
'GpuGemmTester'
,
cases
=
dict
(
op
=
gemm_inplace
,
gpu_op
=
gpugemm_inplace
,
test1
=
[
rand
(
3
,
4
),
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.0
],
cases
=
dict
(
test1
=
[
rand
(
3
,
4
),
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.0
],
test2
=
[
rand
(
3
,
4
),
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
1.0
],
test2
=
[
rand
(
3
,
4
),
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
1.0
],
test3
=
[
rand
(
3
,
4
),
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
-
1.0
],
test3
=
[
rand
(
3
,
4
),
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
-
1.0
],
test4
=
[
rand
(
3
,
4
),
0.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.0
],
test4
=
[
rand
(
3
,
4
),
0.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.0
],
test5
=
[
rand
(
3
,
4
),
0.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.6
],
test5
=
[
rand
(
3
,
4
),
0.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.6
],
test6
=
[
rand
(
3
,
4
),
0.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
-
1.0
],
test6
=
[
rand
(
3
,
4
),
0.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
-
1.0
],
test7
=
[
rand
(
3
,
4
),
-
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.0
],
test7
=
[
rand
(
3
,
4
),
-
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.0
],
test8
=
[
rand
(
3
,
4
),
-
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
1.1
],
test8
=
[
rand
(
3
,
4
),
-
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
1.1
],
test9
=
[
rand
(
3
,
4
),
-
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
-
1.1
],
test9
=
[
rand
(
3
,
4
),
-
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
-
1.1
],
#
test10=[rand(0, 4), -1.0, rand(0, 5), rand(5, 4), 0.0],
#
test10=[rand(0, 4), -1.0, rand(0, 5), rand(5, 4), 0.0],
#
test11=[rand(3, 0), -1.0, rand(3, 5), rand(5, 0), 1.1],
#
test11=[rand(3, 0), -1.0, rand(3, 5), rand(5, 0), 1.1],
#
test12=[rand(3, 4), -1.0, rand(3, 0), rand(0, 4), -1.1],
#
test12=[rand(3, 4), -1.0, rand(3, 0), rand(0, 4), -1.1],
#
test13=[rand(0, 0), -1.0, rand(0, 0), rand(0, 0), -1.1],
#
test13=[rand(0, 0), -1.0, rand(0, 0), rand(0, 0), -1.1],
)
)
)
)
class
TestGpuSger
(
TestGer
):
class
TestGpuSger
(
TestGer
):
...
@@ -84,8 +83,10 @@ class TestGpuSger(TestGer):
...
@@ -84,8 +83,10 @@ class TestGpuSger(TestGer):
def
test_f32_0_0
(
self
):
def
test_f32_0_0
(
self
):
raise
SkipTest
(
'0-sized objects not supported'
)
raise
SkipTest
(
'0-sized objects not supported'
)
def
test_f32_1_0
(
self
):
def
test_f32_1_0
(
self
):
raise
SkipTest
(
'0-sized objects not supported'
)
raise
SkipTest
(
'0-sized objects not supported'
)
def
test_f32_0_1
(
self
):
def
test_f32_0_1
(
self
):
raise
SkipTest
(
'0-sized objects not supported'
)
raise
SkipTest
(
'0-sized objects not supported'
)
...
@@ -103,21 +104,22 @@ class TestGpuGer_OpContract(TestCase, utt.T_OpContractMixin):
...
@@ -103,21 +104,22 @@ class TestGpuGer_OpContract(TestCase, utt.T_OpContractMixin):
GpuDot22Tester
=
makeTester
(
GpuDot22Tester
=
makeTester
(
'Gpu
Gemm
Tester'
,
'Gpu
Dot22
Tester'
,
op
=
_dot22
,
gpu_op
=
gpu_dot22
,
op
=
_dot22
,
gpu_op
=
gpu_dot22
,
cases
=
dict
(
cases
=
dict
(
test1
=
[
rand
(
3
,
4
),
rand
(
4
,
5
)],
test1
=
[
rand
(
3
,
4
),
rand
(
4
,
5
)],
test2
=
[
rand
(
1
,
4
),
rand
(
4
,
5
)],
test2
=
[
rand
(
1
,
4
),
rand
(
4
,
5
)],
test3
=
[
rand
(
3
,
1
),
rand
(
1
,
5
)],
test3
=
[
rand
(
3
,
1
),
rand
(
1
,
5
)],
test4
=
[
rand
(
3
,
4
),
rand
(
4
,
1
)],
test4
=
[
rand
(
3
,
4
),
rand
(
4
,
1
)],
#
test5=[rand(0, 4), rand(4, 5)],
#
test5=[rand(0, 4), rand(4, 5)],
#
test6=[rand(3, 0), rand(0, 5)],
#
test6=[rand(3, 0), rand(0, 5)],
#
test7=[rand(3, 4), rand(4, 0)],
#
test7=[rand(3, 4), rand(4, 0)],
#
test8=[rand(0, 4), rand(4, 0)],
#
test8=[rand(0, 4), rand(4, 0)],
#
test9=[rand(0, 0), rand(0, 0)],
#
test9=[rand(0, 0), rand(0, 0)],
)
)
)
)
def
test_hgemm_swap
():
def
test_hgemm_swap
():
from
theano.sandbox.cuda
import
nvcc_compiler
from
theano.sandbox.cuda
import
nvcc_compiler
if
nvcc_compiler
.
nvcc_version
<
'7.5'
:
if
nvcc_compiler
.
nvcc_version
<
'7.5'
:
...
@@ -149,6 +151,7 @@ def test_hgemm_swap():
...
@@ -149,6 +151,7 @@ def test_hgemm_swap():
utt
.
assert_allclose
(
of
,
on
)
utt
.
assert_allclose
(
of
,
on
)
def
test_hgemm_alpha_output_merge
():
def
test_hgemm_alpha_output_merge
():
from
theano.sandbox.cuda
import
nvcc_compiler
from
theano.sandbox.cuda
import
nvcc_compiler
if
nvcc_compiler
.
nvcc_version
<
'7.5'
:
if
nvcc_compiler
.
nvcc_version
<
'7.5'
:
...
...
theano/sandbox/gpuarray/tests/test_conv_cuda_ndarray.py
浏览文件 @
645557f9
差异被折叠。
点击展开。
theano/sandbox/gpuarray/tests/test_neighbours.py
浏览文件 @
645557f9
import
unittest
from
theano.tensor.nnet.tests
import
test_neighbours
from
theano.tensor.nnet.tests
import
test_neighbours
# We let that import do the init of the back-end if needed.
# We let that import do the init of the back-end if needed.
from
.test_basic_ops
import
(
mode_with_gpu
,
from
.test_basic_ops
import
mode_with_gpu
mode_without_gpu
)
from
..neighbours
import
GpuImages2Neibs
from
..neighbours
import
GpuImages2Neibs
...
...
theano/sandbox/gpuarray/tests/test_nnet.py
浏览文件 @
645557f9
from
__future__
import
print_function
from
__future__
import
print_function
from
nose.plugins.skip
import
SkipTest
import
numpy
import
numpy
import
unittest
import
unittest
...
@@ -7,8 +7,6 @@ import theano
...
@@ -7,8 +7,6 @@ import theano
import
theano.tensor
as
T
import
theano.tensor
as
T
import
theano.tests.unittest_tools
as
utt
import
theano.tests.unittest_tools
as
utt
from
theano.sandbox
import
gpuarray
# We let that import do the init of the back-end if needed.
# We let that import do the init of the back-end if needed.
from
.test_basic_ops
import
(
mode_with_gpu
,
from
.test_basic_ops
import
(
mode_with_gpu
,
mode_without_gpu
)
mode_without_gpu
)
...
@@ -36,15 +34,13 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
...
@@ -36,15 +34,13 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
n_in
=
4098
n_in
=
4098
n_out
=
4099
n_out
=
4099
x
=
T
.
fmatrix
(
'x'
)
y
=
T
.
lvector
(
'y'
)
y
=
T
.
lvector
(
'y'
)
b
=
T
.
fvector
(
'b'
)
b
=
T
.
fvector
(
'b'
)
#W = T.fmatrix('W')
# we precompute the dot with big shape before to allow the test of
# we precompute the dot with big shape before to allow the test of
# GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error
# GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error
#(the launch timed out and was terminated) on GPU card not
#
(the launch timed out and was terminated) on GPU card not
# powerful enough. We need the big shape to check for corner
# powerful enough. We need the big shape to check for corner
# case.
# case.
dot_result
=
T
.
fmatrix
(
'dot_result'
)
dot_result
=
T
.
fmatrix
(
'dot_result'
)
...
@@ -54,7 +50,6 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
...
@@ -54,7 +50,6 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
xx
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
batch_size
,
n_in
),
xx
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
batch_size
,
n_in
),
dtype
=
numpy
.
float32
)
dtype
=
numpy
.
float32
)
#?????yy = numpy.ones((batch_size,),dtype='float32')
yy
=
numpy
.
ones
((
batch_size
,),
dtype
=
'int32'
)
yy
=
numpy
.
ones
((
batch_size
,),
dtype
=
'int32'
)
b_values
=
numpy
.
zeros
((
n_out
,),
dtype
=
'float32'
)
b_values
=
numpy
.
zeros
((
n_out
,),
dtype
=
'float32'
)
W_values
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
n_in
,
n_out
),
dtype
=
'float32'
)
W_values
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
n_in
,
n_out
),
dtype
=
'float32'
)
...
@@ -71,8 +66,6 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
...
@@ -71,8 +66,6 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
classify_gpu
=
theano
.
function
(
inputs
=
[
y
,
b
,
dot_result
],
classify_gpu
=
theano
.
function
(
inputs
=
[
y
,
b
,
dot_result
],
outputs
=
[
loss
,
y_pred
,
dW
],
outputs
=
[
loss
,
y_pred
,
dW
],
mode
=
mode_with_gpu
)
mode
=
mode_with_gpu
)
# theano.printing.debugprint(classify)
# theano.printing.debugprint(classify_gpu)
assert
any
([
isinstance
(
node
.
op
,
assert
any
([
isinstance
(
node
.
op
,
T
.
nnet
.
CrossentropySoftmaxArgmax1HotWithBias
)
T
.
nnet
.
CrossentropySoftmaxArgmax1HotWithBias
)
...
@@ -97,12 +90,10 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
...
@@ -97,12 +90,10 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
We check that we loop when their is too much threads
We check that we loop when their is too much threads
"""
"""
n_in
=
1000
batch_size
=
4097
batch_size
=
4097
n_out
=
1250
n_out
=
1250
if
not
isinstance
(
mode_with_gpu
,
theano
.
compile
.
DebugMode
):
if
not
isinstance
(
mode_with_gpu
,
theano
.
compile
.
DebugMode
):
n_in
=
4098
n_out
=
4099
n_out
=
4099
# Seed numpy.random with config.unittests.rseed
# Seed numpy.random with config.unittests.rseed
...
@@ -137,25 +128,7 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
...
@@ -137,25 +128,7 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
rtol
=
1e-5
rtol
=
1e-5
atol
=
1e-6
atol
=
1e-6
if
not
numpy
.
allclose
(
cpu_out
,
gpu_out
,
rtol
=
rtol
,
atol
=
atol
):
utt
.
assert_allclose
(
cpu_out
,
gpu_out
,
rtol
=
rtol
,
atol
=
atol
)
abs_err
,
rel_err
=
T
.
numeric_grad
.
abs_rel_err
(
cpu_out
,
gpu_out
)
scaled_err
=
numpy
.
minimum
(
abs_err
/
atol
,
rel_err
/
rtol
)
max_i
=
scaled_err
.
argmax
()
print
(
'max err index:'
,
max_i
,
max_i
/
batch_size
,
end
=
' '
)
print
(
max_i
%
batch_size
,
max_i
/
n_out
,
max_i
&
n_out
)
print
(
'At that index:'
)
print
(
'err:'
,
scaled_err
.
flatten
()[
max_i
])
print
(
'absolute error:'
,
abs_err
.
flatten
()[
max_i
])
print
(
'relative error:'
,
rel_err
.
flatten
()[
max_i
])
print
(
'cpu_out:'
,
cpu_out
.
flatten
()[
max_i
])
print
(
'gpu_out:'
,
gpu_out
.
flatten
()[
max_i
])
print
(
'softmax_output_value:'
,
softmax_output_value
.
flatten
()[
max_i
])
print
(
'dnll_value:'
,
dnll_value
[
max_i
/
n_out
])
print
(
'y_idx_value:'
,
y_idx_value
[
max_i
/
n_out
])
assert
False
,
"numpy.allclose(cpu_out, gpu_out, rtol=
%
s, atol=
%
s)"
%
(
rtol
,
atol
)
def
test_softmax_with_bias_float16
():
def
test_softmax_with_bias_float16
():
...
@@ -166,6 +139,7 @@ def test_softmax_with_bias_float16():
...
@@ -166,6 +139,7 @@ def test_softmax_with_bias_float16():
softmax_with_bias_unittest_template
(
dtypeInput
=
'float32'
,
softmax_with_bias_unittest_template
(
dtypeInput
=
'float32'
,
dtypeBias
=
'float16'
)
dtypeBias
=
'float16'
)
def
test_softmax_with_bias_float32
():
def
test_softmax_with_bias_float32
():
softmax_with_bias_unittest_template
(
dtypeInput
=
'float32'
,
softmax_with_bias_unittest_template
(
dtypeInput
=
'float32'
,
dtypeBias
=
'float32'
)
dtypeBias
=
'float32'
)
...
@@ -188,6 +162,7 @@ def softmax_with_bias_unittest_template(dtypeInput, dtypeBias):
...
@@ -188,6 +162,7 @@ def softmax_with_bias_unittest_template(dtypeInput, dtypeBias):
TODO: check that we loop when there are too many threads. (THIS IS
TODO: check that we loop when there are too many threads. (THIS IS
NOT IMPLEMENTED)
NOT IMPLEMENTED)
"""
"""
x
=
T
.
matrix
(
'x'
,
dtype
=
dtypeInput
)
x
=
T
.
matrix
(
'x'
,
dtype
=
dtypeInput
)
b
=
T
.
vector
(
'b'
,
dtype
=
dtypeBias
)
b
=
T
.
vector
(
'b'
,
dtype
=
dtypeBias
)
...
@@ -228,9 +203,11 @@ def softmax_with_bias_unittest_template(dtypeInput, dtypeBias):
...
@@ -228,9 +203,11 @@ def softmax_with_bias_unittest_template(dtypeInput, dtypeBias):
def
test_softmax_float16
():
def
test_softmax_float16
():
softmax_unittest_template
(
'float16'
)
softmax_unittest_template
(
'float16'
)
def
test_softmax_float32
():
def
test_softmax_float32
():
softmax_unittest_template
(
'float32'
)
softmax_unittest_template
(
'float32'
)
def
test_softmax_float64
():
def
test_softmax_float64
():
softmax_unittest_template
(
'float64'
)
softmax_unittest_template
(
'float64'
)
...
...
theano/sandbox/gpuarray/tests/test_type.py
浏览文件 @
645557f9
import
operator
import
numpy
import
numpy
import
theano
import
theano
...
@@ -25,7 +23,6 @@ def test_deep_copy():
...
@@ -25,7 +23,6 @@ def test_deep_copy():
def
test_values_eq_approx
():
def
test_values_eq_approx
():
a
=
rand_gpuarray
(
20
,
dtype
=
'float32'
)
a
=
rand_gpuarray
(
20
,
dtype
=
'float32'
)
g
=
GpuArrayType
(
dtype
=
'float32'
,
broadcastable
=
(
False
,))(
'g'
)
assert
GpuArrayType
.
values_eq_approx
(
a
,
a
)
assert
GpuArrayType
.
values_eq_approx
(
a
,
a
)
b
=
a
.
copy
()
b
=
a
.
copy
()
b
[
0
]
=
numpy
.
asarray
(
b
[
0
])
+
1.
b
[
0
]
=
numpy
.
asarray
(
b
[
0
])
+
1.
...
...
theano/sandbox/gpuarray/type.py
浏览文件 @
645557f9
...
@@ -184,7 +184,7 @@ class GpuArrayType(Type):
...
@@ -184,7 +184,7 @@ class GpuArrayType(Type):
@staticmethod
@staticmethod
def
may_share_memory
(
a
,
b
):
def
may_share_memory
(
a
,
b
):
if
(
not
isinstance
(
a
,
gpuarray
.
GpuArray
)
or
if
(
not
isinstance
(
a
,
gpuarray
.
GpuArray
)
or
not
isinstance
(
b
,
gpuarray
.
GpuArray
)):
not
isinstance
(
b
,
gpuarray
.
GpuArray
)):
return
False
return
False
return
pygpu
.
gpuarray
.
may_share_memory
(
a
,
b
)
return
pygpu
.
gpuarray
.
may_share_memory
(
a
,
b
)
...
@@ -200,11 +200,12 @@ class GpuArrayType(Type):
...
@@ -200,11 +200,12 @@ class GpuArrayType(Type):
self
.
broadcastable
==
other
.
broadcastable
)
self
.
broadcastable
==
other
.
broadcastable
)
def
convert_variable
(
self
,
var
):
def
convert_variable
(
self
,
var
):
if
(
type
(
self
)
==
type
(
var
.
type
)
and
vt
=
var
.
type
self
.
typecode
==
var
.
type
.
typecode
and
if
(
type
(
self
)
==
type
(
vt
)
and
self
.
ndim
==
var
.
type
.
ndim
and
self
.
typecode
==
vt
.
typecode
and
self
.
ndim
==
vt
.
ndim
and
all
(
sb
==
ob
or
ob
for
sb
,
ob
in
zip
(
self
.
broadcastable
,
all
(
sb
==
ob
or
ob
for
sb
,
ob
in
zip
(
self
.
broadcastable
,
v
ar
.
type
.
broadcastable
))):
v
t
.
broadcastable
))):
return
theano
.
tensor
.
patternbroadcast
(
var
,
self
.
broadcastable
)
return
theano
.
tensor
.
patternbroadcast
(
var
,
self
.
broadcastable
)
def
__hash__
(
self
):
def
__hash__
(
self
):
...
...
theano/tests/test_flake8.py
浏览文件 @
645557f9
...
@@ -157,24 +157,11 @@ whitelist_flake8 = [
...
@@ -157,24 +157,11 @@ whitelist_flake8 = [
"sandbox/linalg/ops.py"
,
"sandbox/linalg/ops.py"
,
"sandbox/linalg/__init__.py"
,
"sandbox/linalg/__init__.py"
,
"sandbox/linalg/tests/test_linalg.py"
,
"sandbox/linalg/tests/test_linalg.py"
,
"sandbox/gpuarray/basic_ops.py"
,
"sandbox/gpuarray/nnet.py"
,
"sandbox/gpuarray/elemwise.py"
,
"sandbox/gpuarray/type.py"
,
"sandbox/gpuarray/__init__.py"
,
"sandbox/gpuarray/__init__.py"
,
"sandbox/gpuarray/kernel_codegen.py"
,
"sandbox/gpuarray/conv.py"
,
"sandbox/gpuarray/neighbours.py"
,
"sandbox/gpuarray/tests/test_subtensor.py"
,
"sandbox/gpuarray/tests/test_subtensor.py"
,
"sandbox/gpuarray/tests/test_scan.py"
,
"sandbox/gpuarray/tests/test_scan.py"
,
"sandbox/gpuarray/tests/test_neighbours.py"
,
"sandbox/gpuarray/tests/test_conv_cuda_ndarray.py"
,
"sandbox/gpuarray/tests/test_type.py"
,
"sandbox/gpuarray/tests/test_opt.py"
,
"sandbox/gpuarray/tests/test_opt.py"
,
"sandbox/gpuarray/tests/test_blas.py"
,
"sandbox/gpuarray/tests/test_elemwise.py"
,
"sandbox/gpuarray/tests/test_elemwise.py"
,
"sandbox/gpuarray/tests/test_nnet.py"
,
"sandbox/gpuarray/tests/test_basic_ops.py"
,
"scan_module/scan_utils.py"
,
"scan_module/scan_utils.py"
,
"scan_module/scan_views.py"
,
"scan_module/scan_views.py"
,
"scan_module/scan.py"
,
"scan_module/scan.py"
,
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论