Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
645557f9
提交
645557f9
authored
10月 02, 2015
作者:
Pascal Lamblin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3476 from abergeron/move_config
Multiple fixes preparing for multi-gpu
上级
1ec1cd9b
71dea2cf
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
29 个修改的文件
包含
294 行增加
和
467 行删除
+294
-467
setup.cfg
setup.cfg
+3
-0
__init__.py
theano/__init__.py
+4
-2
configdefaults.py
theano/configdefaults.py
+112
-19
configparser.py
theano/configparser.py
+11
-7
op.py
theano/gof/op.py
+8
-4
__init__.py
theano/sandbox/cuda/__init__.py
+1
-1
dnn.py
theano/sandbox/cuda/dnn.py
+6
-6
nvcc_compiler.py
theano/sandbox/cuda/nvcc_compiler.py
+4
-71
dnn_flags.py
theano/sandbox/dnn_flags.py
+0
-42
__init__.py
theano/sandbox/gpuarray/__init__.py
+7
-12
basic_ops.py
theano/sandbox/gpuarray/basic_ops.py
+0
-0
conv.py
theano/sandbox/gpuarray/conv.py
+19
-39
dnn.py
theano/sandbox/gpuarray/dnn.py
+21
-46
elemwise.py
theano/sandbox/gpuarray/elemwise.py
+0
-0
kernel_codegen.py
theano/sandbox/gpuarray/kernel_codegen.py
+11
-12
neighbours.py
theano/sandbox/gpuarray/neighbours.py
+4
-26
nerv.py
theano/sandbox/gpuarray/nerv.py
+2
-2
nnet.py
theano/sandbox/gpuarray/nnet.py
+0
-0
opt.py
theano/sandbox/gpuarray/opt.py
+2
-2
opt_util.py
theano/sandbox/gpuarray/opt_util.py
+0
-0
subtensor.py
theano/sandbox/gpuarray/subtensor.py
+1
-11
test_basic_ops.py
theano/sandbox/gpuarray/tests/test_basic_ops.py
+23
-73
test_blas.py
theano/sandbox/gpuarray/tests/test_blas.py
+41
-38
test_conv_cuda_ndarray.py
theano/sandbox/gpuarray/tests/test_conv_cuda_ndarray.py
+0
-0
test_neighbours.py
theano/sandbox/gpuarray/tests/test_neighbours.py
+1
-3
test_nnet.py
theano/sandbox/gpuarray/tests/test_nnet.py
+7
-30
test_type.py
theano/sandbox/gpuarray/tests/test_type.py
+0
-3
type.py
theano/sandbox/gpuarray/type.py
+6
-5
test_flake8.py
theano/tests/test_flake8.py
+0
-13
没有找到文件。
setup.cfg
浏览文件 @
645557f9
[nosetest]
match=^test
nocapture=1
[flake8]
ignore=E501,E123,E133
theano/__init__.py
浏览文件 @
645557f9
...
...
@@ -109,8 +109,10 @@ if config.device.startswith('gpu') or config.init_gpu_device.startswith('gpu'):
theano
.
sandbox
.
cuda
.
tests
.
test_driver
.
test_nvidia_driver1
()
if
config
.
device
.
startswith
(
'cuda'
)
or
config
.
device
.
startswith
(
'opencl'
)
or
\
config
.
gpuarray
.
init_device
!=
''
:
if
(
config
.
device
.
startswith
(
'cuda'
)
or
config
.
device
.
startswith
(
'opencl'
)
or
config
.
init_gpu_device
.
startswith
(
'cuda'
)
or
config
.
init_gpu_device
.
startswith
(
'opencl'
)):
import
theano.sandbox.gpuarray
# Use config.numpy to call numpy.seterr
...
...
theano/configdefaults.py
浏览文件 @
645557f9
...
...
@@ -73,19 +73,19 @@ class DeviceParam(ConfigParam):
self
.
default
=
default
def
filter
(
val
):
if
val
.
startswith
(
'cpu'
)
or
val
.
startswith
(
'gpu'
)
\
if
val
==
self
.
default
or
val
.
startswith
(
'gpu'
)
\
or
val
.
startswith
(
'opencl'
)
or
val
.
startswith
(
'cuda'
):
return
val
else
:
raise
ValueError
((
'Invalid value ("
%
s") for configuration '
'variable "
%
s". Valid options start with '
'one of "
cpu
", "gpu", "opencl", "cuda"'
%
(
val
,
self
.
fullname
)))
'one of "
%
s
", "gpu", "opencl", "cuda"'
%
(
self
.
default
,
val
,
self
.
fullname
)))
over
=
kwargs
.
get
(
"allow_override"
,
True
)
super
(
DeviceParam
,
self
)
.
__init__
(
default
,
filter
,
over
)
def
__str__
(
self
):
return
'
%
s (
cpu, gpu*, opencl*, cuda*) '
%
(
self
.
fullname
,
)
return
'
%
s (
%
s, gpu*, opencl*, cuda*) '
%
(
self
.
fullname
,
self
.
default
)
AddConfigVar
(
'device'
,
...
...
@@ -94,15 +94,7 @@ AddConfigVar(
"on it. Do not use upper case letters, only lower case even if "
"NVIDIA use capital letters."
),
DeviceParam
(
'cpu'
,
allow_override
=
False
),
in_c_key
=
False
,)
AddConfigVar
(
'gpuarray.init_device'
,
"""
Device to initialize for gpuarray use without moving
computations automatically.
"""
,
StrParam
(
''
),
in_c_key
=
False
)
in_c_key
=
False
)
AddConfigVar
(
'init_gpu_device'
,
...
...
@@ -110,12 +102,7 @@ AddConfigVar(
"Unlike 'device', setting this option will NOT move computations, "
"nor shared variables, to the specified GPU. "
"It can be used to run GPU-specific tests on a particular GPU."
),
EnumStr
(
''
,
'gpu'
,
'gpu0'
,
'gpu1'
,
'gpu2'
,
'gpu3'
,
'gpu4'
,
'gpu5'
,
'gpu6'
,
'gpu7'
,
'gpu8'
,
'gpu9'
,
'gpu10'
,
'gpu11'
,
'gpu12'
,
'gpu13'
,
'gpu14'
,
'gpu15'
,
allow_override
=
False
),
DeviceParam
(
''
,
allow_override
=
False
),
in_c_key
=
False
)
AddConfigVar
(
...
...
@@ -131,6 +118,112 @@ AddConfigVar(
in_c_key
=
False
)
def
default_cuda_root
():
v
=
os
.
getenv
(
'CUDA_ROOT'
,
""
)
if
v
:
return
v
s
=
os
.
getenv
(
"PATH"
)
if
not
s
:
return
''
for
dir
in
s
.
split
(
os
.
path
.
pathsep
):
if
os
.
path
.
exists
(
os
.
path
.
join
(
dir
,
"nvcc"
)):
return
os
.
path
.
split
(
dir
)[
0
]
return
''
AddConfigVar
(
'cuda.root'
,
"""directory with bin/, lib/, include/ for cuda utilities.
This directory is included via -L and -rpath when linking
dynamically compiled modules. If AUTO and nvcc is in the
path, it will use one of nvcc parent directory. Otherwise
/usr/local/cuda will be used. Leave empty to prevent extra
linker directives. Default: environment variable "CUDA_ROOT"
or else "AUTO".
"""
,
StrParam
(
default_cuda_root
),
in_c_key
=
False
)
def
filter_nvcc_flags
(
s
):
assert
isinstance
(
s
,
str
)
flags
=
[
flag
for
flag
in
s
.
split
(
' '
)
if
flag
]
if
any
([
f
for
f
in
flags
if
not
f
.
startswith
(
"-"
)]):
raise
ValueError
(
"Theano nvcc.flags support only parameter/value pairs without"
" space between them. e.g.: '--machine 64' is not supported,"
" but '--machine=64' is supported. Please add the '=' symbol."
" nvcc.flags value is '
%
s'"
%
s
)
return
' '
.
join
(
flags
)
AddConfigVar
(
'nvcc.flags'
,
"Extra compiler flags for nvcc"
,
ConfigParam
(
""
,
filter_nvcc_flags
),
# Not needed in c key as it is already added.
# We remove it as we don't make the md5 of config to change
# if theano.sandbox.cuda is loaded or not.
in_c_key
=
False
)
AddConfigVar
(
'nvcc.compiler_bindir'
,
"If defined, nvcc compiler driver will seek g++ and gcc"
" in this directory"
,
StrParam
(
""
),
in_c_key
=
False
)
AddConfigVar
(
'nvcc.fastmath'
,
""
,
BoolParam
(
False
),
# Not needed in c key as it is already added.
# We remove it as we don't make the md5 of config to change
# if theano.sandbox.cuda is loaded or not.
in_c_key
=
False
)
AddConfigVar
(
'gpuarray.sync'
,
"""If True, every op will make sure its work is done before
returning. Setting this to True will slow down execution,
but give much more accurate results in profiling."""
,
BoolParam
(
False
),
in_c_key
=
True
)
AddConfigVar
(
'dnn.conv.workmem'
,
"This flag is deprecated; use dnn.conv.algo_fwd."
,
EnumStr
(
''
),
in_c_key
=
False
)
AddConfigVar
(
'dnn.conv.workmem_bwd'
,
"This flag is deprecated; use dnn.conv.algo_bwd."
,
EnumStr
(
''
),
in_c_key
=
False
)
AddConfigVar
(
'dnn.conv.algo_fwd'
,
"Default implementation to use for CuDNN forward convolution."
,
EnumStr
(
'small'
,
'none'
,
'large'
,
'fft'
,
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
),
in_c_key
=
False
)
AddConfigVar
(
'dnn.conv.algo_bwd'
,
"Default implementation to use for CuDNN backward convolution."
,
EnumStr
(
'none'
,
'deterministic'
,
'fft'
,
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
),
in_c_key
=
False
)
def
default_dnn_path
(
suffix
):
def
f
(
suffix
=
suffix
):
if
config
.
cuda
.
root
==
''
:
return
''
return
os
.
path
.
join
(
config
.
cuda
.
root
,
suffix
)
return
f
AddConfigVar
(
'dnn.include_path'
,
"Location of the cudnn header (defaults to the cuda root)"
,
StrParam
(
default_dnn_path
(
'include'
)))
AddConfigVar
(
'dnn.library_path'
,
"Location of the cudnn header (defaults to the cuda root)"
,
StrParam
(
default_dnn_path
(
'lib64'
)))
# This flag determines whether or not to raise error/warning message if
# there is a CPU Op in the computational graph.
AddConfigVar
(
...
...
theano/configparser.py
浏览文件 @
645557f9
...
...
@@ -102,7 +102,7 @@ def change_flags(**kwargs):
l
=
[
v
for
v
in
theano
.
configparser
.
_config_var_list
if
v
.
fullname
==
k
]
assert
len
(
l
)
==
1
old_val
[
k
]
=
l
[
0
]
.
__get__
()
old_val
[
k
]
=
l
[
0
]
.
__get__
(
True
,
None
)
try
:
for
k
in
kwargs
:
l
=
[
v
for
v
in
theano
.
configparser
.
_config_var_list
...
...
@@ -167,7 +167,7 @@ def _config_print(thing, buf):
for
cv
in
_config_var_list
:
print
(
cv
,
file
=
buf
)
print
(
" Doc: "
,
cv
.
doc
,
file
=
buf
)
print
(
" Value: "
,
cv
.
__get__
(),
file
=
buf
)
print
(
" Value: "
,
cv
.
__get__
(
True
,
None
),
file
=
buf
)
print
(
""
,
file
=
buf
)
...
...
@@ -182,7 +182,7 @@ def get_config_md5():
all_opts
=
sorted
([
c
for
c
in
_config_var_list
if
c
.
in_c_key
],
key
=
lambda
cv
:
cv
.
fullname
)
return
theano
.
gof
.
utils
.
hash_from_code
(
'
\n
'
.
join
(
[
'
%
s =
%
s'
%
(
cv
.
fullname
,
cv
.
__get__
())
for
cv
in
all_opts
]))
[
'
%
s =
%
s'
%
(
cv
.
fullname
,
cv
.
__get__
(
True
,
None
))
for
cv
in
all_opts
]))
class
TheanoConfigParser
(
object
):
...
...
@@ -270,14 +270,14 @@ def AddConfigVar(name, doc, configparam, root=config, in_c_key=True):
# Trigger a read of the value from config files and env vars
# This allow to filter wrong value from the user.
if
not
callable
(
configparam
.
default
):
configparam
.
__get__
()
configparam
.
__get__
(
root
,
type
(
root
)
)
else
:
# We do not want to evaluate now the default value
# when it is a callable.
try
:
fetch_val_for_key
(
configparam
.
fullname
)
# The user provided a value, filter it now.
configparam
.
__get__
()
configparam
.
__get__
(
root
,
type
(
root
)
)
except
KeyError
:
pass
setattr
(
root
.
__class__
,
sections
[
0
],
configparam
)
...
...
@@ -294,6 +294,7 @@ class ConfigParam(object):
self
.
default
=
default
self
.
filter
=
filter
self
.
allow_override
=
allow_override
self
.
is_default
=
True
# N.B. --
# self.fullname # set by AddConfigVar
# self.doc # set by AddConfigVar
...
...
@@ -304,16 +305,19 @@ class ConfigParam(object):
# Calling `filter` here may actually be harmful if the default value is
# invalid and causes a crash or has unwanted side effects.
def
__get__
(
self
,
*
args
):
def
__get__
(
self
,
cls
,
type_
):
if
cls
is
None
:
return
self
if
not
hasattr
(
self
,
'val'
):
try
:
val_str
=
fetch_val_for_key
(
self
.
fullname
)
self
.
is_default
=
False
except
KeyError
:
if
callable
(
self
.
default
):
val_str
=
self
.
default
()
else
:
val_str
=
self
.
default
self
.
__set__
(
None
,
val_str
)
self
.
__set__
(
cls
,
val_str
)
# print "RVAL", self.val
return
self
.
val
...
...
theano/gof/op.py
浏览文件 @
645557f9
...
...
@@ -1171,7 +1171,7 @@ def apply_meth(tag):
code
=
self
.
code_sections
[
tag
]
define_macros
,
undef_macros
=
self
.
get_c_macros
(
node
,
name
)
return
os
.
linesep
.
join
([
define_macros
,
code
,
return
os
.
linesep
.
join
([
''
,
define_macros
,
code
,
undef_macros
])
else
:
raise
utils
.
MethodNotDefined
(
...
...
@@ -1428,7 +1428,7 @@ class COp(Op):
def_macros
,
undef_macros
=
self
.
get_c_macros
(
node
,
name
)
def_sub
,
undef_sub
=
self
.
get_sub_macros
(
sub
)
return
os
.
linesep
.
join
([
def_macros
,
def_sub
,
return
os
.
linesep
.
join
([
''
,
def_macros
,
def_sub
,
op_code
,
undef_sub
,
undef_macros
])
else
:
...
...
@@ -1442,17 +1442,21 @@ class COp(Op):
define_macros
,
undef_macros
=
self
.
get_c_macros
(
node
,
name
,
check_input
=
False
)
ctx
=
""
if
'context'
in
sub
:
ctx
=
",
%
s"
%
(
sub
[
'context'
],)
# Generate the C code
return
"""
%(define_macros)
s
{
if (
%(func_name)
s(
%(func_args)
s) != 0) {
if (
%(func_name)
s(
%(func_args)
s
%(ctx)
s
) != 0) {
%(fail)
s
}
}
%(undef_macros)
s
"""
%
dict
(
func_name
=
self
.
func_name
,
fail
=
sub
[
'fail'
],
fail
=
sub
[
'fail'
],
ctx
=
ctx
,
func_args
=
self
.
format_c_function_args
(
inp
,
out
),
define_macros
=
define_macros
,
undef_macros
=
undef_macros
)
...
...
theano/sandbox/cuda/__init__.py
浏览文件 @
645557f9
...
...
@@ -535,7 +535,7 @@ def handle_shared_float32(tf):
# import dependency. So we also test it in the file theano/__init__.py
if
config
.
device
.
startswith
(
'gpu'
):
use
(
device
=
config
.
device
,
force
=
config
.
force_device
,
test_driver
=
False
)
elif
config
.
init_gpu_device
:
elif
config
.
init_gpu_device
.
startswith
(
'gpu'
)
:
assert
config
.
device
==
"cpu"
,
(
"We can use the Theano flag init_gpu_device"
" only when the Theano flag device=='cpu'"
)
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
645557f9
...
...
@@ -27,8 +27,6 @@ from theano.sandbox.cuda import gpu_seqopt, register_opt
from
theano.sandbox.cuda.nvcc_compiler
import
NVCC_compiler
import
theano.sandbox.dnn_flags
def
dnn_available
():
if
dnn_available
.
avail
is
None
:
...
...
@@ -57,15 +55,17 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
return 1;
}
"""
params
=
[
"-l"
,
"cudnn"
,
"-I"
+
os
.
path
.
dirname
(
__file__
)]
if
config
.
dnn
.
include_path
:
params
.
append
(
"-I"
+
config
.
dnn
.
include_path
)
if
config
.
dnn
.
library_path
:
params
.
append
(
"-L"
+
config
.
dnn
.
library_path
)
# Do not run here the test program. It would run on the
# default gpu, not the one selected by the user. If mixed
# GPU are installed or if the GPUs are configured in
# exclusive mode, this cause bad detection.
comp
,
out
,
err
=
NVCC_compiler
.
try_flags
(
[
"-l"
,
"cudnn"
,
"-I"
+
os
.
path
.
dirname
(
__file__
),
"-I"
+
config
.
dnn
.
include_path
,
"-L"
+
config
.
dnn
.
library_path
],
preambule
=
preambule
,
body
=
body
,
params
=
params
,
preambule
=
preambule
,
body
=
body
,
try_run
=
False
,
output
=
True
)
dnn_available
.
avail
=
comp
...
...
theano/sandbox/cuda/nvcc_compiler.py
浏览文件 @
645557f9
...
...
@@ -8,6 +8,7 @@ import warnings
import
numpy
from
theano
import
config
from
theano.compat
import
decode
,
decode_iter
from
theano.gof
import
local_bitwidth
from
theano.gof.utils
import
hash_from_file
...
...
@@ -19,67 +20,6 @@ from theano.misc.windows import output_subprocess_Popen
_logger
=
logging
.
getLogger
(
"theano.sandbox.cuda.nvcc_compiler"
)
from
theano.configparser
import
(
config
,
AddConfigVar
,
StrParam
,
BoolParam
,
ConfigParam
)
AddConfigVar
(
'nvcc.compiler_bindir'
,
"If defined, nvcc compiler driver will seek g++ and gcc"
" in this directory"
,
StrParam
(
""
),
in_c_key
=
False
)
user_provided_cuda_root
=
True
def
default_cuda_root
():
global
user_provided_cuda_root
v
=
os
.
getenv
(
'CUDA_ROOT'
,
""
)
user_provided_cuda_root
=
False
if
v
:
return
v
return
find_cuda_root
()
AddConfigVar
(
'cuda.root'
,
"""directory with bin/, lib/, include/ for cuda utilities.
This directory is included via -L and -rpath when linking
dynamically compiled modules. If AUTO and nvcc is in the
path, it will use one of nvcc parent directory. Otherwise
/usr/local/cuda will be used. Leave empty to prevent extra
linker directives. Default: environment variable "CUDA_ROOT"
or else "AUTO".
"""
,
StrParam
(
default_cuda_root
),
in_c_key
=
False
)
def
filter_nvcc_flags
(
s
):
assert
isinstance
(
s
,
str
)
flags
=
[
flag
for
flag
in
s
.
split
(
' '
)
if
flag
]
if
any
([
f
for
f
in
flags
if
not
f
.
startswith
(
"-"
)]):
raise
ValueError
(
"Theano nvcc.flags support only parameter/value pairs without"
" space between them. e.g.: '--machine 64' is not supported,"
" but '--machine=64' is supported. Please add the '=' symbol."
" nvcc.flags value is '
%
s'"
%
s
)
return
' '
.
join
(
flags
)
AddConfigVar
(
'nvcc.flags'
,
"Extra compiler flags for nvcc"
,
ConfigParam
(
""
,
filter_nvcc_flags
),
# Not needed in c key as it is already added.
# We remove it as we don't make the md5 of config to change
# if theano.sandbox.cuda is loaded or not.
in_c_key
=
False
)
AddConfigVar
(
'nvcc.fastmath'
,
""
,
BoolParam
(
False
),
# Not needed in c key as it is already added.
# We remove it as we don't make the md5 of config to change
# if theano.sandbox.cuda is loaded or not.
in_c_key
=
False
)
nvcc_path
=
'nvcc'
nvcc_version
=
None
...
...
@@ -115,14 +55,6 @@ def is_nvcc_available():
return
False
def
find_cuda_root
():
s
=
os
.
getenv
(
"PATH"
)
if
not
s
:
return
for
dir
in
s
.
split
(
os
.
path
.
pathsep
):
if
os
.
path
.
exists
(
os
.
path
.
join
(
dir
,
"nvcc"
)):
return
os
.
path
.
split
(
dir
)[
0
]
rpath_defaults
=
[]
...
...
@@ -229,7 +161,7 @@ class NVCC_compiler(Compiler):
include_dirs
A list of include directory names (each gets prefixed with -I).
lib_dirs
A list of library search path directory names (each gets
A list of library search path directory names (each gets
prefixed with -L).
libs
A list of libraries to link with (each gets prefixed with -l).
...
...
@@ -359,7 +291,8 @@ class NVCC_compiler(Compiler):
# provided an cuda.root flag, we need to add one, but
# otherwise, we don't add it. See gh-1540 and
# https://wiki.debian.org/RpathIssue for details.
if
(
user_provided_cuda_root
and
if
(
not
type
(
config
.
cuda
)
.
root
.
is_default
and
os
.
path
.
exists
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib'
))):
rpaths
.
append
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib'
))
...
...
theano/sandbox/dnn_flags.py
deleted
100644 → 0
浏览文件 @
1ec1cd9b
"""
This module contains the configuration flags for cudnn support.
Those are shared between the cuda and gpuarray backend which is why
they are in this file.
"""
import
os.path
from
theano.configparser
import
AddConfigVar
,
EnumStr
,
StrParam
from
theano
import
config
AddConfigVar
(
'dnn.conv.workmem'
,
"This flag is deprecated; use dnn.conv.algo_fwd."
,
EnumStr
(
''
),
in_c_key
=
False
)
AddConfigVar
(
'dnn.conv.workmem_bwd'
,
"This flag is deprecated; use dnn.conv.algo_bwd."
,
EnumStr
(
''
),
in_c_key
=
False
)
AddConfigVar
(
'dnn.conv.algo_fwd'
,
"Default implementation to use for CuDNN forward convolution."
,
EnumStr
(
'small'
,
'none'
,
'large'
,
'fft'
,
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
),
in_c_key
=
False
)
AddConfigVar
(
'dnn.conv.algo_bwd'
,
"Default implementation to use for CuDNN backward convolution."
,
EnumStr
(
'none'
,
'deterministic'
,
'fft'
,
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
),
in_c_key
=
False
)
AddConfigVar
(
'dnn.include_path'
,
"Location of the cudnn header (defaults to the cuda root)"
,
StrParam
(
lambda
:
os
.
path
.
join
(
config
.
cuda
.
root
,
'include'
)))
AddConfigVar
(
'dnn.library_path'
,
"Location of the cudnn header (defaults to the cuda root)"
,
StrParam
(
lambda
:
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib64'
)))
theano/sandbox/gpuarray/__init__.py
浏览文件 @
645557f9
...
...
@@ -19,13 +19,6 @@ try:
except
ImportError
:
pygpu
=
None
AddConfigVar
(
'gpuarray.sync'
,
"""If True, every op will make sure its work is done before
returning. Setting this to True will slow down execution,
but give much more accurate results in profiling."""
,
BoolParam
(
False
),
in_c_key
=
True
)
# This is for documentation not to depend on the availability of pygpu
from
.type
import
(
GpuArrayType
,
GpuArrayVariable
,
GpuArrayConstant
,
GpuArraySharedVariable
,
gpuarray_shared_constructor
)
...
...
@@ -57,8 +50,9 @@ if pygpu:
import
theano.compile
theano
.
compile
.
shared_constructor
(
gpuarray_shared_constructor
)
optdb
.
add_tags
(
'gpuarray_opt'
,
'fast_run'
,
'fast_compile'
)
elif
config
.
gpuarray
.
init_device
!=
''
:
init_dev
(
config
.
gpuarray
.
init_device
)
elif
(
config
.
init_gpu_device
.
startswith
(
'cuda'
)
or
config
.
init_gpu_device
.
startswith
(
'opencl'
)):
init_dev
(
config
.
init_gpu_device
)
from
.basic_ops
import
(
GpuAlloc
,
GpuContiguous
,
GpuEye
,
GpuFromHost
,
GpuJoin
,
GpuReshape
,
GpuSplit
,
HostFromGpu
)
...
...
@@ -70,7 +64,8 @@ if pygpu:
except
Exception
:
error
(
"Could not initialize pygpu, support disabled"
,
exc_info
=
True
)
else
:
if
(
config
.
gpuarray
.
init_device
!=
''
or
config
.
device
.
startswith
(
'opencl'
)
or
config
.
device
.
startswith
(
'cuda'
)):
if
(
config
.
init_gpu_device
.
startswith
(
'cuda'
)
or
config
.
init_gpu_device
.
startswith
(
'opencl'
)
or
config
.
device
.
startswith
(
'opencl'
)
or
config
.
device
.
startswith
(
'cuda'
)):
error
(
"pygpu was configured but could not be imported"
,
exc_info
=
True
)
theano/sandbox/gpuarray/basic_ops.py
浏览文件 @
645557f9
差异被折叠。
点击展开。
theano/sandbox/gpuarray/conv.py
浏览文件 @
645557f9
...
...
@@ -5,17 +5,15 @@ import theano
from
theano
import
config
,
gof
try
:
import
pygpu
from
pygpu
import
gpuarray
except
ImportError
:
pass
from
six.moves
import
reduce
from
.comp
import
NVCC_compiler
from
.type
import
GpuArrayType
from
.basic_ops
import
(
as_gpuarray_variable
,
GpuKernelBase
,
Kernel
)
from
.basic_ops
import
as_gpuarray_variable
,
GpuKernelBase
,
Kernel
from
theano.gof
import
utils
class
GpuConv
(
GpuKernelBase
,
gof
.
Op
):
"""
Implement the batched and stacked 2d convolution on the gpu.
...
...
@@ -70,19 +68,19 @@ class GpuConv(GpuKernelBase, gof.Op):
raise
ValueError
(
mode
)
def
__init__
(
self
,
border_mode
,
subsample
=
(
1
,
1
),
logical_img_hw
=
None
,
logical_kern_hw
=
None
,
logical_kern_align_top
=
True
,
version
=-
1
,
direction_hint
=
None
,
verbose
=
0
,
kshp
=
None
,
imshp
=
None
,
max_threads_dim0
=
None
,
nkern
=
None
,
bsize
=
None
,
fft_opt
=
True
):
subsample
=
(
1
,
1
),
logical_img_hw
=
None
,
logical_kern_hw
=
None
,
logical_kern_align_top
=
True
,
version
=-
1
,
direction_hint
=
None
,
verbose
=
0
,
kshp
=
None
,
imshp
=
None
,
max_threads_dim0
=
None
,
nkern
=
None
,
bsize
=
None
,
fft_opt
=
True
):
self
.
border_mode
=
border_mode
self
.
subsample
=
subsample
if
logical_img_hw
is
not
None
:
...
...
@@ -182,7 +180,7 @@ class GpuConv(GpuKernelBase, gof.Op):
def
flops
(
self
,
inputs
,
outputs
):
"""
Useful with the hack in profilemode to print the MFlops.
"""
images
,
kerns
=
inputs
out
,
=
outputs
...
...
@@ -227,32 +225,14 @@ class GpuConv(GpuKernelBase, gof.Op):
nb
=
0
if
self
.
kshp
is
not
None
:
nb
=
self
.
kshp
[
1
]
return
[
'-DTHEANO_KERN_WID='
+
str
(
nb
)]
# ,'-g','-G']
return
[
'-DTHEANO_KERN_WID='
+
str
(
nb
)]
def
c_headers
(
self
):
if
pygpu
.
get_default_context
()
.
kind
==
'opencl'
:
raise
MethodNotDefined
(
'cuda only'
)
return
[
'<stdint.h>'
,
'<stdio.h>'
,
'cuda.h'
,
'<gpuarray/extension.h>'
,
'<numpy_compat.h>'
,
'<gpuarray/ext_cuda.h>'
,
'<gpuarray/types.h>'
]
def
c_header_dirs
(
self
):
if
pygpu
.
get_default_context
()
.
kind
==
'opencl'
:
raise
MethodNotDefined
(
'cuda only'
)
cuda_root
=
config
.
cuda
.
root
if
cuda_root
:
return
[
os
.
path
.
join
(
cuda_root
,
'include'
)]
else
:
return
[]
return
[
'<stdio.h>'
,
'<numpy_compat.h>'
,
'<gpuarray/types.h>'
]
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
return
(
0
,
21
)
def
c_init_code
(
self
):
if
pygpu
.
get_default_context
()
.
kind
==
'opencl'
:
raise
MethodNotDefined
(
'cuda only'
)
return
[
'setup_ext_cuda();'
]
return
(
0
,
22
)
def
c_code
(
self
,
node
,
nodename
,
inp
,
out_
,
sub
):
img
,
kern
=
inp
...
...
theano/sandbox/gpuarray/dnn.py
浏览文件 @
645557f9
...
...
@@ -26,10 +26,7 @@ from .conv import GpuConv
# GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
from
.nnet
import
GpuSoftmax
from
.opt
import
gpu_seqopt
,
register_opt
,
conv_groupopt
,
op_lifter
from
.opt_util
import
alpha_merge
,
output_merge
# We need to import this to define the flags.
from
theano.sandbox
import
dnn_flags
# noqa
from
.opt_util
import
alpha_merge
,
output_merge
,
inplace_allocempty
def
dnn_available
():
...
...
@@ -50,7 +47,6 @@ def dnn_available():
dnn_available
.
avail
=
False
preambule
=
"""
#include <stdio.h>
#include <cuda.h>
#include <cudnn.h>
#include <cudnn_helper.h>
"""
...
...
@@ -64,15 +60,18 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
return 1;
}
"""
params
=
[
"-l"
,
"cudnn"
,
"-I"
+
os
.
path
.
dirname
(
__file__
)]
if
config
.
dnn
.
include_path
:
params
.
append
(
"-I"
+
config
.
dnn
.
include_path
)
if
config
.
dnn
.
library_path
:
params
.
append
(
"-L"
+
config
.
dnn
.
library_path
)
# Do not run here the test program. It would run on the
# default gpu, not the one selected by the user. If mixed
# GPU are installed or if the GPUs are configured in
# exclusive mode, this cause bad detection.
comp
,
out
,
err
=
GCC_compiler
.
try_flags
(
[
"-l"
,
"cudnn"
,
"-I"
+
os
.
path
.
dirname
(
__file__
),
"-I"
+
config
.
dnn
.
include_path
,
"-L"
+
config
.
dnn
.
library_path
],
preambule
=
preambule
,
body
=
body
,
params
,
preambule
=
preambule
,
body
=
body
,
try_run
=
False
,
output
=
True
)
dnn_available
.
avail
=
comp
...
...
@@ -1242,86 +1241,62 @@ conv_groupopt.register('local_conv_dnn', local_conv_dnn, 20,
'conv_dnn'
,
'fast_compile'
,
'fast_run'
,
'cudnn'
)
@local_optimizer
([
GpuDnnConv
],
inplace
=
True
)
def
local_dnn_conv_inplace
(
node
):
if
type
(
node
.
op
)
!=
GpuDnnConv
or
node
.
op
.
inplace
:
return
inputs
=
list
(
node
.
inputs
)
dest
=
inputs
[
2
]
if
(
dest
.
owner
and
isinstance
(
dest
.
owner
.
op
,
GpuAllocEmpty
)
and
len
(
dest
.
clients
)
>
1
):
inputs
[
2
]
=
GpuAllocEmpty
(
dest
.
owner
.
op
.
dtype
)(
*
dest
.
owner
.
inputs
)
@inplace_allocempty
(
GpuDnnConv
,
2
)
def
local_dnn_conv_inplace
(
node
,
inputs
):
return
[
GpuDnnConv
(
algo
=
node
.
op
.
algo
,
inplace
=
True
)(
*
inputs
)]
@local_optimizer
([
GpuDnnConvGradW
],
inplace
=
True
)
def
local_dnn_convgw_inplace
(
node
):
if
type
(
node
.
op
)
!=
GpuDnnConvGradW
or
node
.
op
.
inplace
:
return
inputs
=
list
(
node
.
inputs
)
dest
=
inputs
[
2
]
if
(
dest
.
owner
and
isinstance
(
dest
.
owner
.
op
,
GpuAllocEmpty
)
and
len
(
dest
.
clients
)
>
1
):
inputs
[
2
]
=
GpuAllocEmpty
(
dest
.
owner
.
op
.
dtype
)(
*
dest
.
owner
.
inputs
)
@inplace_allocempty
(
GpuDnnConvGradW
,
2
)
def
local_dnn_convgw_inplace
(
node
,
inputs
):
return
[
GpuDnnConvGradW
(
algo
=
node
.
op
.
algo
,
inplace
=
True
)(
*
inputs
)]
@local_optimizer
([
GpuDnnConvGradI
],
inplace
=
True
)
def
local_dnn_convgi_inplace
(
node
):
if
type
(
node
.
op
)
!=
GpuDnnConvGradI
or
node
.
op
.
inplace
:
return
inputs
=
list
(
node
.
inputs
)
dest
=
inputs
[
2
]
if
(
dest
.
owner
and
isinstance
(
dest
.
owner
.
op
,
GpuAllocEmpty
)
and
len
(
dest
.
clients
)
>
1
):
inputs
[
2
]
=
GpuAllocEmpty
(
dest
.
owner
.
op
.
dtype
)(
*
dest
.
owner
.
inputs
)
@inplace_allocempty
(
GpuDnnConvGradI
,
2
)
def
local_dnn_convgi_inplace
(
node
,
inputs
):
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
,
inplace
=
True
)(
*
inputs
)]
optdb
.
register
(
'local_dnna_conv_inplace'
,
tensor
.
opt
.
in2out
(
local_dnn_conv_inplace
,
local_dnn_convgw_inplace
,
local_dnn_convgi_inplace
,
name
=
"local_dnn_conv_inplace"
),
name
=
"local_dnn
a
_conv_inplace"
),
70.0
,
'fast_run'
,
'inplace'
,
'gpuarray'
,
'cudnn'
)
@register_opt
(
'cudnn'
)
@alpha_merge
(
GpuDnnConv
,
alpha_in
=
4
,
beta_in
=
5
,
nd
=
4
)
@alpha_merge
(
GpuDnnConv
,
alpha_in
=
4
,
beta_in
=
5
)
def
local_dnn_conv_alpha_merge
(
node
,
*
inputs
):
return
[
GpuDnnConv
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
@register_opt
(
'cudnn'
)
@alpha_merge
(
GpuDnnConvGradW
,
alpha_in
=
4
,
beta_in
=
5
,
nd
=
4
)
@alpha_merge
(
GpuDnnConvGradW
,
alpha_in
=
4
,
beta_in
=
5
)
def
local_dnn_convw_alpha_merge
(
node
,
*
inputs
):
return
[
GpuDnnConvGradW
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
@register_opt
(
'cudnn'
)
@alpha_merge
(
GpuDnnConvGradI
,
alpha_in
=
4
,
beta_in
=
5
,
nd
=
4
)
@alpha_merge
(
GpuDnnConvGradI
,
alpha_in
=
4
,
beta_in
=
5
)
def
local_dnn_convi_alpha_merge
(
node
,
*
inputs
):
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
@register_opt
(
'cudnn'
)
@output_merge
(
GpuDnnConv
,
alpha_in
=
4
,
beta_in
=
5
,
out_in
=
2
,
nd
=
4
)
@output_merge
(
GpuDnnConv
,
alpha_in
=
4
,
beta_in
=
5
,
out_in
=
2
)
def
local_dnn_conv_output_merge
(
node
,
*
inputs
):
inputs
=
inputs
[
0
:
2
]
+
(
gpu_contiguous
(
inputs
[
2
]),)
+
inputs
[
3
:]
return
[
GpuDnnConv
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
@register_opt
(
'cudnn'
)
@output_merge
(
GpuDnnConvGradW
,
alpha_in
=
4
,
beta_in
=
5
,
out_in
=
2
,
nd
=
4
)
@output_merge
(
GpuDnnConvGradW
,
alpha_in
=
4
,
beta_in
=
5
,
out_in
=
2
)
def
local_dnn_convw_output_merge
(
node
,
*
inputs
):
inputs
=
inputs
[
0
:
2
]
+
(
gpu_contiguous
(
inputs
[
2
]),)
+
inputs
[
3
:]
return
[
GpuDnnConvGradW
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
@register_opt
(
'cudnn'
)
@output_merge
(
GpuDnnConvGradI
,
alpha_in
=
4
,
beta_in
=
5
,
out_in
=
2
,
nd
=
4
)
@output_merge
(
GpuDnnConvGradI
,
alpha_in
=
4
,
beta_in
=
5
,
out_in
=
2
)
def
local_dnn_convi_output_merge
(
node
,
*
inputs
):
inputs
=
inputs
[
0
:
2
]
+
(
gpu_contiguous
(
inputs
[
2
]),)
+
inputs
[
3
:]
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
...
...
theano/sandbox/gpuarray/elemwise.py
浏览文件 @
645557f9
差异被折叠。
点击展开。
theano/sandbox/gpuarray/kernel_codegen.py
浏览文件 @
645557f9
...
...
@@ -4,11 +4,11 @@ Helper routines for generating gpu kernels for nvcc.
"""
try
:
import
pygpu
from
pygpu
import
gpuarray
except
ImportError
:
pass
def
nvcc_kernel
(
name
,
params
,
body
):
"""
Return the c code of a kernel function.
...
...
@@ -174,16 +174,15 @@ def inline_softmax(N, buf, buf2, threadPos, threadCount, dtype="float32"):
"""
ctype
=
gpuarray
.
dtype_to_ctype
(
dtype
)
return
[
# get max of buf (trashing all but buf[0])
inline_reduce_max
(
N
,
buf
,
threadPos
,
threadCount
),
# get max of buf (trashing all but buf[0])
return
[
inline_reduce_max
(
N
,
buf
,
threadPos
,
threadCount
),
'__syncthreads()'
,
(
'
%
s row_max = '
+
buf
+
'[0]'
)
%
ctype
,
'__syncthreads()'
,
'for(int __i='
+
threadPos
+
'; __i<'
+
N
+
'; __i+='
+
threadCount
+
'){'
,
buf
+
'[__i] = exp('
+
buf2
+
'[__i] - row_max)'
,
buf2
+
'[__i] = '
+
buf
+
'[__i]'
,
'; __i+='
+
threadCount
+
'){'
,
buf
+
'[__i] = exp('
+
buf2
+
'[__i] - row_max)'
,
buf2
+
'[__i] = '
+
buf
+
'[__i]'
,
'}'
,
'__syncthreads()'
,
inline_reduce_sum
(
N
,
buf
,
threadPos
,
threadCount
),
...
...
@@ -192,8 +191,8 @@ def inline_softmax(N, buf, buf2, threadPos, threadCount, dtype="float32"):
'__syncthreads()'
,
# divide each exp() result by the sum to complete the job.
'for(int __i='
+
threadPos
+
'; __i<'
+
N
+
'; __i+='
+
threadCount
+
'){'
,
buf
+
'[__i] = '
+
buf2
+
'[__i] / row_sum'
,
'; __i+='
+
threadCount
+
'){'
,
buf
+
'[__i] = '
+
buf2
+
'[__i] / row_sum'
,
'}'
,
'__syncthreads()'
,
]
...
...
@@ -232,7 +231,7 @@ def inline_reduce_fixed_shared(N, buf, x, stride_x, load_x, pos, count,
Optional, the dtype of the output.
manner_fn
A function that accepts strings of arguments a and b, and returns c code
for their reduction.
for their reduction.
Example: return "
%(a)
s +
%(b)
s" for a sum reduction.
manner_init
A function that accepts strings of arguments a and return c code for its
...
...
@@ -259,7 +258,7 @@ def inline_reduce_fixed_shared(N, buf, x, stride_x, load_x, pos, count,
loop_line
=
manner_fn
(
"red"
,
manner_init
(
"
%(load_x)
s(
%(x)
s[i *
%(stride_x)
s])"
%
locals
()))
loop_line2
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[i]"
%
buf
)
"
%
s[i]"
%
buf
)
r_16
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[
%
s+16]"
%
(
buf
,
pos
))
r_8
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[
%
s+8]"
%
(
buf
,
pos
))
r_4
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[
%
s+4]"
%
(
buf
,
pos
))
...
...
@@ -324,7 +323,7 @@ def inline_softmax_fixed_shared(N, buf, x, stride_x, load_x,
Parameters
----------
N
N
Length of the buffer, atleast waprSize(32).
buf
A shared memory buffer of size warpSize * sizeof(dtype).
...
...
theano/sandbox/gpuarray/neighbours.py
浏览文件 @
645557f9
import
os
import
numpy
from
theano
import
Op
,
Apply
,
config
from
theano.gof
import
local_optimizer
from
theano.tensor.nnet.neighbours
import
Images2Neibs
import
theano.tensor
as
T
try
:
import
pygpu
from
pygpu
import
gpuarray
,
elemwise
from
pygpu
import
gpuarray
except
ImportError
:
pass
from
.basic_ops
import
(
as_gpuarray_variable
,
host_from_gpu
,
gpu_from_host
,
GpuKernelBase
,
Kernel
)
from
.basic_ops
import
as_gpuarray_variable
,
GpuKernelBase
,
Kernel
from
.opt
import
register_opt
as
register_gpu_opt
,
op_lifter
from
.type
import
GpuArrayType
from
.comp
import
NVCC_compiler
class
GpuImages2Neibs
(
GpuKernelBase
,
Images2Neibs
,
Op
):
...
...
@@ -45,27 +40,10 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
dtype
=
ten4
.
type
.
dtype
)()])
def
c_code_cache_version
(
self
):
return
(
1
0
,
1
)
return
(
1
1
,
)
def
c_headers
(
self
):
if
pygpu
.
get_default_context
()
.
kind
==
'opencl'
:
raise
MethodNotDefined
(
'cuda only'
)
return
[
'cuda.h'
,
'<gpuarray/extension.h>'
,
'<numpy_compat.h>'
,
'<gpuarray/ext_cuda.h>'
,
'<gpuarray/types.h>'
]
def
c_header_dirs
(
self
):
if
pygpu
.
get_default_context
()
.
kind
==
'opencl'
:
raise
MethodNotDefined
(
'cuda only'
)
cuda_root
=
config
.
cuda
.
root
if
cuda_root
:
return
[
os
.
path
.
join
(
cuda_root
,
'include'
)]
else
:
return
[]
def
c_init_code
(
self
):
if
pygpu
.
get_default_context
()
.
kind
==
'opencl'
:
raise
MethodNotDefined
(
'cuda only'
)
return
[
'setup_ext_cuda();'
]
return
[
'<numpy_compat.h>'
,
'<gpuarray/types.h>'
]
def
gpu_kernels
(
self
,
node
,
nodename
):
dtype_ten4
=
node
.
inputs
[
0
]
.
dtype
...
...
theano/sandbox/gpuarray/nerv.py
浏览文件 @
645557f9
...
...
@@ -176,13 +176,13 @@ def local_dot_to_gemm16(node):
@opt.register_opt
()
@alpha_merge
(
Gemm16
,
alpha_in
=
1
,
beta_in
=
4
,
nd
=
2
)
@alpha_merge
(
Gemm16
,
alpha_in
=
1
,
beta_in
=
4
)
def
local_gemm16_alpha_merge
(
node
,
*
inputs
):
return
[
Gemm16
(
relu
=
node
.
op
.
relu
)(
*
inputs
)]
@opt.register_opt
()
@output_merge
(
Gemm16
,
alpha_in
=
1
,
beta_in
=
4
,
out_in
=
0
,
nd
=
2
)
@output_merge
(
Gemm16
,
alpha_in
=
1
,
beta_in
=
4
,
out_in
=
0
)
def
local_gemm16_output_merge
(
node
,
*
inputs
):
return
[
Gemm16
(
relu
=
node
.
op
.
relu
)(
*
inputs
)]
...
...
theano/sandbox/gpuarray/nnet.py
浏览文件 @
645557f9
差异被折叠。
点击展开。
theano/sandbox/gpuarray/opt.py
浏览文件 @
645557f9
...
...
@@ -645,13 +645,13 @@ def local_gpua_hgemm(node):
@register_opt
()
@alpha_merge
(
GpuGemm
,
alpha_in
=
1
,
beta_in
=
4
,
nd
=
2
)
@alpha_merge
(
GpuGemm
,
alpha_in
=
1
,
beta_in
=
4
)
def
local_gpuagemm_alpha_merge
(
node
,
*
inputs
):
return
[
gpugemm_no_inplace
(
*
inputs
)]
@register_opt
()
@output_merge
(
GpuGemm
,
alpha_in
=
1
,
beta_in
=
4
,
out_in
=
0
,
nd
=
2
)
@output_merge
(
GpuGemm
,
alpha_in
=
1
,
beta_in
=
4
,
out_in
=
0
)
def
local_gpuagemm_output_merge
(
node
,
*
inputs
):
return
[
gpugemm_no_inplace
(
*
inputs
)]
...
...
theano/sandbox/gpuarray/opt_util.py
浏览文件 @
645557f9
差异被折叠。
点击展开。
theano/sandbox/gpuarray/subtensor.py
浏览文件 @
645557f9
...
...
@@ -180,19 +180,9 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
def
_f16_ok
(
self
):
return
self
.
iadd_node
.
op
.
_f16_ok
def
c_header_dirs
(
self
):
cuda_root
=
config
.
cuda
.
root
if
cuda_root
:
return
[
os
.
path
.
join
(
cuda_root
,
'include'
)]
else
:
return
[]
def
c_headers
(
self
):
return
self
.
iadd_node
.
op
.
c_headers
()
def
c_compiler
(
self
):
return
self
.
iadd_node
.
op
.
c_compiler
()
def
c_init_code
(
self
):
return
self
.
iadd_node
.
op
.
c_init_code
()
...
...
@@ -404,7 +394,7 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
elemwise_version
=
self
.
iadd_node
.
c_code_cache_version
()
if
not
parent_version
or
not
elemwise_version
:
return
return
parent_version
+
elemwise_version
+
(
2
,)
return
parent_version
+
elemwise_version
+
(
3
,)
class
GpuAdvancedSubtensor1
(
HideC
,
tensor
.
AdvancedSubtensor1
):
...
...
theano/sandbox/gpuarray/tests/test_basic_ops.py
浏览文件 @
645557f9
import
unittest
from
theano.compat
import
izip
from
copy
import
copy
,
deepcopy
from
six
import
iteritems
...
...
@@ -13,16 +12,31 @@ from theano.tensor.basic import alloc
# Don't import test classes otherwise they get tested as part of the file
from
theano.tensor.tests
import
test_basic
from
theano.tensor.tests.test_basic
import
rand
,
safe_make_node
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests.unittest_tools
import
SkipTest
import
theano.sandbox.gpuarray
from
..type
import
(
GpuArrayType
,
gpuarray_shared_constructor
)
from
..basic_ops
import
(
host_from_gpu
,
gpu_from_host
,
HostFromGpu
,
GpuFromHost
,
GpuReshape
,
gpu_alloc
,
GpuAlloc
,
GpuAllocEmpty
,
GpuContiguous
,
gpu_join
,
GpuJoin
,
GpuSplit
,
GpuEye
,
gpu_contiguous
)
from
..subtensor
import
GpuSubtensor
import
theano.sandbox.cuda
as
cuda_ndarray
try
:
from
pygpu
import
gpuarray
except
:
pass
if
theano
.
sandbox
.
gpuarray
.
pygpu
is
None
:
raise
SkipTest
(
"pygpu not installed"
)
# If you are writing a new test file, don't copy this code, but rather
# import stuff from this file (like mode_with_gpu) to reuse it.
import
theano.sandbox.cuda
as
cuda_ndarray
if
cuda_ndarray
.
cuda_available
and
not
theano
.
sandbox
.
gpuarray
.
pygpu_activated
:
if
not
cuda_ndarray
.
use
.
device_number
:
# We should not enable all the use like the flag device=gpu,
...
...
@@ -36,25 +50,9 @@ if cuda_ndarray.cuda_available and not theano.sandbox.gpuarray.pygpu_activated:
if
not
theano
.
sandbox
.
gpuarray
.
pygpu_activated
:
raise
SkipTest
(
"pygpu disabled"
)
from
..type
import
(
GpuArrayType
,
gpuarray_shared_constructor
)
from
..basic_ops
import
(
host_from_gpu
,
gpu_from_host
,
gpu_alloc
,
GpuAlloc
,
GpuAllocEmpty
,
gpu_from_cuda
,
cuda_from_gpu
,
HostFromGpu
,
GpuContiguous
,
GpuFromHost
,
GpuReshape
,
gpu_join
,
GpuJoin
,
GpuSplit
,
GpuEye
,
gpu_contiguous
)
from
..subtensor
import
GpuSubtensor
from
theano.tests
import
unittest_tools
as
utt
utt
.
seed_rng
()
rng
=
numpy
.
random
.
RandomState
(
seed
=
utt
.
fetch_seed
())
from
pygpu
import
gpuarray
if
theano
.
config
.
mode
==
'FAST_COMPILE'
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
including
(
'gpuarray'
)
.
excluding
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
excluding
(
'gpuarray'
)
...
...
@@ -63,22 +61,6 @@ else:
mode_without_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'gpuarray'
)
def
may_fail
(
msg
,
EClass
):
"""Mark a test that requires very specific conditions to work to
mask a specific exception class."""
def
test_decorator
(
f
):
def
wrapper
():
try
:
f
()
except
Exception
as
e
:
if
isinstance
(
e
,
EClass
):
raise
SkipTest
(
msg
,
e
)
raise
wrapper
.
__name__
=
f
.
__name__
return
wrapper
return
test_decorator
def
inplace_func
(
inputs
,
outputs
,
mode
=
None
,
allow_input_downcast
=
False
,
on_unused_input
=
'raise'
,
name
=
None
):
if
mode
is
None
:
...
...
@@ -183,9 +165,9 @@ def makeTester(name, op, gpu_op, cases, checks=None, mode_gpu=mode_with_gpu,
else
:
err_msg
=
(
"Test
%
s::
%
s: exception raised during test "
"call was not the same as the reference "
"call (got:
%
s, expected
%
s)"
)
%
\
"call (got:
%
s, expected
%
s)"
%
(
self
.
gpu_op
,
testname
,
type
(
exc
),
type
(
ref_e
))
type
(
ref_e
))
)
exc
.
args
+=
(
err_msg
,)
raise
...
...
@@ -197,9 +179,9 @@ def makeTester(name, op, gpu_op, cases, checks=None, mode_gpu=mode_with_gpu,
expected
):
self
.
fail
((
"Test
%
s::
%
s: Output
%
s gave the wrong "
"value. With inputs
%
s, expected
%
s "
"(dtype
%
s), got
%
s (dtype
%
s)."
)
%
(
self
.
op
,
testname
,
i
,
inputs
,
expected
,
expected
.
dtype
,
variable
,
variable
.
dtype
))
"(dtype
%
s), got
%
s (dtype
%
s)."
%
(
self
.
op
,
testname
,
i
,
inputs
,
expected
,
expected
.
dtype
,
variable
,
variable
.
dtype
)
))
for
description
,
check
in
iteritems
(
self
.
checks
):
if
not
check
(
inputs
,
variables
):
...
...
@@ -250,36 +232,6 @@ def test_transfer_strided():
assert
numpy
.
all
(
fv
==
av
)
@may_fail
(
"Op fails if both contexts are not the same and it's rare "
"that the tests will be run this way"
,
ValueError
)
def
test_transfer_cuda_gpu
():
import
theano.sandbox.cuda
as
cuda_ndarray
if
cuda_ndarray
.
cuda_available
is
False
:
raise
SkipTest
(
"Can't test interaction with cuda if cuda not present"
)
g
=
GpuArrayType
(
dtype
=
'float32'
,
broadcastable
=
(
False
,
False
))(
'g'
)
c
=
cuda_ndarray
.
CudaNdarrayType
((
False
,
False
))(
'c'
)
av
=
theano
.
_asarray
(
rng
.
rand
(
5
,
4
),
dtype
=
'float32'
)
gv
=
gpuarray
.
array
(
av
)
cv
=
cuda_ndarray
.
CudaNdarray
(
av
)
gvs
=
gv
[:,
::
-
2
]
cvs
=
cv
[:,
::
-
2
]
f
=
theano
.
function
([
c
],
gpu_from_cuda
(
c
))
fv
=
f
(
cv
)
assert
GpuArrayType
.
values_eq_approx
(
fv
,
gv
)
fvs
=
f
(
cvs
)
assert
GpuArrayType
.
values_eq_approx
(
fvs
,
gvs
)
f
=
theano
.
function
([
g
],
cuda_from_gpu
(
g
))
fv
=
f
(
gv
)
assert
cuda_ndarray
.
CudaNdarrayType
.
values_eq_approx
(
fv
,
cv
)
fvs
=
f
(
gvs
)
assert
cuda_ndarray
.
CudaNdarrayType
.
values_eq_approx
(
fvs
,
cvs
)
def
gpu_alloc_expected
(
x
,
*
shp
):
g
=
gpuarray
.
empty
(
shp
,
dtype
=
x
.
dtype
)
g
[:]
=
x
...
...
@@ -291,8 +243,8 @@ GpuAllocTester = makeTester(
gpu_op
=
gpu_alloc
,
cases
=
dict
(
correct01
=
(
rand
(),
numpy
.
int32
(
7
)),
# just gives a DeepCopyOp with possibly wrong results on the CPU
#
correct01_bcast=(rand(1), numpy.int32(7)),
# just gives a DeepCopyOp with possibly wrong results on the CPU
#
correct01_bcast=(rand(1), numpy.int32(7)),
correct02
=
(
rand
(),
numpy
.
int32
(
4
),
numpy
.
int32
(
7
)),
correct12
=
(
rand
(
7
),
numpy
.
int32
(
4
),
numpy
.
int32
(
7
)),
correct13
=
(
rand
(
7
),
numpy
.
int32
(
2
),
numpy
.
int32
(
4
),
...
...
@@ -486,8 +438,6 @@ def test_hostfromgpu_shape_i():
cv
=
gpuarray
.
asarray
(
numpy
.
random
.
rand
(
5
,
4
),
dtype
=
'float32'
)
gpu_from_host
=
theano
.
sandbox
.
gpuarray
.
basic_ops
.
gpu_from_host
host_from_gpu
=
theano
.
sandbox
.
gpuarray
.
basic_ops
.
host_from_gpu
f
=
theano
.
function
([
a
],
gpu_from_host
(
a
),
mode
=
m
)
assert
gpu_from_host
in
[
x
.
op
for
x
in
f
.
maker
.
fgraph
.
toposort
()]
...
...
theano/sandbox/gpuarray/tests/test_blas.py
浏览文件 @
645557f9
...
...
@@ -6,8 +6,7 @@ import numpy
import
theano
from
theano
import
tensor
from
theano.tests
import
unittest_tools
as
utt
from
theano.tensor.blas
import
(
gemv_inplace
,
gemm_inplace
,
ger_destructive
,
_dot22
)
from
theano.tensor.blas
import
gemv_inplace
,
gemm_inplace
,
_dot22
from
theano.tensor.tests.test_blas
import
TestGer
,
BaseGemv
from
..
import
gpuarray_shared_constructor
...
...
@@ -15,22 +14,22 @@ from .test_basic_ops import (makeTester, rand,
mode_with_gpu
)
from
..blas
import
(
gpugemv_inplace
,
gpugemv_no_inplace
,
gpugemm_inplace
,
gpugemm_no_inplace
,
gpugemm_inplace
,
gpuger_inplace
,
gpuger_no_inplace
,
GpuGer
,
gpu_dot22
,
GpuGemm
)
GpuGemvTester
=
makeTester
(
'GpuGemvTester'
,
op
=
gemv_inplace
,
gpu_op
=
gpugemv_inplace
,
cases
=
dict
(
dot_vv
=
[
rand
(
1
),
1
,
rand
(
1
,
2
),
rand
(
2
),
0
],
dot_vm
=
[
rand
(
3
),
1
,
rand
(
3
,
2
),
rand
(
2
),
0
],
#
test_02=[rand(0), 1, rand(0, 2), rand(2), 0],
#
test_30=[rand(3), 1, rand(3, 0), rand(0), 0],
#
test_00=[rand(0), 1, rand(0, 0), rand(0), 0],
test_stride
=
[
rand
(
3
)[::
-
1
],
1
,
rand
(
3
,
2
)[::
-
1
],
rand
(
2
)[::
-
1
],
0
],
)
)
GpuGemvTester
=
makeTester
(
'GpuGemvTester'
,
op
=
gemv_inplace
,
gpu_op
=
gpugemv_inplace
,
cases
=
dict
(
dot_vv
=
[
rand
(
1
),
1
,
rand
(
1
,
2
),
rand
(
2
),
0
],
dot_vm
=
[
rand
(
3
),
1
,
rand
(
3
,
2
),
rand
(
2
),
0
],
#
test_02=[rand(0), 1, rand(0, 2), rand(2), 0],
#
test_30=[rand(3), 1, rand(3, 0), rand(0), 0],
#
test_00=[rand(0), 1, rand(0, 0), rand(0), 0],
test_stride
=
[
rand
(
3
)[::
-
1
],
1
,
rand
(
3
,
2
)[::
-
1
],
rand
(
2
)[::
-
1
],
0
],
)
)
class
TestGpuSgemv
(
TestCase
,
BaseGemv
,
utt
.
TestOptimizationMixin
):
...
...
@@ -48,24 +47,24 @@ class TestGpuSgemv(TestCase, BaseGemv, utt.TestOptimizationMixin):
return
theano
.
shared
(
val
)
GpuGemmTester
=
makeTester
(
'GpuGemmTester'
,
op
=
gemm_inplace
,
gpu_op
=
gpugemm_inplace
,
cases
=
dict
(
test1
=
[
rand
(
3
,
4
),
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.0
],
test2
=
[
rand
(
3
,
4
),
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
1.0
],
test3
=
[
rand
(
3
,
4
),
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
-
1.0
],
test4
=
[
rand
(
3
,
4
),
0.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.0
],
test5
=
[
rand
(
3
,
4
),
0.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.6
],
test6
=
[
rand
(
3
,
4
),
0.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
-
1.0
],
test7
=
[
rand
(
3
,
4
),
-
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.0
],
test8
=
[
rand
(
3
,
4
),
-
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
1.1
],
test9
=
[
rand
(
3
,
4
),
-
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
-
1.1
],
#
test10=[rand(0, 4), -1.0, rand(0, 5), rand(5, 4), 0.0],
#
test11=[rand(3, 0), -1.0, rand(3, 5), rand(5, 0), 1.1],
#
test12=[rand(3, 4), -1.0, rand(3, 0), rand(0, 4), -1.1],
#
test13=[rand(0, 0), -1.0, rand(0, 0), rand(0, 0), -1.1],
)
)
GpuGemmTester
=
makeTester
(
'GpuGemmTester'
,
op
=
gemm_inplace
,
gpu_op
=
gpugemm_inplace
,
cases
=
dict
(
test1
=
[
rand
(
3
,
4
),
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.0
],
test2
=
[
rand
(
3
,
4
),
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
1.0
],
test3
=
[
rand
(
3
,
4
),
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
-
1.0
],
test4
=
[
rand
(
3
,
4
),
0.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.0
],
test5
=
[
rand
(
3
,
4
),
0.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.6
],
test6
=
[
rand
(
3
,
4
),
0.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
-
1.0
],
test7
=
[
rand
(
3
,
4
),
-
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.0
],
test8
=
[
rand
(
3
,
4
),
-
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
1.1
],
test9
=
[
rand
(
3
,
4
),
-
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
-
1.1
],
#
test10=[rand(0, 4), -1.0, rand(0, 5), rand(5, 4), 0.0],
#
test11=[rand(3, 0), -1.0, rand(3, 5), rand(5, 0), 1.1],
#
test12=[rand(3, 4), -1.0, rand(3, 0), rand(0, 4), -1.1],
#
test13=[rand(0, 0), -1.0, rand(0, 0), rand(0, 0), -1.1],
)
)
class
TestGpuSger
(
TestGer
):
...
...
@@ -84,8 +83,10 @@ class TestGpuSger(TestGer):
def
test_f32_0_0
(
self
):
raise
SkipTest
(
'0-sized objects not supported'
)
def
test_f32_1_0
(
self
):
raise
SkipTest
(
'0-sized objects not supported'
)
def
test_f32_0_1
(
self
):
raise
SkipTest
(
'0-sized objects not supported'
)
...
...
@@ -103,21 +104,22 @@ class TestGpuGer_OpContract(TestCase, utt.T_OpContractMixin):
GpuDot22Tester
=
makeTester
(
'Gpu
Gemm
Tester'
,
'Gpu
Dot22
Tester'
,
op
=
_dot22
,
gpu_op
=
gpu_dot22
,
cases
=
dict
(
test1
=
[
rand
(
3
,
4
),
rand
(
4
,
5
)],
test2
=
[
rand
(
1
,
4
),
rand
(
4
,
5
)],
test3
=
[
rand
(
3
,
1
),
rand
(
1
,
5
)],
test4
=
[
rand
(
3
,
4
),
rand
(
4
,
1
)],
#
test5=[rand(0, 4), rand(4, 5)],
#
test6=[rand(3, 0), rand(0, 5)],
#
test7=[rand(3, 4), rand(4, 0)],
#
test8=[rand(0, 4), rand(4, 0)],
#
test9=[rand(0, 0), rand(0, 0)],
#
test5=[rand(0, 4), rand(4, 5)],
#
test6=[rand(3, 0), rand(0, 5)],
#
test7=[rand(3, 4), rand(4, 0)],
#
test8=[rand(0, 4), rand(4, 0)],
#
test9=[rand(0, 0), rand(0, 0)],
)
)
def
test_hgemm_swap
():
from
theano.sandbox.cuda
import
nvcc_compiler
if
nvcc_compiler
.
nvcc_version
<
'7.5'
:
...
...
@@ -149,6 +151,7 @@ def test_hgemm_swap():
utt
.
assert_allclose
(
of
,
on
)
def
test_hgemm_alpha_output_merge
():
from
theano.sandbox.cuda
import
nvcc_compiler
if
nvcc_compiler
.
nvcc_version
<
'7.5'
:
...
...
theano/sandbox/gpuarray/tests/test_conv_cuda_ndarray.py
浏览文件 @
645557f9
差异被折叠。
点击展开。
theano/sandbox/gpuarray/tests/test_neighbours.py
浏览文件 @
645557f9
import
unittest
from
theano.tensor.nnet.tests
import
test_neighbours
# We let that import do the init of the back-end if needed.
from
.test_basic_ops
import
(
mode_with_gpu
,
mode_without_gpu
)
from
.test_basic_ops
import
mode_with_gpu
from
..neighbours
import
GpuImages2Neibs
...
...
theano/sandbox/gpuarray/tests/test_nnet.py
浏览文件 @
645557f9
from
__future__
import
print_function
from
nose.plugins.skip
import
SkipTest
import
numpy
import
unittest
...
...
@@ -7,8 +7,6 @@ import theano
import
theano.tensor
as
T
import
theano.tests.unittest_tools
as
utt
from
theano.sandbox
import
gpuarray
# We let that import do the init of the back-end if needed.
from
.test_basic_ops
import
(
mode_with_gpu
,
mode_without_gpu
)
...
...
@@ -36,15 +34,13 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
n_in
=
4098
n_out
=
4099
x
=
T
.
fmatrix
(
'x'
)
y
=
T
.
lvector
(
'y'
)
b
=
T
.
fvector
(
'b'
)
#W = T.fmatrix('W')
# we precompute the dot with big shape before to allow the test of
# GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error
#(the launch timed out and was terminated) on GPU card not
#
(the launch timed out and was terminated) on GPU card not
# powerful enough. We need the big shape to check for corner
# case.
dot_result
=
T
.
fmatrix
(
'dot_result'
)
...
...
@@ -54,7 +50,6 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
xx
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
batch_size
,
n_in
),
dtype
=
numpy
.
float32
)
#?????yy = numpy.ones((batch_size,),dtype='float32')
yy
=
numpy
.
ones
((
batch_size
,),
dtype
=
'int32'
)
b_values
=
numpy
.
zeros
((
n_out
,),
dtype
=
'float32'
)
W_values
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
n_in
,
n_out
),
dtype
=
'float32'
)
...
...
@@ -71,8 +66,6 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
classify_gpu
=
theano
.
function
(
inputs
=
[
y
,
b
,
dot_result
],
outputs
=
[
loss
,
y_pred
,
dW
],
mode
=
mode_with_gpu
)
# theano.printing.debugprint(classify)
# theano.printing.debugprint(classify_gpu)
assert
any
([
isinstance
(
node
.
op
,
T
.
nnet
.
CrossentropySoftmaxArgmax1HotWithBias
)
...
...
@@ -97,12 +90,10 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
We check that we loop when their is too much threads
"""
n_in
=
1000
batch_size
=
4097
n_out
=
1250
if
not
isinstance
(
mode_with_gpu
,
theano
.
compile
.
DebugMode
):
n_in
=
4098
n_out
=
4099
# Seed numpy.random with config.unittests.rseed
...
...
@@ -137,25 +128,7 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
rtol
=
1e-5
atol
=
1e-6
if
not
numpy
.
allclose
(
cpu_out
,
gpu_out
,
rtol
=
rtol
,
atol
=
atol
):
abs_err
,
rel_err
=
T
.
numeric_grad
.
abs_rel_err
(
cpu_out
,
gpu_out
)
scaled_err
=
numpy
.
minimum
(
abs_err
/
atol
,
rel_err
/
rtol
)
max_i
=
scaled_err
.
argmax
()
print
(
'max err index:'
,
max_i
,
max_i
/
batch_size
,
end
=
' '
)
print
(
max_i
%
batch_size
,
max_i
/
n_out
,
max_i
&
n_out
)
print
(
'At that index:'
)
print
(
'err:'
,
scaled_err
.
flatten
()[
max_i
])
print
(
'absolute error:'
,
abs_err
.
flatten
()[
max_i
])
print
(
'relative error:'
,
rel_err
.
flatten
()[
max_i
])
print
(
'cpu_out:'
,
cpu_out
.
flatten
()[
max_i
])
print
(
'gpu_out:'
,
gpu_out
.
flatten
()[
max_i
])
print
(
'softmax_output_value:'
,
softmax_output_value
.
flatten
()[
max_i
])
print
(
'dnll_value:'
,
dnll_value
[
max_i
/
n_out
])
print
(
'y_idx_value:'
,
y_idx_value
[
max_i
/
n_out
])
assert
False
,
"numpy.allclose(cpu_out, gpu_out, rtol=
%
s, atol=
%
s)"
%
(
rtol
,
atol
)
utt
.
assert_allclose
(
cpu_out
,
gpu_out
,
rtol
=
rtol
,
atol
=
atol
)
def
test_softmax_with_bias_float16
():
...
...
@@ -166,6 +139,7 @@ def test_softmax_with_bias_float16():
softmax_with_bias_unittest_template
(
dtypeInput
=
'float32'
,
dtypeBias
=
'float16'
)
def
test_softmax_with_bias_float32
():
softmax_with_bias_unittest_template
(
dtypeInput
=
'float32'
,
dtypeBias
=
'float32'
)
...
...
@@ -188,6 +162,7 @@ def softmax_with_bias_unittest_template(dtypeInput, dtypeBias):
TODO: check that we loop when there are too many threads. (THIS IS
NOT IMPLEMENTED)
"""
x
=
T
.
matrix
(
'x'
,
dtype
=
dtypeInput
)
b
=
T
.
vector
(
'b'
,
dtype
=
dtypeBias
)
...
...
@@ -228,9 +203,11 @@ def softmax_with_bias_unittest_template(dtypeInput, dtypeBias):
def
test_softmax_float16
():
softmax_unittest_template
(
'float16'
)
def
test_softmax_float32
():
softmax_unittest_template
(
'float32'
)
def
test_softmax_float64
():
softmax_unittest_template
(
'float64'
)
...
...
theano/sandbox/gpuarray/tests/test_type.py
浏览文件 @
645557f9
import
operator
import
numpy
import
theano
...
...
@@ -25,7 +23,6 @@ def test_deep_copy():
def
test_values_eq_approx
():
a
=
rand_gpuarray
(
20
,
dtype
=
'float32'
)
g
=
GpuArrayType
(
dtype
=
'float32'
,
broadcastable
=
(
False
,))(
'g'
)
assert
GpuArrayType
.
values_eq_approx
(
a
,
a
)
b
=
a
.
copy
()
b
[
0
]
=
numpy
.
asarray
(
b
[
0
])
+
1.
...
...
theano/sandbox/gpuarray/type.py
浏览文件 @
645557f9
...
...
@@ -184,7 +184,7 @@ class GpuArrayType(Type):
@staticmethod
def
may_share_memory
(
a
,
b
):
if
(
not
isinstance
(
a
,
gpuarray
.
GpuArray
)
or
not
isinstance
(
b
,
gpuarray
.
GpuArray
)):
not
isinstance
(
b
,
gpuarray
.
GpuArray
)):
return
False
return
pygpu
.
gpuarray
.
may_share_memory
(
a
,
b
)
...
...
@@ -200,11 +200,12 @@ class GpuArrayType(Type):
self
.
broadcastable
==
other
.
broadcastable
)
def
convert_variable
(
self
,
var
):
if
(
type
(
self
)
==
type
(
var
.
type
)
and
self
.
typecode
==
var
.
type
.
typecode
and
self
.
ndim
==
var
.
type
.
ndim
and
vt
=
var
.
type
if
(
type
(
self
)
==
type
(
vt
)
and
self
.
typecode
==
vt
.
typecode
and
self
.
ndim
==
vt
.
ndim
and
all
(
sb
==
ob
or
ob
for
sb
,
ob
in
zip
(
self
.
broadcastable
,
v
ar
.
type
.
broadcastable
))):
v
t
.
broadcastable
))):
return
theano
.
tensor
.
patternbroadcast
(
var
,
self
.
broadcastable
)
def
__hash__
(
self
):
...
...
theano/tests/test_flake8.py
浏览文件 @
645557f9
...
...
@@ -157,24 +157,11 @@ whitelist_flake8 = [
"sandbox/linalg/ops.py"
,
"sandbox/linalg/__init__.py"
,
"sandbox/linalg/tests/test_linalg.py"
,
"sandbox/gpuarray/basic_ops.py"
,
"sandbox/gpuarray/nnet.py"
,
"sandbox/gpuarray/elemwise.py"
,
"sandbox/gpuarray/type.py"
,
"sandbox/gpuarray/__init__.py"
,
"sandbox/gpuarray/kernel_codegen.py"
,
"sandbox/gpuarray/conv.py"
,
"sandbox/gpuarray/neighbours.py"
,
"sandbox/gpuarray/tests/test_subtensor.py"
,
"sandbox/gpuarray/tests/test_scan.py"
,
"sandbox/gpuarray/tests/test_neighbours.py"
,
"sandbox/gpuarray/tests/test_conv_cuda_ndarray.py"
,
"sandbox/gpuarray/tests/test_type.py"
,
"sandbox/gpuarray/tests/test_opt.py"
,
"sandbox/gpuarray/tests/test_blas.py"
,
"sandbox/gpuarray/tests/test_elemwise.py"
,
"sandbox/gpuarray/tests/test_nnet.py"
,
"sandbox/gpuarray/tests/test_basic_ops.py"
,
"scan_module/scan_utils.py"
,
"scan_module/scan_views.py"
,
"scan_module/scan.py"
,
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论