Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
bcccce7a
提交
bcccce7a
authored
10月 21, 2020
作者:
Brandon T. Willard
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Apply pyupgrade to theano.gpuarray
上级
2e3f17cb
隐藏空白字符变更
内嵌
并排
正在显示
15 个修改的文件
包含
100 行增加
和
116 行删除
+100
-116
__init__.py
theano/gpuarray/__init__.py
+1
-1
basic_ops.py
theano/gpuarray/basic_ops.py
+13
-14
blas.py
theano/gpuarray/blas.py
+17
-35
cudnn_defs.py
theano/gpuarray/cudnn_defs.py
+4
-4
dnn.py
theano/gpuarray/dnn.py
+12
-11
elemwise.py
theano/gpuarray/elemwise.py
+8
-8
extra_ops.py
theano/gpuarray/extra_ops.py
+2
-2
kernel_codegen.py
theano/gpuarray/kernel_codegen.py
+11
-12
linalg.py
theano/gpuarray/linalg.py
+4
-4
multinomial.py
theano/gpuarray/multinomial.py
+1
-1
opt.py
theano/gpuarray/opt.py
+2
-2
rng_mrg.py
theano/gpuarray/rng_mrg.py
+1
-1
sort.py
theano/gpuarray/sort.py
+2
-2
subtensor.py
theano/gpuarray/subtensor.py
+7
-10
type.py
theano/gpuarray/type.py
+15
-9
没有找到文件。
theano/gpuarray/__init__.py
浏览文件 @
bcccce7a
...
...
@@ -152,7 +152,7 @@ def init_dev(dev, name=None, preallocate=None):
file
=
sys
.
stderr
,
)
if
preallocate
<
0
:
print
(
"Disabling allocation cache on
%
s"
%
(
dev
,
))
print
(
"Disabling allocation cache on
{}"
.
format
(
dev
))
elif
preallocate
>
0
:
if
preallocate
<=
1
:
gmem
=
min
(
preallocate
,
0.95
)
*
context
.
total_gmem
...
...
theano/gpuarray/basic_ops.py
浏览文件 @
bcccce7a
...
...
@@ -4,7 +4,6 @@ import re
from
collections
import
deque
import
numpy
as
np
from
six
import
string_types
import
theano
from
theano
import
Apply
,
Op
,
Type
,
Variable
,
config
,
tensor
...
...
@@ -129,7 +128,7 @@ def gpuarray_helper_inc_dir():
return
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"c_code"
)
class
Kernel
(
object
)
:
class
Kernel
:
"""
This class groups together all the attributes of a gpu kernel.
...
...
@@ -214,14 +213,14 @@ class Kernel(object):
@staticmethod
def
get_flags
(
*
types
):
def
get_dtype
(
t
):
if
isinstance
(
t
,
str
ing_types
):
if
isinstance
(
t
,
str
):
return
np
.
dtype
(
t
)
elif
isinstance
(
t
,
Type
):
return
t
.
dtype
elif
isinstance
(
t
,
Variable
):
return
t
.
type
.
dtype
else
:
raise
TypeError
(
"can't get a dtype from
%
s"
%
(
type
(
t
),
))
raise
TypeError
(
"can't get a dtype from
{}"
.
format
(
type
(
t
)
))
dtypes
=
[
get_dtype
(
t
)
for
t
in
types
]
flags
=
dict
()
...
...
@@ -291,7 +290,7 @@ def get_ctype(dtype):
return
"npy_"
+
dtype
.
name
class
GpuKernelBase
(
object
)
:
class
GpuKernelBase
:
"""
Base class for operations that need to compile kernels.
...
...
@@ -334,7 +333,7 @@ class GpuKernelBase(object):
if
isinstance
(
self
.
params_type
,
ParamsType
)
and
self
.
params_type
.
has_type
(
gpu_context_type
):
return
"(
%
s->
%
s)"
%
(
return
"(
{}->{})"
.
format
(
params_c_name
,
self
.
params_type
.
get_field
(
gpu_context_type
),
)
...
...
@@ -351,14 +350,14 @@ class GpuKernelBase(object):
def
c_headers
(
self
):
try
:
o
=
super
(
GpuKernelBase
,
self
)
.
c_headers
()
o
=
super
()
.
c_headers
()
except
MethodNotDefined
:
o
=
[]
return
o
+
[
"gpuarray/types.h"
,
"numpy/npy_common.h"
]
def
c_header_dirs
(
self
):
try
:
o
=
super
(
GpuKernelBase
,
self
)
.
c_header_dirs
()
o
=
super
()
.
c_header_dirs
()
except
MethodNotDefined
:
o
=
[]
# We rely on the input types for the directory to gpuarray includes
...
...
@@ -577,7 +576,7 @@ class CGpuKernelBase(COp, GpuKernelBase):
kcode
=
split
[
n
+
1
]
splt2
=
kspec
.
split
(
":"
)
if
len
(
splt2
)
!=
3
:
raise
ValueError
(
"Bad kernel spec:
%
s"
%
(
kspec
,
))
raise
ValueError
(
"Bad kernel spec:
{}"
.
format
(
kspec
))
kname
=
splt2
[
0
]
.
strip
()
ktypes
=
[
get_dtype
(
s
.
strip
())
for
s
in
splt2
[
1
]
.
split
(
","
)]
kflags
=
splt2
[
2
]
.
strip
()
...
...
@@ -697,7 +696,7 @@ class GpuFromHost(Op):
self
.
context_name
=
context_name
def
__str__
(
self
):
return
"GpuFromHost<
%
s>"
%
(
self
.
context_name
,
)
return
"GpuFromHost<
{}>"
.
format
(
self
.
context_name
)
def
make_node
(
self
,
x
):
if
not
isinstance
(
x
.
type
,
tensor
.
TensorType
):
...
...
@@ -800,7 +799,7 @@ class GpuToGpu(Op):
self
.
context_name
=
context_name
def
__str__
(
self
):
return
"GpuToGpu<
%
s>"
%
(
self
.
context_name
,
)
return
"GpuToGpu<
{}>"
.
format
(
self
.
context_name
)
def
make_node
(
self
,
x
):
if
not
isinstance
(
x
.
type
,
GpuArrayType
):
...
...
@@ -893,7 +892,7 @@ class GpuAlloc(HideC, Alloc):
m
=
"{memset_0=True}"
else
:
m
=
""
return
"
%
s<
%
s>
%
s"
%
(
self
.
__class__
.
__name__
,
self
.
context_name
,
m
)
return
"
{}<{}>{}"
.
format
(
self
.
__class__
.
__name__
,
self
.
context_name
,
m
)
def
make_node
(
self
,
value
,
*
shape
):
value
=
as_gpuarray_variable
(
value
,
context_name
=
self
.
context_name
)
...
...
@@ -1436,7 +1435,7 @@ class GpuJoin(HideC, Join):
view
=
self
.
view
non_empty_tensor
=
tensors
[
view
]
for
i
,
inp
in
enumerate
(
tensors
):
copy_to_list
.
append
(
"als[
%
s] = &
%
s->ga;"
%
(
i
,
inp
))
copy_to_list
.
append
(
"als[
{}] = &{}->ga;"
.
format
(
i
,
inp
))
n
=
len
(
tensors
)
fail
=
sub
[
"fail"
]
...
...
@@ -1507,7 +1506,7 @@ class GpuSplit(HideC, Split):
_f16_ok
=
True
def
__init__
(
self
,
len_splits
):
super
(
GpuSplit
,
self
)
.
__init__
(
len_splits
)
super
()
.
__init__
(
len_splits
)
# The GPU version of Split returns splits as views of the input.
self
.
view_map
=
{}
for
i
in
range
(
self
.
len_splits
):
...
...
theano/gpuarray/blas.py
浏览文件 @
bcccce7a
from
six
import
integer_types
import
theano
from
theano
import
Apply
,
Op
from
theano.compile
import
optdb
...
...
@@ -446,7 +444,7 @@ class GpuGemmBatch(BlasOp):
return
Apply
(
self
,
[
C
,
alpha
,
A
,
B
,
beta
],
[
C
.
type
()])
def
c_headers
(
self
):
return
super
(
GpuGemmBatch
,
self
)
.
c_headers
()
+
[
"<gpuarray/blas.h>"
]
return
super
()
.
c_headers
()
+
[
"<gpuarray/blas.h>"
]
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
vars
=
dict
(
...
...
@@ -544,7 +542,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
num_groups
=
1
,
unshared
=
False
,
):
if
isinstance
(
border_mode
,
int
eger_types
):
if
isinstance
(
border_mode
,
int
):
if
border_mode
<
0
:
raise
ValueError
(
"invalid border_mode {}, which must be a "
...
...
@@ -595,7 +593,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
return
((
0
,
0
),)
*
2
def
__str__
(
self
):
return
"
%
s{
%
s,
%
s,
%
s,
%
s,
%
s}"
%
(
return
"
{}{{{}, {}, {}, {}, {}}}"
.
format
(
self
.
__class__
.
__name__
,
self
.
border_mode
,
str
(
self
.
subsample
),
...
...
@@ -1071,9 +1069,7 @@ class GpuCorrMM(BaseGpuCorrMM):
num_groups
=
1
,
unshared
=
False
,
):
super
(
GpuCorrMM
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
,
unshared
)
super
()
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
,
unshared
)
def
make_node
(
self
,
img
,
kern
):
ctx_name
=
infer_context_name
(
img
,
kern
)
...
...
@@ -1108,9 +1104,7 @@ class GpuCorrMM(BaseGpuCorrMM):
bottom
,
weights
=
inp
(
top
,)
=
out_
direction
=
"forward"
return
super
(
GpuCorrMM
,
self
)
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
)
return
super
()
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
)
def
grad
(
self
,
inp
,
grads
):
bottom
,
weights
=
inp
...
...
@@ -1152,9 +1146,7 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
num_groups
=
1
,
unshared
=
False
,
):
super
(
GpuCorrMM_gradWeights
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
,
unshared
)
super
()
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
,
unshared
)
def
make_node
(
self
,
img
,
topgrad
,
shape
=
None
):
ctx_name
=
infer_context_name
(
img
,
topgrad
)
...
...
@@ -1207,7 +1199,7 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
height
,
width
=
inp
[
2
:]
or
(
None
,
None
)
(
weights
,)
=
out_
direction
=
"backprop weights"
return
super
(
GpuCorrMM_gradWeights
,
self
)
.
c_code_helper
(
return
super
()
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
,
height
,
width
)
...
...
@@ -1260,9 +1252,7 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
num_groups
=
1
,
unshared
=
False
,
):
super
(
GpuCorrMM_gradInputs
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
,
unshared
)
super
()
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
,
unshared
)
def
make_node
(
self
,
kern
,
topgrad
,
shape
=
None
):
ctx_name
=
infer_context_name
(
kern
,
topgrad
)
...
...
@@ -1311,7 +1301,7 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
height
,
width
=
inp
[
2
:]
or
(
None
,
None
)
(
bottom
,)
=
out_
direction
=
"backprop inputs"
return
super
(
GpuCorrMM_gradInputs
,
self
)
.
c_code_helper
(
return
super
()
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
,
height
,
width
)
...
...
@@ -1376,7 +1366,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
filter_dilation
=
(
1
,
1
,
1
),
num_groups
=
1
,
):
if
isinstance
(
border_mode
,
int
eger_types
):
if
isinstance
(
border_mode
,
int
):
border_mode
=
(
border_mode
,
border_mode
,
border_mode
)
if
isinstance
(
border_mode
,
tuple
):
pad_h
,
pad_w
,
pad_d
=
map
(
int
,
border_mode
)
...
...
@@ -1409,7 +1399,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
return
(
0
,
0
,
0
)
def
__str__
(
self
):
return
"
%
s{
%
s,
%
s,
%
s,
%
s}"
%
(
return
"
{}{{{}, {}, {}, {}}}"
.
format
(
self
.
__class__
.
__name__
,
self
.
border_mode
,
str
(
self
.
subsample
),
...
...
@@ -1842,9 +1832,7 @@ class GpuCorr3dMM(BaseGpuCorr3dMM):
filter_dilation
=
(
1
,
1
,
1
),
num_groups
=
1
,
):
super
(
GpuCorr3dMM
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
)
super
()
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
)
def
make_node
(
self
,
img
,
kern
):
ctx_name
=
infer_context_name
(
img
,
kern
)
...
...
@@ -1876,9 +1864,7 @@ class GpuCorr3dMM(BaseGpuCorr3dMM):
bottom
,
weights
=
inp
(
top
,)
=
out_
direction
=
"forward"
return
super
(
GpuCorr3dMM
,
self
)
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
)
return
super
()
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
)
def
grad
(
self
,
inp
,
grads
):
bottom
,
weights
=
inp
...
...
@@ -1911,9 +1897,7 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
filter_dilation
=
(
1
,
1
,
1
),
num_groups
=
1
,
):
super
(
GpuCorr3dMM_gradWeights
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
)
super
()
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
)
def
make_node
(
self
,
img
,
topgrad
,
shape
=
None
):
ctx_name
=
infer_context_name
(
img
,
topgrad
)
...
...
@@ -1958,7 +1942,7 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
height
,
width
,
depth
=
inp
[
2
:]
or
(
None
,
None
,
None
)
(
weights
,)
=
out_
direction
=
"backprop weights"
return
super
(
GpuCorr3dMM_gradWeights
,
self
)
.
c_code_helper
(
return
super
()
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
,
height
,
width
,
depth
)
...
...
@@ -2002,9 +1986,7 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
filter_dilation
=
(
1
,
1
,
1
),
num_groups
=
1
,
):
super
(
GpuCorr3dMM_gradInputs
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
)
super
()
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
)
def
make_node
(
self
,
kern
,
topgrad
,
shape
=
None
):
ctx_name
=
infer_context_name
(
kern
,
topgrad
)
...
...
@@ -2051,7 +2033,7 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
height
,
width
,
depth
=
inp
[
2
:]
or
(
None
,
None
,
None
)
(
bottom
,)
=
out_
direction
=
"backprop inputs"
return
super
(
GpuCorr3dMM_gradInputs
,
self
)
.
c_code_helper
(
return
super
()
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
,
height
,
width
,
depth
)
...
...
theano/gpuarray/cudnn_defs.py
浏览文件 @
bcccce7a
...
...
@@ -48,7 +48,7 @@ def is_double_config(dtype, precision):
# exclude them from lists of supported algorithms.
class
CuDNNV51
(
object
)
:
class
CuDNNV51
:
version
=
5
cudnnConvolutionMode_t
=
CEnumType
(
...
...
@@ -319,7 +319,7 @@ class CuDNNV6(CuDNNV51):
)
def
fwd_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
is_supported
=
super
(
CuDNNV6
,
self
)
.
fwd_algo_supports_dtype_config
(
is_supported
=
super
()
.
fwd_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
ndim
)
if
not
is_supported
:
...
...
@@ -339,7 +339,7 @@ class CuDNNV6(CuDNNV51):
return
is_supported
def
bwd_filter_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
is_supported
=
super
(
CuDNNV6
,
self
)
.
bwd_filter_algo_supports_dtype_config
(
is_supported
=
super
()
.
bwd_filter_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
ndim
)
if
not
is_supported
:
...
...
@@ -354,7 +354,7 @@ class CuDNNV6(CuDNNV51):
return
is_supported
def
bwd_data_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
is_supported
=
super
(
CuDNNV6
,
self
)
.
bwd_data_algo_supports_dtype_config
(
is_supported
=
super
()
.
bwd_data_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
ndim
)
if
not
is_supported
:
...
...
theano/gpuarray/dnn.py
浏览文件 @
bcccce7a
...
...
@@ -5,7 +5,6 @@ import warnings
from
functools
import
reduce
import
numpy
as
np
from
six
import
integer_types
import
theano
import
theano.pathparse
...
...
@@ -155,19 +154,19 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
path_wrapper
=
'"'
if
os
.
name
==
"nt"
else
""
params
=
[
"-l"
,
"cudnn"
]
params
.
extend
(
[
"-I
%
s
%
s
%
s"
%
(
path_wrapper
,
gpuarray_helper_inc_dir
(),
path_wrapper
)]
[
"-I
{}{}{}"
.
format
(
path_wrapper
,
gpuarray_helper_inc_dir
(),
path_wrapper
)]
)
if
config
.
dnn
.
include_path
:
params
.
extend
(
[
"-I
%
s
%
s
%
s"
%
(
path_wrapper
,
config
.
dnn
.
include_path
,
path_wrapper
)]
[
"-I
{}{}{}"
.
format
(
path_wrapper
,
config
.
dnn
.
include_path
,
path_wrapper
)]
)
if
config
.
cuda
.
include_path
:
params
.
extend
(
[
"-I
%
s
%
s
%
s"
%
(
path_wrapper
,
config
.
cuda
.
include_path
,
path_wrapper
)]
[
"-I
{}{}{}"
.
format
(
path_wrapper
,
config
.
cuda
.
include_path
,
path_wrapper
)]
)
if
config
.
dnn
.
library_path
:
params
.
extend
(
[
"-L
%
s
%
s
%
s"
%
(
path_wrapper
,
config
.
dnn
.
library_path
,
path_wrapper
)]
[
"-L
{}{}{}"
.
format
(
path_wrapper
,
config
.
dnn
.
library_path
,
path_wrapper
)]
)
# Do not run here the test program. It would run on the
# default gpu, not the one selected by the user. If mixed
...
...
@@ -462,7 +461,7 @@ class DnnBase(COp):
return
[]
def
c_code_cache_version
(
self
):
return
(
super
(
DnnBase
,
self
)
.
c_code_cache_version
(),
version
(),
4
)
return
(
super
()
.
c_code_cache_version
(),
version
(),
4
)
class
GpuDnnConvDesc
(
COp
):
...
...
@@ -542,7 +541,7 @@ class GpuDnnConvDesc(COp):
if
version
()
<
6000
and
any
([
d
!=
1
for
d
in
dilation
]):
raise
RuntimeError
(
"Dilation > 1 not supported for cuDNN version < 6."
)
if
isinstance
(
border_mode
,
int
eger_types
):
if
isinstance
(
border_mode
,
int
):
border_mode
=
(
border_mode
,)
*
len
(
subsample
)
if
isinstance
(
border_mode
,
tuple
):
assert
len
(
border_mode
)
==
len
(
subsample
)
...
...
@@ -621,7 +620,7 @@ class GpuDnnConvDesc(COp):
nb_dims
=
property
(
lambda
self
:
len
(
self
.
subsample
))
def
c_code_cache_version
(
self
):
return
(
super
(
GpuDnnConvDesc
,
self
)
.
c_code_cache_version
(),
version
())
return
(
super
()
.
c_code_cache_version
(),
version
())
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
...
...
@@ -646,7 +645,7 @@ def ensure_dt(val, default, name, dtype):
if
hasattr
(
val
,
"ndim"
)
and
val
.
ndim
==
0
:
val
=
as_scalar
(
val
)
if
not
isinstance
(
val
.
type
,
theano
.
scalar
.
Scalar
):
raise
TypeError
(
"
%
s: expected a scalar value"
%
(
name
,
))
raise
TypeError
(
"
{}: expected a scalar value"
.
format
(
name
))
if
not
val
.
type
.
dtype
==
dtype
:
val
=
val
.
astype
(
dtype
)
return
val
...
...
@@ -2971,7 +2970,9 @@ class GpuDnnRNNOp(DnnBase):
elif
direction_mode
==
"unidirectional"
:
self
.
num_dirs
=
1
else
:
raise
ValueError
(
"direction_mode is invalid (got
%
s)"
%
(
direction_mode
,))
raise
ValueError
(
"direction_mode is invalid (got {})"
.
format
(
direction_mode
)
)
def
dnn_context
(
self
,
node
):
return
node
.
outputs
[
1
]
.
type
.
context_name
...
...
@@ -3114,7 +3115,7 @@ class GpuDnnRNNGradWeights(DnnBase):
return
Apply
(
self
,
inputs
,
outputs
)
class
RNNBlock
(
object
)
:
class
RNNBlock
:
"""
An object that allow us to use CuDNN RNN implementation.
TODO: make an example how to use. You can check Theano tests
...
...
theano/gpuarray/elemwise.py
浏览文件 @
bcccce7a
...
...
@@ -103,7 +103,7 @@ class GpuElemwise(HideC, Elemwise):
if
self
.
name
is
not
None
:
return
self
.
name
items
=
str
(
sorted
(
self
.
inplace_pattern
.
items
()))
return
"GpuElemwise{
%
s}
%
s<gpuarray>"
%
(
self
.
scalar_op
,
items
)
return
"GpuElemwise{
{{}}}{}<gpuarray>"
.
format
(
self
.
scalar_op
,
items
)
def
max_inputs
(
self
,
node_or_outputs
):
return
max_inputs_to_GpuElemwise
(
node_or_outputs
)
...
...
@@ -233,7 +233,7 @@ class GpuElemwise(HideC, Elemwise):
args[
%(n)
s].typecode =
%(typecode)
s;
args[
%(n)
s].flags = GE_READ;
"""
%
dict
(
n
=
n
,
name
=
'"
%
s"'
%
(
name
,
),
typecode
=
i
.
type
.
typecode
n
=
n
,
name
=
'"
{}"'
.
format
(
name
),
typecode
=
i
.
type
.
typecode
)
p
=
len
(
inps
)
...
...
@@ -249,7 +249,7 @@ class GpuElemwise(HideC, Elemwise):
args[
%(n)
s].typecode =
%(typecode)
s;
args[
%(n)
s].flags = GE_WRITE;
"""
%
dict
(
n
=
p
,
name
=
'"
%
s"'
%
(
outs
[
n
],
),
typecode
=
o
.
type
.
typecode
n
=
p
,
name
=
'"
{}"'
.
format
(
outs
[
n
]
),
typecode
=
o
.
type
.
typecode
)
p
+=
1
...
...
@@ -572,8 +572,8 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
pre
=
"pre=
%
s,red="
%
str
(
self
.
pre_scalar_op
)
ax
=
""
if
self
.
axis
is
not
None
:
ax
=
"{
%
s}"
%
(
", "
.
join
(
str
(
x
)
for
x
in
self
.
axis
),
)
return
"GpuCAReduceCuda{
%
s
%
s}
%
s"
%
(
pre
,
str
(
self
.
scalar_op
),
ax
)
ax
=
"{
{{}}}"
.
format
(
", "
.
join
(
str
(
x
)
for
x
in
self
.
axis
)
)
return
"GpuCAReduceCuda{
{{}{}}}{}"
.
format
(
pre
,
str
(
self
.
scalar_op
),
ax
)
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
...
...
@@ -585,7 +585,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
x
=
as_gpuarray_variable
(
x
,
infer_context_name
(
x
))
if
x
.
type
.
context
.
kind
!=
b
"cuda"
:
raise
TypeError
(
"GpuCAReduceCuda doesn't work for non-cuda devices"
)
ret
=
super
(
GpuCAReduceCuda
,
self
)
.
make_node
(
x
)
ret
=
super
()
.
make_node
(
x
)
self
=
copy
.
copy
(
self
)
self
.
axis
=
ret
.
op
.
axis
if
self
.
pre_scalar_op
:
...
...
@@ -3056,8 +3056,8 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
def
__str__
(
self
):
ax
=
""
if
self
.
axis
is
not
None
:
ax
=
"{
%
s}"
%
(
", "
.
join
(
str
(
x
)
for
x
in
self
.
axis
),
)
return
"GpuReduce{
%
s}
%
s"
%
(
self
.
scalar_op
,
ax
)
ax
=
"{
{{}}}"
.
format
(
", "
.
join
(
str
(
x
)
for
x
in
self
.
axis
)
)
return
"GpuReduce{
{{}}}{}"
.
format
(
self
.
scalar_op
,
ax
)
def
make_node
(
self
,
input
):
ctx_name
=
infer_context_name
(
input
)
...
...
theano/gpuarray/extra_ops.py
浏览文件 @
bcccce7a
...
...
@@ -73,7 +73,7 @@ class GpuCumOp(GpuKernelBase, Op):
)
if
self
.
axis
>=
x
.
ndim
or
self
.
axis
<
-
x
.
ndim
:
raise
ValueError
(
"axis(={
0
}) out of bounds"
.
format
(
self
.
axis
))
raise
ValueError
(
"axis(={}) out of bounds"
.
format
(
self
.
axis
))
return
Apply
(
self
,
[
x
],
[
x
.
type
()])
def
gpu_kernels
(
self
,
node
,
nodename
):
...
...
@@ -500,7 +500,7 @@ class GpuCumOp(GpuKernelBase, Op):
"""
%
locals
()
)
return
super
(
GpuCumOp
,
self
)
.
c_support_code_struct
(
node
,
nodename
)
+
code
return
super
()
.
c_support_code_struct
(
node
,
nodename
)
+
code
# GpuCumsumOp exists only to serve backward compatibility.
...
...
theano/gpuarray/kernel_codegen.py
浏览文件 @
bcccce7a
...
...
@@ -27,8 +27,7 @@ def nvcc_kernel(name, params, body):
def
flatbody
():
for
b
in
body
:
if
isinstance
(
b
,
(
list
,
tuple
)):
for
bb
in
b
:
yield
bb
yield
from
b
else
:
yield
b
...
...
@@ -94,8 +93,8 @@ def inline_reduce(N, buf, pos, count, manner_fn):
rest of the buffer is trashed by this function.
"""
loop_line
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[i]"
%
(
buf
))
r_n
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[
%
s+_n]"
%
(
buf
,
pos
))
loop_line
=
manner_fn
(
"
{}[{}]"
.
format
(
buf
,
pos
),
"
%
s[i]"
%
(
buf
))
r_n
=
manner_fn
(
"
{}[{}]"
.
format
(
buf
,
pos
),
"{}[{}+_n]"
.
format
(
buf
,
pos
))
return
(
"""
...
...
@@ -124,22 +123,22 @@ def inline_reduce(N, buf, pos, count, manner_fn):
@code_version
(
inline_reduce
.
code_version
)
def
inline_reduce_max
(
N
,
buf
,
pos
,
count
):
return
inline_reduce
(
N
,
buf
,
pos
,
count
,
lambda
a
,
b
:
"max(
%
s,
%
s)"
%
(
a
,
b
))
return
inline_reduce
(
N
,
buf
,
pos
,
count
,
lambda
a
,
b
:
"max(
{}, {})"
.
format
(
a
,
b
))
@code_version
(
inline_reduce
.
code_version
)
def
inline_reduce_sum
(
N
,
buf
,
pos
,
count
):
return
inline_reduce
(
N
,
buf
,
pos
,
count
,
lambda
a
,
b
:
"
%
s +
%
s"
%
(
a
,
b
))
return
inline_reduce
(
N
,
buf
,
pos
,
count
,
lambda
a
,
b
:
"
{} + {}"
.
format
(
a
,
b
))
@code_version
(
inline_reduce
.
code_version
)
def
inline_reduce_min
(
N
,
buf
,
pos
,
count
):
return
inline_reduce
(
N
,
buf
,
pos
,
count
,
lambda
a
,
b
:
"min(
%
s,
%
s)"
%
(
a
,
b
))
return
inline_reduce
(
N
,
buf
,
pos
,
count
,
lambda
a
,
b
:
"min(
{}, {})"
.
format
(
a
,
b
))
@code_version
(
inline_reduce
.
code_version
)
def
inline_reduce_prod
(
N
,
buf
,
pos
,
count
):
return
inline_reduce
(
N
,
buf
,
pos
,
count
,
lambda
a
,
b
:
"
%
s *
%
s"
%
(
a
,
b
))
return
inline_reduce
(
N
,
buf
,
pos
,
count
,
lambda
a
,
b
:
"
{} * {}"
.
format
(
a
,
b
))
@code_version
((
2
,)
+
inline_reduce_max
.
code_version
+
inline_reduce_sum
.
code_version
)
...
...
@@ -275,8 +274,8 @@ def inline_reduce_fixed_shared(
loop_line
=
manner_fn
(
"red"
,
manner_init
(
"
%(load_x)
s(
%(x)
s[i *
%(stride_x)
s])"
%
locals
())
)
loop_line2
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[i]"
%
buf
)
r_n
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[
%
s+_n]"
%
(
buf
,
pos
))
loop_line2
=
manner_fn
(
"
{}[{}]"
.
format
(
buf
,
pos
),
"
%
s[i]"
%
buf
)
r_n
=
manner_fn
(
"
{}[{}]"
.
format
(
buf
,
pos
),
"{}[{}+_n]"
.
format
(
buf
,
pos
))
ctype
=
gpuarray
.
dtype_to_ctype
(
dtype
)
return
(
...
...
@@ -331,7 +330,7 @@ def inline_reduce_fixed_shared_max(
load_x
,
pos
,
count
,
lambda
a
,
b
:
"max(
%
s,
%
s)"
%
(
a
,
b
),
lambda
a
,
b
:
"max(
{}, {})"
.
format
(
a
,
b
),
lambda
a
:
a
,
b
,
stride_b
,
...
...
@@ -431,7 +430,7 @@ def inline_softmax_fixed_shared(
load_x
,
threadPos
,
threadCount
,
lambda
a
,
b
:
"
%
s +
%
s"
%
(
a
,
b
),
lambda
a
,
b
:
"
{} + {}"
.
format
(
a
,
b
),
lambda
a
:
"exp(
%
s - row_max)"
%
a
,
b
,
stride_b
,
...
...
theano/gpuarray/linalg.py
浏览文件 @
bcccce7a
...
...
@@ -137,7 +137,7 @@ class GpuCusolverSolve(Op):
if
self
.
inplace
:
self
.
destroy_map
=
{
0
:
[
0
]}
assert
A_structure
in
MATRIX_STRUCTURES_SOLVE
super
(
GpuCusolverSolve
,
self
)
.
__init__
()
super
()
.
__init__
()
def
make_node
(
self
,
inp1
,
inp2
):
if
not
cusolver_available
:
...
...
@@ -358,7 +358,7 @@ class GpuCublasTriangularSolve(Op):
def
__init__
(
self
,
lower
=
True
,
trans
=
"N"
):
self
.
trans
=
trans
self
.
lower
=
lower
super
(
GpuCublasTriangularSolve
,
self
)
.
__init__
()
super
()
.
__init__
()
def
make_node
(
self
,
inp1
,
inp2
):
if
not
cublas_available
:
...
...
@@ -541,7 +541,7 @@ class GpuCholesky(Op):
self
.
inplace
=
inplace
if
self
.
inplace
:
self
.
destroy_map
=
{
0
:
[
0
]}
super
(
GpuCholesky
,
self
)
.
__init__
()
super
()
.
__init__
()
def
clone_inplace
(
self
):
return
self
.
__class__
(
lower
=
self
.
lower
,
inplace
=
True
)
...
...
@@ -788,7 +788,7 @@ class GpuMagmaSVD(GpuMagmaBase):
)
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
super
(
GpuMagmaSVD
,
self
)
.
prepare_node
(
node
,
storage_map
,
compute_map
,
impl
)
super
()
.
prepare_node
(
node
,
storage_map
,
compute_map
,
impl
)
# Check node to prevent eventual errors with old pickled nodes.
if
self
.
compute_uv
:
A
,
B
,
C
=
node
.
outputs
...
...
theano/gpuarray/multinomial.py
浏览文件 @
bcccce7a
...
...
@@ -534,4 +534,4 @@ class GPUAMultinomialWOReplacementFromUniform(GPUAChoiceFromUniform):
DeprecationWarning
,
stacklevel
=
2
,
)
super
(
GPUAMultinomialWOReplacementFromUniform
,
self
)
.
__init__
(
*
args
,
**
kwargs
)
super
()
.
__init__
(
*
args
,
**
kwargs
)
theano/gpuarray/opt.py
浏览文件 @
bcccce7a
...
...
@@ -484,7 +484,7 @@ class GraphToGPU(Optimizer):
for
(
t
,
o
)
in
not_used
[::
-
1
]:
if
t
>
0
:
# Skip opt that have 0 times, they probably wasn't even tried.
print
(
blanc
+
" "
,
"
%.3
fs -
%
s"
%
(
t
,
o
),
file
=
stream
)
print
(
blanc
+
" "
,
"
{:.3f}s - {}"
.
format
(
t
,
o
),
file
=
stream
)
print
(
file
=
stream
)
@staticmethod
...
...
@@ -2182,7 +2182,7 @@ def local_abstractconv3d_gradinputs_gemm_alt(node):
class
ConvMetaOptimizer
(
LocalMetaOptimizer
):
def
__init__
(
self
):
super
(
ConvMetaOptimizer
,
self
)
.
__init__
()
super
()
.
__init__
()
def
time_call
(
self
,
fn
):
start
=
time
.
time
()
...
...
theano/gpuarray/rng_mrg.py
浏览文件 @
bcccce7a
...
...
@@ -59,7 +59,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
return
op
(
rstate
,
v_size
)
def
c_headers
(
self
):
return
super
(
GPUA_mrg_uniform
,
self
)
.
c_headers
()
+
[
"numpy_compat.h"
]
return
super
()
.
c_headers
()
+
[
"numpy_compat.h"
]
def
gpu_kernels
(
self
,
node
,
name
):
write
=
write_w
(
self
.
output_type
.
dtype
)
...
...
theano/gpuarray/sort.py
浏览文件 @
bcccce7a
...
...
@@ -225,7 +225,7 @@ class GpuTopKOp(GpuKernelBase, TopKOp):
prep_output
=
""
if
self
.
return_values
:
def_dvstrides
=
"const ssize_t *dvstrides = PyGpuArray_STRIDES(
%
s)"
%
yv
params_dv
=
"
%
s->ga.data,
%
s->ga.offset,
\n
"
%
(
yv
,
yv
)
params_dv
=
"
{}->ga.data, {}->ga.offset,
\n
"
.
format
(
yv
,
yv
)
params_dv
+=
""
.
join
(
"dvstrides[
%
d], "
%
i
for
i
in
reordered_axes
)
prep_output
+=
(
"""
...
...
@@ -241,7 +241,7 @@ class GpuTopKOp(GpuKernelBase, TopKOp):
if
self
.
return_indices
:
def_distrides
=
"const ssize_t *distrides = PyGpuArray_STRIDES(
%
s)"
%
yi
params_di
=
"
%
s->ga.data,
%
s->ga.offset,
\n
"
%
(
yi
,
yi
)
params_di
=
"
{}->ga.data, {}->ga.offset,
\n
"
.
format
(
yi
,
yi
)
params_di
+=
""
.
join
(
"distrides[
%
d], "
%
i
for
i
in
reordered_axes
)
prep_output
+=
(
"""
...
...
theano/gpuarray/subtensor.py
浏览文件 @
bcccce7a
import
numpy
as
np
from
six
import
integer_types
from
six.moves
import
StringIO
import
theano.tensor
as
tt
...
...
@@ -157,7 +156,7 @@ class GpuSubtensor(HideC, Subtensor):
def
fix_idx
(
idx
):
if
idx
is
None
:
return
"0"
,
1
elif
isinstance
(
idx
,
(
np
.
integer
,
int
eger_types
)):
elif
isinstance
(
idx
,
(
np
.
integer
,
int
)):
return
str
(
idx
),
0
elif
isinstance
(
idx
,
gof
.
Type
):
return
indices
.
pop
(
0
),
0
...
...
@@ -196,7 +195,7 @@ class GpuSubtensor(HideC, Subtensor):
else
:
if
isinstance
(
idx
,
gof
.
Type
):
start
=
indices
.
pop
(
0
)
elif
isinstance
(
idx
,
(
np
.
integer
,
int
eger_types
)):
elif
isinstance
(
idx
,
(
np
.
integer
,
int
)):
start
=
idx
else
:
assert
0
,
idx
...
...
@@ -454,7 +453,7 @@ int sub_setarray(GpuArray *dst, GpuArray *src) {
)
def
c_code_cache_version
(
self
):
parent_version
=
super
(
GpuIncSubtensor
,
self
)
.
c_code_cache_version
()
parent_version
=
super
()
.
c_code_cache_version
()
if
not
parent_version
:
return
return
parent_version
+
(
10
,)
...
...
@@ -576,7 +575,7 @@ def check_and_convert_boolean_masks(input, idx_list):
return
out_idx_list
class
BaseGpuAdvancedSubtensor
(
object
)
:
class
BaseGpuAdvancedSubtensor
:
def
perform
(
self
,
node
,
inputs
,
out_
):
(
out
,)
=
out_
x
=
inputs
[
0
]
...
...
@@ -703,7 +702,7 @@ class GpuAdvancedSubtensor(HideC, BaseGpuAdvancedSubtensor, AdvancedSubtensor):
return
gof
.
Apply
(
self
,
[
x
]
+
rval
.
inputs
[
1
:],
[
otype
()])
class
BaseGpuAdvancedIncSubtensor
(
object
)
:
class
BaseGpuAdvancedIncSubtensor
:
def
perform
(
self
,
node
,
inp
,
out_
):
(
out
,)
=
out_
x
=
inp
[
0
]
...
...
@@ -1133,7 +1132,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC, GpuAdvancedIncSubtens
return
gof
.
Apply
(
self
,
[
x_
,
y_
,
ilist_
],
[
x_
.
type
()])
def
perform
(
self
,
node
,
inp
,
out
,
params
):
return
super
(
GpuAdvancedIncSubtensor1_dev20
,
self
)
.
perform
(
node
,
inp
,
out
)
return
super
()
.
perform
(
node
,
inp
,
out
)
def
c_code_cache_version
(
self
):
return
(
14
,)
...
...
@@ -1269,9 +1268,7 @@ if (GpuArray_vector_add_fast(%(out)s, %(y)s, %(ind)s, %(params)s->set_instead_of
def
c_support_code_struct
(
self
,
node
,
nodename
):
return
(
super
(
GpuAdvancedIncSubtensor1_dev20
,
self
)
.
c_support_code_struct
(
node
,
nodename
)
super
()
.
c_support_code_struct
(
node
,
nodename
)
+
"""
int GpuArray_vector_add_fast(PyGpuArrayObject* py_self,
PyGpuArrayObject* py_other,
...
...
theano/gpuarray/type.py
浏览文件 @
bcccce7a
...
...
@@ -80,7 +80,7 @@ def reg_context(name, ctx):
"""
if
name
in
_context_reg
:
raise
ValueError
(
"context name
%
s is already defined"
%
(
name
,
))
raise
ValueError
(
"context name
{} is already defined"
.
format
(
name
))
if
not
isinstance
(
ctx
,
gpuarray
.
GpuContext
):
raise
TypeError
(
"context is not GpuContext"
)
_context_reg
[
name
]
=
ctx
...
...
@@ -101,7 +101,7 @@ def get_context(name):
"""
if
name
not
in
_context_reg
:
raise
ContextNotDefined
(
"context name
%
s not defined"
%
(
name
,
))
raise
ContextNotDefined
(
"context name
{} not defined"
.
format
(
name
))
return
_context_reg
[
name
]
...
...
@@ -189,7 +189,9 @@ class GpuArrayType(Type):
self
.
typecode
=
gpuarray
.
dtype_to_typecode
(
self
.
dtype
)
except
gpuarray
.
GpuArrayException
:
raise
TypeError
(
"Unsupported dtype for
%
s:
%
s"
%
(
self
.
__class__
.
__name__
,
self
.
dtype
)
"Unsupported dtype for {}: {}"
.
format
(
self
.
__class__
.
__name__
,
self
.
dtype
)
)
def
clone
(
self
,
dtype
=
None
,
broadcastable
=
None
):
...
...
@@ -233,7 +235,9 @@ class GpuArrayType(Type):
bcast
=
str
(
b
)
else
:
bcast
=
"
%
iD"
%
len
(
b
)
return
"GpuArrayType<
%
s>(
%
s,
%
s)"
%
(
self
.
context_name
,
self
.
dtype
,
bcast
)
return
"GpuArrayType<{}>({}, {})"
.
format
(
self
.
context_name
,
self
.
dtype
,
bcast
)
def
filter
(
self
,
data
,
strict
=
False
,
allow_downcast
=
None
):
return
self
.
filter_inplace
(
...
...
@@ -450,7 +454,9 @@ class GpuArrayType(Type):
}[
self
.
dtype
]
except
KeyError
:
raise
TypeError
(
"Unsupported dtype for
%
s:
%
s"
%
(
self
.
__class__
.
__name__
,
self
.
dtype
)
"Unsupported dtype for {}: {}"
.
format
(
self
.
__class__
.
__name__
,
self
.
dtype
)
)
def
get_shape_info
(
self
,
obj
):
...
...
@@ -474,7 +480,7 @@ class GpuArrayType(Type):
)
def
c_init
(
self
,
name
,
sub
):
return
"
%
s = NULL;"
%
(
name
,
)
return
"
{} = NULL;"
.
format
(
name
)
def
c_extract
(
self
,
name
,
sub
,
check_input
=
True
):
# TODO I don't check broadcast stuff for now.
...
...
@@ -499,7 +505,7 @@ class GpuArrayType(Type):
}
def
c_cleanup
(
self
,
name
,
sub
):
return
"Py_XDECREF(
%(name)
s);
%(name)
s = NULL;"
%
{
"name"
:
name
}
return
"Py_XDECREF(
{name}); {name} = NULL;"
.
format
(
name
=
name
)
def
c_sync
(
self
,
name
,
sub
):
return
"""
...
...
@@ -914,10 +920,10 @@ class GpuContextType(Type):
return
a
==
b
def
c_declare
(
self
,
name
,
sub
,
check_input
=
True
):
return
"PyGpuContextObject *
%
s;"
%
(
name
,
)
return
"PyGpuContextObject *
{};"
.
format
(
name
)
def
c_init
(
self
,
name
,
sub
):
return
"
%
s = NULL;"
%
(
name
,
)
return
"
{} = NULL;"
.
format
(
name
)
def
c_extract
(
self
,
name
,
sub
,
check_input
=
True
):
if
check_input
:
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论