Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
bcccce7a
提交
bcccce7a
authored
10月 21, 2020
作者:
Brandon T. Willard
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Apply pyupgrade to theano.gpuarray
上级
2e3f17cb
隐藏空白字符变更
内嵌
并排
正在显示
15 个修改的文件
包含
100 行增加
和
116 行删除
+100
-116
__init__.py
theano/gpuarray/__init__.py
+1
-1
basic_ops.py
theano/gpuarray/basic_ops.py
+13
-14
blas.py
theano/gpuarray/blas.py
+17
-35
cudnn_defs.py
theano/gpuarray/cudnn_defs.py
+4
-4
dnn.py
theano/gpuarray/dnn.py
+12
-11
elemwise.py
theano/gpuarray/elemwise.py
+8
-8
extra_ops.py
theano/gpuarray/extra_ops.py
+2
-2
kernel_codegen.py
theano/gpuarray/kernel_codegen.py
+11
-12
linalg.py
theano/gpuarray/linalg.py
+4
-4
multinomial.py
theano/gpuarray/multinomial.py
+1
-1
opt.py
theano/gpuarray/opt.py
+2
-2
rng_mrg.py
theano/gpuarray/rng_mrg.py
+1
-1
sort.py
theano/gpuarray/sort.py
+2
-2
subtensor.py
theano/gpuarray/subtensor.py
+7
-10
type.py
theano/gpuarray/type.py
+15
-9
没有找到文件。
theano/gpuarray/__init__.py
浏览文件 @
bcccce7a
...
@@ -152,7 +152,7 @@ def init_dev(dev, name=None, preallocate=None):
...
@@ -152,7 +152,7 @@ def init_dev(dev, name=None, preallocate=None):
file
=
sys
.
stderr
,
file
=
sys
.
stderr
,
)
)
if
preallocate
<
0
:
if
preallocate
<
0
:
print
(
"Disabling allocation cache on
%
s"
%
(
dev
,
))
print
(
"Disabling allocation cache on
{}"
.
format
(
dev
))
elif
preallocate
>
0
:
elif
preallocate
>
0
:
if
preallocate
<=
1
:
if
preallocate
<=
1
:
gmem
=
min
(
preallocate
,
0.95
)
*
context
.
total_gmem
gmem
=
min
(
preallocate
,
0.95
)
*
context
.
total_gmem
...
...
theano/gpuarray/basic_ops.py
浏览文件 @
bcccce7a
...
@@ -4,7 +4,6 @@ import re
...
@@ -4,7 +4,6 @@ import re
from
collections
import
deque
from
collections
import
deque
import
numpy
as
np
import
numpy
as
np
from
six
import
string_types
import
theano
import
theano
from
theano
import
Apply
,
Op
,
Type
,
Variable
,
config
,
tensor
from
theano
import
Apply
,
Op
,
Type
,
Variable
,
config
,
tensor
...
@@ -129,7 +128,7 @@ def gpuarray_helper_inc_dir():
...
@@ -129,7 +128,7 @@ def gpuarray_helper_inc_dir():
return
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"c_code"
)
return
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"c_code"
)
class
Kernel
(
object
)
:
class
Kernel
:
"""
"""
This class groups together all the attributes of a gpu kernel.
This class groups together all the attributes of a gpu kernel.
...
@@ -214,14 +213,14 @@ class Kernel(object):
...
@@ -214,14 +213,14 @@ class Kernel(object):
@staticmethod
@staticmethod
def
get_flags
(
*
types
):
def
get_flags
(
*
types
):
def
get_dtype
(
t
):
def
get_dtype
(
t
):
if
isinstance
(
t
,
str
ing_types
):
if
isinstance
(
t
,
str
):
return
np
.
dtype
(
t
)
return
np
.
dtype
(
t
)
elif
isinstance
(
t
,
Type
):
elif
isinstance
(
t
,
Type
):
return
t
.
dtype
return
t
.
dtype
elif
isinstance
(
t
,
Variable
):
elif
isinstance
(
t
,
Variable
):
return
t
.
type
.
dtype
return
t
.
type
.
dtype
else
:
else
:
raise
TypeError
(
"can't get a dtype from
%
s"
%
(
type
(
t
),
))
raise
TypeError
(
"can't get a dtype from
{}"
.
format
(
type
(
t
)
))
dtypes
=
[
get_dtype
(
t
)
for
t
in
types
]
dtypes
=
[
get_dtype
(
t
)
for
t
in
types
]
flags
=
dict
()
flags
=
dict
()
...
@@ -291,7 +290,7 @@ def get_ctype(dtype):
...
@@ -291,7 +290,7 @@ def get_ctype(dtype):
return
"npy_"
+
dtype
.
name
return
"npy_"
+
dtype
.
name
class
GpuKernelBase
(
object
)
:
class
GpuKernelBase
:
"""
"""
Base class for operations that need to compile kernels.
Base class for operations that need to compile kernels.
...
@@ -334,7 +333,7 @@ class GpuKernelBase(object):
...
@@ -334,7 +333,7 @@ class GpuKernelBase(object):
if
isinstance
(
self
.
params_type
,
ParamsType
)
and
self
.
params_type
.
has_type
(
if
isinstance
(
self
.
params_type
,
ParamsType
)
and
self
.
params_type
.
has_type
(
gpu_context_type
gpu_context_type
):
):
return
"(
%
s->
%
s)"
%
(
return
"(
{}->{})"
.
format
(
params_c_name
,
params_c_name
,
self
.
params_type
.
get_field
(
gpu_context_type
),
self
.
params_type
.
get_field
(
gpu_context_type
),
)
)
...
@@ -351,14 +350,14 @@ class GpuKernelBase(object):
...
@@ -351,14 +350,14 @@ class GpuKernelBase(object):
def
c_headers
(
self
):
def
c_headers
(
self
):
try
:
try
:
o
=
super
(
GpuKernelBase
,
self
)
.
c_headers
()
o
=
super
()
.
c_headers
()
except
MethodNotDefined
:
except
MethodNotDefined
:
o
=
[]
o
=
[]
return
o
+
[
"gpuarray/types.h"
,
"numpy/npy_common.h"
]
return
o
+
[
"gpuarray/types.h"
,
"numpy/npy_common.h"
]
def
c_header_dirs
(
self
):
def
c_header_dirs
(
self
):
try
:
try
:
o
=
super
(
GpuKernelBase
,
self
)
.
c_header_dirs
()
o
=
super
()
.
c_header_dirs
()
except
MethodNotDefined
:
except
MethodNotDefined
:
o
=
[]
o
=
[]
# We rely on the input types for the directory to gpuarray includes
# We rely on the input types for the directory to gpuarray includes
...
@@ -577,7 +576,7 @@ class CGpuKernelBase(COp, GpuKernelBase):
...
@@ -577,7 +576,7 @@ class CGpuKernelBase(COp, GpuKernelBase):
kcode
=
split
[
n
+
1
]
kcode
=
split
[
n
+
1
]
splt2
=
kspec
.
split
(
":"
)
splt2
=
kspec
.
split
(
":"
)
if
len
(
splt2
)
!=
3
:
if
len
(
splt2
)
!=
3
:
raise
ValueError
(
"Bad kernel spec:
%
s"
%
(
kspec
,
))
raise
ValueError
(
"Bad kernel spec:
{}"
.
format
(
kspec
))
kname
=
splt2
[
0
]
.
strip
()
kname
=
splt2
[
0
]
.
strip
()
ktypes
=
[
get_dtype
(
s
.
strip
())
for
s
in
splt2
[
1
]
.
split
(
","
)]
ktypes
=
[
get_dtype
(
s
.
strip
())
for
s
in
splt2
[
1
]
.
split
(
","
)]
kflags
=
splt2
[
2
]
.
strip
()
kflags
=
splt2
[
2
]
.
strip
()
...
@@ -697,7 +696,7 @@ class GpuFromHost(Op):
...
@@ -697,7 +696,7 @@ class GpuFromHost(Op):
self
.
context_name
=
context_name
self
.
context_name
=
context_name
def
__str__
(
self
):
def
__str__
(
self
):
return
"GpuFromHost<
%
s>"
%
(
self
.
context_name
,
)
return
"GpuFromHost<
{}>"
.
format
(
self
.
context_name
)
def
make_node
(
self
,
x
):
def
make_node
(
self
,
x
):
if
not
isinstance
(
x
.
type
,
tensor
.
TensorType
):
if
not
isinstance
(
x
.
type
,
tensor
.
TensorType
):
...
@@ -800,7 +799,7 @@ class GpuToGpu(Op):
...
@@ -800,7 +799,7 @@ class GpuToGpu(Op):
self
.
context_name
=
context_name
self
.
context_name
=
context_name
def
__str__
(
self
):
def
__str__
(
self
):
return
"GpuToGpu<
%
s>"
%
(
self
.
context_name
,
)
return
"GpuToGpu<
{}>"
.
format
(
self
.
context_name
)
def
make_node
(
self
,
x
):
def
make_node
(
self
,
x
):
if
not
isinstance
(
x
.
type
,
GpuArrayType
):
if
not
isinstance
(
x
.
type
,
GpuArrayType
):
...
@@ -893,7 +892,7 @@ class GpuAlloc(HideC, Alloc):
...
@@ -893,7 +892,7 @@ class GpuAlloc(HideC, Alloc):
m
=
"{memset_0=True}"
m
=
"{memset_0=True}"
else
:
else
:
m
=
""
m
=
""
return
"
%
s<
%
s>
%
s"
%
(
self
.
__class__
.
__name__
,
self
.
context_name
,
m
)
return
"
{}<{}>{}"
.
format
(
self
.
__class__
.
__name__
,
self
.
context_name
,
m
)
def
make_node
(
self
,
value
,
*
shape
):
def
make_node
(
self
,
value
,
*
shape
):
value
=
as_gpuarray_variable
(
value
,
context_name
=
self
.
context_name
)
value
=
as_gpuarray_variable
(
value
,
context_name
=
self
.
context_name
)
...
@@ -1436,7 +1435,7 @@ class GpuJoin(HideC, Join):
...
@@ -1436,7 +1435,7 @@ class GpuJoin(HideC, Join):
view
=
self
.
view
view
=
self
.
view
non_empty_tensor
=
tensors
[
view
]
non_empty_tensor
=
tensors
[
view
]
for
i
,
inp
in
enumerate
(
tensors
):
for
i
,
inp
in
enumerate
(
tensors
):
copy_to_list
.
append
(
"als[
%
s] = &
%
s->ga;"
%
(
i
,
inp
))
copy_to_list
.
append
(
"als[
{}] = &{}->ga;"
.
format
(
i
,
inp
))
n
=
len
(
tensors
)
n
=
len
(
tensors
)
fail
=
sub
[
"fail"
]
fail
=
sub
[
"fail"
]
...
@@ -1507,7 +1506,7 @@ class GpuSplit(HideC, Split):
...
@@ -1507,7 +1506,7 @@ class GpuSplit(HideC, Split):
_f16_ok
=
True
_f16_ok
=
True
def
__init__
(
self
,
len_splits
):
def
__init__
(
self
,
len_splits
):
super
(
GpuSplit
,
self
)
.
__init__
(
len_splits
)
super
()
.
__init__
(
len_splits
)
# The GPU version of Split returns splits as views of the input.
# The GPU version of Split returns splits as views of the input.
self
.
view_map
=
{}
self
.
view_map
=
{}
for
i
in
range
(
self
.
len_splits
):
for
i
in
range
(
self
.
len_splits
):
...
...
theano/gpuarray/blas.py
浏览文件 @
bcccce7a
from
six
import
integer_types
import
theano
import
theano
from
theano
import
Apply
,
Op
from
theano
import
Apply
,
Op
from
theano.compile
import
optdb
from
theano.compile
import
optdb
...
@@ -446,7 +444,7 @@ class GpuGemmBatch(BlasOp):
...
@@ -446,7 +444,7 @@ class GpuGemmBatch(BlasOp):
return
Apply
(
self
,
[
C
,
alpha
,
A
,
B
,
beta
],
[
C
.
type
()])
return
Apply
(
self
,
[
C
,
alpha
,
A
,
B
,
beta
],
[
C
.
type
()])
def
c_headers
(
self
):
def
c_headers
(
self
):
return
super
(
GpuGemmBatch
,
self
)
.
c_headers
()
+
[
"<gpuarray/blas.h>"
]
return
super
()
.
c_headers
()
+
[
"<gpuarray/blas.h>"
]
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
vars
=
dict
(
vars
=
dict
(
...
@@ -544,7 +542,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -544,7 +542,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
num_groups
=
1
,
num_groups
=
1
,
unshared
=
False
,
unshared
=
False
,
):
):
if
isinstance
(
border_mode
,
int
eger_types
):
if
isinstance
(
border_mode
,
int
):
if
border_mode
<
0
:
if
border_mode
<
0
:
raise
ValueError
(
raise
ValueError
(
"invalid border_mode {}, which must be a "
"invalid border_mode {}, which must be a "
...
@@ -595,7 +593,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -595,7 +593,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
return
((
0
,
0
),)
*
2
return
((
0
,
0
),)
*
2
def
__str__
(
self
):
def
__str__
(
self
):
return
"
%
s{
%
s,
%
s,
%
s,
%
s,
%
s}"
%
(
return
"
{}{{{}, {}, {}, {}, {}}}"
.
format
(
self
.
__class__
.
__name__
,
self
.
__class__
.
__name__
,
self
.
border_mode
,
self
.
border_mode
,
str
(
self
.
subsample
),
str
(
self
.
subsample
),
...
@@ -1071,9 +1069,7 @@ class GpuCorrMM(BaseGpuCorrMM):
...
@@ -1071,9 +1069,7 @@ class GpuCorrMM(BaseGpuCorrMM):
num_groups
=
1
,
num_groups
=
1
,
unshared
=
False
,
unshared
=
False
,
):
):
super
(
GpuCorrMM
,
self
)
.
__init__
(
super
()
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
,
unshared
)
border_mode
,
subsample
,
filter_dilation
,
num_groups
,
unshared
)
def
make_node
(
self
,
img
,
kern
):
def
make_node
(
self
,
img
,
kern
):
ctx_name
=
infer_context_name
(
img
,
kern
)
ctx_name
=
infer_context_name
(
img
,
kern
)
...
@@ -1108,9 +1104,7 @@ class GpuCorrMM(BaseGpuCorrMM):
...
@@ -1108,9 +1104,7 @@ class GpuCorrMM(BaseGpuCorrMM):
bottom
,
weights
=
inp
bottom
,
weights
=
inp
(
top
,)
=
out_
(
top
,)
=
out_
direction
=
"forward"
direction
=
"forward"
return
super
(
GpuCorrMM
,
self
)
.
c_code_helper
(
return
super
()
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
)
bottom
,
weights
,
top
,
direction
,
sub
)
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
bottom
,
weights
=
inp
bottom
,
weights
=
inp
...
@@ -1152,9 +1146,7 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
...
@@ -1152,9 +1146,7 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
num_groups
=
1
,
num_groups
=
1
,
unshared
=
False
,
unshared
=
False
,
):
):
super
(
GpuCorrMM_gradWeights
,
self
)
.
__init__
(
super
()
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
,
unshared
)
border_mode
,
subsample
,
filter_dilation
,
num_groups
,
unshared
)
def
make_node
(
self
,
img
,
topgrad
,
shape
=
None
):
def
make_node
(
self
,
img
,
topgrad
,
shape
=
None
):
ctx_name
=
infer_context_name
(
img
,
topgrad
)
ctx_name
=
infer_context_name
(
img
,
topgrad
)
...
@@ -1207,7 +1199,7 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
...
@@ -1207,7 +1199,7 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
height
,
width
=
inp
[
2
:]
or
(
None
,
None
)
height
,
width
=
inp
[
2
:]
or
(
None
,
None
)
(
weights
,)
=
out_
(
weights
,)
=
out_
direction
=
"backprop weights"
direction
=
"backprop weights"
return
super
(
GpuCorrMM_gradWeights
,
self
)
.
c_code_helper
(
return
super
()
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
,
height
,
width
bottom
,
weights
,
top
,
direction
,
sub
,
height
,
width
)
)
...
@@ -1260,9 +1252,7 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
...
@@ -1260,9 +1252,7 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
num_groups
=
1
,
num_groups
=
1
,
unshared
=
False
,
unshared
=
False
,
):
):
super
(
GpuCorrMM_gradInputs
,
self
)
.
__init__
(
super
()
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
,
unshared
)
border_mode
,
subsample
,
filter_dilation
,
num_groups
,
unshared
)
def
make_node
(
self
,
kern
,
topgrad
,
shape
=
None
):
def
make_node
(
self
,
kern
,
topgrad
,
shape
=
None
):
ctx_name
=
infer_context_name
(
kern
,
topgrad
)
ctx_name
=
infer_context_name
(
kern
,
topgrad
)
...
@@ -1311,7 +1301,7 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
...
@@ -1311,7 +1301,7 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
height
,
width
=
inp
[
2
:]
or
(
None
,
None
)
height
,
width
=
inp
[
2
:]
or
(
None
,
None
)
(
bottom
,)
=
out_
(
bottom
,)
=
out_
direction
=
"backprop inputs"
direction
=
"backprop inputs"
return
super
(
GpuCorrMM_gradInputs
,
self
)
.
c_code_helper
(
return
super
()
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
,
height
,
width
bottom
,
weights
,
top
,
direction
,
sub
,
height
,
width
)
)
...
@@ -1376,7 +1366,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
...
@@ -1376,7 +1366,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
filter_dilation
=
(
1
,
1
,
1
),
filter_dilation
=
(
1
,
1
,
1
),
num_groups
=
1
,
num_groups
=
1
,
):
):
if
isinstance
(
border_mode
,
int
eger_types
):
if
isinstance
(
border_mode
,
int
):
border_mode
=
(
border_mode
,
border_mode
,
border_mode
)
border_mode
=
(
border_mode
,
border_mode
,
border_mode
)
if
isinstance
(
border_mode
,
tuple
):
if
isinstance
(
border_mode
,
tuple
):
pad_h
,
pad_w
,
pad_d
=
map
(
int
,
border_mode
)
pad_h
,
pad_w
,
pad_d
=
map
(
int
,
border_mode
)
...
@@ -1409,7 +1399,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
...
@@ -1409,7 +1399,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
return
(
0
,
0
,
0
)
return
(
0
,
0
,
0
)
def
__str__
(
self
):
def
__str__
(
self
):
return
"
%
s{
%
s,
%
s,
%
s,
%
s}"
%
(
return
"
{}{{{}, {}, {}, {}}}"
.
format
(
self
.
__class__
.
__name__
,
self
.
__class__
.
__name__
,
self
.
border_mode
,
self
.
border_mode
,
str
(
self
.
subsample
),
str
(
self
.
subsample
),
...
@@ -1842,9 +1832,7 @@ class GpuCorr3dMM(BaseGpuCorr3dMM):
...
@@ -1842,9 +1832,7 @@ class GpuCorr3dMM(BaseGpuCorr3dMM):
filter_dilation
=
(
1
,
1
,
1
),
filter_dilation
=
(
1
,
1
,
1
),
num_groups
=
1
,
num_groups
=
1
,
):
):
super
(
GpuCorr3dMM
,
self
)
.
__init__
(
super
()
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
)
border_mode
,
subsample
,
filter_dilation
,
num_groups
)
def
make_node
(
self
,
img
,
kern
):
def
make_node
(
self
,
img
,
kern
):
ctx_name
=
infer_context_name
(
img
,
kern
)
ctx_name
=
infer_context_name
(
img
,
kern
)
...
@@ -1876,9 +1864,7 @@ class GpuCorr3dMM(BaseGpuCorr3dMM):
...
@@ -1876,9 +1864,7 @@ class GpuCorr3dMM(BaseGpuCorr3dMM):
bottom
,
weights
=
inp
bottom
,
weights
=
inp
(
top
,)
=
out_
(
top
,)
=
out_
direction
=
"forward"
direction
=
"forward"
return
super
(
GpuCorr3dMM
,
self
)
.
c_code_helper
(
return
super
()
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
)
bottom
,
weights
,
top
,
direction
,
sub
)
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
bottom
,
weights
=
inp
bottom
,
weights
=
inp
...
@@ -1911,9 +1897,7 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
...
@@ -1911,9 +1897,7 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
filter_dilation
=
(
1
,
1
,
1
),
filter_dilation
=
(
1
,
1
,
1
),
num_groups
=
1
,
num_groups
=
1
,
):
):
super
(
GpuCorr3dMM_gradWeights
,
self
)
.
__init__
(
super
()
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
)
border_mode
,
subsample
,
filter_dilation
,
num_groups
)
def
make_node
(
self
,
img
,
topgrad
,
shape
=
None
):
def
make_node
(
self
,
img
,
topgrad
,
shape
=
None
):
ctx_name
=
infer_context_name
(
img
,
topgrad
)
ctx_name
=
infer_context_name
(
img
,
topgrad
)
...
@@ -1958,7 +1942,7 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
...
@@ -1958,7 +1942,7 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
height
,
width
,
depth
=
inp
[
2
:]
or
(
None
,
None
,
None
)
height
,
width
,
depth
=
inp
[
2
:]
or
(
None
,
None
,
None
)
(
weights
,)
=
out_
(
weights
,)
=
out_
direction
=
"backprop weights"
direction
=
"backprop weights"
return
super
(
GpuCorr3dMM_gradWeights
,
self
)
.
c_code_helper
(
return
super
()
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
,
height
,
width
,
depth
bottom
,
weights
,
top
,
direction
,
sub
,
height
,
width
,
depth
)
)
...
@@ -2002,9 +1986,7 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
...
@@ -2002,9 +1986,7 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
filter_dilation
=
(
1
,
1
,
1
),
filter_dilation
=
(
1
,
1
,
1
),
num_groups
=
1
,
num_groups
=
1
,
):
):
super
(
GpuCorr3dMM_gradInputs
,
self
)
.
__init__
(
super
()
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
)
border_mode
,
subsample
,
filter_dilation
,
num_groups
)
def
make_node
(
self
,
kern
,
topgrad
,
shape
=
None
):
def
make_node
(
self
,
kern
,
topgrad
,
shape
=
None
):
ctx_name
=
infer_context_name
(
kern
,
topgrad
)
ctx_name
=
infer_context_name
(
kern
,
topgrad
)
...
@@ -2051,7 +2033,7 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
...
@@ -2051,7 +2033,7 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
height
,
width
,
depth
=
inp
[
2
:]
or
(
None
,
None
,
None
)
height
,
width
,
depth
=
inp
[
2
:]
or
(
None
,
None
,
None
)
(
bottom
,)
=
out_
(
bottom
,)
=
out_
direction
=
"backprop inputs"
direction
=
"backprop inputs"
return
super
(
GpuCorr3dMM_gradInputs
,
self
)
.
c_code_helper
(
return
super
()
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
,
height
,
width
,
depth
bottom
,
weights
,
top
,
direction
,
sub
,
height
,
width
,
depth
)
)
...
...
theano/gpuarray/cudnn_defs.py
浏览文件 @
bcccce7a
...
@@ -48,7 +48,7 @@ def is_double_config(dtype, precision):
...
@@ -48,7 +48,7 @@ def is_double_config(dtype, precision):
# exclude them from lists of supported algorithms.
# exclude them from lists of supported algorithms.
class
CuDNNV51
(
object
)
:
class
CuDNNV51
:
version
=
5
version
=
5
cudnnConvolutionMode_t
=
CEnumType
(
cudnnConvolutionMode_t
=
CEnumType
(
...
@@ -319,7 +319,7 @@ class CuDNNV6(CuDNNV51):
...
@@ -319,7 +319,7 @@ class CuDNNV6(CuDNNV51):
)
)
def
fwd_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
def
fwd_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
is_supported
=
super
(
CuDNNV6
,
self
)
.
fwd_algo_supports_dtype_config
(
is_supported
=
super
()
.
fwd_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
ndim
algo
,
dtype
,
precision
,
ndim
)
)
if
not
is_supported
:
if
not
is_supported
:
...
@@ -339,7 +339,7 @@ class CuDNNV6(CuDNNV51):
...
@@ -339,7 +339,7 @@ class CuDNNV6(CuDNNV51):
return
is_supported
return
is_supported
def
bwd_filter_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
def
bwd_filter_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
is_supported
=
super
(
CuDNNV6
,
self
)
.
bwd_filter_algo_supports_dtype_config
(
is_supported
=
super
()
.
bwd_filter_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
ndim
algo
,
dtype
,
precision
,
ndim
)
)
if
not
is_supported
:
if
not
is_supported
:
...
@@ -354,7 +354,7 @@ class CuDNNV6(CuDNNV51):
...
@@ -354,7 +354,7 @@ class CuDNNV6(CuDNNV51):
return
is_supported
return
is_supported
def
bwd_data_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
def
bwd_data_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
is_supported
=
super
(
CuDNNV6
,
self
)
.
bwd_data_algo_supports_dtype_config
(
is_supported
=
super
()
.
bwd_data_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
ndim
algo
,
dtype
,
precision
,
ndim
)
)
if
not
is_supported
:
if
not
is_supported
:
...
...
theano/gpuarray/dnn.py
浏览文件 @
bcccce7a
...
@@ -5,7 +5,6 @@ import warnings
...
@@ -5,7 +5,6 @@ import warnings
from
functools
import
reduce
from
functools
import
reduce
import
numpy
as
np
import
numpy
as
np
from
six
import
integer_types
import
theano
import
theano
import
theano.pathparse
import
theano.pathparse
...
@@ -155,19 +154,19 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
...
@@ -155,19 +154,19 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
path_wrapper
=
'"'
if
os
.
name
==
"nt"
else
""
path_wrapper
=
'"'
if
os
.
name
==
"nt"
else
""
params
=
[
"-l"
,
"cudnn"
]
params
=
[
"-l"
,
"cudnn"
]
params
.
extend
(
params
.
extend
(
[
"-I
%
s
%
s
%
s"
%
(
path_wrapper
,
gpuarray_helper_inc_dir
(),
path_wrapper
)]
[
"-I
{}{}{}"
.
format
(
path_wrapper
,
gpuarray_helper_inc_dir
(),
path_wrapper
)]
)
)
if
config
.
dnn
.
include_path
:
if
config
.
dnn
.
include_path
:
params
.
extend
(
params
.
extend
(
[
"-I
%
s
%
s
%
s"
%
(
path_wrapper
,
config
.
dnn
.
include_path
,
path_wrapper
)]
[
"-I
{}{}{}"
.
format
(
path_wrapper
,
config
.
dnn
.
include_path
,
path_wrapper
)]
)
)
if
config
.
cuda
.
include_path
:
if
config
.
cuda
.
include_path
:
params
.
extend
(
params
.
extend
(
[
"-I
%
s
%
s
%
s"
%
(
path_wrapper
,
config
.
cuda
.
include_path
,
path_wrapper
)]
[
"-I
{}{}{}"
.
format
(
path_wrapper
,
config
.
cuda
.
include_path
,
path_wrapper
)]
)
)
if
config
.
dnn
.
library_path
:
if
config
.
dnn
.
library_path
:
params
.
extend
(
params
.
extend
(
[
"-L
%
s
%
s
%
s"
%
(
path_wrapper
,
config
.
dnn
.
library_path
,
path_wrapper
)]
[
"-L
{}{}{}"
.
format
(
path_wrapper
,
config
.
dnn
.
library_path
,
path_wrapper
)]
)
)
# Do not run here the test program. It would run on the
# Do not run here the test program. It would run on the
# default gpu, not the one selected by the user. If mixed
# default gpu, not the one selected by the user. If mixed
...
@@ -462,7 +461,7 @@ class DnnBase(COp):
...
@@ -462,7 +461,7 @@ class DnnBase(COp):
return
[]
return
[]
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
super
(
DnnBase
,
self
)
.
c_code_cache_version
(),
version
(),
4
)
return
(
super
()
.
c_code_cache_version
(),
version
(),
4
)
class
GpuDnnConvDesc
(
COp
):
class
GpuDnnConvDesc
(
COp
):
...
@@ -542,7 +541,7 @@ class GpuDnnConvDesc(COp):
...
@@ -542,7 +541,7 @@ class GpuDnnConvDesc(COp):
if
version
()
<
6000
and
any
([
d
!=
1
for
d
in
dilation
]):
if
version
()
<
6000
and
any
([
d
!=
1
for
d
in
dilation
]):
raise
RuntimeError
(
"Dilation > 1 not supported for cuDNN version < 6."
)
raise
RuntimeError
(
"Dilation > 1 not supported for cuDNN version < 6."
)
if
isinstance
(
border_mode
,
int
eger_types
):
if
isinstance
(
border_mode
,
int
):
border_mode
=
(
border_mode
,)
*
len
(
subsample
)
border_mode
=
(
border_mode
,)
*
len
(
subsample
)
if
isinstance
(
border_mode
,
tuple
):
if
isinstance
(
border_mode
,
tuple
):
assert
len
(
border_mode
)
==
len
(
subsample
)
assert
len
(
border_mode
)
==
len
(
subsample
)
...
@@ -621,7 +620,7 @@ class GpuDnnConvDesc(COp):
...
@@ -621,7 +620,7 @@ class GpuDnnConvDesc(COp):
nb_dims
=
property
(
lambda
self
:
len
(
self
.
subsample
))
nb_dims
=
property
(
lambda
self
:
len
(
self
.
subsample
))
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
super
(
GpuDnnConvDesc
,
self
)
.
c_code_cache_version
(),
version
())
return
(
super
()
.
c_code_cache_version
(),
version
())
def
__setstate__
(
self
,
d
):
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
self
.
__dict__
.
update
(
d
)
...
@@ -646,7 +645,7 @@ def ensure_dt(val, default, name, dtype):
...
@@ -646,7 +645,7 @@ def ensure_dt(val, default, name, dtype):
if
hasattr
(
val
,
"ndim"
)
and
val
.
ndim
==
0
:
if
hasattr
(
val
,
"ndim"
)
and
val
.
ndim
==
0
:
val
=
as_scalar
(
val
)
val
=
as_scalar
(
val
)
if
not
isinstance
(
val
.
type
,
theano
.
scalar
.
Scalar
):
if
not
isinstance
(
val
.
type
,
theano
.
scalar
.
Scalar
):
raise
TypeError
(
"
%
s: expected a scalar value"
%
(
name
,
))
raise
TypeError
(
"
{}: expected a scalar value"
.
format
(
name
))
if
not
val
.
type
.
dtype
==
dtype
:
if
not
val
.
type
.
dtype
==
dtype
:
val
=
val
.
astype
(
dtype
)
val
=
val
.
astype
(
dtype
)
return
val
return
val
...
@@ -2971,7 +2970,9 @@ class GpuDnnRNNOp(DnnBase):
...
@@ -2971,7 +2970,9 @@ class GpuDnnRNNOp(DnnBase):
elif
direction_mode
==
"unidirectional"
:
elif
direction_mode
==
"unidirectional"
:
self
.
num_dirs
=
1
self
.
num_dirs
=
1
else
:
else
:
raise
ValueError
(
"direction_mode is invalid (got
%
s)"
%
(
direction_mode
,))
raise
ValueError
(
"direction_mode is invalid (got {})"
.
format
(
direction_mode
)
)
def
dnn_context
(
self
,
node
):
def
dnn_context
(
self
,
node
):
return
node
.
outputs
[
1
]
.
type
.
context_name
return
node
.
outputs
[
1
]
.
type
.
context_name
...
@@ -3114,7 +3115,7 @@ class GpuDnnRNNGradWeights(DnnBase):
...
@@ -3114,7 +3115,7 @@ class GpuDnnRNNGradWeights(DnnBase):
return
Apply
(
self
,
inputs
,
outputs
)
return
Apply
(
self
,
inputs
,
outputs
)
class
RNNBlock
(
object
)
:
class
RNNBlock
:
"""
"""
An object that allow us to use CuDNN RNN implementation.
An object that allow us to use CuDNN RNN implementation.
TODO: make an example how to use. You can check Theano tests
TODO: make an example how to use. You can check Theano tests
...
...
theano/gpuarray/elemwise.py
浏览文件 @
bcccce7a
...
@@ -103,7 +103,7 @@ class GpuElemwise(HideC, Elemwise):
...
@@ -103,7 +103,7 @@ class GpuElemwise(HideC, Elemwise):
if
self
.
name
is
not
None
:
if
self
.
name
is
not
None
:
return
self
.
name
return
self
.
name
items
=
str
(
sorted
(
self
.
inplace_pattern
.
items
()))
items
=
str
(
sorted
(
self
.
inplace_pattern
.
items
()))
return
"GpuElemwise{
%
s}
%
s<gpuarray>"
%
(
self
.
scalar_op
,
items
)
return
"GpuElemwise{
{{}}}{}<gpuarray>"
.
format
(
self
.
scalar_op
,
items
)
def
max_inputs
(
self
,
node_or_outputs
):
def
max_inputs
(
self
,
node_or_outputs
):
return
max_inputs_to_GpuElemwise
(
node_or_outputs
)
return
max_inputs_to_GpuElemwise
(
node_or_outputs
)
...
@@ -233,7 +233,7 @@ class GpuElemwise(HideC, Elemwise):
...
@@ -233,7 +233,7 @@ class GpuElemwise(HideC, Elemwise):
args[
%(n)
s].typecode =
%(typecode)
s;
args[
%(n)
s].typecode =
%(typecode)
s;
args[
%(n)
s].flags = GE_READ;
args[
%(n)
s].flags = GE_READ;
"""
%
dict
(
"""
%
dict
(
n
=
n
,
name
=
'"
%
s"'
%
(
name
,
),
typecode
=
i
.
type
.
typecode
n
=
n
,
name
=
'"
{}"'
.
format
(
name
),
typecode
=
i
.
type
.
typecode
)
)
p
=
len
(
inps
)
p
=
len
(
inps
)
...
@@ -249,7 +249,7 @@ class GpuElemwise(HideC, Elemwise):
...
@@ -249,7 +249,7 @@ class GpuElemwise(HideC, Elemwise):
args[
%(n)
s].typecode =
%(typecode)
s;
args[
%(n)
s].typecode =
%(typecode)
s;
args[
%(n)
s].flags = GE_WRITE;
args[
%(n)
s].flags = GE_WRITE;
"""
%
dict
(
"""
%
dict
(
n
=
p
,
name
=
'"
%
s"'
%
(
outs
[
n
],
),
typecode
=
o
.
type
.
typecode
n
=
p
,
name
=
'"
{}"'
.
format
(
outs
[
n
]
),
typecode
=
o
.
type
.
typecode
)
)
p
+=
1
p
+=
1
...
@@ -572,8 +572,8 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
...
@@ -572,8 +572,8 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
pre
=
"pre=
%
s,red="
%
str
(
self
.
pre_scalar_op
)
pre
=
"pre=
%
s,red="
%
str
(
self
.
pre_scalar_op
)
ax
=
""
ax
=
""
if
self
.
axis
is
not
None
:
if
self
.
axis
is
not
None
:
ax
=
"{
%
s}"
%
(
", "
.
join
(
str
(
x
)
for
x
in
self
.
axis
),
)
ax
=
"{
{{}}}"
.
format
(
", "
.
join
(
str
(
x
)
for
x
in
self
.
axis
)
)
return
"GpuCAReduceCuda{
%
s
%
s}
%
s"
%
(
pre
,
str
(
self
.
scalar_op
),
ax
)
return
"GpuCAReduceCuda{
{{}{}}}{}"
.
format
(
pre
,
str
(
self
.
scalar_op
),
ax
)
def
__setstate__
(
self
,
d
):
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
self
.
__dict__
.
update
(
d
)
...
@@ -585,7 +585,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
...
@@ -585,7 +585,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
x
=
as_gpuarray_variable
(
x
,
infer_context_name
(
x
))
x
=
as_gpuarray_variable
(
x
,
infer_context_name
(
x
))
if
x
.
type
.
context
.
kind
!=
b
"cuda"
:
if
x
.
type
.
context
.
kind
!=
b
"cuda"
:
raise
TypeError
(
"GpuCAReduceCuda doesn't work for non-cuda devices"
)
raise
TypeError
(
"GpuCAReduceCuda doesn't work for non-cuda devices"
)
ret
=
super
(
GpuCAReduceCuda
,
self
)
.
make_node
(
x
)
ret
=
super
()
.
make_node
(
x
)
self
=
copy
.
copy
(
self
)
self
=
copy
.
copy
(
self
)
self
.
axis
=
ret
.
op
.
axis
self
.
axis
=
ret
.
op
.
axis
if
self
.
pre_scalar_op
:
if
self
.
pre_scalar_op
:
...
@@ -3056,8 +3056,8 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
...
@@ -3056,8 +3056,8 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
def
__str__
(
self
):
def
__str__
(
self
):
ax
=
""
ax
=
""
if
self
.
axis
is
not
None
:
if
self
.
axis
is
not
None
:
ax
=
"{
%
s}"
%
(
", "
.
join
(
str
(
x
)
for
x
in
self
.
axis
),
)
ax
=
"{
{{}}}"
.
format
(
", "
.
join
(
str
(
x
)
for
x
in
self
.
axis
)
)
return
"GpuReduce{
%
s}
%
s"
%
(
self
.
scalar_op
,
ax
)
return
"GpuReduce{
{{}}}{}"
.
format
(
self
.
scalar_op
,
ax
)
def
make_node
(
self
,
input
):
def
make_node
(
self
,
input
):
ctx_name
=
infer_context_name
(
input
)
ctx_name
=
infer_context_name
(
input
)
...
...
theano/gpuarray/extra_ops.py
浏览文件 @
bcccce7a
...
@@ -73,7 +73,7 @@ class GpuCumOp(GpuKernelBase, Op):
...
@@ -73,7 +73,7 @@ class GpuCumOp(GpuKernelBase, Op):
)
)
if
self
.
axis
>=
x
.
ndim
or
self
.
axis
<
-
x
.
ndim
:
if
self
.
axis
>=
x
.
ndim
or
self
.
axis
<
-
x
.
ndim
:
raise
ValueError
(
"axis(={
0
}) out of bounds"
.
format
(
self
.
axis
))
raise
ValueError
(
"axis(={}) out of bounds"
.
format
(
self
.
axis
))
return
Apply
(
self
,
[
x
],
[
x
.
type
()])
return
Apply
(
self
,
[
x
],
[
x
.
type
()])
def
gpu_kernels
(
self
,
node
,
nodename
):
def
gpu_kernels
(
self
,
node
,
nodename
):
...
@@ -500,7 +500,7 @@ class GpuCumOp(GpuKernelBase, Op):
...
@@ -500,7 +500,7 @@ class GpuCumOp(GpuKernelBase, Op):
"""
"""
%
locals
()
%
locals
()
)
)
return
super
(
GpuCumOp
,
self
)
.
c_support_code_struct
(
node
,
nodename
)
+
code
return
super
()
.
c_support_code_struct
(
node
,
nodename
)
+
code
# GpuCumsumOp exists only to serve backward compatibility.
# GpuCumsumOp exists only to serve backward compatibility.
...
...
theano/gpuarray/kernel_codegen.py
浏览文件 @
bcccce7a
...
@@ -27,8 +27,7 @@ def nvcc_kernel(name, params, body):
...
@@ -27,8 +27,7 @@ def nvcc_kernel(name, params, body):
def
flatbody
():
def
flatbody
():
for
b
in
body
:
for
b
in
body
:
if
isinstance
(
b
,
(
list
,
tuple
)):
if
isinstance
(
b
,
(
list
,
tuple
)):
for
bb
in
b
:
yield
from
b
yield
bb
else
:
else
:
yield
b
yield
b
...
@@ -94,8 +93,8 @@ def inline_reduce(N, buf, pos, count, manner_fn):
...
@@ -94,8 +93,8 @@ def inline_reduce(N, buf, pos, count, manner_fn):
rest of the buffer is trashed by this function.
rest of the buffer is trashed by this function.
"""
"""
loop_line
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[i]"
%
(
buf
))
loop_line
=
manner_fn
(
"
{}[{}]"
.
format
(
buf
,
pos
),
"
%
s[i]"
%
(
buf
))
r_n
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[
%
s+_n]"
%
(
buf
,
pos
))
r_n
=
manner_fn
(
"
{}[{}]"
.
format
(
buf
,
pos
),
"{}[{}+_n]"
.
format
(
buf
,
pos
))
return
(
return
(
"""
"""
...
@@ -124,22 +123,22 @@ def inline_reduce(N, buf, pos, count, manner_fn):
...
@@ -124,22 +123,22 @@ def inline_reduce(N, buf, pos, count, manner_fn):
@code_version
(
inline_reduce
.
code_version
)
@code_version
(
inline_reduce
.
code_version
)
def
inline_reduce_max
(
N
,
buf
,
pos
,
count
):
def
inline_reduce_max
(
N
,
buf
,
pos
,
count
):
return
inline_reduce
(
N
,
buf
,
pos
,
count
,
lambda
a
,
b
:
"max(
%
s,
%
s)"
%
(
a
,
b
))
return
inline_reduce
(
N
,
buf
,
pos
,
count
,
lambda
a
,
b
:
"max(
{}, {})"
.
format
(
a
,
b
))
@code_version
(
inline_reduce
.
code_version
)
@code_version
(
inline_reduce
.
code_version
)
def
inline_reduce_sum
(
N
,
buf
,
pos
,
count
):
def
inline_reduce_sum
(
N
,
buf
,
pos
,
count
):
return
inline_reduce
(
N
,
buf
,
pos
,
count
,
lambda
a
,
b
:
"
%
s +
%
s"
%
(
a
,
b
))
return
inline_reduce
(
N
,
buf
,
pos
,
count
,
lambda
a
,
b
:
"
{} + {}"
.
format
(
a
,
b
))
@code_version
(
inline_reduce
.
code_version
)
@code_version
(
inline_reduce
.
code_version
)
def
inline_reduce_min
(
N
,
buf
,
pos
,
count
):
def
inline_reduce_min
(
N
,
buf
,
pos
,
count
):
return
inline_reduce
(
N
,
buf
,
pos
,
count
,
lambda
a
,
b
:
"min(
%
s,
%
s)"
%
(
a
,
b
))
return
inline_reduce
(
N
,
buf
,
pos
,
count
,
lambda
a
,
b
:
"min(
{}, {})"
.
format
(
a
,
b
))
@code_version
(
inline_reduce
.
code_version
)
@code_version
(
inline_reduce
.
code_version
)
def
inline_reduce_prod
(
N
,
buf
,
pos
,
count
):
def
inline_reduce_prod
(
N
,
buf
,
pos
,
count
):
return
inline_reduce
(
N
,
buf
,
pos
,
count
,
lambda
a
,
b
:
"
%
s *
%
s"
%
(
a
,
b
))
return
inline_reduce
(
N
,
buf
,
pos
,
count
,
lambda
a
,
b
:
"
{} * {}"
.
format
(
a
,
b
))
@code_version
((
2
,)
+
inline_reduce_max
.
code_version
+
inline_reduce_sum
.
code_version
)
@code_version
((
2
,)
+
inline_reduce_max
.
code_version
+
inline_reduce_sum
.
code_version
)
...
@@ -275,8 +274,8 @@ def inline_reduce_fixed_shared(
...
@@ -275,8 +274,8 @@ def inline_reduce_fixed_shared(
loop_line
=
manner_fn
(
loop_line
=
manner_fn
(
"red"
,
manner_init
(
"
%(load_x)
s(
%(x)
s[i *
%(stride_x)
s])"
%
locals
())
"red"
,
manner_init
(
"
%(load_x)
s(
%(x)
s[i *
%(stride_x)
s])"
%
locals
())
)
)
loop_line2
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[i]"
%
buf
)
loop_line2
=
manner_fn
(
"
{}[{}]"
.
format
(
buf
,
pos
),
"
%
s[i]"
%
buf
)
r_n
=
manner_fn
(
"
%
s[
%
s]"
%
(
buf
,
pos
),
"
%
s[
%
s+_n]"
%
(
buf
,
pos
))
r_n
=
manner_fn
(
"
{}[{}]"
.
format
(
buf
,
pos
),
"{}[{}+_n]"
.
format
(
buf
,
pos
))
ctype
=
gpuarray
.
dtype_to_ctype
(
dtype
)
ctype
=
gpuarray
.
dtype_to_ctype
(
dtype
)
return
(
return
(
...
@@ -331,7 +330,7 @@ def inline_reduce_fixed_shared_max(
...
@@ -331,7 +330,7 @@ def inline_reduce_fixed_shared_max(
load_x
,
load_x
,
pos
,
pos
,
count
,
count
,
lambda
a
,
b
:
"max(
%
s,
%
s)"
%
(
a
,
b
),
lambda
a
,
b
:
"max(
{}, {})"
.
format
(
a
,
b
),
lambda
a
:
a
,
lambda
a
:
a
,
b
,
b
,
stride_b
,
stride_b
,
...
@@ -431,7 +430,7 @@ def inline_softmax_fixed_shared(
...
@@ -431,7 +430,7 @@ def inline_softmax_fixed_shared(
load_x
,
load_x
,
threadPos
,
threadPos
,
threadCount
,
threadCount
,
lambda
a
,
b
:
"
%
s +
%
s"
%
(
a
,
b
),
lambda
a
,
b
:
"
{} + {}"
.
format
(
a
,
b
),
lambda
a
:
"exp(
%
s - row_max)"
%
a
,
lambda
a
:
"exp(
%
s - row_max)"
%
a
,
b
,
b
,
stride_b
,
stride_b
,
...
...
theano/gpuarray/linalg.py
浏览文件 @
bcccce7a
...
@@ -137,7 +137,7 @@ class GpuCusolverSolve(Op):
...
@@ -137,7 +137,7 @@ class GpuCusolverSolve(Op):
if
self
.
inplace
:
if
self
.
inplace
:
self
.
destroy_map
=
{
0
:
[
0
]}
self
.
destroy_map
=
{
0
:
[
0
]}
assert
A_structure
in
MATRIX_STRUCTURES_SOLVE
assert
A_structure
in
MATRIX_STRUCTURES_SOLVE
super
(
GpuCusolverSolve
,
self
)
.
__init__
()
super
()
.
__init__
()
def
make_node
(
self
,
inp1
,
inp2
):
def
make_node
(
self
,
inp1
,
inp2
):
if
not
cusolver_available
:
if
not
cusolver_available
:
...
@@ -358,7 +358,7 @@ class GpuCublasTriangularSolve(Op):
...
@@ -358,7 +358,7 @@ class GpuCublasTriangularSolve(Op):
def
__init__
(
self
,
lower
=
True
,
trans
=
"N"
):
def
__init__
(
self
,
lower
=
True
,
trans
=
"N"
):
self
.
trans
=
trans
self
.
trans
=
trans
self
.
lower
=
lower
self
.
lower
=
lower
super
(
GpuCublasTriangularSolve
,
self
)
.
__init__
()
super
()
.
__init__
()
def
make_node
(
self
,
inp1
,
inp2
):
def
make_node
(
self
,
inp1
,
inp2
):
if
not
cublas_available
:
if
not
cublas_available
:
...
@@ -541,7 +541,7 @@ class GpuCholesky(Op):
...
@@ -541,7 +541,7 @@ class GpuCholesky(Op):
self
.
inplace
=
inplace
self
.
inplace
=
inplace
if
self
.
inplace
:
if
self
.
inplace
:
self
.
destroy_map
=
{
0
:
[
0
]}
self
.
destroy_map
=
{
0
:
[
0
]}
super
(
GpuCholesky
,
self
)
.
__init__
()
super
()
.
__init__
()
def
clone_inplace
(
self
):
def
clone_inplace
(
self
):
return
self
.
__class__
(
lower
=
self
.
lower
,
inplace
=
True
)
return
self
.
__class__
(
lower
=
self
.
lower
,
inplace
=
True
)
...
@@ -788,7 +788,7 @@ class GpuMagmaSVD(GpuMagmaBase):
...
@@ -788,7 +788,7 @@ class GpuMagmaSVD(GpuMagmaBase):
)
)
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
super
(
GpuMagmaSVD
,
self
)
.
prepare_node
(
node
,
storage_map
,
compute_map
,
impl
)
super
()
.
prepare_node
(
node
,
storage_map
,
compute_map
,
impl
)
# Check node to prevent eventual errors with old pickled nodes.
# Check node to prevent eventual errors with old pickled nodes.
if
self
.
compute_uv
:
if
self
.
compute_uv
:
A
,
B
,
C
=
node
.
outputs
A
,
B
,
C
=
node
.
outputs
...
...
theano/gpuarray/multinomial.py
浏览文件 @
bcccce7a
...
@@ -534,4 +534,4 @@ class GPUAMultinomialWOReplacementFromUniform(GPUAChoiceFromUniform):
...
@@ -534,4 +534,4 @@ class GPUAMultinomialWOReplacementFromUniform(GPUAChoiceFromUniform):
DeprecationWarning
,
DeprecationWarning
,
stacklevel
=
2
,
stacklevel
=
2
,
)
)
super
(
GPUAMultinomialWOReplacementFromUniform
,
self
)
.
__init__
(
*
args
,
**
kwargs
)
super
()
.
__init__
(
*
args
,
**
kwargs
)
theano/gpuarray/opt.py
浏览文件 @
bcccce7a
...
@@ -484,7 +484,7 @@ class GraphToGPU(Optimizer):
...
@@ -484,7 +484,7 @@ class GraphToGPU(Optimizer):
for
(
t
,
o
)
in
not_used
[::
-
1
]:
for
(
t
,
o
)
in
not_used
[::
-
1
]:
if
t
>
0
:
if
t
>
0
:
# Skip opt that have 0 times, they probably wasn't even tried.
# Skip opt that have 0 times, they probably wasn't even tried.
print
(
blanc
+
" "
,
"
%.3
fs -
%
s"
%
(
t
,
o
),
file
=
stream
)
print
(
blanc
+
" "
,
"
{:.3f}s - {}"
.
format
(
t
,
o
),
file
=
stream
)
print
(
file
=
stream
)
print
(
file
=
stream
)
@staticmethod
@staticmethod
...
@@ -2182,7 +2182,7 @@ def local_abstractconv3d_gradinputs_gemm_alt(node):
...
@@ -2182,7 +2182,7 @@ def local_abstractconv3d_gradinputs_gemm_alt(node):
class
ConvMetaOptimizer
(
LocalMetaOptimizer
):
class
ConvMetaOptimizer
(
LocalMetaOptimizer
):
def
__init__
(
self
):
def
__init__
(
self
):
super
(
ConvMetaOptimizer
,
self
)
.
__init__
()
super
()
.
__init__
()
def
time_call
(
self
,
fn
):
def
time_call
(
self
,
fn
):
start
=
time
.
time
()
start
=
time
.
time
()
...
...
theano/gpuarray/rng_mrg.py
浏览文件 @
bcccce7a
...
@@ -59,7 +59,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
...
@@ -59,7 +59,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
return
op
(
rstate
,
v_size
)
return
op
(
rstate
,
v_size
)
def
c_headers
(
self
):
def
c_headers
(
self
):
return
super
(
GPUA_mrg_uniform
,
self
)
.
c_headers
()
+
[
"numpy_compat.h"
]
return
super
()
.
c_headers
()
+
[
"numpy_compat.h"
]
def
gpu_kernels
(
self
,
node
,
name
):
def
gpu_kernels
(
self
,
node
,
name
):
write
=
write_w
(
self
.
output_type
.
dtype
)
write
=
write_w
(
self
.
output_type
.
dtype
)
...
...
theano/gpuarray/sort.py
浏览文件 @
bcccce7a
...
@@ -225,7 +225,7 @@ class GpuTopKOp(GpuKernelBase, TopKOp):
...
@@ -225,7 +225,7 @@ class GpuTopKOp(GpuKernelBase, TopKOp):
prep_output
=
""
prep_output
=
""
if
self
.
return_values
:
if
self
.
return_values
:
def_dvstrides
=
"const ssize_t *dvstrides = PyGpuArray_STRIDES(
%
s)"
%
yv
def_dvstrides
=
"const ssize_t *dvstrides = PyGpuArray_STRIDES(
%
s)"
%
yv
params_dv
=
"
%
s->ga.data,
%
s->ga.offset,
\n
"
%
(
yv
,
yv
)
params_dv
=
"
{}->ga.data, {}->ga.offset,
\n
"
.
format
(
yv
,
yv
)
params_dv
+=
""
.
join
(
"dvstrides[
%
d], "
%
i
for
i
in
reordered_axes
)
params_dv
+=
""
.
join
(
"dvstrides[
%
d], "
%
i
for
i
in
reordered_axes
)
prep_output
+=
(
prep_output
+=
(
"""
"""
...
@@ -241,7 +241,7 @@ class GpuTopKOp(GpuKernelBase, TopKOp):
...
@@ -241,7 +241,7 @@ class GpuTopKOp(GpuKernelBase, TopKOp):
if
self
.
return_indices
:
if
self
.
return_indices
:
def_distrides
=
"const ssize_t *distrides = PyGpuArray_STRIDES(
%
s)"
%
yi
def_distrides
=
"const ssize_t *distrides = PyGpuArray_STRIDES(
%
s)"
%
yi
params_di
=
"
%
s->ga.data,
%
s->ga.offset,
\n
"
%
(
yi
,
yi
)
params_di
=
"
{}->ga.data, {}->ga.offset,
\n
"
.
format
(
yi
,
yi
)
params_di
+=
""
.
join
(
"distrides[
%
d], "
%
i
for
i
in
reordered_axes
)
params_di
+=
""
.
join
(
"distrides[
%
d], "
%
i
for
i
in
reordered_axes
)
prep_output
+=
(
prep_output
+=
(
"""
"""
...
...
theano/gpuarray/subtensor.py
浏览文件 @
bcccce7a
import
numpy
as
np
import
numpy
as
np
from
six
import
integer_types
from
six.moves
import
StringIO
from
six.moves
import
StringIO
import
theano.tensor
as
tt
import
theano.tensor
as
tt
...
@@ -157,7 +156,7 @@ class GpuSubtensor(HideC, Subtensor):
...
@@ -157,7 +156,7 @@ class GpuSubtensor(HideC, Subtensor):
def
fix_idx
(
idx
):
def
fix_idx
(
idx
):
if
idx
is
None
:
if
idx
is
None
:
return
"0"
,
1
return
"0"
,
1
elif
isinstance
(
idx
,
(
np
.
integer
,
int
eger_types
)):
elif
isinstance
(
idx
,
(
np
.
integer
,
int
)):
return
str
(
idx
),
0
return
str
(
idx
),
0
elif
isinstance
(
idx
,
gof
.
Type
):
elif
isinstance
(
idx
,
gof
.
Type
):
return
indices
.
pop
(
0
),
0
return
indices
.
pop
(
0
),
0
...
@@ -196,7 +195,7 @@ class GpuSubtensor(HideC, Subtensor):
...
@@ -196,7 +195,7 @@ class GpuSubtensor(HideC, Subtensor):
else
:
else
:
if
isinstance
(
idx
,
gof
.
Type
):
if
isinstance
(
idx
,
gof
.
Type
):
start
=
indices
.
pop
(
0
)
start
=
indices
.
pop
(
0
)
elif
isinstance
(
idx
,
(
np
.
integer
,
int
eger_types
)):
elif
isinstance
(
idx
,
(
np
.
integer
,
int
)):
start
=
idx
start
=
idx
else
:
else
:
assert
0
,
idx
assert
0
,
idx
...
@@ -454,7 +453,7 @@ int sub_setarray(GpuArray *dst, GpuArray *src) {
...
@@ -454,7 +453,7 @@ int sub_setarray(GpuArray *dst, GpuArray *src) {
)
)
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
parent_version
=
super
(
GpuIncSubtensor
,
self
)
.
c_code_cache_version
()
parent_version
=
super
()
.
c_code_cache_version
()
if
not
parent_version
:
if
not
parent_version
:
return
return
return
parent_version
+
(
10
,)
return
parent_version
+
(
10
,)
...
@@ -576,7 +575,7 @@ def check_and_convert_boolean_masks(input, idx_list):
...
@@ -576,7 +575,7 @@ def check_and_convert_boolean_masks(input, idx_list):
return
out_idx_list
return
out_idx_list
class
BaseGpuAdvancedSubtensor
(
object
)
:
class
BaseGpuAdvancedSubtensor
:
def
perform
(
self
,
node
,
inputs
,
out_
):
def
perform
(
self
,
node
,
inputs
,
out_
):
(
out
,)
=
out_
(
out
,)
=
out_
x
=
inputs
[
0
]
x
=
inputs
[
0
]
...
@@ -703,7 +702,7 @@ class GpuAdvancedSubtensor(HideC, BaseGpuAdvancedSubtensor, AdvancedSubtensor):
...
@@ -703,7 +702,7 @@ class GpuAdvancedSubtensor(HideC, BaseGpuAdvancedSubtensor, AdvancedSubtensor):
return
gof
.
Apply
(
self
,
[
x
]
+
rval
.
inputs
[
1
:],
[
otype
()])
return
gof
.
Apply
(
self
,
[
x
]
+
rval
.
inputs
[
1
:],
[
otype
()])
class
BaseGpuAdvancedIncSubtensor
(
object
)
:
class
BaseGpuAdvancedIncSubtensor
:
def
perform
(
self
,
node
,
inp
,
out_
):
def
perform
(
self
,
node
,
inp
,
out_
):
(
out
,)
=
out_
(
out
,)
=
out_
x
=
inp
[
0
]
x
=
inp
[
0
]
...
@@ -1133,7 +1132,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC, GpuAdvancedIncSubtens
...
@@ -1133,7 +1132,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC, GpuAdvancedIncSubtens
return
gof
.
Apply
(
self
,
[
x_
,
y_
,
ilist_
],
[
x_
.
type
()])
return
gof
.
Apply
(
self
,
[
x_
,
y_
,
ilist_
],
[
x_
.
type
()])
def
perform
(
self
,
node
,
inp
,
out
,
params
):
def
perform
(
self
,
node
,
inp
,
out
,
params
):
return
super
(
GpuAdvancedIncSubtensor1_dev20
,
self
)
.
perform
(
node
,
inp
,
out
)
return
super
()
.
perform
(
node
,
inp
,
out
)
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
14
,)
return
(
14
,)
...
@@ -1269,9 +1268,7 @@ if (GpuArray_vector_add_fast(%(out)s, %(y)s, %(ind)s, %(params)s->set_instead_of
...
@@ -1269,9 +1268,7 @@ if (GpuArray_vector_add_fast(%(out)s, %(y)s, %(ind)s, %(params)s->set_instead_of
def
c_support_code_struct
(
self
,
node
,
nodename
):
def
c_support_code_struct
(
self
,
node
,
nodename
):
return
(
return
(
super
(
GpuAdvancedIncSubtensor1_dev20
,
self
)
.
c_support_code_struct
(
super
()
.
c_support_code_struct
(
node
,
nodename
)
node
,
nodename
)
+
"""
+
"""
int GpuArray_vector_add_fast(PyGpuArrayObject* py_self,
int GpuArray_vector_add_fast(PyGpuArrayObject* py_self,
PyGpuArrayObject* py_other,
PyGpuArrayObject* py_other,
...
...
theano/gpuarray/type.py
浏览文件 @
bcccce7a
...
@@ -80,7 +80,7 @@ def reg_context(name, ctx):
...
@@ -80,7 +80,7 @@ def reg_context(name, ctx):
"""
"""
if
name
in
_context_reg
:
if
name
in
_context_reg
:
raise
ValueError
(
"context name
%
s is already defined"
%
(
name
,
))
raise
ValueError
(
"context name
{} is already defined"
.
format
(
name
))
if
not
isinstance
(
ctx
,
gpuarray
.
GpuContext
):
if
not
isinstance
(
ctx
,
gpuarray
.
GpuContext
):
raise
TypeError
(
"context is not GpuContext"
)
raise
TypeError
(
"context is not GpuContext"
)
_context_reg
[
name
]
=
ctx
_context_reg
[
name
]
=
ctx
...
@@ -101,7 +101,7 @@ def get_context(name):
...
@@ -101,7 +101,7 @@ def get_context(name):
"""
"""
if
name
not
in
_context_reg
:
if
name
not
in
_context_reg
:
raise
ContextNotDefined
(
"context name
%
s not defined"
%
(
name
,
))
raise
ContextNotDefined
(
"context name
{} not defined"
.
format
(
name
))
return
_context_reg
[
name
]
return
_context_reg
[
name
]
...
@@ -189,7 +189,9 @@ class GpuArrayType(Type):
...
@@ -189,7 +189,9 @@ class GpuArrayType(Type):
self
.
typecode
=
gpuarray
.
dtype_to_typecode
(
self
.
dtype
)
self
.
typecode
=
gpuarray
.
dtype_to_typecode
(
self
.
dtype
)
except
gpuarray
.
GpuArrayException
:
except
gpuarray
.
GpuArrayException
:
raise
TypeError
(
raise
TypeError
(
"Unsupported dtype for
%
s:
%
s"
%
(
self
.
__class__
.
__name__
,
self
.
dtype
)
"Unsupported dtype for {}: {}"
.
format
(
self
.
__class__
.
__name__
,
self
.
dtype
)
)
)
def
clone
(
self
,
dtype
=
None
,
broadcastable
=
None
):
def
clone
(
self
,
dtype
=
None
,
broadcastable
=
None
):
...
@@ -233,7 +235,9 @@ class GpuArrayType(Type):
...
@@ -233,7 +235,9 @@ class GpuArrayType(Type):
bcast
=
str
(
b
)
bcast
=
str
(
b
)
else
:
else
:
bcast
=
"
%
iD"
%
len
(
b
)
bcast
=
"
%
iD"
%
len
(
b
)
return
"GpuArrayType<
%
s>(
%
s,
%
s)"
%
(
self
.
context_name
,
self
.
dtype
,
bcast
)
return
"GpuArrayType<{}>({}, {})"
.
format
(
self
.
context_name
,
self
.
dtype
,
bcast
)
def
filter
(
self
,
data
,
strict
=
False
,
allow_downcast
=
None
):
def
filter
(
self
,
data
,
strict
=
False
,
allow_downcast
=
None
):
return
self
.
filter_inplace
(
return
self
.
filter_inplace
(
...
@@ -450,7 +454,9 @@ class GpuArrayType(Type):
...
@@ -450,7 +454,9 @@ class GpuArrayType(Type):
}[
self
.
dtype
]
}[
self
.
dtype
]
except
KeyError
:
except
KeyError
:
raise
TypeError
(
raise
TypeError
(
"Unsupported dtype for
%
s:
%
s"
%
(
self
.
__class__
.
__name__
,
self
.
dtype
)
"Unsupported dtype for {}: {}"
.
format
(
self
.
__class__
.
__name__
,
self
.
dtype
)
)
)
def
get_shape_info
(
self
,
obj
):
def
get_shape_info
(
self
,
obj
):
...
@@ -474,7 +480,7 @@ class GpuArrayType(Type):
...
@@ -474,7 +480,7 @@ class GpuArrayType(Type):
)
)
def
c_init
(
self
,
name
,
sub
):
def
c_init
(
self
,
name
,
sub
):
return
"
%
s = NULL;"
%
(
name
,
)
return
"
{} = NULL;"
.
format
(
name
)
def
c_extract
(
self
,
name
,
sub
,
check_input
=
True
):
def
c_extract
(
self
,
name
,
sub
,
check_input
=
True
):
# TODO I don't check broadcast stuff for now.
# TODO I don't check broadcast stuff for now.
...
@@ -499,7 +505,7 @@ class GpuArrayType(Type):
...
@@ -499,7 +505,7 @@ class GpuArrayType(Type):
}
}
def
c_cleanup
(
self
,
name
,
sub
):
def
c_cleanup
(
self
,
name
,
sub
):
return
"Py_XDECREF(
%(name)
s);
%(name)
s = NULL;"
%
{
"name"
:
name
}
return
"Py_XDECREF(
{name}); {name} = NULL;"
.
format
(
name
=
name
)
def
c_sync
(
self
,
name
,
sub
):
def
c_sync
(
self
,
name
,
sub
):
return
"""
return
"""
...
@@ -914,10 +920,10 @@ class GpuContextType(Type):
...
@@ -914,10 +920,10 @@ class GpuContextType(Type):
return
a
==
b
return
a
==
b
def
c_declare
(
self
,
name
,
sub
,
check_input
=
True
):
def
c_declare
(
self
,
name
,
sub
,
check_input
=
True
):
return
"PyGpuContextObject *
%
s;"
%
(
name
,
)
return
"PyGpuContextObject *
{};"
.
format
(
name
)
def
c_init
(
self
,
name
,
sub
):
def
c_init
(
self
,
name
,
sub
):
return
"
%
s = NULL;"
%
(
name
,
)
return
"
{} = NULL;"
.
format
(
name
)
def
c_extract
(
self
,
name
,
sub
,
check_input
=
True
):
def
c_extract
(
self
,
name
,
sub
,
check_input
=
True
):
if
check_input
:
if
check_input
:
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论