Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
6366716c
提交
6366716c
authored
3月 12, 2012
作者:
Frederic
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
pep8
上级
d18c322f
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
88 行增加
和
56 行删除
+88
-56
blas.py
theano/sandbox/cuda/blas.py
+88
-56
没有找到文件。
theano/sandbox/cuda/blas.py
浏览文件 @
6366716c
import
os
import
StringIO
from
theano
import
Op
,
Type
,
Apply
,
Variable
,
Constant
from
theano
import
Op
,
Type
,
Apply
,
Variable
,
Constant
from
theano
import
tensor
,
scalar
from
theano
import
tensor
,
scalar
import
StringIO
,
os
import
cuda_ndarray.cuda_ndarray
as
cuda
import
cuda_ndarray.cuda_ndarray
as
cuda
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.sandbox.cuda
import
GpuOp
from
theano.sandbox.cuda
import
GpuOp
class
GpuDot22
(
GpuOp
):
class
GpuDot22
(
GpuOp
):
"""
"""
Implement dot(2d, 2d) on the gpu.
Implement dot(2d, 2d) on the gpu.
"""
"""
def
__str__
(
self
):
def
__str__
(
self
):
return
'GpuDot22'
return
'GpuDot22'
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
return
type
(
self
)
==
type
(
other
)
...
@@ -25,10 +28,10 @@ class GpuDot22(GpuOp):
...
@@ -25,10 +28,10 @@ class GpuDot22(GpuOp):
raise
TypeError
(
y
)
raise
TypeError
(
y
)
otype
=
CudaNdarrayType
(
otype
=
CudaNdarrayType
(
(
x
.
type
.
broadcastable
[
0
],
y
.
type
.
broadcastable
[
1
]))
(
x
.
type
.
broadcastable
[
0
],
y
.
type
.
broadcastable
[
1
]))
return
Apply
(
self
,
[
x
,
y
],
[
otype
()])
return
Apply
(
self
,
[
x
,
y
],
[
otype
()])
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
1
,
1
)
return
(
1
,
1
)
def
c_code
(
self
,
node
,
nodename
,
inputs
,
outputs
,
sub
):
def
c_code
(
self
,
node
,
nodename
,
inputs
,
outputs
,
sub
):
x
,
y
=
inputs
x
,
y
=
inputs
...
@@ -77,12 +80,14 @@ class GpuDot22(GpuOp):
...
@@ -77,12 +80,14 @@ class GpuDot22(GpuOp):
"""
%
locals
()
"""
%
locals
()
gpu_dot22
=
GpuDot22
()
gpu_dot22
=
GpuDot22
()
class
GpuDot22Scalar
(
GpuOp
):
class
GpuDot22Scalar
(
GpuOp
):
"""
"""
Implement dot(2d, 2d) * scalar on the gpu.
Implement dot(2d, 2d) * scalar on the gpu.
"""
"""
def
__str__
(
self
):
def
__str__
(
self
):
return
'GpuDot22Scalar'
return
'GpuDot22Scalar'
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
return
type
(
self
)
==
type
(
other
)
...
@@ -98,10 +103,10 @@ class GpuDot22Scalar(GpuOp):
...
@@ -98,10 +103,10 @@ class GpuDot22Scalar(GpuOp):
raise
TypeError
(
a
)
raise
TypeError
(
a
)
otype
=
CudaNdarrayType
(
otype
=
CudaNdarrayType
(
(
x
.
type
.
broadcastable
[
0
],
y
.
type
.
broadcastable
[
1
]))
(
x
.
type
.
broadcastable
[
0
],
y
.
type
.
broadcastable
[
1
]))
return
Apply
(
self
,
[
x
,
y
,
a
],
[
otype
()])
return
Apply
(
self
,
[
x
,
y
,
a
],
[
otype
()])
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
1
,
1
)
return
(
1
,
1
)
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
x
,
y
,
a
=
inputs
x
,
y
,
a
=
inputs
...
@@ -156,13 +161,14 @@ class GpuDot22Scalar(GpuOp):
...
@@ -156,13 +161,14 @@ class GpuDot22Scalar(GpuOp):
"""
%
locals
()
"""
%
locals
()
gpu_dot22scalar
=
GpuDot22Scalar
()
gpu_dot22scalar
=
GpuDot22Scalar
()
class
GpuGemm
(
GpuOp
):
class
GpuGemm
(
GpuOp
):
"""
"""
implement the gemm on the gpu.
implement the gemm on the gpu.
"""
"""
def
__init__
(
self
,
inplace
):
def
__init__
(
self
,
inplace
):
self
.
__setstate__
({
'inplace'
:
inplace
})
self
.
__setstate__
({
'inplace'
:
inplace
})
def
__str__
(
self
):
def
__str__
(
self
):
if
self
.
inplace
:
if
self
.
inplace
:
...
@@ -187,8 +193,8 @@ class GpuGemm(GpuOp):
...
@@ -187,8 +193,8 @@ class GpuGemm(GpuOp):
return
dict
(
inplace
=
self
.
inplace
)
return
dict
(
inplace
=
self
.
inplace
)
def
make_node
(
self
,
z
,
a
,
x
,
y
,
b
):
def
make_node
(
self
,
z
,
a
,
x
,
y
,
b
):
# the more complicated error checking performed by tensor.gemm
is assumed to already
# the more complicated error checking performed by tensor.gemm
# have been done
#
is assumed to already
have been done
return
Apply
(
self
,
[
z
,
a
,
x
,
y
,
b
],
[
z
.
type
()])
return
Apply
(
self
,
[
z
,
a
,
x
,
y
,
b
],
[
z
.
type
()])
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
...
@@ -270,13 +276,14 @@ class GpuGemm(GpuOp):
...
@@ -270,13 +276,14 @@ class GpuGemm(GpuOp):
gpu_gemm_no_inplace
=
GpuGemm
(
inplace
=
False
)
gpu_gemm_no_inplace
=
GpuGemm
(
inplace
=
False
)
gpu_gemm_inplace
=
GpuGemm
(
inplace
=
True
)
gpu_gemm_inplace
=
GpuGemm
(
inplace
=
True
)
class
GpuGemv
(
GpuOp
):
class
GpuGemv
(
GpuOp
):
"""
"""
implement gemv on the gpu.
implement gemv on the gpu.
"""
"""
def
__init__
(
self
,
inplace
):
def
__init__
(
self
,
inplace
):
self
.
__setstate__
({
'inplace'
:
inplace
})
self
.
__setstate__
({
'inplace'
:
inplace
})
def
__str__
(
self
):
def
__str__
(
self
):
if
self
.
inplace
:
if
self
.
inplace
:
...
@@ -301,8 +308,8 @@ class GpuGemv(GpuOp):
...
@@ -301,8 +308,8 @@ class GpuGemv(GpuOp):
return
dict
(
inplace
=
self
.
inplace
)
return
dict
(
inplace
=
self
.
inplace
)
def
make_node
(
self
,
z
,
a
,
x
,
y
,
b
):
def
make_node
(
self
,
z
,
a
,
x
,
y
,
b
):
# the more complicated error checking performed by tensor.gemv
is assumed to already
# the more complicated error checking performed by tensor.gemv
# have been done
#
is assumed to already
have been done
return
Apply
(
self
,
[
z
,
a
,
x
,
y
,
b
],
[
z
.
type
()])
return
Apply
(
self
,
[
z
,
a
,
x
,
y
,
b
],
[
z
.
type
()])
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
...
@@ -364,13 +371,14 @@ class GpuGemv(GpuOp):
...
@@ -364,13 +371,14 @@ class GpuGemv(GpuOp):
gpu_gemv_no_inplace
=
GpuGemv
(
inplace
=
False
)
gpu_gemv_no_inplace
=
GpuGemv
(
inplace
=
False
)
gpu_gemv_inplace
=
GpuGemv
(
inplace
=
True
)
gpu_gemv_inplace
=
GpuGemv
(
inplace
=
True
)
class
GpuGer
(
GpuOp
):
class
GpuGer
(
GpuOp
):
"""
"""
implement ger on the gpu.
implement ger on the gpu.
"""
"""
def
__init__
(
self
,
inplace
):
def
__init__
(
self
,
inplace
):
self
.
__setstate__
({
'inplace'
:
inplace
})
self
.
__setstate__
({
'inplace'
:
inplace
})
def
__str__
(
self
):
def
__str__
(
self
):
if
self
.
inplace
:
if
self
.
inplace
:
...
@@ -468,6 +476,7 @@ class GpuGer(GpuOp):
...
@@ -468,6 +476,7 @@ class GpuGer(GpuOp):
gpu_ger_no_inplace
=
GpuGer
(
inplace
=
False
)
gpu_ger_no_inplace
=
GpuGer
(
inplace
=
False
)
gpu_ger_inplace
=
GpuGer
(
inplace
=
True
)
gpu_ger_inplace
=
GpuGer
(
inplace
=
True
)
class
GpuOuter
(
GpuOp
):
class
GpuOuter
(
GpuOp
):
""" Implement outer on the gpu."""
""" Implement outer on the gpu."""
def
make_node
(
self
,
x
,
y
):
def
make_node
(
self
,
x
,
y
):
...
@@ -554,10 +563,11 @@ class GpuOuter(GpuOp):
...
@@ -554,10 +563,11 @@ class GpuOuter(GpuOp):
if (
%(name)
sres) {
if (
%(name)
sres) {
%(fail)
s;
%(fail)
s;
}
}
"""
%
dict
(
x
=
x
,
y
=
y
,
A
=
A
,
fail
=
fail
,
name
=
name
)
"""
%
dict
(
x
=
x
,
y
=
y
,
A
=
A
,
fail
=
fail
,
name
=
name
)
gpu_outer
=
GpuOuter
()
gpu_outer
=
GpuOuter
()
##
##
# Not really a BLAS operation, but whatever.
# Not really a BLAS operation, but whatever.
#
#
...
@@ -574,7 +584,7 @@ class GpuConv(GpuOp):
...
@@ -574,7 +584,7 @@ class GpuConv(GpuOp):
raise
ValueError
(
mode
)
raise
ValueError
(
mode
)
def
__init__
(
self
,
border_mode
,
def
__init__
(
self
,
border_mode
,
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
logical_img_hw
=
None
,
logical_img_hw
=
None
,
logical_kern_hw
=
None
,
logical_kern_hw
=
None
,
logical_kern_align_top
=
True
,
logical_kern_align_top
=
True
,
...
@@ -591,30 +601,32 @@ class GpuConv(GpuOp):
...
@@ -591,30 +601,32 @@ class GpuConv(GpuOp):
the execution of the convolution. Mostly used for
the execution of the convolution. Mostly used for
optimization or debugging.
optimization or debugging.
:param kshp: The size of the kernel. If provided, can genera
:param kshp: The size of the kernel. If provided, can genera
faster code. If the GpuConv op is automatically inserted,
faster code. If the GpuConv op is automatically
inserted,
we take its value automatically from the Conv op.
we take its value automatically from the Conv op.
:param imshp: The size of the image. Not used for code generation but
:param imshp: The size of the image. Not used for code generation but
allow to select an experimental new version in another repo.
allow to select an experimental new version in another
repo.
"""
"""
self
.
border_mode
=
border_mode
self
.
border_mode
=
border_mode
self
.
subsample
=
subsample
self
.
subsample
=
subsample
if
logical_img_hw
is
not
None
:
if
logical_img_hw
is
not
None
:
h
,
w
=
logical_img_hw
h
,
w
=
logical_img_hw
#TODO: reconsider this... since shapes are not given in
constructor,
#TODO: reconsider this... since shapes are not given in
#
maybe a multiplier + offset is a more appropriate way of passing this logical
#
constructor, maybe a multiplier + offset is a more
# grid
#
appropriate way of passing this logical
grid
logical_img_hw
=
tuple
(
logical_img_hw
)
logical_img_hw
=
tuple
(
logical_img_hw
)
self
.
logical_img_hw
=
logical_img_hw
self
.
logical_img_hw
=
logical_img_hw
if
logical_kern_hw
is
not
None
:
if
logical_kern_hw
is
not
None
:
h
,
w
=
logical_kern_hw
h
,
w
=
logical_kern_hw
#TODO: reconsider this... since shapes are not given in
constructor,
#TODO: reconsider this... since shapes are not given in
#
maybe a multiplier + offset is a more appropriate way of passing this logical
#
constructor, maybe a multiplier + offset is a more
# grid
#
appropriate way of passing this logical
grid
logical_kern_hw
=
tuple
(
logical_kern_hw
)
logical_kern_hw
=
tuple
(
logical_kern_hw
)
self
.
logical_kern_hw
=
logical_kern_hw
self
.
logical_kern_hw
=
logical_kern_hw
self
.
logical_kern_align_top
=
logical_kern_align_top
self
.
logical_kern_align_top
=
logical_kern_align_top
self
.
version
=
version
self
.
version
=
version
self
.
verbose
=
verbose
self
.
verbose
=
verbose
self
.
kshp
=
kshp
self
.
kshp
=
kshp
self
.
imshp
=
imshp
self
.
imshp
=
imshp
...
@@ -632,11 +644,12 @@ class GpuConv(GpuOp):
...
@@ -632,11 +644,12 @@ class GpuConv(GpuOp):
def
__setstate__
(
self
,
d
):
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
self
.
__dict__
.
update
(
d
)
if
not
hasattr
(
self
,
"imshp"
):
if
not
hasattr
(
self
,
"imshp"
):
self
.
imshp
=
None
self
.
imshp
=
None
def
__hash__
(
self
):
def
__hash__
(
self
):
# don't use hash(self.version) as hash(-1)==-2 and hash(-2)==-2 in python!
# don't use hash(self.version) as hash(-1)==-2 and
# hash(-2)==-2 in python!
return
hash
(
type
(
self
))
\
return
hash
(
type
(
self
))
\
^
hash
(
self
.
border_mode
)
\
^
hash
(
self
.
border_mode
)
\
^
hash
(
self
.
subsample
)
\
^
hash
(
self
.
subsample
)
\
...
@@ -649,14 +662,15 @@ class GpuConv(GpuOp):
...
@@ -649,14 +662,15 @@ class GpuConv(GpuOp):
^
hash
(
self
.
imshp
)
^
hash
(
self
.
imshp
)
def
__str__
(
self
):
def
__str__
(
self
):
return
'
%
s{
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s}'
%
(
self
.
__class__
.
__name__
,
return
'
%
s{
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s}'
%
(
self
.
border_mode
,
self
.
__class__
.
__name__
,
str
(
self
.
subsample
),
self
.
border_mode
,
str
(
self
.
logical_img_hw
),
str
(
self
.
subsample
),
str
(
self
.
logical_kern_hw
),
str
(
self
.
logical_img_hw
),
str
(
self
.
logical_kern_align_top
),
str
(
self
.
logical_kern_hw
),
str
(
self
.
imshp
),
str
(
self
.
logical_kern_align_top
),
str
(
self
.
kshp
))
str
(
self
.
imshp
),
str
(
self
.
kshp
))
def
make_node
(
self
,
img
,
kern
):
def
make_node
(
self
,
img
,
kern
):
if
img
.
type
.
ndim
!=
4
:
if
img
.
type
.
ndim
!=
4
:
...
@@ -664,26 +678,30 @@ class GpuConv(GpuOp):
...
@@ -664,26 +678,30 @@ class GpuConv(GpuOp):
if
kern
.
type
.
ndim
!=
4
:
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
raise
TypeError
(
'kern must be 4D tensor'
)
broadcastable
=
[
img
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
0
],
False
,
False
]
broadcastable
=
[
img
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
0
],
False
,
False
]
return
Apply
(
self
,
[
img
,
kern
],
[
CudaNdarrayType
(
broadcastable
)()])
return
Apply
(
self
,
[
img
,
kern
],
[
CudaNdarrayType
(
broadcastable
)()])
def
c_compile_args
(
self
):
def
c_compile_args
(
self
):
nb
=
0
nb
=
0
if
self
.
kshp
is
not
None
:
if
self
.
kshp
is
not
None
:
nb
=
self
.
kshp
[
1
]
nb
=
self
.
kshp
[
1
]
return
[
'-DTHEANO_KERN_WID='
+
str
(
nb
)]
#
,'-g','-G']
return
[
'-DTHEANO_KERN_WID='
+
str
(
nb
)]
#
,'-g','-G']
def
c_headers
(
self
):
def
c_headers
(
self
):
return
[
'cuda_ndarray.cuh'
,
'<stdio.h>'
]
return
[
'cuda_ndarray.cuh'
,
'<stdio.h>'
]
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
0
,
17
)
# raise this whenever modifying any of the support_code_files
# raise this whenever modifying any of the support_code_files
return
(
0
,
17
)
def
c_support_code_apply
(
self
,
node
,
nodename
):
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of these files
# REMEMBER TO RAISE c_code_cache_version when changing any of
return
open
(
os
.
path
.
join
(
os
.
path
.
split
(
__file__
)[
0
],
'conv_kernel.cu'
))
.
read
()
+
\
# these files
open
(
os
.
path
.
join
(
os
.
path
.
split
(
__file__
)[
0
],
'conv_full_kernel.cu'
))
.
read
()
+
\
files
=
[
'conv_kernel.cu'
,
'conv_full_kernel.cu'
,
'conv.cu'
]
open
(
os
.
path
.
join
(
os
.
path
.
split
(
__file__
)[
0
],
'conv.cu'
))
.
read
()
codes
=
[
open
(
os
.
path
.
join
(
os
.
path
.
split
(
__file__
)[
0
],
f
))
.
read
()
for
f
in
files
]
return
reduce
(
str
.
__add__
,
codes
)
def
c_code
(
self
,
node
,
nodename
,
inp
,
out_
,
sub
):
def
c_code
(
self
,
node
,
nodename
,
inp
,
out_
,
sub
):
img
,
kern
=
inp
img
,
kern
=
inp
...
@@ -724,7 +742,7 @@ class GpuConv(GpuOp):
...
@@ -724,7 +742,7 @@ class GpuConv(GpuOp):
mode, dx, dy, version, verbose);
mode, dx, dy, version, verbose);
Py_XDECREF(
%(out)
s);
Py_XDECREF(
%(out)
s);
%(out)
s = out2;
%(out)
s = out2;
"""
%
sub
"""
%
sub
class
GpuDownsampleFactorMax
(
GpuOp
):
class
GpuDownsampleFactorMax
(
GpuOp
):
...
@@ -736,13 +754,17 @@ class GpuDownsampleFactorMax(GpuOp):
...
@@ -736,13 +754,17 @@ class GpuDownsampleFactorMax(GpuOp):
self
.
ignore_border
=
ignore_border
self
.
ignore_border
=
ignore_border
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
and
self
.
ds
==
other
.
ds
and
self
.
ignore_border
==
other
.
ignore_border
return
(
type
(
self
)
==
type
(
other
)
and
self
.
ds
==
other
.
ds
and
self
.
ignore_border
==
other
.
ignore_border
)
def
__hash__
(
self
):
def
__hash__
(
self
):
return
hash
(
type
(
self
))
^
hash
(
self
.
ds
)
^
hash
(
self
.
ignore_border
)
return
hash
(
type
(
self
))
^
hash
(
self
.
ds
)
^
hash
(
self
.
ignore_border
)
def
__str__
(
self
):
def
__str__
(
self
):
return
'
%
s{
%
s,
%
s}'
%
(
self
.
__class__
.
__name__
,
self
.
ds
,
self
.
ignore_border
)
return
'
%
s{
%
s,
%
s}'
%
(
self
.
__class__
.
__name__
,
self
.
ds
,
self
.
ignore_border
)
def
make_node
(
self
,
x
):
def
make_node
(
self
,
x
):
if
not
isinstance
(
x
.
type
,
CudaNdarrayType
):
if
not
isinstance
(
x
.
type
,
CudaNdarrayType
):
...
@@ -750,10 +772,12 @@ class GpuDownsampleFactorMax(GpuOp):
...
@@ -750,10 +772,12 @@ class GpuDownsampleFactorMax(GpuOp):
if
not
x
.
type
.
ndim
==
4
:
if
not
x
.
type
.
ndim
==
4
:
raise
TypeError
()
raise
TypeError
()
return
Apply
(
self
,
[
x
],
[
x
.
type
()])
return
Apply
(
self
,
[
x
],
[
x
.
type
()])
#def perform(self, node, input_storage, output_storage):
#def perform(self, node, input_storage, output_storage):
#raise NotImplementedError('only C is implemented')
#raise NotImplementedError('only C is implemented')
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
3
)
return
(
3
)
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
x
,
=
inp
x
,
=
inp
z
,
=
out
z
,
=
out
...
@@ -887,6 +911,7 @@ class GpuDownsampleFactorMax(GpuOp):
...
@@ -887,6 +911,7 @@ class GpuDownsampleFactorMax(GpuOp):
}
}
"""
%
locals
()
"""
%
locals
()
class
GpuDownsampleFactorMaxGrad
(
GpuOp
):
class
GpuDownsampleFactorMaxGrad
(
GpuOp
):
"""
"""
Implement the grad of downsample with max on the gpu.
Implement the grad of downsample with max on the gpu.
...
@@ -896,16 +921,21 @@ class GpuDownsampleFactorMaxGrad(GpuOp):
...
@@ -896,16 +921,21 @@ class GpuDownsampleFactorMaxGrad(GpuOp):
self
.
ignore_border
=
ignore_border
self
.
ignore_border
=
ignore_border
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
and
self
.
ds
==
other
.
ds
and
self
.
ignore_border
==
other
.
ignore_border
return
(
type
(
self
)
==
type
(
other
)
and
self
.
ds
==
other
.
ds
and
self
.
ignore_border
==
other
.
ignore_border
)
def
__hash__
(
self
):
def
__hash__
(
self
):
return
hash
(
type
(
self
))
^
hash
(
self
.
ds
)
^
hash
(
self
.
ignore_border
)
return
hash
(
type
(
self
))
^
hash
(
self
.
ds
)
^
hash
(
self
.
ignore_border
)
def
__str__
(
self
):
def
__str__
(
self
):
return
'
%
s{
%
s,
%
s}'
%
(
self
.
__class__
.
__name__
,
self
.
ds
,
self
.
ignore_border
)
return
'
%
s{
%
s,
%
s}'
%
(
self
.
__class__
.
__name__
,
self
.
ds
,
self
.
ignore_border
)
def
make_node
(
self
,
x
,
z
,
gz
):
def
make_node
(
self
,
x
,
z
,
gz
):
return
Apply
(
self
,
[
x
,
z
,
gz
],
[
x
.
type
()])
return
Apply
(
self
,
[
x
,
z
,
gz
],
[
x
.
type
()])
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
#return ()
#return ()
return
(
5
,)
return
(
5
,)
...
@@ -988,12 +1018,14 @@ class GpuDownsampleFactorMaxGrad(GpuOp):
...
@@ -988,12 +1018,14 @@ class GpuDownsampleFactorMaxGrad(GpuOp):
"""
%
locals
()
"""
%
locals
()
def
c_support_code_apply
(
self
,
node
,
nodename
):
def
c_support_code_apply
(
self
,
node
,
nodename
):
# This code considers every position in the output z, andthen computes the gradient for the
# This code considers every position in the output z, andthen
# input pixels that were downsampled to that z-position. It does so by running along every
# computes the gradient for the input pixels that were
# z row (sometimes plus one, to make sure every gx row gets totally filled), and by
# downsampled to that z-position. It does so by running along
# running along every x col. This code is not sensitive to the ignore_border flag along
# every z row (sometimes plus one, to make sure every gx row
# the row dimension (since it runs for every position in the output z), but it is sensitive
# gets totally filled), and by running along every x col. This
# along the col dimension.
# code is not sensitive to the ignore_border flag along the
# row dimension (since it runs for every position in the
# output z), but it is sensitive along the col dimension.
ignore_border
=
int
(
self
.
ignore_border
)
ignore_border
=
int
(
self
.
ignore_border
)
return
"""
return
"""
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论