Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
ed7759fb
提交
ed7759fb
authored
3月 13, 2016
作者:
Chiheb Trabelsi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
basic_ops.py has been modified in order to respect the flake8 style
上级
200babca
显示空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
43 行增加
和
43 行删除
+43
-43
basic_ops.py
theano/sandbox/cuda/basic_ops.py
+43
-43
没有找到文件。
theano/sandbox/cuda/basic_ops.py
浏览文件 @
ed7759fb
...
@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function, division
...
@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function, division
import
copy
import
copy
import
logging
import
logging
import
sys
import
sys
import
warnings
import
numpy
import
numpy
from
six
import
iteritems
from
six
import
iteritems
from
six.moves
import
StringIO
,
xrange
from
six.moves
import
StringIO
,
xrange
...
@@ -12,6 +12,9 @@ from theano import gof, Type, Apply
...
@@ -12,6 +12,9 @@ from theano import gof, Type, Apply
from
theano
import
tensor
,
scalar
,
config
from
theano
import
tensor
,
scalar
,
config
from
theano.gradient
import
grad_undefined
from
theano.gradient
import
grad_undefined
from
theano.scalar
import
Scalar
from
theano.scalar
import
Scalar
from
theano.sandbox.cuda
import
GpuOp
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.sandbox.cuda.elemwise
import
NaiveAlgo
scal
=
scalar
# somewhere scalar gets reassigned to be a function
scal
=
scalar
# somewhere scalar gets reassigned to be a function
...
@@ -24,10 +27,6 @@ try:
...
@@ -24,10 +27,6 @@ try:
except
ImportError
:
except
ImportError
:
pass
pass
from
theano.sandbox.cuda
import
GpuOp
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.sandbox.cuda.elemwise
import
NaiveAlgo
_logger_name
=
'theano.sandbox.cuda.basic_ops'
_logger_name
=
'theano.sandbox.cuda.basic_ops'
_logger
=
logging
.
getLogger
(
_logger_name
)
_logger
=
logging
.
getLogger
(
_logger_name
)
...
@@ -596,10 +595,8 @@ class GpuCAReduce(GpuOp):
...
@@ -596,10 +595,8 @@ class GpuCAReduce(GpuOp):
if
self
.
pre_scalar_op
:
if
self
.
pre_scalar_op
:
pre
=
"pre=
%
s,red="
%
str
(
self
.
pre_scalar_op
)
pre
=
"pre=
%
s,red="
%
str
(
self
.
pre_scalar_op
)
return
"GpuCAReduce{
%
s
%
s}{
%
s}"
%
(
return
"GpuCAReduce{
%
s
%
s}{
%
s}"
%
(
pre
,
pre
,
str
(
self
.
scalar_op
),
str
(
self
.
scalar_op
),
','
.
join
(
str
(
i
)
for
i
in
self
.
reduce_mask
))
','
.
join
(
str
(
i
)
for
i
in
self
.
reduce_mask
)
)
def
__setstate__
(
self
,
d
):
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
self
.
__dict__
.
update
(
d
)
...
@@ -775,15 +772,18 @@ class GpuCAReduce(GpuOp):
...
@@ -775,15 +772,18 @@ class GpuCAReduce(GpuOp):
# check if the tensor is ccontiguous, if true, use the c_code_reduce_ccontig code.
# check if the tensor is ccontiguous, if true, use the c_code_reduce_ccontig code.
# TODO: check if we are ccontiguous when we un-dimshuffle
# TODO: check if we are ccontiguous when we un-dimshuffle
# TODO: if only some dims are ccontiguous, call version with less dims.
# TODO: if only some dims are ccontiguous, call version with less dims.
print
(
'if(CudaNdarray_is_c_contiguous(
%(x)
s)){'
%
locals
(),
file
=
sio
)
print
(
'if(CudaNdarray_is_c_contiguous(
%(x)
s)){'
%
locals
(),
file
=
sio
)
self
.
c_code_reduce_ccontig
(
sio
,
node
,
name
,
x
,
z
,
fail
)
self
.
c_code_reduce_ccontig
(
sio
,
node
,
name
,
x
,
z
,
fail
)
print
(
"}else{"
,
file
=
sio
)
print
(
"}else{"
,
file
=
sio
)
getattr
(
self
,
'c_code_reduce_
%
s'
%
(
''
.
join
(
getattr
(
self
,
'c_code_reduce_
%
s'
%
(
''
.
join
(
str
(
i
)
for
i
in
self
.
reduce_mask
)))(
sio
,
node
,
name
,
x
,
z
,
fail
)
str
(
i
)
for
i
in
self
.
reduce_mask
)))(
sio
,
node
,
name
,
x
,
z
,
fail
)
print
(
"}"
,
file
=
sio
)
print
(
"}"
,
file
=
sio
)
else
:
else
:
getattr
(
self
,
'c_code_reduce_
%
s'
%
(
''
.
join
(
getattr
(
self
,
'c_code_reduce_
%
s'
%
(
''
.
join
(
str
(
i
)
for
i
in
self
.
reduce_mask
)))(
sio
,
node
,
name
,
x
,
z
,
fail
)
str
(
i
)
for
i
in
self
.
reduce_mask
)))(
sio
,
node
,
name
,
x
,
z
,
fail
)
# \end bracket the reduction ...
# \end bracket the reduction ...
print
(
"""
print
(
"""
...
@@ -976,7 +976,7 @@ class GpuCAReduce(GpuOp):
...
@@ -976,7 +976,7 @@ class GpuCAReduce(GpuOp):
assert
isinstance
(
self
.
scalar_op
,
(
scal
.
Maximum
,
assert
isinstance
(
self
.
scalar_op
,
(
scal
.
Maximum
,
scal
.
Minimum
))
scal
.
Minimum
))
if
self
.
pre_scalar_op
:
if
self
.
pre_scalar_op
:
#dtype = node.inputs[0].dtype
#
dtype = node.inputs[0].dtype
dtype
=
'float32'
dtype
=
'float32'
dummy_var
=
scal
.
Scalar
(
dtype
=
dtype
)()
dummy_var
=
scal
.
Scalar
(
dtype
=
dtype
)()
...
@@ -1834,12 +1834,15 @@ class GpuCAReduce(GpuOp):
...
@@ -1834,12 +1834,15 @@ class GpuCAReduce(GpuOp):
version
=
[
15
]
# the version corresponding to the c code in this Op
version
=
[
15
]
# the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend...
# now we insert versions for the ops on which we depend...
scalar_node
=
Apply
(
self
.
scalar_op
,
Apply
(
self
.
scalar_op
,
[
Scalar
(
dtype
=
input
.
type
.
dtype
)()
for
input
in
node
.
inputs
],
[
Scalar
(
[
Scalar
(
dtype
=
output
.
type
.
dtype
)()
for
output
in
node
.
outputs
])
dtype
=
input
.
type
.
dtype
)()
for
input
in
node
.
inputs
],
[
Scalar
(
dtype
=
output
.
type
.
dtype
)()
for
output
in
node
.
outputs
])
version
.
extend
(
self
.
scalar_op
.
c_code_cache_version
())
version
.
extend
(
self
.
scalar_op
.
c_code_cache_version
())
for
i
in
node
.
inputs
+
node
.
outputs
:
for
i
in
node
.
inputs
+
node
.
outputs
:
version
.
extend
(
Scalar
(
dtype
=
i
.
type
.
dtype
)
.
c_code_cache_version
())
version
.
extend
(
Scalar
(
dtype
=
i
.
type
.
dtype
)
.
c_code_cache_version
())
if
all
(
version
):
if
all
(
version
):
return
tuple
(
version
)
return
tuple
(
version
)
else
:
else
:
...
@@ -1946,10 +1949,11 @@ class GpuCAReduce(GpuOp):
...
@@ -1946,10 +1949,11 @@ class GpuCAReduce(GpuOp):
%(reducebuf)
s
%(reducebuf)
s
}
}
"""
%
locals
(),
file
=
sio
)
"""
%
locals
(),
file
=
sio
)
#01, 011, 0111
#
01, 011, 0111
if
(
0
==
self
.
reduce_mask
[
0
]
and
if
(
0
==
self
.
reduce_mask
[
0
]
and
all
(
self
.
reduce_mask
[
1
:])
and
all
(
self
.
reduce_mask
[
1
:])
and
nd_in
in
[
2
,
3
,
4
]):
nd_in
in
[
2
,
3
,
4
]):
# this kernel uses one block for each row.
# this kernel uses one block for each row.
# threads per block for each element per row.
# threads per block for each element per row.
...
@@ -2117,10 +2121,10 @@ class GpuCAReduce(GpuOp):
...
@@ -2117,10 +2121,10 @@ class GpuCAReduce(GpuOp):
# this kernel uses one block for multiple column(up to 32TODO),
# this kernel uses one block for multiple column(up to 32TODO),
# threads per block for each element per column.
# threads per block for each element per column.
# thread.x = dim 2 contiguous
# thread.x = dim 2 contiguous
# thread.y = dim 1
# thread.y = dim 1
# block.x = dim 0
# block.x = dim 0
# block.y = dim 1 rest
# block.y = dim 1 rest
init
=
self
.
_k_init
(
node
,
nodename
)
init
=
self
.
_k_init
(
node
,
nodename
)
decl
=
self
.
_k_decl
(
node
,
nodename
,
pattern
=
"010_inner"
)
decl
=
self
.
_k_decl
(
node
,
nodename
,
pattern
=
"010_inner"
)
reducebuf
=
self
.
_k_reduce_buf_multiple
(
'Z[i0 * sZ0 + i2*sZ1]'
,
reducebuf
=
self
.
_k_reduce_buf_multiple
(
'Z[i0 * sZ0 + i2*sZ1]'
,
...
@@ -2470,7 +2474,7 @@ class GpuReshape(tensor.Reshape, GpuOp):
...
@@ -2470,7 +2474,7 @@ class GpuReshape(tensor.Reshape, GpuOp):
if
(
x
.
size
%
ss
)
!=
0
:
if
(
x
.
size
%
ss
)
!=
0
:
raise
ValueError
(
"When using -1 in new shape, the computed new shape must be an multiple of the original shape."
)
raise
ValueError
(
"When using -1 in new shape, the computed new shape must be an multiple of the original shape."
)
shp_new
=
numpy
.
copy
(
shp
)
shp_new
=
numpy
.
copy
(
shp
)
shp_new
[
m1_idx
]
=
x
.
size
/
ss
shp_new
[
m1_idx
]
=
x
.
size
/
ss
shp
=
shp_new
shp
=
shp_new
else
:
else
:
...
@@ -2721,7 +2725,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
...
@@ -2721,7 +2725,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
def
perform
(
self
,
node
,
inp
,
out_
):
def
perform
(
self
,
node
,
inp
,
out_
):
# This don't work as CudaNdarray_Subscript() don't support it.
# This don't work as CudaNdarray_Subscript() don't support it.
#super(GpuAdvancedSubtensor1, self).perform(node, inp, out_)
#
super(GpuAdvancedSubtensor1, self).perform(node, inp, out_)
x
,
idx
=
inp
x
,
idx
=
inp
out
,
=
out_
out
,
=
out_
x_orig
=
x
x_orig
=
x
...
@@ -2733,7 +2737,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
...
@@ -2733,7 +2737,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
if
x
.
ndim
<=
3
:
if
x
.
ndim
<=
3
:
# CudaNdarray.take only supports ndim <= 3
# CudaNdarray.take only supports ndim <= 3
if
self
.
perform_using_take
is
not
None
:
if
self
.
perform_using_take
is
not
None
:
assert
self
.
perform_using_take
==
True
,
(
assert
self
.
perform_using_take
is
True
,
(
"GpuAdvancedSubtensor1 used the fast version"
)
"GpuAdvancedSubtensor1 used the fast version"
)
if
idx
.
dtype
!=
numpy
.
int64
:
if
idx
.
dtype
!=
numpy
.
int64
:
if
idx
.
dtype
in
[
numpy
.
int8
,
numpy
.
int16
,
numpy
.
int32
,
if
idx
.
dtype
in
[
numpy
.
int8
,
numpy
.
int16
,
numpy
.
int32
,
...
@@ -2762,7 +2766,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
...
@@ -2762,7 +2766,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
out
[
0
]
=
o
out
[
0
]
=
o
else
:
else
:
if
self
.
perform_using_take
is
not
None
:
if
self
.
perform_using_take
is
not
None
:
assert
self
.
perform_using_take
==
False
,
(
assert
self
.
perform_using_take
is
False
,
(
"GpuAdvancedSubtensor1 didn't use the fast version"
)
"GpuAdvancedSubtensor1 didn't use the fast version"
)
if
out_
[
0
][
0
]
is
None
or
out_
[
0
][
0
]
.
shape
!=
out_shape
:
if
out_
[
0
][
0
]
is
None
or
out_
[
0
][
0
]
.
shape
!=
out_shape
:
o
=
cuda_ndarray
.
cuda_ndarray
.
CudaNdarray
.
zeros
(
out_shape
)
o
=
cuda_ndarray
.
cuda_ndarray
.
CudaNdarray
.
zeros
(
out_shape
)
...
@@ -3006,8 +3010,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
...
@@ -3006,8 +3010,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
convert_map
=
{
8
:
tensor
.
basic
.
_convert_to_int8
,
convert_map
=
{
8
:
tensor
.
basic
.
_convert_to_int8
,
16
:
tensor
.
basic
.
_convert_to_int16
,
16
:
tensor
.
basic
.
_convert_to_int16
,
32
:
tensor
.
basic
.
_convert_to_int32
,
32
:
tensor
.
basic
.
_convert_to_int32
,
64
:
tensor
.
basic
.
_convert_to_int64
64
:
tensor
.
basic
.
_convert_to_int64
}
}
intwidth
=
theano
.
configdefaults
.
python_int_bitwidth
()
intwidth
=
theano
.
configdefaults
.
python_int_bitwidth
()
ilist_
=
convert_map
[
intwidth
](
ilist_
)
ilist_
=
convert_map
[
intwidth
](
ilist_
)
...
@@ -3354,7 +3357,6 @@ class GpuFlatten(gof.HideC, tensor.Flatten, GpuOp):
...
@@ -3354,7 +3357,6 @@ class GpuFlatten(gof.HideC, tensor.Flatten, GpuOp):
return
Apply
(
self
,
[
x
],
[
out_type
()])
return
Apply
(
self
,
[
x
],
[
out_type
()])
def
gpu_flatten
(
x
,
outdim
=
1
):
def
gpu_flatten
(
x
,
outdim
=
1
):
"""
"""
Implement flatten on the gpu.
Implement flatten on the gpu.
...
@@ -3378,9 +3380,9 @@ def gpu_flatten(x, outdim=1):
...
@@ -3378,9 +3380,9 @@ def gpu_flatten(x, outdim=1):
"""
"""
x
=
as_cuda_ndarray_variable
(
x
)
x
=
as_cuda_ndarray_variable
(
x
)
if
outdim
>
1
:
if
outdim
>
1
:
dims
=
tuple
(
x
.
shape
[:
outdim
-
1
])
+
(
-
1
,
)
dims
=
tuple
(
x
.
shape
[:
outdim
-
1
])
+
(
-
1
,
)
else
:
else
:
dims
=
(
-
1
,)
dims
=
(
-
1
,
)
return
GpuReshape
(
outdim
)(
x
,
dims
)
return
GpuReshape
(
outdim
)(
x
,
dims
)
...
@@ -3408,12 +3410,11 @@ class GpuJoin(tensor.Join, GpuOp):
...
@@ -3408,12 +3410,11 @@ class GpuJoin(tensor.Join, GpuOp):
as_tensor_variable_args
=
[
as_cuda_ndarray_variable
(
x
)
as_tensor_variable_args
=
[
as_cuda_ndarray_variable
(
x
)
for
x
in
tensors
]
for
x
in
tensors
]
output_maker
=
\
def
output_maker
(
bcast
):
lambda
bcast
:
CudaNdarrayType
(
broadcastable
=
bcast
)(
)
return
(
CudaNdarrayType
(
broadcastable
=
bcast
)()
)
return
tensor
.
Join
.
_make_node_internal
(
self
,
return
tensor
.
Join
.
_make_node_internal
(
axis
,
tensors
,
self
,
axis
,
tensors
,
as_tensor_variable_args
,
output_maker
)
as_tensor_variable_args
,
output_maker
)
def
perform
(
self
,
node
,
axis_and_tensors
,
out_
):
def
perform
(
self
,
node
,
axis_and_tensors
,
out_
):
out
,
=
out_
out
,
=
out_
...
@@ -3464,7 +3465,7 @@ class GpuJoin(tensor.Join, GpuOp):
...
@@ -3464,7 +3465,7 @@ class GpuJoin(tensor.Join, GpuOp):
# except for 'axis'
# except for 'axis'
def
construct_slices
(
curlen
):
def
construct_slices
(
curlen
):
slices
=
[
slice
(
None
,
None
,
None
)
for
i
in
\
slices
=
[
slice
(
None
,
None
,
None
)
for
i
in
xrange
(
len
(
template_shape
))]
xrange
(
len
(
template_shape
))]
slices
[
axis
]
=
slice
(
curpos
,
curpos
+
curlen
,
None
)
slices
[
axis
]
=
slice
(
curpos
,
curpos
+
curlen
,
None
)
return
tuple
(
slices
)
return
tuple
(
slices
)
...
@@ -3829,8 +3830,8 @@ class GpuAlloc(GpuAllocEmpty):
...
@@ -3829,8 +3830,8 @@ class GpuAlloc(GpuAllocEmpty):
# If the output is a constant, it will have to be deepcopied
# If the output is a constant, it will have to be deepcopied
# each time the function is called. So we do not fold.
# each time the function is called. So we do not fold.
return
False
return
False
elif
(
# T
he following ops work inplace of their input id 0.
# Else if t
he following ops work inplace of their input id 0.
client
[
1
]
==
0
and
elif
(
client
[
1
]
==
0
and
isinstance
(
client
[
0
]
.
op
,
(
isinstance
(
client
[
0
]
.
op
,
(
# Ops that will work inplace on the Alloc. So if they
# Ops that will work inplace on the Alloc. So if they
# get constant_folded, they would copy the
# get constant_folded, they would copy the
...
@@ -3844,8 +3845,7 @@ class GpuAlloc(GpuAllocEmpty):
...
@@ -3844,8 +3845,7 @@ class GpuAlloc(GpuAllocEmpty):
GpuAdvancedIncSubtensor1
,
GpuAdvancedIncSubtensor1
,
theano
.
sandbox
.
cuda
.
blas
.
GpuGemm
,
theano
.
sandbox
.
cuda
.
blas
.
GpuGemm
,
theano
.
sandbox
.
cuda
.
blas
.
GpuGemv
,
theano
.
sandbox
.
cuda
.
blas
.
GpuGemv
,
theano
.
sandbox
.
cuda
.
blas
.
GpuGer
,
theano
.
sandbox
.
cuda
.
blas
.
GpuGer
,))):
))):
return
False
return
False
# If the clients is a transfer, we don't want to fold. We
# If the clients is a transfer, we don't want to fold. We
# let the moving opt finish before deciding what to do.
# let the moving opt finish before deciding what to do.
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论