Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
1392f523
提交
1392f523
authored
10月 29, 2013
作者:
Frederic
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Make GpuSubtensor and GpuIncSubtensor use the new NumPy C API.
This simplify the *IncSubtensor code at the same time.
上级
07068846
隐藏空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
62 行增加
和
92 行删除
+62
-92
basic_ops.py
theano/sandbox/cuda/basic_ops.py
+55
-70
subtensor.py
theano/tensor/subtensor.py
+7
-22
没有找到文件。
theano/sandbox/cuda/basic_ops.py
浏览文件 @
1392f523
...
...
@@ -2223,12 +2223,6 @@ class GpuReshape(tensor.Reshape, GpuOp):
out
[
0
]
=
x
.
reshape
(
tuple
(
shp
))
# C Code shared by GpuSubtensor and GpuIncSubtensor
_define_set_data
=
"""
#define CudaNdarray_set_device_data2(obj, ptr, base)
\
CudaNdarray_set_device_data(obj, (float *)ptr, base)
"""
class
GpuSubtensor
(
GpuOp
,
tensor
.
Subtensor
):
"""
Implement subtensor on the gpu.
...
...
@@ -2276,16 +2270,27 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
view_ndim
=
node
.
outputs
[
0
]
.
ndim
fail
=
sub
[
'fail'
]
decl
=
"CudaNdarray* xview = NULL;"
get_xview
=
self
.
helper_c_code
(
node
,
name
,
inputs
,
outputs
,
sub
,
self
.
idx_list
,
view_ndim
=
view_ndim
,
c_prefix
=
'CudaNdarray'
,
strides_mul
=
4
,
)
build_view
=
"""
//TODO: give this Op a second output so that this view can be cached
//TODO: alternatively, fix the memory leak on failure
CudaNdarray*
xview = (CudaNdarray*) CudaNdarray_New(
%(view_ndim)
s);
xview = (CudaNdarray*) CudaNdarray_New(
%(view_ndim)
s);
if (!xview)
{
%(fail)
s;
}
if (CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(
%(x)
s),
(PyObject*) NULL))
if (CudaNdarray_set_device_data(
xview,
CudaNdarray_DEV_DATA(
%(x)
s) + xview_offset/4,
(PyObject*)
%(x)
s))
{
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
...
...
@@ -2294,43 +2299,24 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
%(fail)
s;
}
cnda_mark_dev_structure_dirty(xview);
"""
%
locals
()
get_xview
=
_define_set_data
+
\
self
.
helper_c_code
(
node
,
name
,
inputs
,
outputs
,
sub
,
self
.
idx_list
,
c_prefix
=
'CudaNdarray'
,
set_data
=
'CudaNdarray_set_device_data2'
,
set_dim
=
'CudaNdarray_set_dim'
,
set_stride
=
'CudaNdarray_set_stride'
,
update_flags
=
""
,
strides_mul
=
4
)
finish_view
=
""
#For broadcasted dimensions, set the strides to 0
#We can't do that only for broadcasted dimensions as this can happen for dimensions of size 0,
#That are rebroadcated later.
for
idx
in
range
(
node
.
outputs
[
0
]
.
ndim
):
finish_view
+=
"""
if(CudaNdarray_HOST_DIMS(xview)[
%(idx)
s]==1)
CudaNdarray_set_stride(xview,
%(idx)
s, 0);
"""
%
locals
()
finish_view
+=
"""
//Set the base only now
if(CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(xview),
%(x)
s)){
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set"
" the base of the view array");
Py_XDECREF(xview);
%(fail)
s;
for(int idx=0;idx <
%(view_ndim)
s; idx++){
//For broadcasted dimensions, set the strides to 0
//We can't do that only for broadcasted dimensions as this can happen
//for dimensions of size 0. That are rebroadcated later.
if(xview_dims[idx]==1)
CudaNdarray_set_stride(xview, idx, 0);
else
CudaNdarray_set_stride(xview, idx, xview_strides[idx]);
CudaNdarray_set_dim(xview, idx, xview_dims[idx]);
}
"""
%
locals
()
finish_view
=
"""
Py_XDECREF(
%(z)
s);
%(z)
s = xview;
"""
%
locals
()
return
build_view
+
"{"
+
get_xview
+
"}"
+
finish_view
return
decl
+
get_xview
+
build_view
+
finish_view
def
c_code_cache_version
(
self
):
hv
=
self
.
helper_c_code_cache_version
()
...
...
@@ -2719,6 +2705,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
"""
%
locals
()
class
GpuIncSubtensor
(
tensor
.
IncSubtensor
,
GpuOp
):
"""
Implement IncSubtensor on the gpu.
...
...
@@ -2756,6 +2743,9 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
return
"""(CudaNdarray*) CudaNdarray_Copy(
%(x)
s)"""
%
locals
()
def
decl_view
(
self
):
return
"CudaNdarray* zview = NULL;"
def
make_view_array
(
self
,
x
,
view_ndim
):
"""
:param x: a string identifying an array to be viewed
...
...
@@ -2765,17 +2755,32 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
This doesn't need to actually set up the view with the
right indexing; we'll do that manually later.
"""
return
"""CudaNdarray* zview = (CudaNdarray*)
CudaNdarray_New(
%(view_ndim)
s)"""
%
locals
()
ret
=
"""zview = (CudaNdarray*) CudaNdarray_New(
%(view_ndim)
s);
if (CudaNdarray_set_device_data(
zview,
CudaNdarray_DEV_DATA(
%(x)
s) + xview_offset/4,
(PyObject*)
%(x)
s))
{
zview = NULL;
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
" devdata field of the view");
}else{
cnda_mark_dev_structure_dirty(zview);
for(int idx=0;idx <
%(view_ndim)
s; idx++){
if(xview_dims[idx]==1)
CudaNdarray_set_stride(zview, idx, 0);
else
CudaNdarray_set_stride(zview, idx, xview_strides[idx]);
CudaNdarray_set_dim(zview, idx, xview_dims[idx]);
}
}
"""
%
locals
()
return
ret
def
get_helper_c_code_args
(
self
):
""" Return a dictionary of arguments to use with helper_c_code"""
return
{
'update_flags'
:
""
,
'c_prefix'
:
'CudaNdarray'
,
'set_data'
:
'CudaNdarray_set_device_data2'
,
'set_dim'
:
'CudaNdarray_set_dim'
,
'set_stride'
:
'CudaNdarray_set_stride'
,
'update_flags'
:
""
,
return
{
'c_prefix'
:
'CudaNdarray'
,
'strides_mul'
:
4
}
...
...
@@ -2789,24 +2794,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
return
"""CudaNdarray_CopyFromCudaNdarray(
%(view)
s,
%(source)
s)"""
%
locals
()
def
define_set_data
(
self
):
return
_define_set_data
def
link_view_array
(
self
,
x
,
fail
):
return
"""
if (CudaNdarray_set_device_data(zview, CudaNdarray_DEV_DATA(
%(x)
s),
(PyObject*) NULL))
{
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
" devdata field of the view");
Py_XDECREF(zview);
%(fail)
s;
}
cnda_mark_dev_structure_dirty(zview);
"""
%
locals
()
def
set_view_base
(
self
,
x
,
fail
):
return
"""
//Set the base only now
...
...
@@ -2823,9 +2810,8 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
def
add_to_zview
(
self
,
x
,
fail
):
return
"""
PyObject * add_result = CudaNdarray_inplace_add((PyObject *) zview,
(PyObject *) py_
%(x)
s);
PyObject * add_result = CudaNdarray_inplace_add((PyObject *) zview,
(PyObject *) py_
%(x)
s);
if (! add_result )
{
...
...
@@ -2839,7 +2825,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
%
locals
()
def
c_code_cache_version
(
self
):
parent_version
=
super
(
GpuIncSubtensor
,
self
)
.
c_code_cache_version
()
if
parent_version
:
return
parent_version
+
(
0
,)
...
...
theano/tensor/subtensor.py
浏览文件 @
1392f523
...
...
@@ -1098,6 +1098,9 @@ class IncSubtensor(Op):
(
x
,
y
)
+
inputs
,
[
x
.
type
()])
def
decl_view
(
self
):
return
"PyArrayObject * zview = NULL;"
def
perform
(
self
,
node
,
inputs
,
out_
):
out
,
=
out_
x
,
y
=
inputs
[:
2
]
...
...
@@ -1171,7 +1174,6 @@ class IncSubtensor(Op):
numpy
.
sum
([
not
isinstance
(
idx
,
slice
)
for
idx
in
self
.
idx_list
]))
decl
=
"PyArrayObject * zview = NULL;"
copy_of_x
=
self
.
copy_of_x
(
x
)
copy_input_if_necessary
=
"""
...
...
@@ -1186,15 +1188,11 @@ class IncSubtensor(Op):
}
else
{
if (
%(z)
s) Py_
DECREF(
%(z)
s);
Py_X
DECREF(
%(z)
s);
%(z)
s =
%(copy_of_x)
s;
}
"""
%
locals
()
alloc_zview
=
self
.
make_view_array
(
z
,
view_ndim
)
# On GPU, it takes two steps to make a view
link_zview
=
self
.
link_view_array
(
z
,
fail
)
# get info needed to make zview: a view of %(z)s
helper_args
=
self
.
get_helper_c_code_args
()
...
...
@@ -1210,6 +1208,8 @@ class IncSubtensor(Op):
)
#Make a view on the output, as we will write into it.
alloc_zview
=
self
.
make_view_array
(
z
,
view_ndim
)
build_view
=
"""
//TODO: give this Op a second output so that this view can be cached
//TODO: alternatively, fix the memory leak on failure
...
...
@@ -1218,7 +1218,6 @@ class IncSubtensor(Op):
{
%(fail)
s;
}
%(link_zview)
s;
"""
%
locals
()
copy_into
=
self
.
copy_into
(
"zview"
,
y
)
...
...
@@ -1239,8 +1238,7 @@ class IncSubtensor(Op):
%(add_to_zview)
s
}
"""
%
locals
()
return
(
decl
+
return
(
self
.
decl_view
()
+
copy_input_if_necessary
+
get_zview
+
build_view
+
...
...
@@ -1322,19 +1320,6 @@ class IncSubtensor(Op):
"""
return
"""PyArray_CopyInto(
%(view)
s,
%(source)
s)"""
%
locals
()
def
link_view_array
(
self
,
x
,
fail
):
""" Returns code to complete making zview a view of x"""
# On CPU there is nothing to do, make_view_array already did this
return
""
def
set_view_base
(
self
,
x
,
fail
):
""" Returns code to make zview be a correct view of x,
after helper_c_code is done messing with x"""
# On CPU there is nothing to do
return
""
def
add_to_zview
(
self
,
x
,
fail
):
""" Return C code to add x to zview. Should DECREF zview if the
add fails."""
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论