Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
555be3a5
提交
555be3a5
authored
12月 06, 2013
作者:
Arnaud Bergeron
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
c_code() for GpuCAReduce.
This is not pretty code, but it is working code.
上级
8d26859e
隐藏空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
219 行增加
和
13 行删除
+219
-13
elemwise.py
theano/sandbox/gpuarray/elemwise.py
+212
-11
test_elemwise.py
theano/sandbox/gpuarray/tests/test_elemwise.py
+7
-2
没有找到文件。
theano/sandbox/gpuarray/elemwise.py
浏览文件 @
555be3a5
...
@@ -11,11 +11,12 @@ try:
...
@@ -11,11 +11,12 @@ try:
from
pygpu.tools
import
ScalarArg
,
ArrayArg
from
pygpu.tools
import
ScalarArg
,
ArrayArg
from
pygpu.elemwise
import
ElemwiseKernel
from
pygpu.elemwise
import
ElemwiseKernel
from
pygpu.reduction
import
ReductionKernel
from
pygpu.reduction
import
ReductionKernel
from
pygpu.gpuarray
import
dtype_to_typecode
from
pygpu.gpuarray
import
dtype_to_typecode
,
dtype_to_ctype
except
ImportError
:
except
ImportError
:
pass
pass
from
theano.sandbox.gpuarray.basic_ops
import
as_gpuarray_variable
,
HideC
from
theano.sandbox.gpuarray.basic_ops
import
(
as_gpuarray_variable
,
HideC
,
GpuKernelBase
)
from
theano.sandbox.gpuarray.type
import
GpuArrayType
from
theano.sandbox.gpuarray.type
import
GpuArrayType
from
theano.gof.utils
import
MethodNotDefined
from
theano.gof.utils
import
MethodNotDefined
...
@@ -480,7 +481,7 @@ class GpuDimShuffle(HideC, DimShuffle):
...
@@ -480,7 +481,7 @@ class GpuDimShuffle(HideC, DimShuffle):
return
(
3
,)
return
(
3
,)
class
GpuCAReduce
(
HideC
,
CAReduceDtype
):
class
GpuCAReduce
(
GpuKernelBase
,
HideC
,
CAReduceDtype
):
def
__init__
(
self
,
scalar_op
,
axis
=
None
,
dtype
=
None
,
acc_dtype
=
None
):
def
__init__
(
self
,
scalar_op
,
axis
=
None
,
dtype
=
None
,
acc_dtype
=
None
):
if
not
hasattr
(
scalar_op
,
'identity'
):
if
not
hasattr
(
scalar_op
,
'identity'
):
raise
ValueError
(
"No identity on scalar op"
)
raise
ValueError
(
"No identity on scalar op"
)
...
@@ -510,18 +511,219 @@ class GpuCAReduce(HideC, CAReduceDtype):
...
@@ -510,18 +511,219 @@ class GpuCAReduce(HideC, CAReduceDtype):
return
Apply
(
res
.
op
,
[
input
],
[
otype
()])
return
Apply
(
res
.
op
,
[
input
],
[
otype
()])
def
make_thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
):
def
make_thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
):
# cache the kernel object
self
.
get_kernel_cache
(
node
)
return
super
(
GpuCAReduce
,
self
)
.
make_thunk
(
node
,
storage_map
,
compute_map
,
no_recycling
)
def
get_kernel_cache
(
self
,
node
):
attr
=
'@cache_reduction_k'
if
self
.
axis
is
None
:
if
self
.
axis
is
None
:
redux
=
[
True
]
*
node
.
inputs
[
0
]
.
ndim
redux
=
[
True
]
*
node
.
inputs
[
0
]
.
ndim
else
:
else
:
redux
=
self
.
redux
redux
=
self
.
redux
if
not
hasattr
(
node
,
attr
):
acc_dtype
=
getattr
(
self
,
'acc_dtype'
,
None
)
if
acc_dtype
is
None
:
acc_dtype
=
node
.
outputs
[
0
]
.
type
.
dtype
if
any
(
redux
):
setattr
(
node
,
attr
,
self
.
generate_kernel
(
node
,
acc_dtype
,
redux
))
if
any
(
redux
):
return
getattr
(
node
,
attr
)
def
c_kernel_code
(
self
,
node
):
if
not
any
(
getattr
(
self
,
'redux'
,
[
node
.
inputs
[
0
]
.
ndim
!=
0
])):
# Some OpenCL compilers do not accept no-arguments kernels
return
"KERNEL void reduk(GLOBAL_MEM float *a) {}"
else
:
k
=
self
.
get_kernel_cache
(
node
)
_
,
src
,
_
,
_
=
k
.
_get_basic_kernel
(
k
.
init_local_size
,
node
.
inputs
[
0
]
.
ndim
)
return
src
def
c_kernel_name
(
self
):
return
"reduk"
def
c_kernel_params
(
self
,
node
):
if
not
any
(
getattr
(
self
,
'redux'
,
[
node
.
inputs
[
0
]
.
ndim
!=
0
])):
return
[
"GA_FLOAT"
]
else
:
# Make sure this is synced with the call definition in
# pygpu/reduction.py
nd
=
node
.
inputs
[
0
]
.
ndim
res
=
[
"GA_UINT"
,
"GA_BUFFER"
]
res
.
extend
(
"GA_UINT"
for
_
in
range
(
nd
))
res
.
append
(
"GA_BUFFER"
)
res
.
append
(
"GA_UINT"
)
res
.
extend
(
"GA_INT"
for
_
in
range
(
nd
))
return
res
def
c_kernel_flags
(
self
,
node
):
acc_dtype
=
getattr
(
self
,
'acc_dtype'
,
None
)
acc_dtype
=
getattr
(
self
,
'acc_dtype'
,
None
)
if
acc_dtype
is
None
:
if
acc_dtype
is
None
:
acc_dtype
=
node
.
outputs
[
0
]
.
type
.
dtype
acc_dtype
=
node
.
outputs
[
0
]
.
type
.
dtype
if
any
(
redux
):
return
self
.
_get_kernel_flags
(
node
.
inputs
[
0
]
.
type
.
dtype
,
node
.
_cache_reduction_k
=
self
.
generate_kernel
(
node
,
acc_dtype
,
acc_dtype
,
redux
)
node
.
outputs
[
0
]
.
type
.
dtype
)
return
super
(
GpuCAReduce
,
self
)
.
make_thunk
(
node
,
storage_map
,
compute_map
,
no_recycling
)
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
if
not
any
(
getattr
(
self
,
'redux'
,
[
node
.
inputs
[
0
]
.
ndim
!=
0
])):
# We special case the no-reduction case since the gpu
# kernel has trouble handling it.
return
"""
Py_XDECREF(
%(out)
s);
%(out)
s = pygpu_copy(
%(inp)
s, GA_ANY_ORDER);
if (!
%(out)
s) {
%(fail)
s
}
if (
%(sync)
d)
GpuArray_sync(&
%(out)
s->ga);
"""
%
dict
(
out
=
out
[
0
],
inp
=
inp
[
0
],
fail
=
sub
[
'fail'
],
sync
=
bool
(
config
.
gpuarray
.
sync
))
k
=
self
.
get_kernel_cache
(
node
)
_
,
src
,
_
,
ls
=
k
.
_get_basic_kernel
(
k
.
init_local_size
,
node
.
inputs
[
0
]
.
ndim
)
if
self
.
axis
is
None
:
redux
=
[
True
]
*
node
.
inputs
[
0
]
.
ndim
else
:
redux
=
self
.
redux
acc_dtype
=
getattr
(
self
,
'acc_dtype'
,
None
)
if
acc_dtype
is
None
:
acc_dtype
=
node
.
outputs
[
0
]
.
type
.
dtype
input
=
inp
[
0
]
output
=
out
[
0
]
nd_out
=
node
.
outputs
[
0
]
.
ndim
code
=
"""
size_t gs = 1;
unsigned int n = 1;
unsigned int proxy_dim[
%(nd_in)
s];
unsigned int proxy_off;
int proxy_str[
%(nd_in)
s];
void *args[
%(n_args)
s];
PyGpuArrayObject *tmp;
int err;
"""
%
dict
(
n_args
=
4
+
(
node
.
inputs
[
0
]
.
ndim
*
2
),
nd_in
=
node
.
inputs
[
0
]
.
ndim
)
if
nd_out
!=
0
:
code
+=
"""
size_t out_dims[
%(nd_out)
s];
int need_out =
%(output)
s == NULL ||
%(output)
s->ga.nd !=
%(nd_out)
s;
"""
%
dict
(
nd_out
=
nd_out
,
output
=
output
)
j
=
0
for
i
in
range
(
node
.
inputs
[
0
]
.
ndim
):
if
not
self
.
redux
[
i
]:
code
+=
"""
out_dims[
%(j)
s] =
%(input)
s->ga.dimensions[
%(i)
s];
if (!need_out)
need_out |=
%(output)
s->ga.dimensions[
%(j)
s] != out_dims[
%(j)
s];
"""
%
dict
(
j
=
j
,
i
=
i
,
input
=
input
,
output
=
output
)
j
+=
1
code
+=
"""
if (need_out) {
%(output)
s = pygpu_empty(
%(nd_out)
s, out_dims,
%(out_type)
s, GA_C_ORDER, pygpu_default_context(), Py_None);
if (!
%(output)
s) {
%(fail)
s
}
}
"""
%
dict
(
output
=
output
,
nd_out
=
nd_out
,
fail
=
sub
[
'fail'
],
out_type
=
dtype_to_typecode
(
node
.
outputs
[
0
]
.
type
.
dtype
))
else
:
code
+=
"""
if (
%(output)
s == NULL ||
%(output)
s->ga.nd != 0) {
Py_XDECREF(
%(output)
s);
%(output)
s = pygpu_empty(0, NULL,
%(out_type)
s, GA_C_ORDER,
pygpu_default_context(), Py_None);
if (!
%(output)
s) {
%(fail)
s
}
}
"""
%
dict
(
output
=
output
,
fail
=
sub
[
'fail'
],
out_type
=
dtype_to_typecode
(
node
.
outputs
[
0
]
.
type
.
dtype
))
if
acc_dtype
!=
node
.
outputs
[
0
]
.
type
.
dtype
:
code
+=
"""
tmp = pygpu_empty(
%(output)
s->ga.nd,
%(output)
s->ga.dimensions,
%(acc_type)
s, GA_C_ORDER, pygpu_default_context(),
Py_None);
if (!tmp)
%(fail)
s
"""
%
dict
(
output
=
output
,
fail
=
sub
[
'fail'
],
acc_type
=
dtype_to_typecode
(
acc_dtype
))
else
:
code
+=
"""
tmp =
%(output)
s;
Py_INCREF(tmp);
"""
%
dict
(
output
=
output
)
# We need the proxies since we are passing a pointer to the
# data into the call and therefore we need a real copy of the
# data in the proper type.
code
+=
"""
args[0] = &n;
args[1] = &tmp->ga;
"""
%
dict
(
output
=
output
)
p
=
2
for
i
in
range
(
node
.
inputs
[
0
]
.
ndim
):
if
not
redux
[
i
]:
var
=
'gs'
else
:
var
=
'n'
code
+=
"""
proxy_dim[
%(i)
s] =
%(input)
s->ga.dimensions[
%(i)
s];
args[
%(p)
s] = &proxy_dim[
%(i)
s];
%(var)
s *=
%(input)
s->ga.dimensions[
%(i)
s];
"""
%
dict
(
i
=
i
,
p
=
p
,
input
=
input
,
var
=
var
)
p
+=
1
code
+=
"""
args[
%(p)
s] = &
%(input)
s->ga;
proxy_off =
%(input)
s->ga.offset;
args[
%(p)
s+1] = &proxy_off;
"""
%
dict
(
p
=
p
,
input
=
input
)
p
+=
2
for
i
in
range
(
node
.
inputs
[
0
]
.
ndim
):
code
+=
"""
proxy_str[
%(i)
s] =
%(input)
s->ga.strides[
%(i)
s];
args[
%(p)
s] = &proxy_str[
%(i)
s];
"""
%
dict
(
p
=
p
,
i
=
i
,
input
=
input
)
p
+=
1
code
+=
"""
if (gs == 0) gs = 1;
err = GpuKernel_call(&
%(k_var)
s, 0,
%(ls)
s, gs, args);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"compyte error: GpuCAReduce:
%%
s.",
GpuKernel_error(&
%(k_var)
s, err));
%(fail)
s
}
if (
%(cast_out)
d) {
err = GpuArray_move(&
%(output)
s->ga, &tmp->ga);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"compyte error: GpuCAReduce [cast]:
%%
s.",
GpuArray_error(&tmp->ga, err));
%(fail)
s
}
} else {
Py_XDECREF(
%(output)
s);
%(output)
s = tmp;
}
if (
%(sync)
d)
GpuArray_sync(&
%(output)
s->ga);
"""
%
dict
(
k_var
=
self
.
c_kernel_obj
(
name
),
sync
=
bool
(
config
.
gpuarray
.
sync
),
ls
=
ls
,
fail
=
sub
[
'fail'
],
output
=
output
,
input
=
input
,
cast_out
=
bool
(
acc_dtype
!=
node
.
outputs
[
0
]
.
type
.
dtype
))
return
code
def
c_code_cache_version
(
self
):
return
None
def
generate_kernel
(
self
,
node
,
odtype
,
redux
):
def
generate_kernel
(
self
,
node
,
odtype
,
redux
):
if
isinstance
(
self
.
scalar_op
,
scalar
.
basic
.
Add
):
if
isinstance
(
self
.
scalar_op
,
scalar
.
basic
.
Add
):
...
@@ -533,8 +735,7 @@ class GpuCAReduce(HideC, CAReduceDtype):
...
@@ -533,8 +735,7 @@ class GpuCAReduce(HideC, CAReduceDtype):
return
ReductionKernel
(
pygpu
.
get_default_context
(),
odtype
,
return
ReductionKernel
(
pygpu
.
get_default_context
(),
odtype
,
self
.
scalar_op
.
identity
,
reduce_expr
,
redux
,
self
.
scalar_op
.
identity
,
reduce_expr
,
redux
,
arguments
=
[
make_argument
(
node
.
inputs
[
0
],
'a'
)],
arguments
=
[
make_argument
(
node
.
inputs
[
0
],
'a'
)],
init_nd
=
node
.
inputs
[
0
]
.
ndim
init_nd
=
node
.
inputs
[
0
]
.
ndim
)
)
def
perform
(
self
,
node
,
inp
,
out
):
def
perform
(
self
,
node
,
inp
,
out
):
input
,
=
inp
input
,
=
inp
...
@@ -546,7 +747,7 @@ class GpuCAReduce(HideC, CAReduceDtype):
...
@@ -546,7 +747,7 @@ class GpuCAReduce(HideC, CAReduceDtype):
redux
=
self
.
redux
redux
=
self
.
redux
if
any
(
redux
):
if
any
(
redux
):
output
[
0
]
=
node
.
_cache_reduction_k
(
input
)
.
astype
(
copy
=
False
,
output
[
0
]
=
self
.
get_kernel_cache
(
node
)
(
input
)
.
astype
(
copy
=
False
,
dtype
=
node
.
outputs
[
0
]
.
type
.
dtype
)
dtype
=
node
.
outputs
[
0
]
.
type
.
dtype
)
else
:
else
:
output
[
0
]
=
pygpu
.
gpuarray
.
array
(
input
,
copy
=
True
,
output
[
0
]
=
pygpu
.
gpuarray
.
array
(
input
,
copy
=
True
,
...
...
theano/sandbox/gpuarray/tests/test_elemwise.py
浏览文件 @
555be3a5
...
@@ -55,7 +55,12 @@ class test_GpuCAReduce(test_CAReduce):
...
@@ -55,7 +55,12 @@ class test_GpuCAReduce(test_CAReduce):
test_nan
=
True
)
test_nan
=
True
)
def
test_c
(
self
):
def
test_c
(
self
):
raise
SkipTest
(
"no C code"
)
for
dtype
in
self
.
dtypes
+
self
.
bin_dtypes
:
for
op
in
self
.
reds
:
self
.
with_linker
(
gof
.
CLinker
(),
op
,
dtype
=
dtype
)
def
test_c_nan
(
self
):
def
test_c_nan
(
self
):
raise
SkipTest
(
"no C code"
)
for
dtype
in
self
.
dtypes
:
for
op
in
self
.
reds
:
self
.
with_linker
(
gof
.
CLinker
(),
op
,
dtype
=
dtype
,
test_nan
=
True
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论