Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
b895c6e8
提交
b895c6e8
authored
5月 26, 2016
作者:
Frédéric Bastien
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4542 from abergeron/api_changes
libgpuarray api changes
上级
19619e9a
c59491c7
隐藏空白字符变更
内嵌
并排
正在显示
19 个修改的文件
包含
89 行增加
和
107 行删除
+89
-107
__init__.py
theano/gpuarray/__init__.py
+1
-1
basic_ops.py
theano/gpuarray/basic_ops.py
+4
-4
blockgemv.c
theano/gpuarray/blockgemv.c
+22
-29
blockger.c
theano/gpuarray/blockger.c
+19
-26
dnn.py
theano/gpuarray/dnn.py
+1
-1
dnn_fwd.c
theano/gpuarray/dnn_fwd.c
+3
-3
dnn_gi.c
theano/gpuarray/dnn_gi.c
+3
-3
dnn_gw.c
theano/gpuarray/dnn_gw.c
+3
-3
elemwise.py
theano/gpuarray/elemwise.py
+3
-3
extra_ops.py
theano/gpuarray/extra_ops.py
+6
-9
gemm16.c
theano/gpuarray/gemm16.c
+1
-1
neighbours.py
theano/gpuarray/neighbours.py
+1
-1
nerv.py
theano/gpuarray/nerv.py
+1
-1
nnet.py
theano/gpuarray/nnet.py
+4
-4
opt.py
theano/gpuarray/opt.py
+4
-4
subtensor.py
theano/gpuarray/subtensor.py
+9
-10
test_elemwise.py
theano/gpuarray/tests/test_elemwise.py
+2
-2
test_extra_ops.py
theano/gpuarray/tests/test_extra_ops.py
+1
-1
test_opt.py
theano/gpuarray/tests/test_opt.py
+1
-1
没有找到文件。
theano/gpuarray/__init__.py
浏览文件 @
b895c6e8
...
@@ -42,7 +42,7 @@ register_transfer(transfer)
...
@@ -42,7 +42,7 @@ register_transfer(transfer)
def
init_dev
(
dev
,
name
=
None
):
def
init_dev
(
dev
,
name
=
None
):
v
=
pygpu
.
gpuarray
.
api_version
()
v
=
pygpu
.
gpuarray
.
api_version
()
expected
=
-
999
8
expected
=
-
999
7
if
v
[
0
]
!=
expected
:
if
v
[
0
]
!=
expected
:
raise
RuntimeError
(
"Wrong major API version for gpuarray:"
,
v
[
0
],
raise
RuntimeError
(
"Wrong major API version for gpuarray:"
,
v
[
0
],
"Make sure Theano and libgpuarray/pygpu "
"Make sure Theano and libgpuarray/pygpu "
...
...
theano/gpuarray/basic_ops.py
浏览文件 @
b895c6e8
...
@@ -259,14 +259,14 @@ class GpuKernelBase(object):
...
@@ -259,14 +259,14 @@ class GpuKernelBase(object):
int types[
%(numargs)
u] = {
%(types)
s};
int types[
%(numargs)
u] = {
%(types)
s};
const char *bcode =
%(bvar)
s;
const char *bcode =
%(bvar)
s;
size_t sz = sizeof(
%(bvar)
s);
size_t sz = sizeof(
%(bvar)
s);
if (GpuKernel_init(&
%(ovar)
s,
%(ctx)
s->
ops,
%(ctx)
s->
ctx, 1, &bcode, &sz,
if (GpuKernel_init(&
%(ovar)
s,
%(ctx)
s->ctx, 1, &bcode, &sz,
"
%(kname)
s",
%(numargs)
u, types, GA_USE_BINARY, NULL)
"
%(kname)
s",
%(numargs)
u, types, GA_USE_BINARY, NULL)
!= GA_NO_ERROR) {
!= GA_NO_ERROR) {
if ((err = GpuKernel_init(&
%(ovar)
s,
%(ctx)
s->
ops,
%(ctx)
s->
ctx, 1,
if ((err = GpuKernel_init(&
%(ovar)
s,
%(ctx)
s->ctx, 1,
&
%(cname)
s, NULL, "
%(kname)
s",
%(numargs)
u,
&
%(cname)
s, NULL, "
%(kname)
s",
%(numargs)
u,
types,
%(flags)
s, NULL)) != GA_NO_ERROR) {
types,
%(flags)
s, NULL)) != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "GpuKernel_init error
%%
d:
%%
s",
PyErr_Format(PyExc_RuntimeError, "GpuKernel_init error
%%
d:
%%
s",
err,
Gpu_error(
%(ctx)
s->ops,
%(ctx)
s->ctx, err));
err,
gpucontext_error(
%(ctx)
s->ctx, err));
%(fail)
s
%(fail)
s
}
}
}
}
...
@@ -310,7 +310,7 @@ class GpuKernelBase(object):
...
@@ -310,7 +310,7 @@ class GpuKernelBase(object):
The node that we need the cache version for.
The node that we need the cache version for.
"""
"""
return
(
3
,
self
.
get_params
(
node
)
.
bin_id
)
return
(
4
,
self
.
get_params
(
node
)
.
bin_id
)
class
HostFromGpu
(
Op
):
class
HostFromGpu
(
Op
):
...
...
theano/gpuarray/blockgemv.c
浏览文件 @
b895c6e8
...
@@ -24,16 +24,9 @@ int APPLY_SPECIFIC(blockgemv)(PyGpuArrayObject *o, PyGpuArrayObject *W,
...
@@ -24,16 +24,9 @@ int APPLY_SPECIFIC(blockgemv)(PyGpuArrayObject *o, PyGpuArrayObject *W,
size_t
*
offW
=
NULL
;
size_t
*
offW
=
NULL
;
size_t
*
offInp
=
NULL
;
size_t
*
offInp
=
NULL
;
size_t
*
offOut
=
NULL
;
size_t
*
offOut
=
NULL
;
gpuarray_blas_ops
*
blas_ops
;
int
err
;
int
err
;
err
=
ctx
->
ops
->
property
(
ctx
->
ctx
,
NULL
,
NULL
,
err
=
gpublas_setup
(
ctx
->
ctx
);
GA_CTX_PROP_BLAS_OPS
,
&
blas_ops
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_SetString
(
PyExc_RuntimeError
,
"Can't get blas ops"
);
return
-
1
;
}
err
=
blas_ops
->
setup
(
ctx
->
ctx
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_SetString
(
PyExc_RuntimeError
,
"Can't setup blas"
);
PyErr_SetString
(
PyExc_RuntimeError
,
"Can't setup blas"
);
return
-
1
;
return
-
1
;
...
@@ -93,29 +86,29 @@ int APPLY_SPECIFIC(blockgemv)(PyGpuArrayObject *o, PyGpuArrayObject *W,
...
@@ -93,29 +86,29 @@ int APPLY_SPECIFIC(blockgemv)(PyGpuArrayObject *o, PyGpuArrayObject *W,
}
}
if
(
out
->
ga
.
typecode
==
GA_FLOAT
)
{
if
(
out
->
ga
.
typecode
==
GA_FLOAT
)
{
err
=
blas_ops
->
sgemvBatch
(
cb_fortran
,
transA
,
err
=
gpublas_
sgemvBatch
(
cb_fortran
,
transA
,
PyGpuArray_DIMS
(
out
)[
2
],
PyGpuArray_DIMS
(
out
)[
2
],
PyGpuArray_DIMS
(
h
)[
2
],
1
,
PyGpuArray_DIMS
(
h
)[
2
],
1
,
W_list
,
offW
,
lda
,
W_list
,
offW
,
lda
,
inp_list
,
offInp
,
PyGpuArray_STRIDES
(
h
)[
2
]
/
gpuarray_get_elsize
(
h
->
ga
.
typecode
),
inp_list
,
offInp
,
PyGpuArray_STRIDES
(
h
)[
2
]
/
gpuarray_get_elsize
(
h
->
ga
.
typecode
),
1
,
out_list
,
offOut
,
PyGpuArray_STRIDES
(
out
)[
2
]
/
gpuarray_get_elsize
(
out
->
ga
.
typecode
),
1
,
out_list
,
offOut
,
PyGpuArray_STRIDES
(
out
)[
2
]
/
gpuarray_get_elsize
(
out
->
ga
.
typecode
),
PyGpuArray_DIMS
(
out
)[
1
]
*
PyGpuArray_DIMS
(
h
)[
1
]
*
PyGpuArray_DIMS
(
out
)[
0
],
0
);
PyGpuArray_DIMS
(
out
)[
1
]
*
PyGpuArray_DIMS
(
h
)[
1
]
*
PyGpuArray_DIMS
(
out
)[
0
],
0
);
}
else
if
(
out
->
ga
.
typecode
==
GA_DOUBLE
)
{
}
else
if
(
out
->
ga
.
typecode
==
GA_DOUBLE
)
{
err
=
blas_ops
->
dgemvBatch
(
cb_fortran
,
transA
,
err
=
gpublas_
dgemvBatch
(
cb_fortran
,
transA
,
PyGpuArray_DIMS
(
out
)[
2
],
PyGpuArray_DIMS
(
out
)[
2
],
PyGpuArray_DIMS
(
h
)[
2
],
1
,
PyGpuArray_DIMS
(
h
)[
2
],
1
,
W_list
,
offW
,
lda
,
W_list
,
offW
,
lda
,
inp_list
,
offInp
,
PyGpuArray_STRIDES
(
h
)[
2
]
/
gpuarray_get_elsize
(
h
->
ga
.
typecode
),
inp_list
,
offInp
,
PyGpuArray_STRIDES
(
h
)[
2
]
/
gpuarray_get_elsize
(
h
->
ga
.
typecode
),
1
,
out_list
,
offOut
,
PyGpuArray_STRIDES
(
out
)[
2
]
/
gpuarray_get_elsize
(
out
->
ga
.
typecode
),
1
,
out_list
,
offOut
,
PyGpuArray_STRIDES
(
out
)[
2
]
/
gpuarray_get_elsize
(
out
->
ga
.
typecode
),
PyGpuArray_DIMS
(
out
)[
1
]
*
PyGpuArray_DIMS
(
h
)[
1
]
*
PyGpuArray_DIMS
(
out
)[
0
],
0
);
PyGpuArray_DIMS
(
out
)[
1
]
*
PyGpuArray_DIMS
(
h
)[
1
]
*
PyGpuArray_DIMS
(
out
)[
0
],
0
);
}
else
if
(
out
->
ga
.
typecode
==
GA_HALF
)
{
}
else
if
(
out
->
ga
.
typecode
==
GA_HALF
)
{
err
=
blas_ops
->
sgemvBatch
(
cb_fortran
,
transA
,
err
=
gpublas_
sgemvBatch
(
cb_fortran
,
transA
,
PyGpuArray_DIMS
(
out
)[
2
],
PyGpuArray_DIMS
(
out
)[
2
],
PyGpuArray_DIMS
(
h
)[
2
],
1
,
PyGpuArray_DIMS
(
h
)[
2
],
1
,
W_list
,
offW
,
lda
,
W_list
,
offW
,
lda
,
inp_list
,
offInp
,
PyGpuArray_STRIDES
(
h
)[
2
]
/
gpuarray_get_elsize
(
h
->
ga
.
typecode
),
inp_list
,
offInp
,
PyGpuArray_STRIDES
(
h
)[
2
]
/
gpuarray_get_elsize
(
h
->
ga
.
typecode
),
1
,
out_list
,
offOut
,
PyGpuArray_STRIDES
(
out
)[
2
]
/
gpuarray_get_elsize
(
out
->
ga
.
typecode
),
1
,
out_list
,
offOut
,
PyGpuArray_STRIDES
(
out
)[
2
]
/
gpuarray_get_elsize
(
out
->
ga
.
typecode
),
PyGpuArray_DIMS
(
out
)[
1
]
*
PyGpuArray_DIMS
(
h
)[
1
]
*
PyGpuArray_DIMS
(
out
)[
0
],
0
);
PyGpuArray_DIMS
(
out
)[
1
]
*
PyGpuArray_DIMS
(
h
)[
1
]
*
PyGpuArray_DIMS
(
out
)[
0
],
0
);
}
else
{
}
else
{
err
=
GA_INVALID_ERROR
;
err
=
GA_INVALID_ERROR
;
}
}
...
...
theano/gpuarray/blockger.c
浏览文件 @
b895c6e8
...
@@ -12,16 +12,9 @@ int APPLY_SPECIFIC(blockger)(PyGpuArrayObject *o, PyGpuArrayObject *x,
...
@@ -12,16 +12,9 @@ int APPLY_SPECIFIC(blockger)(PyGpuArrayObject *o, PyGpuArrayObject *x,
size_t
*
offOut
=
NULL
;
size_t
*
offOut
=
NULL
;
size_t
*
offX
=
NULL
;
size_t
*
offX
=
NULL
;
size_t
*
offY
=
NULL
;
size_t
*
offY
=
NULL
;
gpuarray_blas_ops
*
blas_ops
;
int
err
;
int
err
;
err
=
ctx
->
ops
->
property
(
ctx
->
ctx
,
NULL
,
NULL
,
err
=
gpublas_setup
(
ctx
->
ctx
);
GA_CTX_PROP_BLAS_OPS
,
&
blas_ops
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_SetString
(
PyExc_RuntimeError
,
"Can't get blas ops"
);
return
-
1
;
}
err
=
blas_ops
->
setup
(
ctx
->
ctx
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_SetString
(
PyExc_RuntimeError
,
"Can't setup blas"
);
PyErr_SetString
(
PyExc_RuntimeError
,
"Can't setup blas"
);
return
-
1
;
return
-
1
;
...
@@ -84,26 +77,26 @@ int APPLY_SPECIFIC(blockger)(PyGpuArrayObject *o, PyGpuArrayObject *x,
...
@@ -84,26 +77,26 @@ int APPLY_SPECIFIC(blockger)(PyGpuArrayObject *o, PyGpuArrayObject *x,
ssize_t
str_out
=
PyGpuArray_STRIDES
(
out
)[
2
]
/
gpuarray_get_elsize
(
out
->
ga
.
typecode
);
ssize_t
str_out
=
PyGpuArray_STRIDES
(
out
)[
2
]
/
gpuarray_get_elsize
(
out
->
ga
.
typecode
);
if
(
out
->
ga
.
typecode
==
GA_FLOAT
)
{
if
(
out
->
ga
.
typecode
==
GA_FLOAT
)
{
err
=
blas_ops
->
sgerBatch
(
cb_fortran
,
err
=
gpublas_
sgerBatch
(
cb_fortran
,
PyGpuArray_DIMS
(
y
)[
2
],
PyGpuArray_DIMS
(
x
)[
2
],
PyGpuArray_DIMS
(
y
)[
2
],
PyGpuArray_DIMS
(
x
)[
2
],
*
(
float
*
)
PyArray_GETPTR1
(
alpha
,
0
),
*
(
float
*
)
PyArray_GETPTR1
(
alpha
,
0
),
y_list
,
offY
,
str_y
,
x_list
,
offX
,
str_x
,
y_list
,
offY
,
str_y
,
x_list
,
offX
,
str_x
,
o_list
,
offOut
,
str_out
,
o_list
,
offOut
,
str_out
,
PyGpuArray_DIMS
(
x
)[
0
]
*
PyGpuArray_DIMS
(
x
)[
1
]
*
PyGpuArray_DIMS
(
y
)[
1
],
0
);
PyGpuArray_DIMS
(
x
)[
0
]
*
PyGpuArray_DIMS
(
x
)[
1
]
*
PyGpuArray_DIMS
(
y
)[
1
],
0
);
}
else
if
(
out
->
ga
.
typecode
==
GA_DOUBLE
)
{
}
else
if
(
out
->
ga
.
typecode
==
GA_DOUBLE
)
{
err
=
blas_ops
->
dgerBatch
(
cb_fortran
,
err
=
gpublas_
dgerBatch
(
cb_fortran
,
PyGpuArray_DIMS
(
y
)[
2
],
PyGpuArray_DIMS
(
x
)[
2
],
PyGpuArray_DIMS
(
y
)[
2
],
PyGpuArray_DIMS
(
x
)[
2
],
*
(
double
*
)
PyArray_GETPTR1
(
alpha
,
0
),
*
(
double
*
)
PyArray_GETPTR1
(
alpha
,
0
),
y_list
,
offY
,
str_y
,
x_list
,
offX
,
str_x
,
y_list
,
offY
,
str_y
,
x_list
,
offX
,
str_x
,
o_list
,
offOut
,
str_out
,
o_list
,
offOut
,
str_out
,
PyGpuArray_DIMS
(
x
)[
0
]
*
PyGpuArray_DIMS
(
x
)[
1
]
*
PyGpuArray_DIMS
(
y
)[
1
],
0
);
PyGpuArray_DIMS
(
x
)[
0
]
*
PyGpuArray_DIMS
(
x
)[
1
]
*
PyGpuArray_DIMS
(
y
)[
1
],
0
);
}
else
if
(
out
->
ga
.
typecode
==
GA_HALF
)
{
}
else
if
(
out
->
ga
.
typecode
==
GA_HALF
)
{
err
=
blas_ops
->
hgerBatch
(
cb_fortran
,
err
=
gpublas_
hgerBatch
(
cb_fortran
,
PyGpuArray_DIMS
(
y
)[
2
],
PyGpuArray_DIMS
(
x
)[
2
],
PyGpuArray_DIMS
(
y
)[
2
],
PyGpuArray_DIMS
(
x
)[
2
],
*
(
float
*
)
PyArray_GETPTR1
(
alpha
,
0
),
*
(
float
*
)
PyArray_GETPTR1
(
alpha
,
0
),
y_list
,
offY
,
str_y
,
x_list
,
offX
,
str_x
,
y_list
,
offY
,
str_y
,
x_list
,
offX
,
str_x
,
o_list
,
offOut
,
str_out
,
o_list
,
offOut
,
str_out
,
PyGpuArray_DIMS
(
x
)[
0
]
*
PyGpuArray_DIMS
(
x
)[
1
]
*
PyGpuArray_DIMS
(
y
)[
1
],
0
);
PyGpuArray_DIMS
(
x
)[
0
]
*
PyGpuArray_DIMS
(
x
)[
1
]
*
PyGpuArray_DIMS
(
y
)[
1
],
0
);
}
else
{
}
else
{
err
=
GA_INVALID_ERROR
;
err
=
GA_INVALID_ERROR
;
}
}
...
...
theano/gpuarray/dnn.py
浏览文件 @
b895c6e8
...
@@ -125,7 +125,7 @@ def dnn_available(context_name):
...
@@ -125,7 +125,7 @@ def dnn_available(context_name):
ctx
=
get_context
(
context_name
)
ctx
=
get_context
(
context_name
)
if
not
ctx
.
kind
==
'cuda'
:
if
not
ctx
.
kind
==
b
'cuda'
:
dnn_available
.
msg
=
"Not on a CUDA device."
dnn_available
.
msg
=
"Not on a CUDA device."
return
False
return
False
...
...
theano/gpuarray/dnn_fwd.c
浏览文件 @
b895c6e8
...
@@ -105,7 +105,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -105,7 +105,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
algo
=
choice
.
algo
;
algo
=
choice
.
algo
;
#else
#else
size_t
free
;
size_t
free
;
int
err2
=
c
->
ops
->
property
(
c
->
ctx
,
NULL
,
NULL
,
GA_CTX_PROP_FREE_GMEM
,
&
free
);
int
err2
=
gpucontext_property
(
c
->
ctx
,
GA_CTX_PROP_FREE_GMEM
,
&
free
);
if
(
err2
!=
GA_NO_ERROR
)
{
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
...
@@ -234,7 +234,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -234,7 +234,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
* to place a nice get_work_mem() function in.
* to place a nice get_work_mem() function in.
*/
*/
if
(
worksize
!=
0
)
{
if
(
worksize
!=
0
)
{
workspace
=
c
->
ops
->
buffer
_alloc
(
c
->
ctx
,
worksize
,
NULL
,
0
,
NULL
);
workspace
=
gpudata
_alloc
(
c
->
ctx
,
worksize
,
NULL
,
0
,
NULL
);
if
(
workspace
==
NULL
)
{
if
(
workspace
==
NULL
)
{
PyErr_SetString
(
PyExc_RuntimeError
,
PyErr_SetString
(
PyExc_RuntimeError
,
"Could not allocate working memory"
);
"Could not allocate working memory"
);
...
@@ -258,7 +258,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -258,7 +258,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
*
output
));
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
*
output
));
if
(
worksize
!=
0
)
if
(
worksize
!=
0
)
c
->
ops
->
buffer
_release
(
workspace
);
gpudata
_release
(
workspace
);
cuda_record
(
input
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
input
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
kerns
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
kerns
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
...
...
theano/gpuarray/dnn_gi.c
浏览文件 @
b895c6e8
...
@@ -106,7 +106,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -106,7 +106,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
algo
=
choice
.
algo
;
algo
=
choice
.
algo
;
#else
#else
size_t
free
;
size_t
free
;
int
err2
=
c
->
ops
->
property
(
c
->
ctx
,
NULL
,
NULL
,
GA_CTX_PROP_FREE_GMEM
,
&
free
);
int
err2
=
gpucontext_property
(
c
->
ctx
,
GA_CTX_PROP_FREE_GMEM
,
&
free
);
if
(
err2
!=
GA_NO_ERROR
)
{
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
...
@@ -204,7 +204,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -204,7 +204,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
}
}
if
(
worksize
!=
0
)
{
if
(
worksize
!=
0
)
{
workspace
=
c
->
ops
->
buffer
_alloc
(
c
->
ctx
,
worksize
,
NULL
,
0
,
NULL
);
workspace
=
gpudata
_alloc
(
c
->
ctx
,
worksize
,
NULL
,
0
,
NULL
);
if
(
workspace
==
NULL
)
{
if
(
workspace
==
NULL
)
{
PyErr_SetString
(
PyExc_RuntimeError
,
PyErr_SetString
(
PyExc_RuntimeError
,
"Could not allocate working memory"
);
"Could not allocate working memory"
);
...
@@ -227,7 +227,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -227,7 +227,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
*
input
));
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
*
input
));
if
(
worksize
!=
0
)
if
(
worksize
!=
0
)
c
->
ops
->
buffer
_release
(
workspace
);
gpudata
_release
(
workspace
);
cuda_record
(
kerns
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
kerns
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
...
...
theano/gpuarray/dnn_gw.c
浏览文件 @
b895c6e8
...
@@ -107,7 +107,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -107,7 +107,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
algo
=
choice
.
algo
;
algo
=
choice
.
algo
;
#else
#else
size_t
free
;
size_t
free
;
int
err2
=
c
->
ops
->
property
(
c
->
ctx
,
NULL
,
NULL
,
GA_CTX_PROP_FREE_GMEM
,
&
free
);
int
err2
=
gpucontext_property
(
c
->
ctx
,
GA_CTX_PROP_FREE_GMEM
,
&
free
);
if
(
err2
!=
GA_NO_ERROR
)
{
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
...
@@ -192,7 +192,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -192,7 +192,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
}
}
if
(
worksize
!=
0
)
{
if
(
worksize
!=
0
)
{
workspace
=
c
->
ops
->
buffer
_alloc
(
c
->
ctx
,
worksize
,
NULL
,
0
,
NULL
);
workspace
=
gpudata
_alloc
(
c
->
ctx
,
worksize
,
NULL
,
0
,
NULL
);
if
(
workspace
==
NULL
)
{
if
(
workspace
==
NULL
)
{
PyErr_SetString
(
PyExc_RuntimeError
,
"Could not allocate working memory"
);
PyErr_SetString
(
PyExc_RuntimeError
,
"Could not allocate working memory"
);
cuda_exit
(
c
->
ctx
);
cuda_exit
(
c
->
ctx
);
...
@@ -214,7 +214,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -214,7 +214,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
*
kerns
));
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
*
kerns
));
if
(
worksize
!=
0
)
if
(
worksize
!=
0
)
c
->
ops
->
buffer
_release
(
workspace
);
gpudata
_release
(
workspace
);
cuda_record
(
input
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
input
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
...
...
theano/gpuarray/elemwise.py
浏览文件 @
b895c6e8
...
@@ -199,7 +199,7 @@ class GpuElemwise(HideC, Elemwise):
...
@@ -199,7 +199,7 @@ class GpuElemwise(HideC, Elemwise):
typecode
=
o
.
type
.
typecode
)
typecode
=
o
.
type
.
typecode
)
res
+=
"""
res
+=
"""
ge = GpuElemwise_new(
%(ctx)
s->
ops,
%(ctx)
s->
ctx,
%(support)
s,
%(kop)
s,
%(nargs)
s, args,
%(nd)
s, 0);
ge = GpuElemwise_new(
%(ctx)
s->ctx,
%(support)
s,
%(kop)
s,
%(nargs)
s, args,
%(nd)
s, 0);
if (ge == NULL) {
if (ge == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Could not initialize elemwise support");
PyErr_SetString(PyExc_RuntimeError, "Could not initialize elemwise support");
%(fail)
s
%(fail)
s
...
@@ -360,7 +360,7 @@ class GpuElemwise(HideC, Elemwise):
...
@@ -360,7 +360,7 @@ class GpuElemwise(HideC, Elemwise):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
ver
=
self
.
scalar_op
.
c_code_cache_version
()
ver
=
self
.
scalar_op
.
c_code_cache_version
()
if
ver
:
if
ver
:
return
(
6
,
ver
)
return
(
7
,
ver
)
else
:
else
:
return
ver
return
ver
...
@@ -554,7 +554,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
...
@@ -554,7 +554,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
def
make_node
(
self
,
x
):
def
make_node
(
self
,
x
):
x
=
as_gpuarray_variable
(
x
,
infer_context_name
(
x
))
x
=
as_gpuarray_variable
(
x
,
infer_context_name
(
x
))
if
x
.
type
.
context
.
kind
!=
'cuda'
:
if
x
.
type
.
context
.
kind
!=
b
'cuda'
:
raise
TypeError
(
"GpuCAReduceCuda doesn't work for non-cuda devices"
)
raise
TypeError
(
"GpuCAReduceCuda doesn't work for non-cuda devices"
)
ret
=
super
(
GpuCAReduceCuda
,
self
)
.
make_node
(
x
)
ret
=
super
(
GpuCAReduceCuda
,
self
)
.
make_node
(
x
)
self
=
copy
.
copy
(
self
)
self
=
copy
.
copy
(
self
)
...
...
theano/gpuarray/extra_ops.py
浏览文件 @
b895c6e8
...
@@ -26,11 +26,8 @@ class GpuCumsum(GpuKernelBase, Op):
...
@@ -26,11 +26,8 @@ class GpuCumsum(GpuKernelBase, Op):
def
__init__
(
self
,
axis
):
def
__init__
(
self
,
axis
):
self
.
axis
=
axis
self
.
axis
=
axis
def
__str__
(
self
):
def
c_code_cache_version
(
self
):
return
"
%
s{
%
s}"
%
(
self
.
__class__
.
__name__
,
self
.
axis
)
return
(
3
,)
def
c_code_cache_version_apply
(
self
,
node
):
return
(
1
,)
def
c_headers
(
self
):
def
c_headers
(
self
):
return
[
'<numpy_compat.h>'
,
'<gpuarray/types.h>'
,
'<gpuarray_helper.h>'
]
return
[
'<numpy_compat.h>'
,
'<gpuarray/types.h>'
,
'<gpuarray_helper.h>'
]
...
@@ -221,7 +218,7 @@ class GpuCumsum(GpuKernelBase, Op):
...
@@ -221,7 +218,7 @@ class GpuCumsum(GpuKernelBase, Op):
return
kernels
return
kernels
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
if
node
.
inputs
[
0
]
.
type
.
context
.
kind
!=
'cuda'
:
if
node
.
inputs
[
0
]
.
type
.
context
.
kind
!=
b
'cuda'
:
raise
NotImplementedError
(
"cuda only"
)
raise
NotImplementedError
(
"cuda only"
)
x
,
=
inp
x
,
=
inp
z
,
=
out
z
,
=
out
...
@@ -249,17 +246,17 @@ class GpuCumsum(GpuKernelBase, Op):
...
@@ -249,17 +246,17 @@ class GpuCumsum(GpuKernelBase, Op):
size_t max_grid_size1;
size_t max_grid_size1;
size_t max_grid_size2;
size_t max_grid_size2;
int err;
int err;
err =
%(ctx)
s->ops->property(
%(ctx)
s->ctx, NULL, NULL
, GA_CTX_PROP_MAXLSIZE0, &max_threads_dim0);
err =
gpucontext_property(
%(ctx)
s->ctx
, GA_CTX_PROP_MAXLSIZE0, &max_threads_dim0);
if (err != GA_NO_ERROR){
if (err != GA_NO_ERROR){
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_threads_dims0");
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_threads_dims0");
%(fail)
s;
%(fail)
s;
}
}
err =
%(ctx)
s->ops->property(
%(ctx)
s->ctx, NULL, NULL
, GA_CTX_PROP_MAXGSIZE1, &max_grid_size1);
err =
gpucontext_property(
%(ctx)
s->ctx
, GA_CTX_PROP_MAXGSIZE1, &max_grid_size1);
if (err != GA_NO_ERROR){
if (err != GA_NO_ERROR){
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_grid_size1");
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_grid_size1");
%(fail)
s;
%(fail)
s;
}
}
err =
%(ctx)
s->ops->property(
%(ctx)
s->ctx, NULL, NULL
, GA_CTX_PROP_MAXGSIZE2, &max_grid_size2);
err =
gpucontext_property(
%(ctx)
s->ctx
, GA_CTX_PROP_MAXGSIZE2, &max_grid_size2);
if (err != GA_NO_ERROR){
if (err != GA_NO_ERROR){
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_grid_size2");
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_grid_size2");
%(fail)
s;
%(fail)
s;
...
...
theano/gpuarray/gemm16.c
浏览文件 @
b895c6e8
...
@@ -117,7 +117,7 @@ int gemm16(PyGpuArrayObject *C, float alpha,
...
@@ -117,7 +117,7 @@ int gemm16(PyGpuArrayObject *C, float alpha,
if
(
48
<
n128
&&
n128
<=
64
)
{
if
(
48
<
n128
&&
n128
<=
64
)
{
n64
=
n
/
64
;
n64
=
n
/
64
;
if
(
nprocs
==
0
)
if
(
nprocs
==
0
)
if
(
A
->
ga
.
ops
->
property
(
A
->
context
->
ctx
,
NULL
,
NULL
,
if
(
gpucontext_property
(
A
->
context
->
ctx
,
GA_CTX_PROP_NUMPROCS
,
&
nprocs
))
{
GA_CTX_PROP_NUMPROCS
,
&
nprocs
))
{
nprocs
=
0
;
nprocs
=
0
;
res
=
1
;
res
=
1
;
...
...
theano/gpuarray/neighbours.py
浏览文件 @
b895c6e8
...
@@ -243,7 +243,7 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
...
@@ -243,7 +243,7 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
return
kernels
return
kernels
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
if
node
.
inputs
[
0
]
.
type
.
context
.
kind
!=
'cuda'
:
if
node
.
inputs
[
0
]
.
type
.
context
.
kind
!=
b
'cuda'
:
raise
NotImplementedError
(
"cuda only"
)
raise
NotImplementedError
(
"cuda only"
)
dtype_ten4
=
node
.
inputs
[
0
]
.
dtype
dtype_ten4
=
node
.
inputs
[
0
]
.
dtype
dtype_neib_shape
=
node
.
inputs
[
1
]
.
dtype
dtype_neib_shape
=
node
.
inputs
[
1
]
.
dtype
...
...
theano/gpuarray/nerv.py
浏览文件 @
b895c6e8
...
@@ -105,7 +105,7 @@ class Gemm16(COp):
...
@@ -105,7 +105,7 @@ class Gemm16(COp):
return
"""
return
"""
bcode = bin_
%(name)
s;
bcode = bin_
%(name)
s;
sz = sizeof(bin_
%(name)
s);
sz = sizeof(bin_
%(name)
s);
if (GpuKernel_init(&k_
%(name)
s, c->
ops, c->
ctx, 1, &bcode, &sz,
if (GpuKernel_init(&k_
%(name)
s, c->ctx, 1, &bcode, &sz,
"hgemm_
%(name)
s", 13, types, GA_USE_BINARY, NULL)
"hgemm_
%(name)
s", 13, types, GA_USE_BINARY, NULL)
!= GA_NO_ERROR) {
!= GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Could not initialize kernel
%(name)
s");
PyErr_SetString(PyExc_RuntimeError, "Could not initialize kernel
%(name)
s");
...
...
theano/gpuarray/nnet.py
浏览文件 @
b895c6e8
...
@@ -189,7 +189,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
...
@@ -189,7 +189,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
flags
=
flags
,
objvar
=
k_var
)]
flags
=
flags
,
objvar
=
k_var
)]
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
if
node
.
inputs
[
0
]
.
type
.
context
.
kind
!=
'cuda'
:
if
node
.
inputs
[
0
]
.
type
.
context
.
kind
!=
b
'cuda'
:
raise
NotImplementedError
(
'cuda only'
)
raise
NotImplementedError
(
'cuda only'
)
typecode_x
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
node
.
inputs
[
0
]
.
dtype
)
typecode_x
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
node
.
inputs
[
0
]
.
dtype
)
typecode_b
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
node
.
inputs
[
1
]
.
dtype
)
typecode_b
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
node
.
inputs
[
1
]
.
dtype
)
...
@@ -375,7 +375,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
...
@@ -375,7 +375,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
return
[
'<numpy_compat.h>'
,
'<gpuarray/types.h>'
]
return
[
'<numpy_compat.h>'
,
'<gpuarray/types.h>'
]
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
if
node
.
inputs
[
0
]
.
type
.
context
.
kind
!=
'cuda'
:
if
node
.
inputs
[
0
]
.
type
.
context
.
kind
!=
b
'cuda'
:
raise
NotImplementedError
(
"cuda only"
)
raise
NotImplementedError
(
"cuda only"
)
typecode_dx
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
node
.
outputs
[
0
]
.
dtype
)
typecode_dx
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
node
.
outputs
[
0
]
.
dtype
)
itemsize_dnll
=
numpy
.
dtype
(
node
.
inputs
[
0
]
.
dtype
)
.
itemsize
itemsize_dnll
=
numpy
.
dtype
(
node
.
inputs
[
0
]
.
dtype
)
.
itemsize
...
@@ -584,7 +584,7 @@ class GpuSoftmax(GpuKernelBase, Op):
...
@@ -584,7 +584,7 @@ class GpuSoftmax(GpuKernelBase, Op):
return
[
'<numpy_compat.h>'
,
'<gpuarray/types.h>'
]
return
[
'<numpy_compat.h>'
,
'<gpuarray/types.h>'
]
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
if
node
.
inputs
[
0
]
.
type
.
context
.
kind
!=
'cuda'
:
if
node
.
inputs
[
0
]
.
type
.
context
.
kind
!=
b
'cuda'
:
raise
NotImplementedError
(
"cuda only"
)
raise
NotImplementedError
(
"cuda only"
)
dtype_x
=
node
.
inputs
[
0
]
.
dtype
dtype_x
=
node
.
inputs
[
0
]
.
dtype
work_x
=
work_dtype
(
dtype_x
)
work_x
=
work_dtype
(
dtype_x
)
...
@@ -783,7 +783,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
...
@@ -783,7 +783,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
return
[
'<numpy_compat.h>'
,
'<gpuarray/types.h>'
]
return
[
'<numpy_compat.h>'
,
'<gpuarray/types.h>'
]
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
if
node
.
inputs
[
0
]
.
type
.
context
.
kind
!=
'cuda'
:
if
node
.
inputs
[
0
]
.
type
.
context
.
kind
!=
b
'cuda'
:
raise
NotImplementedError
(
'cuda only'
)
raise
NotImplementedError
(
'cuda only'
)
dtype_x
=
node
.
inputs
[
0
]
.
dtype
dtype_x
=
node
.
inputs
[
0
]
.
dtype
dtype_b
=
node
.
inputs
[
1
]
.
dtype
dtype_b
=
node
.
inputs
[
1
]
.
dtype
...
...
theano/gpuarray/opt.py
浏览文件 @
b895c6e8
...
@@ -146,7 +146,7 @@ def op_lifter(OP, cuda_only=False):
...
@@ -146,7 +146,7 @@ def op_lifter(OP, cuda_only=False):
# Check if we should replace
# Check if we should replace
if
(
not
replace
or
if
(
not
replace
or
(
cuda_only
and
(
cuda_only
and
get_context
(
context_name
)
.
kind
!=
'cuda'
)):
get_context
(
context_name
)
.
kind
!=
b
'cuda'
)):
return
False
return
False
# tag the inputs with the context in case
# tag the inputs with the context in case
...
@@ -643,7 +643,7 @@ def local_gpua_advanced_subtensor(node, context_name):
...
@@ -643,7 +643,7 @@ def local_gpua_advanced_subtensor(node, context_name):
def
local_gpua_advanced_incsubtensor
(
node
,
context_name
):
def
local_gpua_advanced_incsubtensor
(
node
,
context_name
):
context
=
get_context
(
context_name
)
context
=
get_context
(
context_name
)
# This is disabled on non-cuda contexts
# This is disabled on non-cuda contexts
if
context
.
kind
!=
'cuda'
:
if
context
.
kind
!=
b
'cuda'
:
return
None
return
None
x
,
y
,
ilist
=
node
.
inputs
x
,
y
,
ilist
=
node
.
inputs
...
@@ -674,12 +674,12 @@ def local_gpua_careduce(node, context_name):
...
@@ -674,12 +674,12 @@ def local_gpua_careduce(node, context_name):
if
isinstance
(
node
.
op
.
scalar_op
,
(
scalar
.
Add
,
scalar
.
Mul
,
if
isinstance
(
node
.
op
.
scalar_op
,
(
scalar
.
Add
,
scalar
.
Mul
,
scalar
.
Maximum
,
scalar
.
Minimum
)):
scalar
.
Maximum
,
scalar
.
Minimum
)):
ctx
=
get_context
(
context_name
)
ctx
=
get_context
(
context_name
)
if
ctx
.
kind
==
'opencl'
:
if
ctx
.
kind
==
b
'opencl'
:
op
=
GpuCAReduceCPY
op
=
GpuCAReduceCPY
if
node
.
op
.
scalar_op
not
in
[
scalar
.
add
,
scalar
.
mul
]:
if
node
.
op
.
scalar_op
not
in
[
scalar
.
add
,
scalar
.
mul
]:
# We don't support yet all reduction with cpy code.
# We don't support yet all reduction with cpy code.
return
return
elif
ctx
.
kind
==
'cuda'
:
elif
ctx
.
kind
==
b
'cuda'
:
op
=
GpuCAReduceCuda
op
=
GpuCAReduceCuda
else
:
else
:
return
False
return
False
...
...
theano/gpuarray/subtensor.py
浏览文件 @
b895c6e8
...
@@ -340,7 +340,7 @@ class GpuIncSubtensor(IncSubtensor):
...
@@ -340,7 +340,7 @@ class GpuIncSubtensor(IncSubtensor):
args[1].name = "b";
args[1].name = "b";
args[1].typecode =
%(type2)
s;
args[1].typecode =
%(type2)
s;
args[1].flags = GE_READ;
args[1].flags = GE_READ;
iadd = GpuElemwise_new(
%(ctx)
s->
ops,
%(ctx)
s->
ctx, "", "a += b",
iadd = GpuElemwise_new(
%(ctx)
s->ctx, "", "a += b",
2, args,
%(nd)
s, 0);
2, args,
%(nd)
s, 0);
if (iadd == NULL) {
if (iadd == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Could not intialize inplace add support");
PyErr_SetString(PyExc_RuntimeError, "Could not intialize inplace add support");
...
@@ -369,7 +369,7 @@ class GpuIncSubtensor(IncSubtensor):
...
@@ -369,7 +369,7 @@ class GpuIncSubtensor(IncSubtensor):
parent_version
=
super
(
GpuIncSubtensor
,
self
)
.
c_code_cache_version
()
parent_version
=
super
(
GpuIncSubtensor
,
self
)
.
c_code_cache_version
()
if
not
parent_version
:
if
not
parent_version
:
return
return
return
parent_version
+
(
5
,)
return
parent_version
+
(
6
,)
class
GpuAdvancedSubtensor1
(
HideC
,
tensor
.
AdvancedSubtensor1
):
class
GpuAdvancedSubtensor1
(
HideC
,
tensor
.
AdvancedSubtensor1
):
...
@@ -437,8 +437,7 @@ if (err != GA_NO_ERROR) {
...
@@ -437,8 +437,7 @@ if (err != GA_NO_ERROR) {
if (err == GA_VALUE_ERROR) {
if (err == GA_VALUE_ERROR) {
PyErr_SetString(PyExc_IndexError, "Index out of bounds.");
PyErr_SetString(PyExc_IndexError, "Index out of bounds.");
} else {
} else {
PyErr_SetString(PyExc_RuntimeError, Gpu_error(
%(v)
s->context->ops,
PyErr_SetString(PyExc_RuntimeError, GpuArray_error(&
%(v)
s->ga, err));
%(v)
s->context->ctx, err));
}
}
%(fail)
s
%(fail)
s
}
}
...
@@ -589,7 +588,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
...
@@ -589,7 +588,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
return
super
(
GpuAdvancedIncSubtensor1_dev20
,
self
)
.
perform
(
node
,
inp
,
out
)
return
super
(
GpuAdvancedIncSubtensor1_dev20
,
self
)
.
perform
(
node
,
inp
,
out
)
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
6
,)
return
(
8
,)
def
c_headers
(
self
):
def
c_headers
(
self
):
return
[
'<numpy_compat.h>'
,
'<gpuarray_helper.h>'
,
return
[
'<numpy_compat.h>'
,
'<gpuarray_helper.h>'
,
...
@@ -600,7 +599,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
...
@@ -600,7 +599,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
ctx
=
self
.
get_params
(
node
)
ctx
=
self
.
get_params
(
node
)
if
ctx
.
kind
!=
'cuda'
:
if
ctx
.
kind
!=
b
'cuda'
:
raise
NotImplementedError
(
"cuda only"
)
raise
NotImplementedError
(
"cuda only"
)
if
(
self
.
set_instead_of_inc
or
if
(
self
.
set_instead_of_inc
or
node
.
inputs
[
0
]
.
ndim
!=
node
.
inputs
[
1
]
.
ndim
or
node
.
inputs
[
0
]
.
ndim
!=
node
.
inputs
[
1
]
.
ndim
or
...
@@ -757,8 +756,8 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) {
...
@@ -757,8 +756,8 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) {
int err, kerr = 0;
int err, kerr = 0;
if (threads_per_block[0] > 0 && n_blocks[0] > 0) {
if (threads_per_block[0] > 0 && n_blocks[0] > 0) {
err =
py_self->ga.ops->property(NULL, py_self->ga.data, NULL
,
err =
gpudata_property(py_self->ga.data
,
GA_CTX_PROP_ERRBUF, &errbuf);
GA_CTX_PROP_ERRBUF, &errbuf);
if (err != GA_NO_ERROR) {
if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Can't fetch error buffer");
PyErr_SetString(PyExc_RuntimeError, "Can't fetch error buffer");
return 1;
return 1;
...
@@ -793,7 +792,7 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) {
...
@@ -793,7 +792,7 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) {
GpuKernel_error(&
%(k_var)
s, err));
GpuKernel_error(&
%(k_var)
s, err));
return 1;
return 1;
}
}
err =
py_self->ga.ops->buffer
_read(&kerr, errbuf, 0, sizeof(int));
err =
gpudata
_read(&kerr, errbuf, 0, sizeof(int));
if (err != GA_NO_ERROR) {
if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Can't read error buffer");
PyErr_SetString(PyExc_RuntimeError, "Can't read error buffer");
return 1;
return 1;
...
@@ -801,7 +800,7 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) {
...
@@ -801,7 +800,7 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) {
if (kerr != 0) {
if (kerr != 0) {
PyErr_SetString(PyExc_IndexError, "Index out of bounds");
PyErr_SetString(PyExc_IndexError, "Index out of bounds");
kerr = 0;
kerr = 0;
py_self->ga.ops->buffer
_write(errbuf, 0, &kerr, sizeof(int));
gpudata
_write(errbuf, 0, &kerr, sizeof(int));
return 1;
return 1;
}
}
}
}
...
...
theano/gpuarray/tests/test_elemwise.py
浏览文件 @
b895c6e8
...
@@ -197,7 +197,7 @@ class test_GpuCAReduceCuda(test_GpuCAReduceCPY):
...
@@ -197,7 +197,7 @@ class test_GpuCAReduceCuda(test_GpuCAReduceCPY):
def
setUp
(
self
):
def
setUp
(
self
):
super
(
test_GpuCAReduceCuda
,
self
)
.
setUp
()
super
(
test_GpuCAReduceCuda
,
self
)
.
setUp
()
if
get_context
(
test_ctx_name
)
.
kind
!=
'cuda'
:
if
get_context
(
test_ctx_name
)
.
kind
!=
b
'cuda'
:
raise
SkipTest
(
"Cuda specific tests"
)
raise
SkipTest
(
"Cuda specific tests"
)
...
@@ -212,7 +212,7 @@ class T_gpureduce_dtype(test_elemwise.T_reduce_dtype):
...
@@ -212,7 +212,7 @@ class T_gpureduce_dtype(test_elemwise.T_reduce_dtype):
'float32'
,
'float64'
]
'float32'
,
'float64'
]
def
setUp
(
self
):
def
setUp
(
self
):
if
get_context
(
test_ctx_name
)
.
kind
!=
'cuda'
:
if
get_context
(
test_ctx_name
)
.
kind
!=
b
'cuda'
:
raise
SkipTest
(
"Cuda specific tests"
)
raise
SkipTest
(
"Cuda specific tests"
)
...
...
theano/gpuarray/tests/test_extra_ops.py
浏览文件 @
b895c6e8
...
@@ -24,7 +24,7 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
...
@@ -24,7 +24,7 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
def
setUp
(
self
):
def
setUp
(
self
):
super
(
TestGpuCumsum
,
self
)
.
setUp
()
super
(
TestGpuCumsum
,
self
)
.
setUp
()
test_ctx
=
get_context
(
test_ctx_name
)
test_ctx
=
get_context
(
test_ctx_name
)
if
test_ctx
.
kind
!=
'cuda'
:
if
test_ctx
.
kind
!=
b
'cuda'
:
raise
SkipTest
(
"Cuda specific tests"
)
raise
SkipTest
(
"Cuda specific tests"
)
self
.
max_threads_dim0
=
test_ctx
.
maxlsize0
self
.
max_threads_dim0
=
test_ctx
.
maxlsize0
self
.
max_grid_size1
=
test_ctx
.
maxgsize2
self
.
max_grid_size1
=
test_ctx
.
maxgsize2
...
...
theano/gpuarray/tests/test_opt.py
浏览文件 @
b895c6e8
...
@@ -125,7 +125,7 @@ def test_reduce():
...
@@ -125,7 +125,7 @@ def test_reduce():
topo
=
f
.
maker
.
fgraph
.
toposort
()
topo
=
f
.
maker
.
fgraph
.
toposort
()
ops
=
[
type
(
node
.
op
)
for
node
in
topo
]
ops
=
[
type
(
node
.
op
)
for
node
in
topo
]
if
kind
==
'opencl'
and
method
in
[
"max"
,
"min"
]:
if
kind
==
b
'opencl'
and
method
in
[
"max"
,
"min"
]:
assert
not
(
GpuCAReduceCuda
in
ops
or
GpuCAReduceCPY
in
ops
)
assert
not
(
GpuCAReduceCuda
in
ops
or
GpuCAReduceCPY
in
ops
)
else
:
else
:
assert
GpuCAReduceCuda
in
ops
or
GpuCAReduceCPY
in
ops
assert
GpuCAReduceCuda
in
ops
or
GpuCAReduceCPY
in
ops
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论