Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
4a8fed96
提交
4a8fed96
authored
6月 01, 2017
作者:
Frédéric Bastien
提交者:
GitHub
6月 01, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #5923 from nouiz/float16
don't move op that don't support float16 on the GPU
上级
ebc59491
dc7f7a4f
隐藏空白字符变更
内嵌
并排
正在显示
13 个修改的文件
包含
378 行增加
和
296 行删除
+378
-296
ops.py
theano/compile/ops.py
+4
-0
cutils.py
theano/gof/cutils.py
+2
-196
linalg.py
theano/gpuarray/linalg.py
+3
-1
opt.py
theano/gpuarray/opt.py
+46
-7
subtensor.py
theano/gpuarray/subtensor.py
+5
-2
test_blas.py
theano/gpuarray/tests/test_blas.py
+18
-2
test_subtensor.py
theano/gpuarray/tests/test_subtensor.py
+66
-4
test_type.py
theano/gpuarray/tests/test_type.py
+41
-11
gradient.py
theano/gradient.py
+5
-1
inc_code.py
theano/tensor/inc_code.py
+162
-0
subtensor.py
theano/tensor/subtensor.py
+6
-44
test_opt.py
theano/tensor/tests/test_opt.py
+1
-4
test_subtensor.py
theano/tensor/tests/test_subtensor.py
+19
-24
没有找到文件。
theano/compile/ops.py
浏览文件 @
4a8fed96
...
@@ -50,6 +50,7 @@ class ViewOp(gof.Op):
...
@@ -50,6 +50,7 @@ class ViewOp(gof.Op):
# the output variable is %(oname)s.
# the output variable is %(oname)s.
c_code_and_version
=
{}
c_code_and_version
=
{}
__props__
=
()
__props__
=
()
_f16_ok
=
True
def
make_node
(
self
,
x
):
def
make_node
(
self
,
x
):
return
gof
.
Apply
(
self
,
[
x
],
[
x
.
type
()])
return
gof
.
Apply
(
self
,
[
x
],
[
x
.
type
()])
...
@@ -151,6 +152,7 @@ class DeepCopyOp(gof.Op):
...
@@ -151,6 +152,7 @@ class DeepCopyOp(gof.Op):
check_input
=
False
check_input
=
False
__props__
=
()
__props__
=
()
_f16_ok
=
True
def
__init__
(
self
):
def
__init__
(
self
):
pass
pass
...
@@ -659,6 +661,7 @@ class Rebroadcast(gof.Op):
...
@@ -659,6 +661,7 @@ class Rebroadcast(gof.Op):
check_input
=
False
check_input
=
False
__props__
=
(
"axis"
,)
__props__
=
(
"axis"
,)
_f16_ok
=
True
def
__init__
(
self
,
*
axis
):
def
__init__
(
self
,
*
axis
):
# Sort them to make sure we merge all possible case.
# Sort them to make sure we merge all possible case.
...
@@ -820,6 +823,7 @@ class SpecifyShape(gof.Op):
...
@@ -820,6 +823,7 @@ class SpecifyShape(gof.Op):
# the output variable is %(oname)s.
# the output variable is %(oname)s.
c_code_and_version
=
{}
c_code_and_version
=
{}
__props__
=
()
__props__
=
()
_f16_ok
=
True
def
make_node
(
self
,
x
,
shape
):
def
make_node
(
self
,
x
,
shape
):
if
not
isinstance
(
x
,
gof
.
Variable
):
if
not
isinstance
(
x
,
gof
.
Variable
):
...
...
theano/gof/cutils.py
浏览文件 @
4a8fed96
...
@@ -14,189 +14,6 @@ if os.path.exists(os.path.join(config.compiledir, 'cutils_ext.so')):
...
@@ -14,189 +14,6 @@ if os.path.exists(os.path.join(config.compiledir, 'cutils_ext.so')):
os
.
remove
(
os
.
path
.
join
(
config
.
compiledir
,
'cutils_ext.so'
))
os
.
remove
(
os
.
path
.
join
(
config
.
compiledir
,
'cutils_ext.so'
))
def
compile_cutils_code
():
types
=
[
'npy_'
+
t
for
t
in
[
'int8'
,
'int16'
,
'int32'
,
'int64'
,
'int128'
,
'int256'
,
'uint8'
,
'uint16'
,
'uint32'
,
'uint64'
,
'uint128'
,
'uint256'
,
'float16'
,
'float32'
,
'float64'
,
'float80'
,
'float96'
,
'float128'
,
'float256'
]]
complex_types
=
[
'npy_'
+
t
for
t
in
[
'complex32'
,
'complex64'
,
'complex128'
,
'complex160'
,
'complex192'
,
'complex512'
]]
inplace_map_template
=
"""
#if defined(
%(typen)
s)
static void
%(type)
s_inplace_add(PyArrayMapIterObject *mit,
PyArrayIterObject *it, int inc_or_set)
{
int index = mit->size;
while (index--) {
%(op)
s
PyArray_MapIterNext(mit);
PyArray_ITER_NEXT(it);
}
}
#endif
"""
floatadd
=
(
"((
%(type)
s*)mit->dataptr)[0] = "
"(inc_or_set ? ((
%(type)
s*)mit->dataptr)[0] : 0)"
" + ((
%(type)
s*)it->dataptr)[0];"
)
complexadd
=
"""
((
%(type)
s*)mit->dataptr)[0].real =
(inc_or_set ? ((
%(type)
s*)mit->dataptr)[0].real : 0)
+ ((
%(type)
s*)it->dataptr)[0].real;
((
%(type)
s*)mit->dataptr)[0].imag =
(inc_or_set ? ((
%(type)
s*)mit->dataptr)[0].imag : 0)
+ ((
%(type)
s*)it->dataptr)[0].imag;
"""
fns
=
''
.
join
([
inplace_map_template
%
{
'type'
:
t
,
'typen'
:
t
.
upper
(),
'op'
:
floatadd
%
{
'type'
:
t
}}
for
t
in
types
]
+
[
inplace_map_template
%
{
'type'
:
t
,
'typen'
:
t
.
upper
(),
'op'
:
complexadd
%
{
'type'
:
t
}}
for
t
in
complex_types
])
def
gen_binop
(
type
,
typen
):
return
"""
#if defined(
%(typen)
s)
%(type)
s_inplace_add,
#endif
"""
%
dict
(
type
=
type
,
typen
=
typen
)
fn_array
=
(
"static inplace_map_binop addition_funcs[] = {"
+
''
.
join
([
gen_binop
(
type
=
t
,
typen
=
t
.
upper
())
for
t
in
types
+
complex_types
])
+
"NULL};
\n
"
)
def
gen_num
(
typen
):
return
"""
#if defined(
%(typen)
s)
%(typen)
s,
#endif
"""
%
dict
(
type
=
type
,
typen
=
typen
)
type_number_array
=
(
"static int type_numbers[] = {"
+
''
.
join
([
gen_num
(
typen
=
t
.
upper
())
for
t
in
types
+
complex_types
])
+
"-1000};"
)
code
=
(
"""
#if NPY_API_VERSION >= 0x00000008
typedef void (*inplace_map_binop)(PyArrayMapIterObject *,
PyArrayIterObject *, int inc_or_set);
"""
+
fns
+
fn_array
+
type_number_array
+
"""
static int
map_increment(PyArrayMapIterObject *mit, PyObject *op,
inplace_map_binop add_inplace, int inc_or_set)
{
PyArrayObject *arr = NULL;
PyArrayIterObject *it;
PyArray_Descr *descr;
if (mit->ait == NULL) {
return -1;
}
descr = PyArray_DESCR(mit->ait->ao);
Py_INCREF(descr);
arr = (PyArrayObject *)PyArray_FromAny(op, descr,
0, 0, NPY_ARRAY_FORCECAST, NULL);
if (arr == NULL) {
return -1;
}
if ((mit->subspace != NULL) && (mit->consec)) {
PyArray_MapIterSwapAxes(mit, (PyArrayObject **)&arr, 0);
if (arr == NULL) {
return -1;
}
}
it = (PyArrayIterObject*)
PyArray_BroadcastToShape((PyObject*)arr, mit->dimensions, mit->nd);
if (it == NULL) {
Py_DECREF(arr);
return -1;
}
(*add_inplace)(mit, it, inc_or_set);
Py_DECREF(arr);
Py_DECREF(it);
return 0;
}
static PyObject *
inplace_increment(PyObject *dummy, PyObject *args)
{
PyObject *arg_a = NULL, *index=NULL, *inc=NULL;
int inc_or_set = 1;
PyArrayObject *a;
inplace_map_binop add_inplace = NULL;
int type_number = -1;
int i = 0;
PyArrayMapIterObject * mit;
if (!PyArg_ParseTuple(args, "OOO|i", &arg_a, &index,
&inc, &inc_or_set)) {
return NULL;
}
if (!PyArray_Check(arg_a)) {
PyErr_SetString(PyExc_ValueError,
"needs an ndarray as first argument");
return NULL;
}
a = (PyArrayObject *) arg_a;
if (PyArray_FailUnlessWriteable(a, "input/output array") < 0) {
return NULL;
}
if (PyArray_NDIM(a) == 0) {
PyErr_SetString(PyExc_IndexError, "0-d arrays can't be indexed.");
return NULL;
}
type_number = PyArray_TYPE(a);
while (type_numbers[i] >= 0 && addition_funcs[i] != NULL){
if (type_number == type_numbers[i]) {
add_inplace = addition_funcs[i];
break;
}
i++ ;
}
if (add_inplace == NULL) {
PyErr_SetString(PyExc_TypeError, "unsupported type for a");
return NULL;
}
mit = (PyArrayMapIterObject *) PyArray_MapIterArray(a, index);
if (mit == NULL) {
goto fail;
}
if (map_increment(mit, inc, add_inplace, inc_or_set) != 0) {
goto fail;
}
Py_DECREF(mit);
Py_INCREF(Py_None);
return Py_None;
fail:
Py_XDECREF(mit);
return NULL;
}
#endif
"""
)
return
code
def
compile_cutils
():
def
compile_cutils
():
"""
"""
Do just the compilation of cutils_ext.
Do just the compilation of cutils_ext.
...
@@ -204,7 +21,6 @@ def compile_cutils():
...
@@ -204,7 +21,6 @@ def compile_cutils():
"""
"""
code
=
(
"""
code
=
(
"""
#include <Python.h>
#include <Python.h>
#include "numpy/arrayobject.h"
#include "theano_mod_helper.h"
#include "theano_mod_helper.h"
extern "C"{
extern "C"{
...
@@ -226,18 +42,10 @@ def compile_cutils():
...
@@ -226,18 +42,10 @@ def compile_cutils():
int failure = fn(it);
int failure = fn(it);
return Py_BuildValue("i", failure);
return Py_BuildValue("i", failure);
}"""
)
}
static PyMethodDef CutilsExtMethods[] = {
code
+=
compile_cutils_code
()
code
+=
(
"""static PyMethodDef CutilsExtMethods[] = {
{"run_cthunk", run_cthunk, METH_VARARGS|METH_KEYWORDS,
{"run_cthunk", run_cthunk, METH_VARARGS|METH_KEYWORDS,
"Run a theano cthunk."},
"Run a theano cthunk."},
#if NPY_API_VERSION >= 0x00000008
{"inplace_increment", inplace_increment,
METH_VARARGS,
"increments a numpy array inplace at the passed indexes."},
#endif
{NULL, NULL, 0, NULL} /* Sentinel */
{NULL, NULL, 0, NULL} /* Sentinel */
};"""
)
};"""
)
if
PY3
:
if
PY3
:
...
@@ -256,7 +64,6 @@ def compile_cutils():
...
@@ -256,7 +64,6 @@ def compile_cutils():
PyMODINIT_FUNC
PyMODINIT_FUNC
PyInit_cutils_ext(void) {
PyInit_cutils_ext(void) {
import_array();
return PyModule_Create(&moduledef);
return PyModule_Create(&moduledef);
}
}
}
}
...
@@ -266,7 +73,6 @@ def compile_cutils():
...
@@ -266,7 +73,6 @@ def compile_cutils():
PyMODINIT_FUNC
PyMODINIT_FUNC
initcutils_ext(void)
initcutils_ext(void)
{
{
import_array();
(void) Py_InitModule("cutils_ext", CutilsExtMethods);
(void) Py_InitModule("cutils_ext", CutilsExtMethods);
}
}
} //extern C
} //extern C
...
...
theano/gpuarray/linalg.py
浏览文件 @
4a8fed96
...
@@ -261,7 +261,7 @@ class GpuCholesky(Op):
...
@@ -261,7 +261,7 @@ class GpuCholesky(Op):
raise
RuntimeError
(
'CUSOLVER is not available and '
raise
RuntimeError
(
'CUSOLVER is not available and '
'GpuCholesky Op can not be constructed.'
)
'GpuCholesky Op can not be constructed.'
)
if
skcuda
.
__version__
<=
'0.5.1'
:
if
skcuda
.
__version__
<=
'0.5.1'
:
warnings
.
warn
(
'The Gpu
Solve
op requires scikit-cuda > 0.5.1 to work with CUDA 8'
)
warnings
.
warn
(
'The Gpu
Cholesky
op requires scikit-cuda > 0.5.1 to work with CUDA 8'
)
if
not
pygpu_available
:
if
not
pygpu_available
:
raise
RuntimeError
(
'Missing pygpu or triu/tril functions.'
raise
RuntimeError
(
'Missing pygpu or triu/tril functions.'
'Install or update libgpuarray.'
)
'Install or update libgpuarray.'
)
...
@@ -382,6 +382,7 @@ class GpuMagmaSVD(COp):
...
@@ -382,6 +382,7 @@ class GpuMagmaSVD(COp):
A
=
as_gpuarray_variable
(
A
,
ctx_name
)
A
=
as_gpuarray_variable
(
A
,
ctx_name
)
if
A
.
ndim
!=
2
:
if
A
.
ndim
!=
2
:
raise
LinAlgError
(
"Matrix rank error"
)
raise
LinAlgError
(
"Matrix rank error"
)
assert
A
.
dtype
==
'float32'
if
self
.
compute_uv
:
if
self
.
compute_uv
:
return
theano
.
Apply
(
self
,
[
A
],
return
theano
.
Apply
(
self
,
[
A
],
[
A
.
type
(),
[
A
.
type
(),
...
@@ -476,6 +477,7 @@ class GpuMagmaMatrixInverse(COp):
...
@@ -476,6 +477,7 @@ class GpuMagmaMatrixInverse(COp):
def
make_node
(
self
,
x
):
def
make_node
(
self
,
x
):
ctx_name
=
infer_context_name
(
x
)
ctx_name
=
infer_context_name
(
x
)
x
=
as_gpuarray_variable
(
x
,
ctx_name
)
x
=
as_gpuarray_variable
(
x
,
ctx_name
)
assert
x
.
dtype
==
'float32'
if
x
.
ndim
!=
2
:
if
x
.
ndim
!=
2
:
raise
LinAlgError
(
"Matrix rank error"
)
raise
LinAlgError
(
"Matrix rank error"
)
return
theano
.
Apply
(
self
,
[
x
],
[
x
.
type
()])
return
theano
.
Apply
(
self
,
[
x
],
[
x
.
type
()])
...
...
theano/gpuarray/opt.py
浏览文件 @
4a8fed96
...
@@ -1181,6 +1181,14 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
...
@@ -1181,6 +1181,14 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
@op_lifter
([
tensor
.
blas
.
Gemv
,
tensor
.
blas_c
.
CGemv
])
@op_lifter
([
tensor
.
blas
.
Gemv
,
tensor
.
blas_c
.
CGemv
])
@register_opt2
([
tensor
.
blas
.
Gemv
],
'fast_compile'
)
@register_opt2
([
tensor
.
blas
.
Gemv
],
'fast_compile'
)
def
local_gpua_gemv
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpua_gemv
(
op
,
context_name
,
inputs
,
outputs
):
if
inputs
[
0
]
.
dtype
==
'float16'
:
# Use gemm implementation as cublas gemv don't support float16
return
gpugemm_no_inplace
(
inputs
[
0
][:,
None
],
inputs
[
1
],
inputs
[
2
],
inputs
[
3
][:,
None
],
inputs
[
4
])
.
dimshuffle
(
0
)
if
inputs
[
0
]
.
dtype
not
in
[
'float32'
,
'float64'
]:
if
inputs
[
0
]
.
dtype
not
in
[
'float32'
,
'float64'
]:
return
return
if
op
.
inplace
:
if
op
.
inplace
:
...
@@ -1351,6 +1359,8 @@ theano.tensor.nnet.conv2d()
...
@@ -1351,6 +1359,8 @@ theano.tensor.nnet.conv2d()
@op_lifter
([
SparseBlockGemv
])
@op_lifter
([
SparseBlockGemv
])
@register_opt2
([
SparseBlockGemv
],
'fast_compile'
)
@register_opt2
([
SparseBlockGemv
],
'fast_compile'
)
def
local_gpua_sparseblockgemv
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpua_sparseblockgemv
(
op
,
context_name
,
inputs
,
outputs
):
if
inputs
[
0
]
.
dtype
==
'float16'
:
return
if
op
.
inplace
:
if
op
.
inplace
:
return
gpu_sparse_block_gemv_inplace
return
gpu_sparse_block_gemv_inplace
else
:
else
:
...
@@ -1361,6 +1371,8 @@ def local_gpua_sparseblockgemv(op, context_name, inputs, outputs):
...
@@ -1361,6 +1371,8 @@ def local_gpua_sparseblockgemv(op, context_name, inputs, outputs):
@op_lifter
([
SparseBlockOuter
])
@op_lifter
([
SparseBlockOuter
])
@register_opt2
([
SparseBlockOuter
],
'fast_compile'
)
@register_opt2
([
SparseBlockOuter
],
'fast_compile'
)
def
local_gpua_sparseblockouter
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpua_sparseblockouter
(
op
,
context_name
,
inputs
,
outputs
):
if
inputs
[
0
]
.
dtype
==
'float16'
:
return
if
op
.
inplace
:
if
op
.
inplace
:
return
gpu_sparse_block_outer_inplace
return
gpu_sparse_block_outer_inplace
else
:
else
:
...
@@ -1998,7 +2010,13 @@ def _scan_type_infer(node):
...
@@ -1998,7 +2010,13 @@ def _scan_type_infer(node):
@op_lifter
([
tensor
.
MaxAndArgmax
])
@op_lifter
([
tensor
.
MaxAndArgmax
])
@register_opt2
([
tensor
.
MaxAndArgmax
],
'fast_compile'
)
@register_opt2
([
tensor
.
MaxAndArgmax
],
'fast_compile'
)
def
local_gpu_maxandargmax
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpu_maxandargmax
(
op
,
context_name
,
inputs
,
outputs
):
return
GpuMaxAndArgmax
(
op
.
get_params
(
None
))
op
=
GpuMaxAndArgmax
(
op
.
get_params
(
None
))
if
inputs
[
0
]
.
dtype
==
"float16"
:
# For now it is better to copy/cast on the GPU then transfer to the CPU
casted_inputs
=
inputs
[
0
]
.
astype
(
'float32'
)
ret
=
op
(
casted_inputs
)
return
[
ret
[
0
]
.
astype
(
'float16'
),
ret
[
1
]]
return
op
# solve
# solve
...
@@ -2008,9 +2026,15 @@ def local_gpu_maxandargmax(op, context_name, inputs, outputs):
...
@@ -2008,9 +2026,15 @@ def local_gpu_maxandargmax(op, context_name, inputs, outputs):
def
local_gpu_solve
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpu_solve
(
op
,
context_name
,
inputs
,
outputs
):
if
not
cusolver_available
:
if
not
cusolver_available
:
return
return
if
inputs
[
0
]
.
dtype
not
in
[
'float16'
,
'float32'
]:
return
if
op
.
A_structure
not
in
MATRIX_STRUCTURES_SOLVE
:
if
op
.
A_structure
not
in
MATRIX_STRUCTURES_SOLVE
:
return
return
return
GpuCusolverSolve
(
A_structure
=
op
.
A_structure
)
op
=
GpuCusolverSolve
(
A_structure
=
op
.
A_structure
)
if
inputs
[
0
]
.
dtype
==
'float16'
:
return
op
(
inputs
[
0
]
.
astype
(
'float32'
),
inputs
[
1
]
.
astype
(
'float32'
))
.
astype
(
'float16'
)
return
op
@register_inplace
()
@register_inplace
()
...
@@ -2028,7 +2052,13 @@ def local_inplace_gpu_solve(node):
...
@@ -2028,7 +2052,13 @@ def local_inplace_gpu_solve(node):
def
local_gpu_cholesky
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpu_cholesky
(
op
,
context_name
,
inputs
,
outputs
):
if
not
cusolver_available
:
if
not
cusolver_available
:
return
return
return
GpuCholesky
(
lower
=
op
.
lower
,
inplace
=
op
.
destructive
)
if
inputs
[
0
]
.
dtype
not
in
[
'float16'
,
'float32'
]:
return
op
=
GpuCholesky
(
lower
=
op
.
lower
,
inplace
=
op
.
destructive
)
if
inputs
[
0
]
.
dtype
==
'float16'
:
return
op
(
inputs
[
0
]
.
astype
(
'float32'
))
.
astype
(
'float16'
)
return
op
@register_inplace
()
@register_inplace
()
...
@@ -2044,7 +2074,12 @@ def local_inplace_cholesky(node):
...
@@ -2044,7 +2074,12 @@ def local_inplace_cholesky(node):
def
local_gpu_matrix_inverse
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpu_matrix_inverse
(
op
,
context_name
,
inputs
,
outputs
):
if
not
config
.
magma
.
enabled
:
if
not
config
.
magma
.
enabled
:
return
return
return
GpuMagmaMatrixInverse
()
if
inputs
[
0
]
.
dtype
not
in
[
'float16'
,
'float32'
]:
return
op
=
GpuMagmaMatrixInverse
()
if
inputs
[
0
]
.
dtype
==
'float16'
:
return
op
(
inputs
[
0
]
.
astype
(
'float32'
))
.
astype
(
'float16'
)
return
op
@register_inplace
()
@register_inplace
()
...
@@ -2061,9 +2096,13 @@ def local_inplace_matrix_inverse_inplace(node):
...
@@ -2061,9 +2096,13 @@ def local_inplace_matrix_inverse_inplace(node):
def
local_gpu_svd
(
op
,
context_name
,
inputs
,
outputs
):
def
local_gpu_svd
(
op
,
context_name
,
inputs
,
outputs
):
if
not
config
.
magma
.
enabled
:
if
not
config
.
magma
.
enabled
:
return
return
return
GpuMagmaSVD
(
full_matrices
=
op
.
full_matrices
,
if
inputs
[
0
]
.
dtype
not
in
[
'float16'
,
'float32'
]:
compute_uv
=
op
.
compute_uv
)
return
op
=
GpuMagmaSVD
(
full_matrices
=
op
.
full_matrices
,
compute_uv
=
op
.
compute_uv
)
if
inputs
[
0
]
.
dtype
==
'float16'
:
return
op
(
inputs
[
0
]
.
astype
(
'float32'
))
.
astype
(
'float16'
)
return
op
# Do not register in fast_run or fast_compile.
# Do not register in fast_run or fast_compile.
# It will be added to fast_run if the GPU is enabled.
# It will be added to fast_run if the GPU is enabled.
...
...
theano/gpuarray/subtensor.py
浏览文件 @
4a8fed96
...
@@ -232,7 +232,7 @@ class GpuIncSubtensor(IncSubtensor):
...
@@ -232,7 +232,7 @@ class GpuIncSubtensor(IncSubtensor):
if
not
self
.
set_instead_of_inc
:
if
not
self
.
set_instead_of_inc
:
# sub_x += y
# sub_x += y
iadd
=
get_iadd
(
node
.
inputs
[
0
],
node
.
inputs
[
1
])
iadd
=
get_iadd
(
node
.
inputs
[
0
],
node
.
inputs
[
1
])
iadd
(
sub_x
,
y
,
broadcast
=
False
)
iadd
(
sub_x
,
y
)
else
:
else
:
# sub_x[...] = y
# sub_x[...] = y
x
.
__setitem__
(
cdata
,
y
)
x
.
__setitem__
(
cdata
,
y
)
...
@@ -403,6 +403,8 @@ class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1):
...
@@ -403,6 +403,8 @@ class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1):
"""
"""
AdvancedSubrensor1 on the GPU.
AdvancedSubrensor1 on the GPU.
"""
"""
_f16_ok
=
True
def
make_node
(
self
,
x
,
ilist
):
def
make_node
(
self
,
x
,
ilist
):
ctx_name
=
infer_context_name
(
x
,
ilist
)
ctx_name
=
infer_context_name
(
x
,
ilist
)
x_
=
as_gpuarray_variable
(
x
,
ctx_name
)
x_
=
as_gpuarray_variable
(
x
,
ctx_name
)
...
@@ -807,7 +809,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC,
...
@@ -807,7 +809,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC,
"""
"""
ctx_name
=
infer_context_name
(
x
,
y
,
ilist
)
ctx_name
=
infer_context_name
(
x
,
y
,
ilist
)
x_
=
as_gpuarray_variable
(
x
,
ctx_name
)
x_
=
as_gpuarray_variable
(
x
,
ctx_name
)
y_
=
as_gpuarray_variable
(
y
,
ctx_name
)
y_
=
as_gpuarray_variable
(
y
.
astype
(
x
.
dtype
)
,
ctx_name
)
ilist_
=
as_gpuarray_variable
(
ilist
,
ctx_name
)
ilist_
=
as_gpuarray_variable
(
ilist
,
ctx_name
)
assert
x_
.
type
.
ndim
>=
y_
.
type
.
ndim
assert
x_
.
type
.
ndim
>=
y_
.
type
.
ndim
...
@@ -1088,6 +1090,7 @@ __device__ ga_half atomicExch(ga_half *addr, ga_half val) {
...
@@ -1088,6 +1090,7 @@ __device__ ga_half atomicExch(ga_half *addr, ga_half val) {
class
GpuExtractDiag
(
Op
):
class
GpuExtractDiag
(
Op
):
__props__
=
(
"offset"
,
"axis1"
,
"axis2"
,
"view"
)
__props__
=
(
"offset"
,
"axis1"
,
"axis2"
,
"view"
)
_f16_ok
=
True
def
__init__
(
self
,
offset
=
0
,
axis1
=
0
,
axis2
=
1
,
view
=
False
):
def
__init__
(
self
,
offset
=
0
,
axis1
=
0
,
axis2
=
1
,
view
=
False
):
self
.
view
=
view
self
.
view
=
view
...
...
theano/gpuarray/tests/test_blas.py
浏览文件 @
4a8fed96
...
@@ -8,7 +8,7 @@ import theano
...
@@ -8,7 +8,7 @@ import theano
from
theano
import
config
from
theano
import
config
from
theano
import
tensor
from
theano
import
tensor
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests
import
unittest_tools
as
utt
from
theano.tensor.blas
import
gemv_inplace
,
gemm_inplace
,
_dot22
,
batched_dot
from
theano.tensor.blas
import
gemv
,
gemv
_inplace
,
gemm_inplace
,
_dot22
,
batched_dot
from
theano.tensor.tests.test_blas
import
TestGer
,
BaseGemv
from
theano.tensor.tests.test_blas
import
TestGer
,
BaseGemv
from
..
import
gpuarray_shared_constructor
from
..
import
gpuarray_shared_constructor
...
@@ -18,7 +18,7 @@ from ..blas import (gpugemv_inplace, gpugemv_no_inplace,
...
@@ -18,7 +18,7 @@ from ..blas import (gpugemv_inplace, gpugemv_no_inplace,
gpugemm_inplace
,
gpugemm_no_inplace
,
gpugemm_inplace
,
gpugemm_no_inplace
,
gpugemmbatch_no_inplace
,
gpugemmbatch_no_inplace
,
gpuger_inplace
,
gpuger_no_inplace
,
gpuger_inplace
,
gpuger_no_inplace
,
GpuGer
,
gpu_dot22
)
GpuGer
,
GpuGemm
,
gpu_dot22
)
GpuGemvTester
=
makeTester
(
GpuGemvTester
=
makeTester
(
...
@@ -42,6 +42,22 @@ GpuGemvTester = makeTester(
...
@@ -42,6 +42,22 @@ GpuGemvTester = makeTester(
def
test_float16
():
def
test_float16
():
# gemv (gemm called)
float16_data
=
[
rand
(
3
)
.
astype
(
'float16'
),
np
.
asarray
(
1
,
dtype
=
np
.
float32
),
rand
(
3
,
3
)
.
astype
(
'float16'
),
rand
(
3
)
.
astype
(
'float16'
),
np
.
asarray
(
0.5
,
dtype
=
np
.
float32
)]
float16_shared
=
[
gpuarray_shared_constructor
(
val
,
target
=
test_ctx_name
)
for
val
in
float16_data
]
o
=
gemv
(
*
float16_shared
)
f
=
theano
.
function
([],
o
,
mode
=
mode_with_gpu
)
y
,
alpha
,
A
,
x
,
beta
=
float16_data
out
=
f
()
utt
.
assert_allclose
(
np
.
asarray
(
out
),
alpha
*
np
.
dot
(
A
,
x
)
+
beta
*
y
)
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
any
([
isinstance
(
n
.
op
,
GpuGemm
)
for
n
in
topo
])
# gemm
# gemm
float16_data
=
[
rand
(
3
,
3
)
.
astype
(
'float16'
),
float16_data
=
[
rand
(
3
,
3
)
.
astype
(
'float16'
),
np
.
asarray
(
1
,
dtype
=
np
.
float32
),
np
.
asarray
(
1
,
dtype
=
np
.
float32
),
...
...
theano/gpuarray/tests/test_subtensor.py
浏览文件 @
4a8fed96
...
@@ -49,6 +49,33 @@ class G_subtensor(test_subtensor.T_subtensor):
...
@@ -49,6 +49,33 @@ class G_subtensor(test_subtensor.T_subtensor):
assert
self
.
sub
==
GpuSubtensor
assert
self
.
sub
==
GpuSubtensor
class
G_subtensorF16
(
test_subtensor
.
T_subtensor
):
def
shortDescription
(
self
):
return
None
def
__init__
(
self
,
name
):
def
shared
(
x
,
**
kwargs
):
return
gpuarray_shared_constructor
(
x
,
target
=
test_ctx_name
,
**
kwargs
)
test_subtensor
.
T_subtensor
.
__init__
(
self
,
name
,
shared
=
shared
,
sub
=
GpuSubtensor
,
inc_sub
=
GpuIncSubtensor
,
adv_sub1
=
GpuAdvancedSubtensor1
,
adv_incsub1
=
GpuAdvancedIncSubtensor1
,
dimshuffle
=
GpuDimShuffle
,
mode
=
mode_with_gpu
,
# avoid errors with limited devices
dtype
=
'float16'
,
# use floatX?
ignore_topo
=
(
HostFromGpu
,
GpuFromHost
,
DeepCopyOp
,
GpuContiguous
))
# GPU opt can't run in fast_compile only.
self
.
fast_compile
=
False
assert
self
.
sub
==
GpuSubtensor
def
test_advinc_subtensor1
():
def
test_advinc_subtensor1
():
# Test the second case in the opt local_gpu_advanced_incsubtensor1
# Test the second case in the opt local_gpu_advanced_incsubtensor1
for
shp
in
[(
3
,
3
),
(
3
,
3
,
3
)]:
for
shp
in
[(
3
,
3
),
(
3
,
3
,
3
)]:
...
@@ -73,7 +100,9 @@ def test_advinc_subtensor1():
...
@@ -73,7 +100,9 @@ def test_advinc_subtensor1():
def
test_advinc_subtensor1_dtype
():
def
test_advinc_subtensor1_dtype
():
# Test the mixed dtype case
# Test the mixed dtype case
shp
=
(
3
,
4
)
shp
=
(
3
,
4
)
for
dtype1
,
dtype2
in
[(
'float32'
,
'int8'
),
(
'float32'
,
'float64'
)]:
for
dtype1
,
dtype2
in
[(
'float32'
,
'int8'
),
(
'float32'
,
'float64'
),
(
'float16'
,
'int8'
),
(
'float16'
,
'float64'
),
(
'float16'
,
'float16'
)]:
shared
=
gpuarray_shared_constructor
shared
=
gpuarray_shared_constructor
xval
=
np
.
arange
(
np
.
prod
(
shp
),
dtype
=
dtype1
)
.
reshape
(
shp
)
+
1
xval
=
np
.
arange
(
np
.
prod
(
shp
),
dtype
=
dtype1
)
.
reshape
(
shp
)
+
1
yval
=
np
.
empty
((
2
,)
+
shp
[
1
:],
dtype
=
dtype2
)
yval
=
np
.
empty
((
2
,)
+
shp
[
1
:],
dtype
=
dtype2
)
...
@@ -95,7 +124,9 @@ def test_advinc_subtensor1_dtype():
...
@@ -95,7 +124,9 @@ def test_advinc_subtensor1_dtype():
def
test_advinc_subtensor1_vector_scalar
():
def
test_advinc_subtensor1_vector_scalar
():
# Test the case where x is a vector and y a scalar
# Test the case where x is a vector and y a scalar
shp
=
(
3
,)
shp
=
(
3
,)
for
dtype1
,
dtype2
in
[(
'float32'
,
'int8'
),
(
'float32'
,
'float64'
)]:
for
dtype1
,
dtype2
in
[(
'float32'
,
'int8'
),
(
'float32'
,
'float64'
),
(
'float16'
,
'int8'
),
(
'float16'
,
'float64'
),
(
'float16'
,
'float16'
)]:
shared
=
gpuarray_shared_constructor
shared
=
gpuarray_shared_constructor
xval
=
np
.
arange
(
np
.
prod
(
shp
),
dtype
=
dtype1
)
.
reshape
(
shp
)
+
1
xval
=
np
.
arange
(
np
.
prod
(
shp
),
dtype
=
dtype1
)
.
reshape
(
shp
)
+
1
yval
=
np
.
asarray
(
10
,
dtype
=
dtype2
)
yval
=
np
.
asarray
(
10
,
dtype
=
dtype2
)
...
@@ -105,7 +136,8 @@ def test_advinc_subtensor1_vector_scalar():
...
@@ -105,7 +136,8 @@ def test_advinc_subtensor1_vector_scalar():
name
=
'y'
)
name
=
'y'
)
expr
=
tensor
.
advanced_inc_subtensor1
(
x
,
y
,
[
0
,
2
])
expr
=
tensor
.
advanced_inc_subtensor1
(
x
,
y
,
[
0
,
2
])
f
=
theano
.
function
([
y
],
expr
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
y
],
expr
,
mode
=
mode_with_gpu
)
assert
sum
([
isinstance
(
node
.
op
,
GpuAdvancedIncSubtensor1_dev20
)
assert
sum
([
isinstance
(
node
.
op
,
(
GpuAdvancedIncSubtensor1_dev20
,
GpuAdvancedIncSubtensor1
))
for
node
in
f
.
maker
.
fgraph
.
toposort
()])
==
1
for
node
in
f
.
maker
.
fgraph
.
toposort
()])
==
1
rval
=
f
(
yval
)
rval
=
f
(
yval
)
rep
=
xval
.
copy
()
rep
=
xval
.
copy
()
...
@@ -169,7 +201,26 @@ class G_advancedsubtensor(test_subtensor.TestAdvancedSubtensor):
...
@@ -169,7 +201,26 @@ class G_advancedsubtensor(test_subtensor.TestAdvancedSubtensor):
sub
=
GpuAdvancedSubtensor
,
sub
=
GpuAdvancedSubtensor
,
mode
=
mode_with_gpu
,
mode
=
mode_with_gpu
,
# avoid errors with limited devices
# avoid errors with limited devices
dtype
=
'float32'
,
dtype
=
'float32'
,
# floatX?
ignore_topo
=
(
HostFromGpu
,
GpuFromHost
,
DeepCopyOp
))
# GPU opt can't run in fast_compile only.
self
.
fast_compile
=
False
assert
self
.
sub
==
GpuAdvancedSubtensor
class
G_advancedsubtensorF16
(
test_subtensor
.
TestAdvancedSubtensor
):
def
shortDescription
(
self
):
return
None
def
__init__
(
self
,
name
):
test_subtensor
.
TestAdvancedSubtensor
.
__init__
(
self
,
name
,
shared
=
gpuarray_shared_constructor
,
sub
=
GpuAdvancedSubtensor
,
mode
=
mode_with_gpu
,
# avoid errors with limited devices
dtype
=
'float16'
,
# floatX?
ignore_topo
=
(
HostFromGpu
,
GpuFromHost
,
ignore_topo
=
(
HostFromGpu
,
GpuFromHost
,
DeepCopyOp
))
DeepCopyOp
))
# GPU opt can't run in fast_compile only.
# GPU opt can't run in fast_compile only.
...
@@ -218,6 +269,17 @@ class test_gpuextractdiag(unittest.TestCase):
...
@@ -218,6 +269,17 @@ class test_gpuextractdiag(unittest.TestCase):
GpuExtractDiag
(
offset
,
axis1
,
axis2
)(
x
)
.
eval
({
x
:
np_x
}),
GpuExtractDiag
(
offset
,
axis1
,
axis2
)(
x
)
.
eval
({
x
:
np_x
}),
np_x
.
diagonal
(
offset
,
axis1
,
axis2
))
np_x
.
diagonal
(
offset
,
axis1
,
axis2
))
def
test_tensor_float16
(
self
):
x
=
tensor
.
tensor4
()
np_x
=
np
.
arange
(
30107
)
.
reshape
(
7
,
11
,
17
,
23
)
.
astype
(
'float16'
)
for
offset
,
axis1
,
axis2
in
[
(
1
,
0
,
1
),
(
-
1
,
0
,
1
),
(
0
,
1
,
0
),
(
-
2
,
1
,
0
),
(
-
3
,
1
,
0
),
(
-
2
,
2
,
0
),
(
3
,
3
,
0
),
(
-
1
,
3
,
2
),
(
2
,
2
,
3
),
(
-
1
,
2
,
1
),
(
1
,
3
,
1
),
(
-
1
,
1
,
3
)]:
assert
np
.
allclose
(
GpuExtractDiag
(
offset
,
axis1
,
axis2
)(
x
)
.
eval
({
x
:
np_x
}),
np_x
.
diagonal
(
offset
,
axis1
,
axis2
))
class
test_gpuallocdiag
(
unittest
.
TestCase
):
class
test_gpuallocdiag
(
unittest
.
TestCase
):
def
test_matrix
(
self
):
def
test_matrix
(
self
):
...
...
theano/gpuarray/tests/test_type.py
浏览文件 @
4a8fed96
...
@@ -7,7 +7,7 @@ import numpy as np
...
@@ -7,7 +7,7 @@ import numpy as np
import
theano
import
theano
from
theano.compat
import
PY3
from
theano.compat
import
PY3
from
theano
import
config
from
theano
import
config
from
theano.compile
import
DeepCopyOp
from
theano.compile
import
DeepCopyOp
,
Rebroadcast
,
ViewOp
from
theano.misc.pkl_utils
import
CompatUnpickler
from
theano.misc.pkl_utils
import
CompatUnpickler
# Disabled for now
# Disabled for now
...
@@ -21,16 +21,45 @@ import pygpu
...
@@ -21,16 +21,45 @@ import pygpu
def
test_deep_copy
():
def
test_deep_copy
():
a
=
rand_gpuarray
(
20
,
dtype
=
'float32'
)
for
dtype
in
[
'float16'
,
'float32'
]:
g
=
GpuArrayType
(
dtype
=
'float32'
,
broadcastable
=
(
False
,))(
'g'
)
a
=
rand_gpuarray
(
20
,
dtype
=
dtype
)
g
=
GpuArrayType
(
dtype
=
dtype
,
broadcastable
=
(
False
,))(
'g'
)
f
=
theano
.
function
([
g
],
g
)
assert
isinstance
(
f
.
maker
.
fgraph
.
toposort
()[
0
]
.
op
,
DeepCopyOp
)
res
=
f
(
a
)
assert
GpuArrayType
.
values_eq
(
res
,
a
)
def
test_view
():
for
dtype
in
[
'float16'
,
'float32'
]:
a
=
rand_gpuarray
(
20
,
dtype
=
dtype
)
g
=
GpuArrayType
(
dtype
=
dtype
,
broadcastable
=
(
False
,))(
'g'
)
f
=
theano
.
function
([
g
],
g
)
f
=
theano
.
function
([
g
],
ViewOp
()(
g
)
)
assert
isinstance
(
f
.
maker
.
fgraph
.
toposort
()[
0
]
.
op
,
DeepCopy
Op
)
assert
isinstance
(
f
.
maker
.
fgraph
.
toposort
()[
0
]
.
op
,
View
Op
)
res
=
f
(
a
)
res
=
f
(
a
)
assert
GpuArrayType
.
values_eq
(
res
,
a
)
assert
GpuArrayType
.
values_eq
(
res
,
a
)
def
test_rebroadcast
():
for
dtype
in
[
'float16'
,
'float32'
]:
a
=
rand_gpuarray
(
1
,
dtype
=
dtype
)
g
=
GpuArrayType
(
dtype
=
dtype
,
broadcastable
=
(
False
,))(
'g'
)
f
=
theano
.
function
([
g
],
Rebroadcast
((
0
,
True
))(
g
))
assert
isinstance
(
f
.
maker
.
fgraph
.
toposort
()[
0
]
.
op
,
Rebroadcast
)
res
=
f
(
a
)
assert
GpuArrayType
.
values_eq
(
res
,
a
)
def
test_values_eq_approx
():
def
test_values_eq_approx
():
...
@@ -45,10 +74,11 @@ def test_values_eq_approx():
...
@@ -45,10 +74,11 @@ def test_values_eq_approx():
def
test_specify_shape
():
def
test_specify_shape
():
a
=
rand_gpuarray
(
20
,
dtype
=
'float32'
)
for
dtype
in
[
'float16'
,
'float32'
]:
g
=
GpuArrayType
(
dtype
=
'float32'
,
broadcastable
=
(
False
,))(
'g'
)
a
=
rand_gpuarray
(
20
,
dtype
=
dtype
)
f
=
theano
.
function
([
g
],
theano
.
tensor
.
specify_shape
(
g
,
[
20
]))
g
=
GpuArrayType
(
dtype
=
dtype
,
broadcastable
=
(
False
,))(
'g'
)
f
(
a
)
f
=
theano
.
function
([
g
],
theano
.
tensor
.
specify_shape
(
g
,
[
20
]))
f
(
a
)
def
test_filter_float
():
def
test_filter_float
():
...
...
theano/gradient.py
浏览文件 @
4a8fed96
...
@@ -1482,7 +1482,11 @@ class numeric_grad(object):
...
@@ -1482,7 +1482,11 @@ class numeric_grad(object):
The tuple (abs_err, rel_err) is returned
The tuple (abs_err, rel_err) is returned
"""
"""
abs_err
=
abs
(
a
-
b
)
abs_err
=
abs
(
a
-
b
)
rel_err
=
abs_err
/
np
.
maximum
(
abs
(
a
)
+
abs
(
b
),
1e-8
)
# 1e-8 is to prevent division by zeros.
# [] is to make sure that if a and b are float16, 1e-8 don't get
# dowcasted to float16 as that give 0! This would add back the
# division by zero
rel_err
=
abs_err
/
np
.
maximum
(
abs
(
a
)
+
abs
(
b
),
[
1e-8
])
# The numpy.asarray are needed as if a or b is a sparse matrix
# The numpy.asarray are needed as if a or b is a sparse matrix
# this would result in a numpy.matrix and not a numpy.ndarray
# this would result in a numpy.matrix and not a numpy.ndarray
# and the behave differently causing problem later.
# and the behave differently causing problem later.
...
...
theano/tensor/inc_code.py
0 → 100644
浏览文件 @
4a8fed96
from
__future__
import
absolute_import
,
print_function
,
division
def
inc_code
():
types
=
[
'npy_'
+
t
for
t
in
[
'int8'
,
'int16'
,
'int32'
,
'int64'
,
'uint8'
,
'uint16'
,
'uint32'
,
'uint64'
,
'float16'
,
'float32'
,
'float64'
]]
complex_types
=
[
'npy_'
+
t
for
t
in
[
'complex32'
,
'complex64'
,
'complex128'
]]
inplace_map_template
=
"""
#if defined(
%(typen)
s)
static void
%(type)
s_inplace_add(PyArrayMapIterObject *mit,
PyArrayIterObject *it, int inc_or_set)
{
int index = mit->size;
while (index--) {
%(op)
s
PyArray_MapIterNext(mit);
PyArray_ITER_NEXT(it);
}
}
#endif
"""
floatadd
=
(
"((
%(type)
s*)mit->dataptr)[0] = "
"(inc_or_set ? ((
%(type)
s*)mit->dataptr)[0] : 0)"
" + ((
%(type)
s*)it->dataptr)[0];"
)
complexadd
=
"""
((
%(type)
s*)mit->dataptr)[0].real =
(inc_or_set ? ((
%(type)
s*)mit->dataptr)[0].real : 0)
+ ((
%(type)
s*)it->dataptr)[0].real;
((
%(type)
s*)mit->dataptr)[0].imag =
(inc_or_set ? ((
%(type)
s*)mit->dataptr)[0].imag : 0)
+ ((
%(type)
s*)it->dataptr)[0].imag;
"""
fns
=
''
.
join
([
inplace_map_template
%
{
'type'
:
t
,
'typen'
:
t
.
upper
(),
'op'
:
floatadd
%
{
'type'
:
t
}}
for
t
in
types
]
+
[
inplace_map_template
%
{
'type'
:
t
,
'typen'
:
t
.
upper
(),
'op'
:
complexadd
%
{
'type'
:
t
}}
for
t
in
complex_types
])
def
gen_binop
(
type
,
typen
):
return
"""
#if defined(
%(typen)
s)
%(type)
s_inplace_add,
#endif
"""
%
dict
(
type
=
type
,
typen
=
typen
)
fn_array
=
(
"static inplace_map_binop addition_funcs[] = {"
+
''
.
join
([
gen_binop
(
type
=
t
,
typen
=
t
.
upper
())
for
t
in
types
+
complex_types
])
+
"NULL};
\n
"
)
def
gen_num
(
typen
):
return
"""
#if defined(
%(typen)
s)
%(typen)
s,
#endif
"""
%
dict
(
type
=
type
,
typen
=
typen
)
type_number_array
=
(
"static int type_numbers[] = {"
+
''
.
join
([
gen_num
(
typen
=
t
.
upper
())
for
t
in
types
+
complex_types
])
+
"-1000};"
)
code
=
(
"""
typedef void (*inplace_map_binop)(PyArrayMapIterObject *,
PyArrayIterObject *, int inc_or_set);
"""
+
fns
+
fn_array
+
type_number_array
+
"""
static int
map_increment(PyArrayMapIterObject *mit, PyArrayObject *op,
inplace_map_binop add_inplace, int inc_or_set)
{
PyArrayObject *arr = NULL;
PyArrayIterObject *it;
PyArray_Descr *descr;
if (mit->ait == NULL) {
return -1;
}
descr = PyArray_DESCR(mit->ait->ao);
Py_INCREF(descr);
arr = (PyArrayObject *)PyArray_FromAny((PyObject *)op, descr,
0, 0, NPY_ARRAY_FORCECAST, NULL);
if (arr == NULL) {
return -1;
}
if ((mit->subspace != NULL) && (mit->consec)) {
PyArray_MapIterSwapAxes(mit, (PyArrayObject **)&arr, 0);
if (arr == NULL) {
return -1;
}
}
it = (PyArrayIterObject*)
PyArray_BroadcastToShape((PyObject*)arr, mit->dimensions, mit->nd);
if (it == NULL) {
Py_DECREF(arr);
return -1;
}
(*add_inplace)(mit, it, inc_or_set);
Py_DECREF(arr);
Py_DECREF(it);
return 0;
}
static int
inplace_increment(PyArrayObject *a, PyObject *index, PyArrayObject *inc,
int inc_or_set)
{
inplace_map_binop add_inplace = NULL;
int type_number = -1;
int i = 0;
PyArrayMapIterObject * mit;
if (PyArray_FailUnlessWriteable(a, "input/output array") < 0) {
return -1;
}
if (PyArray_NDIM(a) == 0) {
PyErr_SetString(PyExc_IndexError, "0-d arrays can't be indexed.");
return -1;
}
type_number = PyArray_TYPE(a);
while (type_numbers[i] >= 0 && addition_funcs[i] != NULL){
if (type_number == type_numbers[i]) {
add_inplace = addition_funcs[i];
break;
}
i++ ;
}
if (add_inplace == NULL) {
PyErr_SetString(PyExc_TypeError, "unsupported type for a");
return -1;
}
mit = (PyArrayMapIterObject *) PyArray_MapIterArray(a, index);
if (mit == NULL) {
goto fail;
}
if (map_increment(mit, inc, add_inplace, inc_or_set) != 0) {
goto fail;
}
Py_DECREF(mit);
Py_INCREF(Py_None);
return 0;
fail:
Py_XDECREF(mit);
return -1;
}
"""
)
return
code
theano/tensor/subtensor.py
浏览文件 @
4a8fed96
...
@@ -22,9 +22,7 @@ from theano.tensor.elemwise import DimShuffle
...
@@ -22,9 +22,7 @@ from theano.tensor.elemwise import DimShuffle
from
theano.tensor.type_other
import
NoneConst
,
SliceType
,
NoneTypeT
,
make_slice
from
theano.tensor.type_other
import
NoneConst
,
SliceType
,
NoneTypeT
,
make_slice
from
theano
import
config
from
theano
import
config
if
config
.
cxx
:
from
.inc_code
import
inc_code
import
theano.gof.cutils
# needed to import cutils_ext
from
cutils_ext.cutils_ext
import
inplace_increment
_logger
=
logging
.
getLogger
(
"theano.tensor.subtensor"
)
_logger
=
logging
.
getLogger
(
"theano.tensor.subtensor"
)
...
@@ -1943,8 +1941,7 @@ class AdvancedIncSubtensor1(Op):
...
@@ -1943,8 +1941,7 @@ class AdvancedIncSubtensor1(Op):
NPY_ARRAY_ENSURECOPY, NULL)"""
%
locals
()
NPY_ARRAY_ENSURECOPY, NULL)"""
%
locals
()
def
c_support_code
(
self
):
def
c_support_code
(
self
):
from
theano.gof.cutils
import
compile_cutils_code
return
inc_code
()
return
compile_cutils_code
()
def
c_code
(
self
,
node
,
name
,
input_names
,
output_names
,
sub
):
def
c_code
(
self
,
node
,
name
,
input_names
,
output_names
,
sub
):
numpy_ver
=
[
int
(
n
)
for
n
in
np
.
__version__
.
split
(
'.'
)[:
2
]]
numpy_ver
=
[
int
(
n
)
for
n
in
np
.
__version__
.
split
(
'.'
)[:
2
]]
...
@@ -1976,17 +1973,14 @@ class AdvancedIncSubtensor1(Op):
...
@@ -1976,17 +1973,14 @@ class AdvancedIncSubtensor1(Op):
Py_XDECREF(
%(out)
s);
Py_XDECREF(
%(out)
s);
%(out)
s =
%(copy_of_x)
s;
%(out)
s =
%(copy_of_x)
s;
}
}
PyObject *arglist = Py_BuildValue("OOOi",
%(out)
s,
%(idx)
s,
%(y)
s,
%(inc_or_set)
d);
if (inplace_increment(
%(out)
s, (PyObject *)
%(idx)
s,
%(y)
s,
%(inc_or_set)
d)) {
rval = inplace_increment(NULL, arglist);
Py_XDECREF(arglist);
if (rval == NULL) {
%(fail)
s;
%(fail)
s;
}
}
Py_XDECREF(rval);
Py_XDECREF(rval);
"""
%
locals
()
"""
%
locals
()
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
3
,)
return
(
4
,)
def
perform
(
self
,
node
,
inp
,
out_
):
def
perform
(
self
,
node
,
inp
,
out_
):
# TODO opt to make this inplace
# TODO opt to make this inplace
...
@@ -2001,36 +1995,10 @@ class AdvancedIncSubtensor1(Op):
...
@@ -2001,36 +1995,10 @@ class AdvancedIncSubtensor1(Op):
if
self
.
set_instead_of_inc
:
if
self
.
set_instead_of_inc
:
x
[
idx
]
=
y
x
[
idx
]
=
y
else
:
else
:
if
config
.
cxx
and
node
.
inputs
[
0
]
.
dtype
!=
'float16'
:
np
.
add
.
at
(
x
,
idx
,
y
)
increment
=
inplace_increment
else
:
increment
=
self
.
inplace_increment1d_slow
increment
(
x
,
idx
,
y
)
out
[
0
]
=
x
out
[
0
]
=
x
def
inplace_increment1d_slow
(
self
,
x
,
idx
,
y
):
# If `y` has as many dimensions as `x`, then we want to iterate
# jointly on `x` and `y`. Otherwise, it means `y` should be
# broadcasted to fill all relevant rows of `x`.
assert
y
.
ndim
<=
x
.
ndim
# Should be guaranteed by `make_node`
if
y
.
ndim
==
x
.
ndim
:
if
len
(
y
)
==
1
:
# Allow broadcasting of y[0]
y_0
=
y
[
0
]
for
i
in
idx
:
x
[
i
]
+=
y_0
else
:
assert
len
(
y
)
==
len
(
idx
)
j
=
0
for
i
in
idx
:
x
[
i
]
+=
y
[
j
]
j
+=
1
else
:
for
i
in
idx
:
x
[
i
]
+=
y
def
infer_shape
(
self
,
node
,
ishapes
):
def
infer_shape
(
self
,
node
,
ishapes
):
x
,
y
,
ilist
=
ishapes
x
,
y
,
ilist
=
ishapes
return
[
x
]
return
[
x
]
...
@@ -2246,14 +2214,8 @@ class AdvancedIncSubtensor(Op):
...
@@ -2246,14 +2214,8 @@ class AdvancedIncSubtensor(Op):
if
self
.
set_instead_of_inc
:
if
self
.
set_instead_of_inc
:
out
[
0
][
inputs
[
2
:]]
=
inputs
[
1
]
out
[
0
][
inputs
[
2
:]]
=
inputs
[
1
]
elif
config
.
cxx
:
inplace_increment
(
out
[
0
],
tuple
(
inputs
[
2
:]),
inputs
[
1
])
else
:
else
:
raise
NotImplementedError
(
np
.
add
.
at
(
out
[
0
],
tuple
(
inputs
[
2
:]),
inputs
[
1
])
'Could not import inplace_increment, so advanced '
'indexing is disabled. '
'Please make sure that you have a working C++ compiler '
'and that config.cxx is correctly set.'
)
def
infer_shape
(
self
,
node
,
ishapes
):
def
infer_shape
(
self
,
node
,
ishapes
):
return
[
ishapes
[
0
]]
return
[
ishapes
[
0
]]
...
...
theano/tensor/tests/test_opt.py
浏览文件 @
4a8fed96
...
@@ -2936,10 +2936,7 @@ def test_local_IncSubtensor_serialize():
...
@@ -2936,10 +2936,7 @@ def test_local_IncSubtensor_serialize():
i
=
T
.
vector
(
'i'
,
dtype
=
'int64'
)
i
=
T
.
vector
(
'i'
,
dtype
=
'int64'
)
j
=
T
.
vector
(
'j'
,
dtype
=
'int64'
)
j
=
T
.
vector
(
'j'
,
dtype
=
'int64'
)
t
=
T
.
scalar
(
't'
)
t
=
T
.
scalar
(
't'
)
if
theano
.
tensor
.
subtensor
.
inplace_increment
:
y
=
(
W
[
i
]
+
W
[
j
]
+
W
[
1
]
+
W
[
i
,
j
])
.
sum
()
y
=
(
W
[
i
]
+
W
[
j
]
+
W
[
1
]
+
W
[
i
,
j
])
.
sum
()
else
:
y
=
(
W
[
i
]
+
W
[
j
]
+
W
[
1
])
.
sum
()
cost
=
T
.
sqr
(
t
-
y
)
cost
=
T
.
sqr
(
t
-
y
)
dW
=
theano
.
grad
(
cost
,
W
)
dW
=
theano
.
grad
(
cost
,
W
)
mode
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'fusion'
)
mode
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'fusion'
)
...
...
theano/tensor/tests/test_subtensor.py
浏览文件 @
4a8fed96
...
@@ -5,7 +5,6 @@ import sys
...
@@ -5,7 +5,6 @@ import sys
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
from
nose.plugins.skip
import
SkipTest
from
nose.tools
import
assert_equal
from
nose.tools
import
assert_equal
from
numpy.testing
import
assert_array_equal
from
numpy.testing
import
assert_array_equal
from
six
import
StringIO
from
six
import
StringIO
...
@@ -524,10 +523,11 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
...
@@ -524,10 +523,11 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
gn
=
theano
.
grad
(
t
.
sum
(),
n
)
gn
=
theano
.
grad
(
t
.
sum
(),
n
)
g
=
self
.
function
([],
gn
,
op
=
self
.
adv_incsub1
)
g
=
self
.
function
([],
gn
,
op
=
self
.
adv_incsub1
)
utt
.
verify_grad
(
lambda
m
:
m
[[
1
,
3
]],
utt
.
verify_grad
(
lambda
m
:
m
[[
1
,
3
]],
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
)])
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
)],
mode
=
self
.
mode
)
g
()
g
()
utt
.
verify_grad
(
lambda
m
:
m
[
idx
],
utt
.
verify_grad
(
lambda
m
:
m
[
idx
],
[
data
])
[
data
]
,
mode
=
self
.
mode
)
def
test_noncontiguous_idx
(
self
):
def
test_noncontiguous_idx
(
self
):
data
=
rand
(
4
,
2
,
3
)
data
=
rand
(
4
,
2
,
3
)
...
@@ -597,17 +597,20 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
...
@@ -597,17 +597,20 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
self
.
assertTrue
(
np
.
allclose
(
g_00
,
2
))
self
.
assertTrue
(
np
.
allclose
(
g_00
,
2
))
utt
.
verify_grad
(
lambda
m
:
m
[[
1
,
3
]],
utt
.
verify_grad
(
lambda
m
:
m
[[
1
,
3
]],
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
)])
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
)],
mode
=
self
.
mode
)
def
fun
(
x
,
y
):
def
fun
(
x
,
y
):
return
advanced_inc_subtensor1
(
x
,
y
,
[
1
,
3
])
return
advanced_inc_subtensor1
(
x
,
y
,
[
1
,
3
])
utt
.
verify_grad
(
fun
,
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
),
utt
.
verify_grad
(
fun
,
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
),
np
.
random
.
rand
(
2
,
5
)
.
astype
(
self
.
dtype
)])
np
.
random
.
rand
(
2
,
5
)
.
astype
(
self
.
dtype
)],
mode
=
self
.
mode
)
def
fun
(
x
,
y
):
def
fun
(
x
,
y
):
return
advanced_set_subtensor1
(
x
,
y
,
[
1
,
3
])
return
advanced_set_subtensor1
(
x
,
y
,
[
1
,
3
])
utt
.
verify_grad
(
fun
,
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
),
utt
.
verify_grad
(
fun
,
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
),
np
.
random
.
rand
(
2
,
5
)
.
astype
(
self
.
dtype
)])
np
.
random
.
rand
(
2
,
5
)
.
astype
(
self
.
dtype
)],
mode
=
self
.
mode
)
# test set_subtensor broadcast
# test set_subtensor broadcast
self
.
dtype
=
'float32'
self
.
dtype
=
'float32'
...
@@ -872,12 +875,12 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
...
@@ -872,12 +875,12 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
def
fct
(
t
):
def
fct
(
t
):
return
theano
.
tensor
.
sum
(
t
[
idx_
])
return
theano
.
tensor
.
sum
(
t
[
idx_
])
utt
.
verify_grad
(
fct
,
[
data
])
utt
.
verify_grad
(
fct
,
[
data
]
,
mode
=
self
.
mode
)
# Test the grad of the grad (e.i. AdvancedIncSubtensor1.grad)
# Test the grad of the grad (e.i. AdvancedIncSubtensor1.grad)
def
fct2
(
t
):
def
fct2
(
t
):
return
theano
.
tensor
.
grad
(
theano
.
tensor
.
sum
(
t
[
idx_
]),
t
)
return
theano
.
tensor
.
grad
(
theano
.
tensor
.
sum
(
t
[
idx_
]),
t
)
utt
.
verify_grad
(
fct2
,
[
data
])
utt
.
verify_grad
(
fct2
,
[
data
]
,
mode
=
self
.
mode
)
# Test shape of AdvancedIncSubtensor1 and AdvancedSubtensor1
# Test shape of AdvancedIncSubtensor1 and AdvancedSubtensor1
if
not
self
.
fast_compile
:
if
not
self
.
fast_compile
:
...
@@ -958,7 +961,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
...
@@ -958,7 +961,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
# vector
# vector
utt
.
verify_grad
(
utt
.
verify_grad
(
inc_slice
(
slice
(
2
,
4
,
None
)),
inc_slice
(
slice
(
2
,
4
,
None
)),
(
np
.
asarray
([
0
,
1
,
2
,
3
,
4
,
5.
]),
np
.
asarray
([
9
,
9.
]),))
(
np
.
asarray
([
0
,
1
,
2
,
3
,
4
,
5.
]),
np
.
asarray
([
9
,
9.
]),),
mode
=
self
.
mode
)
# matrix
# matrix
utt
.
verify_grad
(
utt
.
verify_grad
(
...
@@ -1498,9 +1502,6 @@ class TestAdvancedSubtensor(unittest.TestCase):
...
@@ -1498,9 +1502,6 @@ class TestAdvancedSubtensor(unittest.TestCase):
utt
.
assert_allclose
(
rval
,
aval
)
utt
.
assert_allclose
(
rval
,
aval
)
def
test_inc_adv_subtensor_w_2vec
(
self
):
def
test_inc_adv_subtensor_w_2vec
(
self
):
if
not
config
.
cxx
:
raise
SkipTest
(
'config.cxx empty'
)
subt
=
self
.
m
[
self
.
ix1
,
self
.
ix12
]
subt
=
self
.
m
[
self
.
ix1
,
self
.
ix12
]
a
=
inc_subtensor
(
subt
,
subt
)
a
=
inc_subtensor
(
subt
,
subt
)
...
@@ -1519,9 +1520,6 @@ class TestAdvancedSubtensor(unittest.TestCase):
...
@@ -1519,9 +1520,6 @@ class TestAdvancedSubtensor(unittest.TestCase):
[
.
5
,
.
3
*
2
,
.
15
]]),
aval
[
.
5
,
.
3
*
2
,
.
15
]]),
aval
def
test_inc_adv_subtensor_with_broadcasting
(
self
):
def
test_inc_adv_subtensor_with_broadcasting
(
self
):
if
not
config
.
cxx
:
raise
SkipTest
(
'config.cxx empty'
)
inc
=
dscalar
()
inc
=
dscalar
()
a
=
inc_subtensor
(
self
.
m
[
self
.
ix1
,
self
.
ix12
],
inc
)
a
=
inc_subtensor
(
self
.
m
[
self
.
ix1
,
self
.
ix12
],
inc
)
g_inc
=
tensor
.
grad
(
a
.
sum
(),
inc
)
g_inc
=
tensor
.
grad
(
a
.
sum
(),
inc
)
...
@@ -1542,9 +1540,6 @@ class TestAdvancedSubtensor(unittest.TestCase):
...
@@ -1542,9 +1540,6 @@ class TestAdvancedSubtensor(unittest.TestCase):
assert
np
.
allclose
(
gval
,
3.0
),
gval
assert
np
.
allclose
(
gval
,
3.0
),
gval
def
test_inc_adv_subtensor1_with_broadcasting
(
self
):
def
test_inc_adv_subtensor1_with_broadcasting
(
self
):
if
not
config
.
cxx
:
raise
SkipTest
(
'config.cxx empty'
)
inc
=
dscalar
()
inc
=
dscalar
()
a
=
inc_subtensor
(
self
.
m
[
self
.
ix1
],
inc
)
a
=
inc_subtensor
(
self
.
m
[
self
.
ix1
],
inc
)
g_inc
=
tensor
.
grad
(
a
.
sum
(),
inc
)
g_inc
=
tensor
.
grad
(
a
.
sum
(),
inc
)
...
@@ -1564,9 +1559,6 @@ class TestAdvancedSubtensor(unittest.TestCase):
...
@@ -1564,9 +1559,6 @@ class TestAdvancedSubtensor(unittest.TestCase):
assert
np
.
allclose
(
gval
,
9.0
),
gval
assert
np
.
allclose
(
gval
,
9.0
),
gval
def
test_inc_adv_subtensor_with_index_broadcasting
(
self
):
def
test_inc_adv_subtensor_with_index_broadcasting
(
self
):
if
not
config
.
cxx
:
raise
SkipTest
(
'config.cxx empty'
)
a
=
inc_subtensor
(
self
.
m
[
self
.
ix1
,
self
.
ix2
],
2.1
)
a
=
inc_subtensor
(
self
.
m
[
self
.
ix1
,
self
.
ix2
],
2.1
)
assert
a
.
type
==
self
.
m
.
type
,
(
a
.
type
,
self
.
m
.
type
)
assert
a
.
type
==
self
.
m
.
type
,
(
a
.
type
,
self
.
m
.
type
)
...
@@ -1640,17 +1632,20 @@ class TestAdvancedSubtensor(unittest.TestCase):
...
@@ -1640,17 +1632,20 @@ class TestAdvancedSubtensor(unittest.TestCase):
self
.
assertTrue
(
isinstance
(
t
.
owner
.
op
,
tensor
.
AdvancedSubtensor
))
self
.
assertTrue
(
isinstance
(
t
.
owner
.
op
,
tensor
.
AdvancedSubtensor
))
utt
.
verify_grad
(
lambda
m
:
m
[[
1
,
3
],
[
2
,
4
]],
utt
.
verify_grad
(
lambda
m
:
m
[[
1
,
3
],
[
2
,
4
]],
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
)])
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
)],
mode
=
self
.
mode
)
def
fun
(
x
,
y
):
def
fun
(
x
,
y
):
return
advanced_inc_subtensor
(
x
,
y
,
[
1
,
3
],
[
2
,
4
])
return
advanced_inc_subtensor
(
x
,
y
,
[
1
,
3
],
[
2
,
4
])
utt
.
verify_grad
(
fun
,
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
),
utt
.
verify_grad
(
fun
,
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
),
np
.
random
.
rand
(
2
)
.
astype
(
self
.
dtype
)])
np
.
random
.
rand
(
2
)
.
astype
(
self
.
dtype
)],
mode
=
self
.
mode
)
def
fun
(
x
,
y
):
def
fun
(
x
,
y
):
return
advanced_set_subtensor
(
x
,
y
,
[
1
,
3
],
[
2
,
4
])
return
advanced_set_subtensor
(
x
,
y
,
[
1
,
3
],
[
2
,
4
])
utt
.
verify_grad
(
fun
,
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
),
utt
.
verify_grad
(
fun
,
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
),
np
.
random
.
rand
(
2
)
.
astype
(
self
.
dtype
)])
np
.
random
.
rand
(
2
)
.
astype
(
self
.
dtype
)],
mode
=
self
.
mode
)
class
TestInferShape
(
utt
.
InferShapeTester
):
class
TestInferShape
(
utt
.
InferShapeTester
):
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论