Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
4a8fed96
提交
4a8fed96
authored
6月 01, 2017
作者:
Frédéric Bastien
提交者:
GitHub
6月 01, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #5923 from nouiz/float16
don't move op that don't support float16 on the GPU
上级
ebc59491
dc7f7a4f
显示空白字符变更
内嵌
并排
正在显示
13 个修改的文件
包含
371 行增加
和
289 行删除
+371
-289
ops.py
theano/compile/ops.py
+4
-0
cutils.py
theano/gof/cutils.py
+2
-196
linalg.py
theano/gpuarray/linalg.py
+3
-1
opt.py
theano/gpuarray/opt.py
+45
-6
subtensor.py
theano/gpuarray/subtensor.py
+5
-2
test_blas.py
theano/gpuarray/tests/test_blas.py
+18
-2
test_subtensor.py
theano/gpuarray/tests/test_subtensor.py
+66
-4
test_type.py
theano/gpuarray/tests/test_type.py
+35
-5
gradient.py
theano/gradient.py
+5
-1
inc_code.py
theano/tensor/inc_code.py
+162
-0
subtensor.py
theano/tensor/subtensor.py
+7
-45
test_opt.py
theano/tensor/tests/test_opt.py
+0
-3
test_subtensor.py
theano/tensor/tests/test_subtensor.py
+19
-24
没有找到文件。
theano/compile/ops.py
浏览文件 @
4a8fed96
...
...
@@ -50,6 +50,7 @@ class ViewOp(gof.Op):
# the output variable is %(oname)s.
c_code_and_version
=
{}
__props__
=
()
_f16_ok
=
True
def
make_node
(
self
,
x
):
return
gof
.
Apply
(
self
,
[
x
],
[
x
.
type
()])
...
...
@@ -151,6 +152,7 @@ class DeepCopyOp(gof.Op):
check_input
=
False
__props__
=
()
_f16_ok
=
True
def
__init__
(
self
):
pass
...
...
@@ -659,6 +661,7 @@ class Rebroadcast(gof.Op):
check_input
=
False
__props__
=
(
"axis"
,)
_f16_ok
=
True
def
__init__
(
self
,
*
axis
):
# Sort them to make sure we merge all possible case.
...
...
@@ -820,6 +823,7 @@ class SpecifyShape(gof.Op):
# the output variable is %(oname)s.
c_code_and_version
=
{}
__props__
=
()
_f16_ok
=
True
def
make_node
(
self
,
x
,
shape
):
if
not
isinstance
(
x
,
gof
.
Variable
):
...
...
theano/gof/cutils.py
浏览文件 @
4a8fed96
...
...
@@ -14,189 +14,6 @@ if os.path.exists(os.path.join(config.compiledir, 'cutils_ext.so')):
os
.
remove
(
os
.
path
.
join
(
config
.
compiledir
,
'cutils_ext.so'
))
def
compile_cutils_code
():
types
=
[
'npy_'
+
t
for
t
in
[
'int8'
,
'int16'
,
'int32'
,
'int64'
,
'int128'
,
'int256'
,
'uint8'
,
'uint16'
,
'uint32'
,
'uint64'
,
'uint128'
,
'uint256'
,
'float16'
,
'float32'
,
'float64'
,
'float80'
,
'float96'
,
'float128'
,
'float256'
]]
complex_types
=
[
'npy_'
+
t
for
t
in
[
'complex32'
,
'complex64'
,
'complex128'
,
'complex160'
,
'complex192'
,
'complex512'
]]
inplace_map_template
=
"""
#if defined(
%(typen)
s)
static void
%(type)
s_inplace_add(PyArrayMapIterObject *mit,
PyArrayIterObject *it, int inc_or_set)
{
int index = mit->size;
while (index--) {
%(op)
s
PyArray_MapIterNext(mit);
PyArray_ITER_NEXT(it);
}
}
#endif
"""
floatadd
=
(
"((
%(type)
s*)mit->dataptr)[0] = "
"(inc_or_set ? ((
%(type)
s*)mit->dataptr)[0] : 0)"
" + ((
%(type)
s*)it->dataptr)[0];"
)
complexadd
=
"""
((
%(type)
s*)mit->dataptr)[0].real =
(inc_or_set ? ((
%(type)
s*)mit->dataptr)[0].real : 0)
+ ((
%(type)
s*)it->dataptr)[0].real;
((
%(type)
s*)mit->dataptr)[0].imag =
(inc_or_set ? ((
%(type)
s*)mit->dataptr)[0].imag : 0)
+ ((
%(type)
s*)it->dataptr)[0].imag;
"""
fns
=
''
.
join
([
inplace_map_template
%
{
'type'
:
t
,
'typen'
:
t
.
upper
(),
'op'
:
floatadd
%
{
'type'
:
t
}}
for
t
in
types
]
+
[
inplace_map_template
%
{
'type'
:
t
,
'typen'
:
t
.
upper
(),
'op'
:
complexadd
%
{
'type'
:
t
}}
for
t
in
complex_types
])
def
gen_binop
(
type
,
typen
):
return
"""
#if defined(
%(typen)
s)
%(type)
s_inplace_add,
#endif
"""
%
dict
(
type
=
type
,
typen
=
typen
)
fn_array
=
(
"static inplace_map_binop addition_funcs[] = {"
+
''
.
join
([
gen_binop
(
type
=
t
,
typen
=
t
.
upper
())
for
t
in
types
+
complex_types
])
+
"NULL};
\n
"
)
def
gen_num
(
typen
):
return
"""
#if defined(
%(typen)
s)
%(typen)
s,
#endif
"""
%
dict
(
type
=
type
,
typen
=
typen
)
type_number_array
=
(
"static int type_numbers[] = {"
+
''
.
join
([
gen_num
(
typen
=
t
.
upper
())
for
t
in
types
+
complex_types
])
+
"-1000};"
)
code
=
(
"""
#if NPY_API_VERSION >= 0x00000008
typedef void (*inplace_map_binop)(PyArrayMapIterObject *,
PyArrayIterObject *, int inc_or_set);
"""
+
fns
+
fn_array
+
type_number_array
+
"""
static int
map_increment(PyArrayMapIterObject *mit, PyObject *op,
inplace_map_binop add_inplace, int inc_or_set)
{
PyArrayObject *arr = NULL;
PyArrayIterObject *it;
PyArray_Descr *descr;
if (mit->ait == NULL) {
return -1;
}
descr = PyArray_DESCR(mit->ait->ao);
Py_INCREF(descr);
arr = (PyArrayObject *)PyArray_FromAny(op, descr,
0, 0, NPY_ARRAY_FORCECAST, NULL);
if (arr == NULL) {
return -1;
}
if ((mit->subspace != NULL) && (mit->consec)) {
PyArray_MapIterSwapAxes(mit, (PyArrayObject **)&arr, 0);
if (arr == NULL) {
return -1;
}
}
it = (PyArrayIterObject*)
PyArray_BroadcastToShape((PyObject*)arr, mit->dimensions, mit->nd);
if (it == NULL) {
Py_DECREF(arr);
return -1;
}
(*add_inplace)(mit, it, inc_or_set);
Py_DECREF(arr);
Py_DECREF(it);
return 0;
}
static PyObject *
inplace_increment(PyObject *dummy, PyObject *args)
{
PyObject *arg_a = NULL, *index=NULL, *inc=NULL;
int inc_or_set = 1;
PyArrayObject *a;
inplace_map_binop add_inplace = NULL;
int type_number = -1;
int i = 0;
PyArrayMapIterObject * mit;
if (!PyArg_ParseTuple(args, "OOO|i", &arg_a, &index,
&inc, &inc_or_set)) {
return NULL;
}
if (!PyArray_Check(arg_a)) {
PyErr_SetString(PyExc_ValueError,
"needs an ndarray as first argument");
return NULL;
}
a = (PyArrayObject *) arg_a;
if (PyArray_FailUnlessWriteable(a, "input/output array") < 0) {
return NULL;
}
if (PyArray_NDIM(a) == 0) {
PyErr_SetString(PyExc_IndexError, "0-d arrays can't be indexed.");
return NULL;
}
type_number = PyArray_TYPE(a);
while (type_numbers[i] >= 0 && addition_funcs[i] != NULL){
if (type_number == type_numbers[i]) {
add_inplace = addition_funcs[i];
break;
}
i++ ;
}
if (add_inplace == NULL) {
PyErr_SetString(PyExc_TypeError, "unsupported type for a");
return NULL;
}
mit = (PyArrayMapIterObject *) PyArray_MapIterArray(a, index);
if (mit == NULL) {
goto fail;
}
if (map_increment(mit, inc, add_inplace, inc_or_set) != 0) {
goto fail;
}
Py_DECREF(mit);
Py_INCREF(Py_None);
return Py_None;
fail:
Py_XDECREF(mit);
return NULL;
}
#endif
"""
)
return
code
def
compile_cutils
():
"""
Do just the compilation of cutils_ext.
...
...
@@ -204,7 +21,6 @@ def compile_cutils():
"""
code
=
(
"""
#include <Python.h>
#include "numpy/arrayobject.h"
#include "theano_mod_helper.h"
extern "C"{
...
...
@@ -226,18 +42,10 @@ def compile_cutils():
int failure = fn(it);
return Py_BuildValue("i", failure);
}"""
)
code
+=
compile_cutils_code
()
code
+=
(
"""static PyMethodDef CutilsExtMethods[] = {
}
static PyMethodDef CutilsExtMethods[] = {
{"run_cthunk", run_cthunk, METH_VARARGS|METH_KEYWORDS,
"Run a theano cthunk."},
#if NPY_API_VERSION >= 0x00000008
{"inplace_increment", inplace_increment,
METH_VARARGS,
"increments a numpy array inplace at the passed indexes."},
#endif
{NULL, NULL, 0, NULL} /* Sentinel */
};"""
)
if
PY3
:
...
...
@@ -256,7 +64,6 @@ def compile_cutils():
PyMODINIT_FUNC
PyInit_cutils_ext(void) {
import_array();
return PyModule_Create(&moduledef);
}
}
...
...
@@ -266,7 +73,6 @@ def compile_cutils():
PyMODINIT_FUNC
initcutils_ext(void)
{
import_array();
(void) Py_InitModule("cutils_ext", CutilsExtMethods);
}
} //extern C
...
...
theano/gpuarray/linalg.py
浏览文件 @
4a8fed96
...
...
@@ -261,7 +261,7 @@ class GpuCholesky(Op):
raise
RuntimeError
(
'CUSOLVER is not available and '
'GpuCholesky Op can not be constructed.'
)
if
skcuda
.
__version__
<=
'0.5.1'
:
warnings
.
warn
(
'The Gpu
Solve
op requires scikit-cuda > 0.5.1 to work with CUDA 8'
)
warnings
.
warn
(
'The Gpu
Cholesky
op requires scikit-cuda > 0.5.1 to work with CUDA 8'
)
if
not
pygpu_available
:
raise
RuntimeError
(
'Missing pygpu or triu/tril functions.'
'Install or update libgpuarray.'
)
...
...
@@ -382,6 +382,7 @@ class GpuMagmaSVD(COp):
A
=
as_gpuarray_variable
(
A
,
ctx_name
)
if
A
.
ndim
!=
2
:
raise
LinAlgError
(
"Matrix rank error"
)
assert
A
.
dtype
==
'float32'
if
self
.
compute_uv
:
return
theano
.
Apply
(
self
,
[
A
],
[
A
.
type
(),
...
...
@@ -476,6 +477,7 @@ class GpuMagmaMatrixInverse(COp):
def
make_node
(
self
,
x
):
ctx_name
=
infer_context_name
(
x
)
x
=
as_gpuarray_variable
(
x
,
ctx_name
)
assert
x
.
dtype
==
'float32'
if
x
.
ndim
!=
2
:
raise
LinAlgError
(
"Matrix rank error"
)
return
theano
.
Apply
(
self
,
[
x
],
[
x
.
type
()])
...
...
theano/gpuarray/opt.py
浏览文件 @
4a8fed96
...
...
@@ -1181,6 +1181,14 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
@op_lifter
([
tensor
.
blas
.
Gemv
,
tensor
.
blas_c
.
CGemv
])
@register_opt2
([
tensor
.
blas
.
Gemv
],
'fast_compile'
)
def
local_gpua_gemv
(
op
,
context_name
,
inputs
,
outputs
):
if
inputs
[
0
]
.
dtype
==
'float16'
:
# Use gemm implementation as cublas gemv don't support float16
return
gpugemm_no_inplace
(
inputs
[
0
][:,
None
],
inputs
[
1
],
inputs
[
2
],
inputs
[
3
][:,
None
],
inputs
[
4
])
.
dimshuffle
(
0
)
if
inputs
[
0
]
.
dtype
not
in
[
'float32'
,
'float64'
]:
return
if
op
.
inplace
:
...
...
@@ -1351,6 +1359,8 @@ theano.tensor.nnet.conv2d()
@op_lifter
([
SparseBlockGemv
])
@register_opt2
([
SparseBlockGemv
],
'fast_compile'
)
def
local_gpua_sparseblockgemv
(
op
,
context_name
,
inputs
,
outputs
):
if
inputs
[
0
]
.
dtype
==
'float16'
:
return
if
op
.
inplace
:
return
gpu_sparse_block_gemv_inplace
else
:
...
...
@@ -1361,6 +1371,8 @@ def local_gpua_sparseblockgemv(op, context_name, inputs, outputs):
@op_lifter
([
SparseBlockOuter
])
@register_opt2
([
SparseBlockOuter
],
'fast_compile'
)
def
local_gpua_sparseblockouter
(
op
,
context_name
,
inputs
,
outputs
):
if
inputs
[
0
]
.
dtype
==
'float16'
:
return
if
op
.
inplace
:
return
gpu_sparse_block_outer_inplace
else
:
...
...
@@ -1998,7 +2010,13 @@ def _scan_type_infer(node):
@op_lifter
([
tensor
.
MaxAndArgmax
])
@register_opt2
([
tensor
.
MaxAndArgmax
],
'fast_compile'
)
def
local_gpu_maxandargmax
(
op
,
context_name
,
inputs
,
outputs
):
return
GpuMaxAndArgmax
(
op
.
get_params
(
None
))
op
=
GpuMaxAndArgmax
(
op
.
get_params
(
None
))
if
inputs
[
0
]
.
dtype
==
"float16"
:
# For now it is better to copy/cast on the GPU then transfer to the CPU
casted_inputs
=
inputs
[
0
]
.
astype
(
'float32'
)
ret
=
op
(
casted_inputs
)
return
[
ret
[
0
]
.
astype
(
'float16'
),
ret
[
1
]]
return
op
# solve
...
...
@@ -2008,9 +2026,15 @@ def local_gpu_maxandargmax(op, context_name, inputs, outputs):
def
local_gpu_solve
(
op
,
context_name
,
inputs
,
outputs
):
if
not
cusolver_available
:
return
if
inputs
[
0
]
.
dtype
not
in
[
'float16'
,
'float32'
]:
return
if
op
.
A_structure
not
in
MATRIX_STRUCTURES_SOLVE
:
return
return
GpuCusolverSolve
(
A_structure
=
op
.
A_structure
)
op
=
GpuCusolverSolve
(
A_structure
=
op
.
A_structure
)
if
inputs
[
0
]
.
dtype
==
'float16'
:
return
op
(
inputs
[
0
]
.
astype
(
'float32'
),
inputs
[
1
]
.
astype
(
'float32'
))
.
astype
(
'float16'
)
return
op
@register_inplace
()
...
...
@@ -2028,7 +2052,13 @@ def local_inplace_gpu_solve(node):
def
local_gpu_cholesky
(
op
,
context_name
,
inputs
,
outputs
):
if
not
cusolver_available
:
return
return
GpuCholesky
(
lower
=
op
.
lower
,
inplace
=
op
.
destructive
)
if
inputs
[
0
]
.
dtype
not
in
[
'float16'
,
'float32'
]:
return
op
=
GpuCholesky
(
lower
=
op
.
lower
,
inplace
=
op
.
destructive
)
if
inputs
[
0
]
.
dtype
==
'float16'
:
return
op
(
inputs
[
0
]
.
astype
(
'float32'
))
.
astype
(
'float16'
)
return
op
@register_inplace
()
...
...
@@ -2044,7 +2074,12 @@ def local_inplace_cholesky(node):
def
local_gpu_matrix_inverse
(
op
,
context_name
,
inputs
,
outputs
):
if
not
config
.
magma
.
enabled
:
return
return
GpuMagmaMatrixInverse
()
if
inputs
[
0
]
.
dtype
not
in
[
'float16'
,
'float32'
]:
return
op
=
GpuMagmaMatrixInverse
()
if
inputs
[
0
]
.
dtype
==
'float16'
:
return
op
(
inputs
[
0
]
.
astype
(
'float32'
))
.
astype
(
'float16'
)
return
op
@register_inplace
()
...
...
@@ -2061,9 +2096,13 @@ def local_inplace_matrix_inverse_inplace(node):
def
local_gpu_svd
(
op
,
context_name
,
inputs
,
outputs
):
if
not
config
.
magma
.
enabled
:
return
return
GpuMagmaSVD
(
full_matrices
=
op
.
full_matrices
,
if
inputs
[
0
]
.
dtype
not
in
[
'float16'
,
'float32'
]:
return
op
=
GpuMagmaSVD
(
full_matrices
=
op
.
full_matrices
,
compute_uv
=
op
.
compute_uv
)
if
inputs
[
0
]
.
dtype
==
'float16'
:
return
op
(
inputs
[
0
]
.
astype
(
'float32'
))
.
astype
(
'float16'
)
return
op
# Do not register in fast_run or fast_compile.
# It will be added to fast_run if the GPU is enabled.
...
...
theano/gpuarray/subtensor.py
浏览文件 @
4a8fed96
...
...
@@ -232,7 +232,7 @@ class GpuIncSubtensor(IncSubtensor):
if
not
self
.
set_instead_of_inc
:
# sub_x += y
iadd
=
get_iadd
(
node
.
inputs
[
0
],
node
.
inputs
[
1
])
iadd
(
sub_x
,
y
,
broadcast
=
False
)
iadd
(
sub_x
,
y
)
else
:
# sub_x[...] = y
x
.
__setitem__
(
cdata
,
y
)
...
...
@@ -403,6 +403,8 @@ class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1):
"""
AdvancedSubrensor1 on the GPU.
"""
_f16_ok
=
True
def
make_node
(
self
,
x
,
ilist
):
ctx_name
=
infer_context_name
(
x
,
ilist
)
x_
=
as_gpuarray_variable
(
x
,
ctx_name
)
...
...
@@ -807,7 +809,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC,
"""
ctx_name
=
infer_context_name
(
x
,
y
,
ilist
)
x_
=
as_gpuarray_variable
(
x
,
ctx_name
)
y_
=
as_gpuarray_variable
(
y
,
ctx_name
)
y_
=
as_gpuarray_variable
(
y
.
astype
(
x
.
dtype
)
,
ctx_name
)
ilist_
=
as_gpuarray_variable
(
ilist
,
ctx_name
)
assert
x_
.
type
.
ndim
>=
y_
.
type
.
ndim
...
...
@@ -1088,6 +1090,7 @@ __device__ ga_half atomicExch(ga_half *addr, ga_half val) {
class
GpuExtractDiag
(
Op
):
__props__
=
(
"offset"
,
"axis1"
,
"axis2"
,
"view"
)
_f16_ok
=
True
def
__init__
(
self
,
offset
=
0
,
axis1
=
0
,
axis2
=
1
,
view
=
False
):
self
.
view
=
view
...
...
theano/gpuarray/tests/test_blas.py
浏览文件 @
4a8fed96
...
...
@@ -8,7 +8,7 @@ import theano
from
theano
import
config
from
theano
import
tensor
from
theano.tests
import
unittest_tools
as
utt
from
theano.tensor.blas
import
gemv_inplace
,
gemm_inplace
,
_dot22
,
batched_dot
from
theano.tensor.blas
import
gemv
,
gemv
_inplace
,
gemm_inplace
,
_dot22
,
batched_dot
from
theano.tensor.tests.test_blas
import
TestGer
,
BaseGemv
from
..
import
gpuarray_shared_constructor
...
...
@@ -18,7 +18,7 @@ from ..blas import (gpugemv_inplace, gpugemv_no_inplace,
gpugemm_inplace
,
gpugemm_no_inplace
,
gpugemmbatch_no_inplace
,
gpuger_inplace
,
gpuger_no_inplace
,
GpuGer
,
gpu_dot22
)
GpuGer
,
GpuGemm
,
gpu_dot22
)
GpuGemvTester
=
makeTester
(
...
...
@@ -42,6 +42,22 @@ GpuGemvTester = makeTester(
def
test_float16
():
# gemv (gemm called)
float16_data
=
[
rand
(
3
)
.
astype
(
'float16'
),
np
.
asarray
(
1
,
dtype
=
np
.
float32
),
rand
(
3
,
3
)
.
astype
(
'float16'
),
rand
(
3
)
.
astype
(
'float16'
),
np
.
asarray
(
0.5
,
dtype
=
np
.
float32
)]
float16_shared
=
[
gpuarray_shared_constructor
(
val
,
target
=
test_ctx_name
)
for
val
in
float16_data
]
o
=
gemv
(
*
float16_shared
)
f
=
theano
.
function
([],
o
,
mode
=
mode_with_gpu
)
y
,
alpha
,
A
,
x
,
beta
=
float16_data
out
=
f
()
utt
.
assert_allclose
(
np
.
asarray
(
out
),
alpha
*
np
.
dot
(
A
,
x
)
+
beta
*
y
)
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
any
([
isinstance
(
n
.
op
,
GpuGemm
)
for
n
in
topo
])
# gemm
float16_data
=
[
rand
(
3
,
3
)
.
astype
(
'float16'
),
np
.
asarray
(
1
,
dtype
=
np
.
float32
),
...
...
theano/gpuarray/tests/test_subtensor.py
浏览文件 @
4a8fed96
...
...
@@ -49,6 +49,33 @@ class G_subtensor(test_subtensor.T_subtensor):
assert
self
.
sub
==
GpuSubtensor
class
G_subtensorF16
(
test_subtensor
.
T_subtensor
):
def
shortDescription
(
self
):
return
None
def
__init__
(
self
,
name
):
def
shared
(
x
,
**
kwargs
):
return
gpuarray_shared_constructor
(
x
,
target
=
test_ctx_name
,
**
kwargs
)
test_subtensor
.
T_subtensor
.
__init__
(
self
,
name
,
shared
=
shared
,
sub
=
GpuSubtensor
,
inc_sub
=
GpuIncSubtensor
,
adv_sub1
=
GpuAdvancedSubtensor1
,
adv_incsub1
=
GpuAdvancedIncSubtensor1
,
dimshuffle
=
GpuDimShuffle
,
mode
=
mode_with_gpu
,
# avoid errors with limited devices
dtype
=
'float16'
,
# use floatX?
ignore_topo
=
(
HostFromGpu
,
GpuFromHost
,
DeepCopyOp
,
GpuContiguous
))
# GPU opt can't run in fast_compile only.
self
.
fast_compile
=
False
assert
self
.
sub
==
GpuSubtensor
def
test_advinc_subtensor1
():
# Test the second case in the opt local_gpu_advanced_incsubtensor1
for
shp
in
[(
3
,
3
),
(
3
,
3
,
3
)]:
...
...
@@ -73,7 +100,9 @@ def test_advinc_subtensor1():
def
test_advinc_subtensor1_dtype
():
# Test the mixed dtype case
shp
=
(
3
,
4
)
for
dtype1
,
dtype2
in
[(
'float32'
,
'int8'
),
(
'float32'
,
'float64'
)]:
for
dtype1
,
dtype2
in
[(
'float32'
,
'int8'
),
(
'float32'
,
'float64'
),
(
'float16'
,
'int8'
),
(
'float16'
,
'float64'
),
(
'float16'
,
'float16'
)]:
shared
=
gpuarray_shared_constructor
xval
=
np
.
arange
(
np
.
prod
(
shp
),
dtype
=
dtype1
)
.
reshape
(
shp
)
+
1
yval
=
np
.
empty
((
2
,)
+
shp
[
1
:],
dtype
=
dtype2
)
...
...
@@ -95,7 +124,9 @@ def test_advinc_subtensor1_dtype():
def
test_advinc_subtensor1_vector_scalar
():
# Test the case where x is a vector and y a scalar
shp
=
(
3
,)
for
dtype1
,
dtype2
in
[(
'float32'
,
'int8'
),
(
'float32'
,
'float64'
)]:
for
dtype1
,
dtype2
in
[(
'float32'
,
'int8'
),
(
'float32'
,
'float64'
),
(
'float16'
,
'int8'
),
(
'float16'
,
'float64'
),
(
'float16'
,
'float16'
)]:
shared
=
gpuarray_shared_constructor
xval
=
np
.
arange
(
np
.
prod
(
shp
),
dtype
=
dtype1
)
.
reshape
(
shp
)
+
1
yval
=
np
.
asarray
(
10
,
dtype
=
dtype2
)
...
...
@@ -105,7 +136,8 @@ def test_advinc_subtensor1_vector_scalar():
name
=
'y'
)
expr
=
tensor
.
advanced_inc_subtensor1
(
x
,
y
,
[
0
,
2
])
f
=
theano
.
function
([
y
],
expr
,
mode
=
mode_with_gpu
)
assert
sum
([
isinstance
(
node
.
op
,
GpuAdvancedIncSubtensor1_dev20
)
assert
sum
([
isinstance
(
node
.
op
,
(
GpuAdvancedIncSubtensor1_dev20
,
GpuAdvancedIncSubtensor1
))
for
node
in
f
.
maker
.
fgraph
.
toposort
()])
==
1
rval
=
f
(
yval
)
rep
=
xval
.
copy
()
...
...
@@ -169,7 +201,26 @@ class G_advancedsubtensor(test_subtensor.TestAdvancedSubtensor):
sub
=
GpuAdvancedSubtensor
,
mode
=
mode_with_gpu
,
# avoid errors with limited devices
dtype
=
'float32'
,
dtype
=
'float32'
,
# floatX?
ignore_topo
=
(
HostFromGpu
,
GpuFromHost
,
DeepCopyOp
))
# GPU opt can't run in fast_compile only.
self
.
fast_compile
=
False
assert
self
.
sub
==
GpuAdvancedSubtensor
class
G_advancedsubtensorF16
(
test_subtensor
.
TestAdvancedSubtensor
):
def
shortDescription
(
self
):
return
None
def
__init__
(
self
,
name
):
test_subtensor
.
TestAdvancedSubtensor
.
__init__
(
self
,
name
,
shared
=
gpuarray_shared_constructor
,
sub
=
GpuAdvancedSubtensor
,
mode
=
mode_with_gpu
,
# avoid errors with limited devices
dtype
=
'float16'
,
# floatX?
ignore_topo
=
(
HostFromGpu
,
GpuFromHost
,
DeepCopyOp
))
# GPU opt can't run in fast_compile only.
...
...
@@ -218,6 +269,17 @@ class test_gpuextractdiag(unittest.TestCase):
GpuExtractDiag
(
offset
,
axis1
,
axis2
)(
x
)
.
eval
({
x
:
np_x
}),
np_x
.
diagonal
(
offset
,
axis1
,
axis2
))
def
test_tensor_float16
(
self
):
x
=
tensor
.
tensor4
()
np_x
=
np
.
arange
(
30107
)
.
reshape
(
7
,
11
,
17
,
23
)
.
astype
(
'float16'
)
for
offset
,
axis1
,
axis2
in
[
(
1
,
0
,
1
),
(
-
1
,
0
,
1
),
(
0
,
1
,
0
),
(
-
2
,
1
,
0
),
(
-
3
,
1
,
0
),
(
-
2
,
2
,
0
),
(
3
,
3
,
0
),
(
-
1
,
3
,
2
),
(
2
,
2
,
3
),
(
-
1
,
2
,
1
),
(
1
,
3
,
1
),
(
-
1
,
1
,
3
)]:
assert
np
.
allclose
(
GpuExtractDiag
(
offset
,
axis1
,
axis2
)(
x
)
.
eval
({
x
:
np_x
}),
np_x
.
diagonal
(
offset
,
axis1
,
axis2
))
class
test_gpuallocdiag
(
unittest
.
TestCase
):
def
test_matrix
(
self
):
...
...
theano/gpuarray/tests/test_type.py
浏览文件 @
4a8fed96
...
...
@@ -7,7 +7,7 @@ import numpy as np
import
theano
from
theano.compat
import
PY3
from
theano
import
config
from
theano.compile
import
DeepCopyOp
from
theano.compile
import
DeepCopyOp
,
Rebroadcast
,
ViewOp
from
theano.misc.pkl_utils
import
CompatUnpickler
# Disabled for now
...
...
@@ -21,8 +21,9 @@ import pygpu
def
test_deep_copy
():
a
=
rand_gpuarray
(
20
,
dtype
=
'float32'
)
g
=
GpuArrayType
(
dtype
=
'float32'
,
broadcastable
=
(
False
,))(
'g'
)
for
dtype
in
[
'float16'
,
'float32'
]:
a
=
rand_gpuarray
(
20
,
dtype
=
dtype
)
g
=
GpuArrayType
(
dtype
=
dtype
,
broadcastable
=
(
False
,))(
'g'
)
f
=
theano
.
function
([
g
],
g
)
...
...
@@ -33,6 +34,34 @@ def test_deep_copy():
assert
GpuArrayType
.
values_eq
(
res
,
a
)
def
test_view
():
for
dtype
in
[
'float16'
,
'float32'
]:
a
=
rand_gpuarray
(
20
,
dtype
=
dtype
)
g
=
GpuArrayType
(
dtype
=
dtype
,
broadcastable
=
(
False
,))(
'g'
)
f
=
theano
.
function
([
g
],
ViewOp
()(
g
))
assert
isinstance
(
f
.
maker
.
fgraph
.
toposort
()[
0
]
.
op
,
ViewOp
)
res
=
f
(
a
)
assert
GpuArrayType
.
values_eq
(
res
,
a
)
def
test_rebroadcast
():
for
dtype
in
[
'float16'
,
'float32'
]:
a
=
rand_gpuarray
(
1
,
dtype
=
dtype
)
g
=
GpuArrayType
(
dtype
=
dtype
,
broadcastable
=
(
False
,))(
'g'
)
f
=
theano
.
function
([
g
],
Rebroadcast
((
0
,
True
))(
g
))
assert
isinstance
(
f
.
maker
.
fgraph
.
toposort
()[
0
]
.
op
,
Rebroadcast
)
res
=
f
(
a
)
assert
GpuArrayType
.
values_eq
(
res
,
a
)
def
test_values_eq_approx
():
a
=
rand_gpuarray
(
20
,
dtype
=
'float32'
)
assert
GpuArrayType
.
values_eq_approx
(
a
,
a
)
...
...
@@ -45,8 +74,9 @@ def test_values_eq_approx():
def
test_specify_shape
():
a
=
rand_gpuarray
(
20
,
dtype
=
'float32'
)
g
=
GpuArrayType
(
dtype
=
'float32'
,
broadcastable
=
(
False
,))(
'g'
)
for
dtype
in
[
'float16'
,
'float32'
]:
a
=
rand_gpuarray
(
20
,
dtype
=
dtype
)
g
=
GpuArrayType
(
dtype
=
dtype
,
broadcastable
=
(
False
,))(
'g'
)
f
=
theano
.
function
([
g
],
theano
.
tensor
.
specify_shape
(
g
,
[
20
]))
f
(
a
)
...
...
theano/gradient.py
浏览文件 @
4a8fed96
...
...
@@ -1482,7 +1482,11 @@ class numeric_grad(object):
The tuple (abs_err, rel_err) is returned
"""
abs_err
=
abs
(
a
-
b
)
rel_err
=
abs_err
/
np
.
maximum
(
abs
(
a
)
+
abs
(
b
),
1e-8
)
# 1e-8 is to prevent division by zeros.
# [] is to make sure that if a and b are float16, 1e-8 don't get
# dowcasted to float16 as that give 0! This would add back the
# division by zero
rel_err
=
abs_err
/
np
.
maximum
(
abs
(
a
)
+
abs
(
b
),
[
1e-8
])
# The numpy.asarray are needed as if a or b is a sparse matrix
# this would result in a numpy.matrix and not a numpy.ndarray
# and the behave differently causing problem later.
...
...
theano/tensor/inc_code.py
0 → 100644
浏览文件 @
4a8fed96
from
__future__
import
absolute_import
,
print_function
,
division
def
inc_code
():
types
=
[
'npy_'
+
t
for
t
in
[
'int8'
,
'int16'
,
'int32'
,
'int64'
,
'uint8'
,
'uint16'
,
'uint32'
,
'uint64'
,
'float16'
,
'float32'
,
'float64'
]]
complex_types
=
[
'npy_'
+
t
for
t
in
[
'complex32'
,
'complex64'
,
'complex128'
]]
inplace_map_template
=
"""
#if defined(
%(typen)
s)
static void
%(type)
s_inplace_add(PyArrayMapIterObject *mit,
PyArrayIterObject *it, int inc_or_set)
{
int index = mit->size;
while (index--) {
%(op)
s
PyArray_MapIterNext(mit);
PyArray_ITER_NEXT(it);
}
}
#endif
"""
floatadd
=
(
"((
%(type)
s*)mit->dataptr)[0] = "
"(inc_or_set ? ((
%(type)
s*)mit->dataptr)[0] : 0)"
" + ((
%(type)
s*)it->dataptr)[0];"
)
complexadd
=
"""
((
%(type)
s*)mit->dataptr)[0].real =
(inc_or_set ? ((
%(type)
s*)mit->dataptr)[0].real : 0)
+ ((
%(type)
s*)it->dataptr)[0].real;
((
%(type)
s*)mit->dataptr)[0].imag =
(inc_or_set ? ((
%(type)
s*)mit->dataptr)[0].imag : 0)
+ ((
%(type)
s*)it->dataptr)[0].imag;
"""
fns
=
''
.
join
([
inplace_map_template
%
{
'type'
:
t
,
'typen'
:
t
.
upper
(),
'op'
:
floatadd
%
{
'type'
:
t
}}
for
t
in
types
]
+
[
inplace_map_template
%
{
'type'
:
t
,
'typen'
:
t
.
upper
(),
'op'
:
complexadd
%
{
'type'
:
t
}}
for
t
in
complex_types
])
def
gen_binop
(
type
,
typen
):
return
"""
#if defined(
%(typen)
s)
%(type)
s_inplace_add,
#endif
"""
%
dict
(
type
=
type
,
typen
=
typen
)
fn_array
=
(
"static inplace_map_binop addition_funcs[] = {"
+
''
.
join
([
gen_binop
(
type
=
t
,
typen
=
t
.
upper
())
for
t
in
types
+
complex_types
])
+
"NULL};
\n
"
)
def
gen_num
(
typen
):
return
"""
#if defined(
%(typen)
s)
%(typen)
s,
#endif
"""
%
dict
(
type
=
type
,
typen
=
typen
)
type_number_array
=
(
"static int type_numbers[] = {"
+
''
.
join
([
gen_num
(
typen
=
t
.
upper
())
for
t
in
types
+
complex_types
])
+
"-1000};"
)
code
=
(
"""
typedef void (*inplace_map_binop)(PyArrayMapIterObject *,
PyArrayIterObject *, int inc_or_set);
"""
+
fns
+
fn_array
+
type_number_array
+
"""
static int
map_increment(PyArrayMapIterObject *mit, PyArrayObject *op,
inplace_map_binop add_inplace, int inc_or_set)
{
PyArrayObject *arr = NULL;
PyArrayIterObject *it;
PyArray_Descr *descr;
if (mit->ait == NULL) {
return -1;
}
descr = PyArray_DESCR(mit->ait->ao);
Py_INCREF(descr);
arr = (PyArrayObject *)PyArray_FromAny((PyObject *)op, descr,
0, 0, NPY_ARRAY_FORCECAST, NULL);
if (arr == NULL) {
return -1;
}
if ((mit->subspace != NULL) && (mit->consec)) {
PyArray_MapIterSwapAxes(mit, (PyArrayObject **)&arr, 0);
if (arr == NULL) {
return -1;
}
}
it = (PyArrayIterObject*)
PyArray_BroadcastToShape((PyObject*)arr, mit->dimensions, mit->nd);
if (it == NULL) {
Py_DECREF(arr);
return -1;
}
(*add_inplace)(mit, it, inc_or_set);
Py_DECREF(arr);
Py_DECREF(it);
return 0;
}
static int
inplace_increment(PyArrayObject *a, PyObject *index, PyArrayObject *inc,
int inc_or_set)
{
inplace_map_binop add_inplace = NULL;
int type_number = -1;
int i = 0;
PyArrayMapIterObject * mit;
if (PyArray_FailUnlessWriteable(a, "input/output array") < 0) {
return -1;
}
if (PyArray_NDIM(a) == 0) {
PyErr_SetString(PyExc_IndexError, "0-d arrays can't be indexed.");
return -1;
}
type_number = PyArray_TYPE(a);
while (type_numbers[i] >= 0 && addition_funcs[i] != NULL){
if (type_number == type_numbers[i]) {
add_inplace = addition_funcs[i];
break;
}
i++ ;
}
if (add_inplace == NULL) {
PyErr_SetString(PyExc_TypeError, "unsupported type for a");
return -1;
}
mit = (PyArrayMapIterObject *) PyArray_MapIterArray(a, index);
if (mit == NULL) {
goto fail;
}
if (map_increment(mit, inc, add_inplace, inc_or_set) != 0) {
goto fail;
}
Py_DECREF(mit);
Py_INCREF(Py_None);
return 0;
fail:
Py_XDECREF(mit);
return -1;
}
"""
)
return
code
theano/tensor/subtensor.py
浏览文件 @
4a8fed96
...
...
@@ -22,9 +22,7 @@ from theano.tensor.elemwise import DimShuffle
from
theano.tensor.type_other
import
NoneConst
,
SliceType
,
NoneTypeT
,
make_slice
from
theano
import
config
if
config
.
cxx
:
import
theano.gof.cutils
# needed to import cutils_ext
from
cutils_ext.cutils_ext
import
inplace_increment
from
.inc_code
import
inc_code
_logger
=
logging
.
getLogger
(
"theano.tensor.subtensor"
)
...
...
@@ -1943,8 +1941,7 @@ class AdvancedIncSubtensor1(Op):
NPY_ARRAY_ENSURECOPY, NULL)"""
%
locals
()
def
c_support_code
(
self
):
from
theano.gof.cutils
import
compile_cutils_code
return
compile_cutils_code
()
return
inc_code
()
def
c_code
(
self
,
node
,
name
,
input_names
,
output_names
,
sub
):
numpy_ver
=
[
int
(
n
)
for
n
in
np
.
__version__
.
split
(
'.'
)[:
2
]]
...
...
@@ -1976,17 +1973,14 @@ class AdvancedIncSubtensor1(Op):
Py_XDECREF(
%(out)
s);
%(out)
s =
%(copy_of_x)
s;
}
PyObject *arglist = Py_BuildValue("OOOi",
%(out)
s,
%(idx)
s,
%(y)
s,
%(inc_or_set)
d);
rval = inplace_increment(NULL, arglist);
Py_XDECREF(arglist);
if (rval == NULL) {
if (inplace_increment(
%(out)
s, (PyObject *)
%(idx)
s,
%(y)
s,
%(inc_or_set)
d)) {
%(fail)
s;
}
Py_XDECREF(rval);
"""
%
locals
()
def
c_code_cache_version
(
self
):
return
(
3
,)
return
(
4
,)
def
perform
(
self
,
node
,
inp
,
out_
):
# TODO opt to make this inplace
...
...
@@ -2001,36 +1995,10 @@ class AdvancedIncSubtensor1(Op):
if
self
.
set_instead_of_inc
:
x
[
idx
]
=
y
else
:
if
config
.
cxx
and
node
.
inputs
[
0
]
.
dtype
!=
'float16'
:
increment
=
inplace_increment
else
:
increment
=
self
.
inplace_increment1d_slow
increment
(
x
,
idx
,
y
)
np
.
add
.
at
(
x
,
idx
,
y
)
out
[
0
]
=
x
def
inplace_increment1d_slow
(
self
,
x
,
idx
,
y
):
# If `y` has as many dimensions as `x`, then we want to iterate
# jointly on `x` and `y`. Otherwise, it means `y` should be
# broadcasted to fill all relevant rows of `x`.
assert
y
.
ndim
<=
x
.
ndim
# Should be guaranteed by `make_node`
if
y
.
ndim
==
x
.
ndim
:
if
len
(
y
)
==
1
:
# Allow broadcasting of y[0]
y_0
=
y
[
0
]
for
i
in
idx
:
x
[
i
]
+=
y_0
else
:
assert
len
(
y
)
==
len
(
idx
)
j
=
0
for
i
in
idx
:
x
[
i
]
+=
y
[
j
]
j
+=
1
else
:
for
i
in
idx
:
x
[
i
]
+=
y
def
infer_shape
(
self
,
node
,
ishapes
):
x
,
y
,
ilist
=
ishapes
return
[
x
]
...
...
@@ -2246,14 +2214,8 @@ class AdvancedIncSubtensor(Op):
if
self
.
set_instead_of_inc
:
out
[
0
][
inputs
[
2
:]]
=
inputs
[
1
]
elif
config
.
cxx
:
inplace_increment
(
out
[
0
],
tuple
(
inputs
[
2
:]),
inputs
[
1
])
else
:
raise
NotImplementedError
(
'Could not import inplace_increment, so advanced '
'indexing is disabled. '
'Please make sure that you have a working C++ compiler '
'and that config.cxx is correctly set.'
)
else
:
np
.
add
.
at
(
out
[
0
],
tuple
(
inputs
[
2
:]),
inputs
[
1
])
def
infer_shape
(
self
,
node
,
ishapes
):
return
[
ishapes
[
0
]]
...
...
theano/tensor/tests/test_opt.py
浏览文件 @
4a8fed96
...
...
@@ -2936,10 +2936,7 @@ def test_local_IncSubtensor_serialize():
i
=
T
.
vector
(
'i'
,
dtype
=
'int64'
)
j
=
T
.
vector
(
'j'
,
dtype
=
'int64'
)
t
=
T
.
scalar
(
't'
)
if
theano
.
tensor
.
subtensor
.
inplace_increment
:
y
=
(
W
[
i
]
+
W
[
j
]
+
W
[
1
]
+
W
[
i
,
j
])
.
sum
()
else
:
y
=
(
W
[
i
]
+
W
[
j
]
+
W
[
1
])
.
sum
()
cost
=
T
.
sqr
(
t
-
y
)
dW
=
theano
.
grad
(
cost
,
W
)
mode
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'fusion'
)
...
...
theano/tensor/tests/test_subtensor.py
浏览文件 @
4a8fed96
...
...
@@ -5,7 +5,6 @@ import sys
import
unittest
import
numpy
as
np
from
nose.plugins.skip
import
SkipTest
from
nose.tools
import
assert_equal
from
numpy.testing
import
assert_array_equal
from
six
import
StringIO
...
...
@@ -524,10 +523,11 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
gn
=
theano
.
grad
(
t
.
sum
(),
n
)
g
=
self
.
function
([],
gn
,
op
=
self
.
adv_incsub1
)
utt
.
verify_grad
(
lambda
m
:
m
[[
1
,
3
]],
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
)])
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
)],
mode
=
self
.
mode
)
g
()
utt
.
verify_grad
(
lambda
m
:
m
[
idx
],
[
data
])
[
data
]
,
mode
=
self
.
mode
)
def
test_noncontiguous_idx
(
self
):
data
=
rand
(
4
,
2
,
3
)
...
...
@@ -597,17 +597,20 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
self
.
assertTrue
(
np
.
allclose
(
g_00
,
2
))
utt
.
verify_grad
(
lambda
m
:
m
[[
1
,
3
]],
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
)])
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
)],
mode
=
self
.
mode
)
def
fun
(
x
,
y
):
return
advanced_inc_subtensor1
(
x
,
y
,
[
1
,
3
])
utt
.
verify_grad
(
fun
,
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
),
np
.
random
.
rand
(
2
,
5
)
.
astype
(
self
.
dtype
)])
np
.
random
.
rand
(
2
,
5
)
.
astype
(
self
.
dtype
)],
mode
=
self
.
mode
)
def
fun
(
x
,
y
):
return
advanced_set_subtensor1
(
x
,
y
,
[
1
,
3
])
utt
.
verify_grad
(
fun
,
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
),
np
.
random
.
rand
(
2
,
5
)
.
astype
(
self
.
dtype
)])
np
.
random
.
rand
(
2
,
5
)
.
astype
(
self
.
dtype
)],
mode
=
self
.
mode
)
# test set_subtensor broadcast
self
.
dtype
=
'float32'
...
...
@@ -872,12 +875,12 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
def
fct
(
t
):
return
theano
.
tensor
.
sum
(
t
[
idx_
])
utt
.
verify_grad
(
fct
,
[
data
])
utt
.
verify_grad
(
fct
,
[
data
]
,
mode
=
self
.
mode
)
# Test the grad of the grad (e.i. AdvancedIncSubtensor1.grad)
def
fct2
(
t
):
return
theano
.
tensor
.
grad
(
theano
.
tensor
.
sum
(
t
[
idx_
]),
t
)
utt
.
verify_grad
(
fct2
,
[
data
])
utt
.
verify_grad
(
fct2
,
[
data
]
,
mode
=
self
.
mode
)
# Test shape of AdvancedIncSubtensor1 and AdvancedSubtensor1
if
not
self
.
fast_compile
:
...
...
@@ -958,7 +961,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
# vector
utt
.
verify_grad
(
inc_slice
(
slice
(
2
,
4
,
None
)),
(
np
.
asarray
([
0
,
1
,
2
,
3
,
4
,
5.
]),
np
.
asarray
([
9
,
9.
]),))
(
np
.
asarray
([
0
,
1
,
2
,
3
,
4
,
5.
]),
np
.
asarray
([
9
,
9.
]),),
mode
=
self
.
mode
)
# matrix
utt
.
verify_grad
(
...
...
@@ -1498,9 +1502,6 @@ class TestAdvancedSubtensor(unittest.TestCase):
utt
.
assert_allclose
(
rval
,
aval
)
def
test_inc_adv_subtensor_w_2vec
(
self
):
if
not
config
.
cxx
:
raise
SkipTest
(
'config.cxx empty'
)
subt
=
self
.
m
[
self
.
ix1
,
self
.
ix12
]
a
=
inc_subtensor
(
subt
,
subt
)
...
...
@@ -1519,9 +1520,6 @@ class TestAdvancedSubtensor(unittest.TestCase):
[
.
5
,
.
3
*
2
,
.
15
]]),
aval
def
test_inc_adv_subtensor_with_broadcasting
(
self
):
if
not
config
.
cxx
:
raise
SkipTest
(
'config.cxx empty'
)
inc
=
dscalar
()
a
=
inc_subtensor
(
self
.
m
[
self
.
ix1
,
self
.
ix12
],
inc
)
g_inc
=
tensor
.
grad
(
a
.
sum
(),
inc
)
...
...
@@ -1542,9 +1540,6 @@ class TestAdvancedSubtensor(unittest.TestCase):
assert
np
.
allclose
(
gval
,
3.0
),
gval
def
test_inc_adv_subtensor1_with_broadcasting
(
self
):
if
not
config
.
cxx
:
raise
SkipTest
(
'config.cxx empty'
)
inc
=
dscalar
()
a
=
inc_subtensor
(
self
.
m
[
self
.
ix1
],
inc
)
g_inc
=
tensor
.
grad
(
a
.
sum
(),
inc
)
...
...
@@ -1564,9 +1559,6 @@ class TestAdvancedSubtensor(unittest.TestCase):
assert
np
.
allclose
(
gval
,
9.0
),
gval
def
test_inc_adv_subtensor_with_index_broadcasting
(
self
):
if
not
config
.
cxx
:
raise
SkipTest
(
'config.cxx empty'
)
a
=
inc_subtensor
(
self
.
m
[
self
.
ix1
,
self
.
ix2
],
2.1
)
assert
a
.
type
==
self
.
m
.
type
,
(
a
.
type
,
self
.
m
.
type
)
...
...
@@ -1640,17 +1632,20 @@ class TestAdvancedSubtensor(unittest.TestCase):
self
.
assertTrue
(
isinstance
(
t
.
owner
.
op
,
tensor
.
AdvancedSubtensor
))
utt
.
verify_grad
(
lambda
m
:
m
[[
1
,
3
],
[
2
,
4
]],
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
)])
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
)],
mode
=
self
.
mode
)
def
fun
(
x
,
y
):
return
advanced_inc_subtensor
(
x
,
y
,
[
1
,
3
],
[
2
,
4
])
utt
.
verify_grad
(
fun
,
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
),
np
.
random
.
rand
(
2
)
.
astype
(
self
.
dtype
)])
np
.
random
.
rand
(
2
)
.
astype
(
self
.
dtype
)],
mode
=
self
.
mode
)
def
fun
(
x
,
y
):
return
advanced_set_subtensor
(
x
,
y
,
[
1
,
3
],
[
2
,
4
])
utt
.
verify_grad
(
fun
,
[
np
.
random
.
rand
(
5
,
5
)
.
astype
(
self
.
dtype
),
np
.
random
.
rand
(
2
)
.
astype
(
self
.
dtype
)])
np
.
random
.
rand
(
2
)
.
astype
(
self
.
dtype
)],
mode
=
self
.
mode
)
class
TestInferShape
(
utt
.
InferShapeTester
):
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论