Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
44726397
提交
44726397
authored
5月 01, 2017
作者:
Frédéric Bastien
提交者:
GitHub
5月 01, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #5778 from aam-at/magma_gpu
Add magma gpu svd and matrix inverse
上级
b6a4757f
1399245f
隐藏空白字符变更
内嵌
并排
正在显示
9 个修改的文件
包含
698 行增加
和
45 行删除
+698
-45
config.txt
doc/library/config.txt
+23
-0
configdefaults.py
theano/configdefaults.py
+16
-0
linalg.py
theano/gpuarray/linalg.py
+175
-13
magma_inv.c
theano/gpuarray/magma_inv.c
+99
-0
magma_svd.c
theano/gpuarray/magma_svd.c
+175
-0
opt.py
theano/gpuarray/opt.py
+30
-1
test_linalg.py
theano/gpuarray/tests/test_linalg.py
+111
-9
nlinalg.py
theano/tensor/nlinalg.py
+27
-8
test_nlinalg.py
theano/tensor/tests/test_nlinalg.py
+42
-14
没有找到文件。
doc/library/config.txt
浏览文件 @
44726397
...
@@ -679,6 +679,29 @@ import theano and print the config variable, as in:
...
@@ -679,6 +679,29 @@ import theano and print the config variable, as in:
``'guess_once'``, ``'guess_on_shape_change'``, ``'time_once'``,
``'guess_once'``, ``'guess_on_shape_change'``, ``'time_once'``,
``'time_on_shape_change'``.
``'time_on_shape_change'``.
.. attribute:: config.magma.enabled
String value: ``'True'``, ``'False'``
Default: ``'False'``
If ``'True'``, use `magma <http://icl.cs.utk.edu/magma/>`_ for matrix
computations.
If ``'False'``, disable magma.
.. attribute:: config.magma.include_path
Default: ``''``
Location of the magma headers.
.. attribute:: config.magma.library_path
Default: ``''``
Location of the magma library.
.. attribute:: config.gcc.cxxflags
.. attribute:: config.gcc.cxxflags
Default: ``""``
Default: ``""``
...
...
theano/configdefaults.py
浏览文件 @
44726397
...
@@ -361,6 +361,22 @@ AddConfigVar('dnn.enabled',
...
@@ -361,6 +361,22 @@ AddConfigVar('dnn.enabled',
EnumStr
(
"auto"
,
"True"
,
"False"
,
"no_check"
),
EnumStr
(
"auto"
,
"True"
,
"False"
,
"no_check"
),
in_c_key
=
False
)
in_c_key
=
False
)
AddConfigVar
(
'magma.include_path'
,
"Location of the magma header"
,
StrParam
(
''
),
in_c_key
=
False
)
AddConfigVar
(
'magma.library_path'
,
"Location of the magma library"
,
StrParam
(
''
),
in_c_key
=
False
)
AddConfigVar
(
'magma.enabled'
,
" If True, use magma for matrix computation."
" If False, disable magma"
,
BoolParam
(
False
),
in_c_key
=
False
)
# This flag determines whether or not to raise error/warning message if
# This flag determines whether or not to raise error/warning message if
# there is a CPU Op in the computational graph.
# there is a CPU Op in the computational graph.
AddConfigVar
(
AddConfigVar
(
...
...
theano/gpuarray/linalg.py
浏览文件 @
44726397
from
__future__
import
absolute_import
,
division
,
print_function
from
__future__
import
absolute_import
,
division
,
print_function
import
pkg_resources
import
os
import
theano
import
warnings
import
warnings
from
theano
import
Op
import
pkg_resources
from
theano.gpuarray
import
basic_ops
,
GpuArrayType
import
numpy
as
np
import
numpy
as
np
from
numpy.linalg.linalg
import
LinAlgError
from
numpy.linalg.linalg
import
LinAlgError
import
theano
from
theano
import
Op
,
config
,
tensor
from
theano.gof
import
COp
from
theano.gpuarray
import
GpuArrayType
from
.basic_ops
import
as_gpuarray_variable
,
gpu_contiguous
,
infer_context_name
from
.type
import
gpu_context_type
try
:
try
:
import
pygpu
import
pygpu
from
pygpu.basic
import
triu
,
tril
from
pygpu.basic
import
triu
,
tril
...
@@ -94,13 +99,13 @@ class GpuCusolverSolve(Op):
...
@@ -94,13 +99,13 @@ class GpuCusolverSolve(Op):
'GpuCusolverSolve Op can not be constructed.'
)
'GpuCusolverSolve Op can not be constructed.'
)
if
skcuda
.
__version__
<=
'0.5.1'
:
if
skcuda
.
__version__
<=
'0.5.1'
:
warnings
.
warn
(
'The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8'
)
warnings
.
warn
(
'The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8'
)
context_name
=
basic_ops
.
infer_context_name
(
inp1
,
inp2
)
context_name
=
infer_context_name
(
inp1
,
inp2
)
inp1
=
basic_ops
.
as_gpuarray_variable
(
inp1
,
context_name
)
inp1
=
as_gpuarray_variable
(
inp1
,
context_name
)
inp2
=
basic_ops
.
as_gpuarray_variable
(
inp2
,
context_name
)
inp2
=
as_gpuarray_variable
(
inp2
,
context_name
)
inp1
=
basic_ops
.
gpu_contiguous
(
inp1
)
inp1
=
gpu_contiguous
(
inp1
)
inp2
=
basic_ops
.
gpu_contiguous
(
inp2
)
inp2
=
gpu_contiguous
(
inp2
)
# this op can only operate on float32 matrices
# this op can only operate on float32 matrices
assert
inp1
.
ndim
==
2
assert
inp1
.
ndim
==
2
...
@@ -260,11 +265,11 @@ class GpuCholesky(Op):
...
@@ -260,11 +265,11 @@ class GpuCholesky(Op):
if
not
pygpu_available
:
if
not
pygpu_available
:
raise
RuntimeError
(
'Missing pygpu or triu/tril functions.'
raise
RuntimeError
(
'Missing pygpu or triu/tril functions.'
'Install or update libgpuarray.'
)
'Install or update libgpuarray.'
)
context_name
=
basic_ops
.
infer_context_name
(
inp
)
context_name
=
infer_context_name
(
inp
)
inp
=
basic_ops
.
as_gpuarray_variable
(
inp
,
context_name
)
inp
=
as_gpuarray_variable
(
inp
,
context_name
)
inp
=
basic_ops
.
gpu_contiguous
(
inp
)
inp
=
gpu_contiguous
(
inp
)
# this op can only operate on float32 matrices
# this op can only operate on float32 matrices
# because of current implementation of triu/tril.
# because of current implementation of triu/tril.
...
@@ -341,3 +346,160 @@ class GpuCholesky(Op):
...
@@ -341,3 +346,160 @@ class GpuCholesky(Op):
def
gpu_cholesky
(
A
,
lower
=
True
):
def
gpu_cholesky
(
A
,
lower
=
True
):
return
GpuCholesky
(
lower
)(
A
)
return
GpuCholesky
(
lower
)(
A
)
class
GpuMagmaSVD
(
COp
):
"""Computes the svd of a matrix :math:`A` using magma library.
"""
__props__
=
(
'full_matrices'
,
'compute_uv'
)
params_type
=
gpu_context_type
def
__init__
(
self
,
full_matrices
=
True
,
compute_uv
=
True
):
self
.
full_matrices
=
full_matrices
self
.
compute_uv
=
compute_uv
COp
.
__init__
(
self
,
[
'magma_svd.c'
],
'APPLY_SPECIFIC(magma_svd)'
)
def
c_headers
(
self
):
return
[
'gpuarray/types.h'
,
'gpuarray/array.h'
,
'gpuarray/ext_cuda.h'
,
'gpuarray_helper.h'
,
'magma.h'
]
def
c_header_dirs
(
self
):
dirs
=
[
os
.
path
.
dirname
(
__file__
),
pygpu
.
get_include
()]
if
config
.
magma
.
include_path
:
dirs
.
append
(
config
.
magma
.
include_path
)
return
dirs
def
c_libraries
(
self
):
return
[
'magma'
]
def
c_lib_dirs
(
self
):
if
config
.
magma
.
library_path
:
return
[
config
.
magma
.
library_path
]
return
[]
def
make_node
(
self
,
A
):
ctx_name
=
infer_context_name
(
A
)
A
=
as_gpuarray_variable
(
A
,
ctx_name
)
if
A
.
ndim
!=
2
:
raise
LinAlgError
(
"Matrix rank error"
)
if
self
.
compute_uv
:
return
theano
.
Apply
(
self
,
[
A
],
[
A
.
type
(),
GpuArrayType
(
A
.
dtype
,
broadcastable
=
[
False
],
context_name
=
ctx_name
)(),
A
.
type
()])
else
:
return
theano
.
Apply
(
self
,
[
A
],
[
GpuArrayType
(
A
.
dtype
,
broadcastable
=
[
False
],
context_name
=
ctx_name
)()])
def
get_params
(
self
,
node
):
return
node
.
inputs
[
0
]
.
type
.
context
def
get_op_params
(
self
):
params
=
[]
if
self
.
compute_uv
:
params
.
append
((
'COMPUTE_UV'
,
'1'
))
if
self
.
full_matrices
:
params
.
append
((
'FULL_MATRICES'
,
'1'
))
return
params
def
infer_shape
(
self
,
node
,
shapes
):
x_shape
,
=
shapes
M
,
N
=
x_shape
K
=
tensor
.
minimum
(
M
,
N
)
s_shape
=
(
K
,
)
if
self
.
compute_uv
:
u_shape
=
(
M
,
M
)
if
self
.
full_matrices
else
(
M
,
K
)
vt_shape
=
(
N
,
N
)
if
self
.
full_matrices
else
(
K
,
N
)
return
[
u_shape
,
s_shape
,
vt_shape
]
else
:
return
[
s_shape
]
def
gpu_svd
(
a
,
full_matrices
=
1
,
compute_uv
=
1
):
"""
This function performs the SVD on GPU.
Parameters
----------
full_matrices : bool, optional
If True (default), u and v have the shapes (M, M) and (N, N),
respectively.
Otherwise, the shapes are (M, K) and (K, N), respectively,
where K = min(M, N).
compute_uv : bool, optional
Whether or not to compute u and v in addition to s.
True by default.
Returns
-------
U, V, D : matrices
"""
return
GpuMagmaSVD
(
full_matrices
,
compute_uv
)(
a
)
class
GpuMagmaMatrixInverse
(
COp
):
"""Computes the inverse of a matrix :math:`A` using magma library.
"""
__props__
=
(
'inplace'
,
)
params_type
=
gpu_context_type
def
__init__
(
self
,
inplace
=
False
):
COp
.
__init__
(
self
,
[
'magma_inv.c'
],
'APPLY_SPECIFIC(magma_inv)'
)
self
.
inplace
=
inplace
if
self
.
inplace
:
self
.
destroy_map
=
{
0
:
[
0
]}
def
c_headers
(
self
):
return
[
'gpuarray/types.h'
,
'gpuarray/array.h'
,
'gpuarray/ext_cuda.h'
,
'gpuarray_helper.h'
,
'magma.h'
]
def
c_header_dirs
(
self
):
dirs
=
[
os
.
path
.
dirname
(
__file__
),
pygpu
.
get_include
()]
if
config
.
magma
.
include_path
:
dirs
.
append
(
config
.
magma
.
include_path
)
return
dirs
def
c_libraries
(
self
):
return
[
'magma'
]
def
c_lib_dirs
(
self
):
if
config
.
magma
.
library_path
:
return
[
config
.
magma
.
library_path
]
return
[]
def
clone_inplace
(
self
):
return
self
.
__class__
(
inplace
=
True
)
def
make_node
(
self
,
x
):
ctx_name
=
infer_context_name
(
x
)
x
=
as_gpuarray_variable
(
x
,
ctx_name
)
if
x
.
ndim
!=
2
:
raise
LinAlgError
(
"Matrix rank error"
)
return
theano
.
Apply
(
self
,
[
x
],
[
x
.
type
()])
def
get_params
(
self
,
node
):
return
node
.
inputs
[
0
]
.
type
.
context
def
get_op_params
(
self
):
if
self
.
inplace
:
return
[(
'INPLACE'
,
'1'
)]
else
:
return
[]
def
infer_shape
(
self
,
node
,
shapes
):
return
shapes
def
gpu_matrix_inverse
(
a
):
"""
This function performs the matrix inverse on GPU.
Returns
-------
a_inv: matrix
"""
return
GpuMagmaMatrixInverse
()(
a
)
theano/gpuarray/magma_inv.c
0 → 100644
浏览文件 @
44726397
#section init_code
setup_ext_cuda
();
#section support_code_struct
int
APPLY_SPECIFIC
(
magma_inv
)(
PyGpuArrayObject
*
A
,
PyGpuArrayObject
**
A_inv
,
PyGpuContextObject
*
c
)
{
const
size_t
*
dims
;
magma_int_t
N
,
ldwork
,
info
;
magma_int_t
*
piv
=
NULL
;
gpudata
*
dwork
=
NULL
;
int
res
=
-
1
;
if
(
A
->
ga
.
typecode
!=
GA_FLOAT
)
{
PyErr_SetString
(
PyExc_TypeError
,
"GpuMagmaMatrixInverse: Unsupported data type"
);
return
-
1
;
}
// This is early to match the exit() in the fail label.
cuda_enter
(
c
->
ctx
);
magma_init
();
if
(
!
GpuArray_IS_C_CONTIGUOUS
(
&
A
->
ga
))
{
PyErr_SetString
(
PyExc_ValueError
,
"GpuMagmaMatrixInverse: requires data to be C-contiguous"
);
return
1
;
}
if
(
PyGpuArray_NDIM
(
A
)
!=
2
)
{
PyErr_SetString
(
PyExc_ValueError
,
"GpuMagmaMatrixInverse: matrix rank error"
);
goto
fail
;
}
dims
=
PyGpuArray_DIMS
(
A
);
if
(
dims
[
0
]
!=
dims
[
1
])
{
PyErr_SetString
(
PyExc_ValueError
,
"GpuMagmaMatrixInverse: matrix is not square"
);
goto
fail
;
}
#ifdef INPLACE
Py_XDECREF
(
*
A_inv
);
*
A_inv
=
A
;
Py_INCREF
(
*
A_inv
);
#else
*
A_inv
=
theano_try_copy
(
*
A_inv
,
A
);
if
(
*
A_inv
==
NULL
)
{
PyErr_SetString
(
PyExc_RuntimeError
,
"GpuMagmaMatrixInverse: failed to allocate memory for the output"
);
goto
fail
;
}
#endif
// magma matrix inverse
N
=
dims
[
0
];
ldwork
=
N
*
magma_get_sgetri_nb
(
N
);
dwork
=
gpudata_alloc
(
c
->
ctx
,
ldwork
*
sizeof
(
float
),
NULL
,
0
,
NULL
);
if
(
dwork
==
NULL
)
{
PyErr_SetString
(
PyExc_RuntimeError
,
"GpuMagmaMatrixInverse: failed to allocate working memory"
);
goto
fail
;
}
if
(
magma_imalloc_cpu
(
&
piv
,
N
))
{
PyErr_SetString
(
PyExc_RuntimeError
,
"GpuMagmaMatrixInverse: failed to allocate memory for the pivot array"
);
goto
fail
;
}
magma_sgetrf_gpu
(
N
,
N
,
(
float
*
)
PyGpuArray_DEV_DATA
(
*
A_inv
),
N
,
piv
,
&
info
);
if
(
info
!=
0
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuMagmaMatrixInverse: magma_sgetrf_gpu returned error %d: %s."
,
info
,
magma_strerror
(
info
));
goto
fail
;
}
magma_sgetri_gpu
(
N
,
(
float
*
)
PyGpuArray_DEV_DATA
(
*
A_inv
),
N
,
piv
,
*
(
float
**
)
dwork
,
ldwork
,
&
info
);
if
(
info
!=
0
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuMagmaMatrixInverse: magma_sgetri_gpu returned error %d: %s."
,
info
,
magma_strerror
(
info
));
goto
fail
;
}
res
=
0
;
fail:
if
(
piv
!=
NULL
)
magma_free
(
piv
);
if
(
dwork
!=
NULL
)
gpudata_release
(
dwork
);
magma_finalize
();
cuda_exit
(
c
->
ctx
);
return
res
;
}
theano/gpuarray/magma_svd.c
0 → 100644
浏览文件 @
44726397
#section init_code
setup_ext_cuda
();
#section support_code_struct
int
APPLY_SPECIFIC
(
magma_svd
)(
PyGpuArrayObject
*
A
,
#ifdef COMPUTE_UV
PyGpuArrayObject
**
U
,
#endif
PyGpuArrayObject
**
S
,
#ifdef COMPUTE_UV
PyGpuArrayObject
**
VT
,
#endif
PyGpuContextObject
*
c
)
{
magma_int_t
*
iwork
=
NULL
,
iunused
[
1
];
magma_int_t
M
,
N
,
K
,
ldu
,
ldv
,
M_U
,
N_VT
,
info
;
magma_vec_t
jobz
;
size_t
s_dims
[
1
],
u_dims
[
2
],
vt_dims
[
2
];
float
*
a_data
=
NULL
,
*
s_data
=
NULL
,
*
u_data
=
NULL
,
*
vt_data
=
NULL
,
*
work
=
NULL
;
float
dummy
[
1
];
int
res
=
-
1
,
lwork
;
if
(
A
->
ga
.
typecode
!=
GA_FLOAT
)
{
PyErr_SetString
(
PyExc_TypeError
,
"GpuMagmaMatrixInverse: Unsupported data type"
);
return
-
1
;
}
// This is early to match the exit() in the fail label.
cuda_enter
(
c
->
ctx
);
magma_init
();
if
(
!
GpuArray_IS_C_CONTIGUOUS
(
&
A
->
ga
))
{
PyErr_SetString
(
PyExc_ValueError
,
"GpuMagmaMatrixInverse: requires data to be C-contiguous"
);
return
1
;
}
if
(
PyGpuArray_NDIM
(
A
)
!=
2
)
{
PyErr_SetString
(
PyExc_ValueError
,
"GpuMagmaMatrixInverse: matrix rank error"
);
goto
fail
;
}
// magma matrix svd
// reverse dimensions because MAGMA expects column-major matrices:
M
=
PyGpuArray_DIM
(
A
,
1
);
N
=
PyGpuArray_DIM
(
A
,
0
);
K
=
std
::
min
(
M
,
N
);
if
(
MAGMA_SUCCESS
!=
magma_smalloc_pinned
(
&
a_data
,
M
*
N
))
{
PyErr_SetString
(
PyExc_RuntimeError
,
"GpuMagmaSVD: failed to allocate memory"
);
goto
fail
;
}
cudaMemcpy
(
a_data
,
PyGpuArray_DEV_DATA
(
A
),
M
*
N
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
);
if
(
MAGMA_SUCCESS
!=
magma_smalloc_pinned
(
&
s_data
,
K
))
{
PyErr_SetString
(
PyExc_RuntimeError
,
"GpuMagmaSVD: failed to allocate memory"
);
goto
fail
;
}
#ifdef COMPUTE_UV
#ifdef FULL_MATRICES
jobz
=
MagmaAllVec
;
#else
jobz
=
MagmaSomeVec
;
#endif
M_U
=
(
jobz
==
MagmaAllVec
?
M
:
K
);
N_VT
=
(
jobz
==
MagmaAllVec
?
N
:
K
);
ldu
=
M
;
ldv
=
N_VT
;
if
(
MAGMA_SUCCESS
!=
magma_smalloc_pinned
(
&
u_data
,
M_U
*
M
))
{
PyErr_SetString
(
PyExc_RuntimeError
,
"GpuMagmaSVD: failed to allocate memory"
);
goto
fail
;
}
if
(
MAGMA_SUCCESS
!=
magma_smalloc_pinned
(
&
vt_data
,
N
*
N_VT
))
{
PyErr_SetString
(
PyExc_RuntimeError
,
"GpuMagmaSVD: failed to allocate memory"
);
goto
fail
;
}
#else
jobz
=
MagmaNoVec
;
ldu
=
M
;
ldv
=
N
;
#endif
// query for workspace size
magma_sgesdd
(
jobz
,
M
,
N
,
NULL
,
M
,
NULL
,
NULL
,
ldu
,
NULL
,
ldv
,
dummy
,
-
1
,
iunused
,
&
info
);
lwork
=
(
magma_int_t
)
MAGMA_S_REAL
(
dummy
[
0
]);
if
(
MAGMA_SUCCESS
!=
magma_smalloc_pinned
(
&
work
,
lwork
))
{
PyErr_SetString
(
PyExc_RuntimeError
,
"GpuMagmaSVD: failed to allocate working memory"
);
goto
fail
;
}
if
(
MAGMA_SUCCESS
!=
magma_imalloc_cpu
(
&
iwork
,
8
*
K
))
{
PyErr_SetString
(
PyExc_RuntimeError
,
"GpuMagmaSVD: failed to allocate working memory"
);
goto
fail
;
}
// compute svd
magma_sgesdd
(
jobz
,
M
,
N
,
a_data
,
M
,
s_data
,
u_data
,
ldu
,
vt_data
,
ldv
,
work
,
lwork
,
iwork
,
&
info
);
if
(
info
>
0
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuMagmaSVD: the updating process of SBDSDC did not converge (error: %d)"
,
info
);
goto
fail
;
}
else
if
(
info
<
0
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuMagmaSVD: magma_sgesdd_gpu argument %d has an illegal value"
,
-
info
);
goto
fail
;
}
s_dims
[
0
]
=
K
;
if
(
theano_prep_output
(
S
,
1
,
s_dims
,
A
->
ga
.
typecode
,
GA_C_ORDER
,
c
)
!=
0
){
PyErr_SetString
(
PyExc_RuntimeError
,
"GpuMagmaSVD: failed to allocate memory"
);
goto
fail
;
}
cudaMemcpy
(
PyGpuArray_DEV_DATA
(
*
S
),
s_data
,
K
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
);
#ifdef COMPUTE_UV
u_dims
[
0
]
=
N
;
u_dims
[
1
]
=
N_VT
;
if
(
theano_prep_output
(
U
,
2
,
u_dims
,
A
->
ga
.
typecode
,
GA_C_ORDER
,
c
)
!=
0
){
PyErr_SetString
(
PyExc_RuntimeError
,
"GpuMagmaSVD: failed to allocate memory"
);
goto
fail
;
}
// magma expects column-major matrices. Exchange u_data -> VT and vt_data -> U
// to match numpy.linalg.svd output
cudaMemcpy
(
PyGpuArray_DEV_DATA
(
*
U
),
vt_data
,
N
*
N_VT
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
);
vt_dims
[
0
]
=
M_U
;
vt_dims
[
1
]
=
M
;
if
(
theano_prep_output
(
VT
,
2
,
vt_dims
,
A
->
ga
.
typecode
,
GA_C_ORDER
,
c
)
!=
0
){
PyErr_SetString
(
PyExc_RuntimeError
,
"GpuMagmaSVD: failed to allocate memory"
);
goto
fail
;
}
// magma expects column-major matrices. Exchange u_data -> VT and vt_data -> U
// to match numpy.linalg.svd output
cudaMemcpy
(
PyGpuArray_DEV_DATA
(
*
VT
),
u_data
,
M_U
*
M
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
);
#endif
res
=
0
;
fail:
if
(
a_data
!=
NULL
)
magma_free_pinned
(
a_data
);
if
(
s_data
!=
NULL
)
magma_free_pinned
(
s_data
);
if
(
u_data
!=
NULL
)
magma_free_pinned
(
u_data
);
if
(
vt_data
!=
NULL
)
magma_free_pinned
(
vt_data
);
if
(
work
!=
NULL
)
magma_free_pinned
(
work
);
if
(
iwork
!=
NULL
)
magma_free_cpu
(
iwork
);
magma_finalize
();
cuda_exit
(
c
->
ctx
);
return
res
;
}
theano/gpuarray/opt.py
浏览文件 @
44726397
...
@@ -32,6 +32,7 @@ from theano.tensor.nnet.abstract_conv import (BaseAbstractConv,
...
@@ -32,6 +32,7 @@ from theano.tensor.nnet.abstract_conv import (BaseAbstractConv,
AbstractConv3d
,
AbstractConv3d
,
AbstractConv3d_gradWeights
,
AbstractConv3d_gradWeights
,
AbstractConv3d_gradInputs
)
AbstractConv3d_gradInputs
)
import
theano.tensor.nlinalg
as
nlinalg
import
theano.tensor.signal.pool
as
pool
import
theano.tensor.signal.pool
as
pool
import
theano.tensor.slinalg
as
slinalg
import
theano.tensor.slinalg
as
slinalg
...
@@ -71,7 +72,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
...
@@ -71,7 +72,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
from
.opt_util
import
alpha_merge
,
output_merge
,
pad_dims
,
unpad_dims
from
.opt_util
import
alpha_merge
,
output_merge
,
pad_dims
,
unpad_dims
from
.reduction
import
GpuMaxAndArgmax
from
.reduction
import
GpuMaxAndArgmax
from
.linalg
import
(
GpuCusolverSolve
,
MATRIX_STRUCTURES_SOLVE
,
GpuCholesky
,
from
.linalg
import
(
GpuCusolverSolve
,
MATRIX_STRUCTURES_SOLVE
,
GpuCholesky
,
cusolver_available
)
cusolver_available
,
GpuMagmaMatrixInverse
,
GpuMagmaSVD
)
_logger
=
logging
.
getLogger
(
"theano.gpuarray.opt"
)
_logger
=
logging
.
getLogger
(
"theano.gpuarray.opt"
)
...
@@ -2004,6 +2005,34 @@ def local_inplace_cholesky(node):
...
@@ -2004,6 +2005,34 @@ def local_inplace_cholesky(node):
if
isinstance
(
node
.
op
,
GpuCholesky
)
and
not
node
.
op
.
inplace
:
if
isinstance
(
node
.
op
,
GpuCholesky
)
and
not
node
.
op
.
inplace
:
return
[
GpuCholesky
(
lower
=
node
.
op
.
lower
,
inplace
=
True
)(
*
node
.
inputs
)]
return
[
GpuCholesky
(
lower
=
node
.
op
.
lower
,
inplace
=
True
)(
*
node
.
inputs
)]
@register_opt
(
'magma'
,
'fast_compile'
)
@op_lifter
([
nlinalg
.
MatrixInverse
])
@register_opt2
([
theano
.
tensor
.
nlinalg
.
MatrixInverse
],
'magma'
,
'fast_compile'
)
def
local_gpu_matrix_inverse
(
op
,
context_name
,
inputs
,
outputs
):
if
not
config
.
magma
.
enabled
:
return
return
GpuMagmaMatrixInverse
()
@register_inplace
()
@local_optimizer
([
GpuMagmaMatrixInverse
])
def
local_inplace_matrix_inverse_inplace
(
node
):
if
isinstance
(
node
.
op
,
GpuMagmaMatrixInverse
):
if
not
node
.
op
.
inplace
:
return
[
node
.
op
.
clone_inplace
()(
*
node
.
inputs
)]
@register_opt
(
'magma'
,
'fast_compile'
)
@op_lifter
([
nlinalg
.
SVD
])
@register_opt2
([
theano
.
tensor
.
nlinalg
.
SVD
],
'magma'
,
'fast_compile'
)
def
local_gpu_svd
(
op
,
context_name
,
inputs
,
outputs
):
if
not
config
.
magma
.
enabled
:
return
return
GpuMagmaSVD
(
full_matrices
=
op
.
full_matrices
,
compute_uv
=
op
.
compute_uv
)
# Do not register in fast_run or fast_compile.
# Do not register in fast_run or fast_compile.
# It will be added to fast_run if the GPU is enabled.
# It will be added to fast_run if the GPU is enabled.
optdb
.
register
(
'gpua_scanOp_make_inplace'
,
optdb
.
register
(
'gpua_scanOp_make_inplace'
,
...
...
theano/gpuarray/tests/test_linalg.py
浏览文件 @
44726397
from
__future__
import
absolute_import
,
division
,
print_function
from
__future__
import
absolute_import
,
division
,
print_function
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
import
theano
from
numpy.linalg.linalg
import
LinAlgError
import
theano
from
theano
import
config
from
theano.gpuarray.linalg
import
(
GpuCholesky
,
GpuMagmaMatrixInverse
,
cusolver_available
,
gpu_matrix_inverse
,
gpu_solve
,
gpu_svd
)
from
theano.tensor.nlinalg
import
matrix_inverse
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests
import
unittest_tools
as
utt
from
.config
import
mode_with_gpu
from
numpy.linalg.linalg
import
LinAlgError
from
..
import
gpuarray_shared_constructor
from
.config
import
mode_with_gpu
,
mode_without_gpu
from
.test_basic_ops
import
rand
# Skip tests if cuda_ndarray is not available.
from
nose.plugins.skip
import
SkipTest
from
theano.gpuarray.linalg
import
(
cusolver_available
,
gpu_solve
,
GpuCholesky
)
if
not
cusolver_available
:
class
TestCusolver
(
unittest
.
TestCase
):
raise
SkipTest
(
'Optional package scikits.cuda.cusolver not available'
)
def
setUp
(
self
):
if
not
cusolver_available
:
self
.
skipTest
(
'Optional package scikits.cuda.cusolver not available'
)
class
TestCusolver
(
unittest
.
TestCase
):
def
run_gpu_solve
(
self
,
A_val
,
x_val
,
A_struct
=
None
):
def
run_gpu_solve
(
self
,
A_val
,
x_val
,
A_struct
=
None
):
b_val
=
np
.
dot
(
A_val
,
x_val
)
b_val
=
np
.
dot
(
A_val
,
x_val
)
b_val_trans
=
np
.
dot
(
A_val
.
T
,
x_val
)
b_val_trans
=
np
.
dot
(
A_val
.
T
,
x_val
)
...
@@ -117,6 +123,8 @@ class TestCusolver(unittest.TestCase):
...
@@ -117,6 +123,8 @@ class TestCusolver(unittest.TestCase):
class
TestGpuCholesky
(
unittest
.
TestCase
):
class
TestGpuCholesky
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
if
not
cusolver_available
:
self
.
skipTest
(
'Optional package scikits.cuda.cusolver not available'
)
utt
.
seed_rng
()
utt
.
seed_rng
()
def
get_gpu_cholesky_func
(
self
,
lower
=
True
,
inplace
=
False
):
def
get_gpu_cholesky_func
(
self
,
lower
=
True
,
inplace
=
False
):
...
@@ -191,3 +199,97 @@ class TestGpuCholesky(unittest.TestCase):
...
@@ -191,3 +199,97 @@ class TestGpuCholesky(unittest.TestCase):
A_val
=
-
M_val
.
dot
(
M_val
.
T
)
A_val
=
-
M_val
.
dot
(
M_val
.
T
)
fn
=
self
.
get_gpu_cholesky_func
(
True
,
False
)
fn
=
self
.
get_gpu_cholesky_func
(
True
,
False
)
self
.
assertRaises
(
LinAlgError
,
fn
,
A_val
)
self
.
assertRaises
(
LinAlgError
,
fn
,
A_val
)
class
TestMagma
(
unittest
.
TestCase
):
def
setUp
(
self
):
if
not
config
.
magma
.
enabled
:
self
.
skipTest
(
'Magma is not enabled, skipping test'
)
def
test_gpu_matrix_inverse
(
self
):
A
=
theano
.
tensor
.
fmatrix
(
"A"
)
fn
=
theano
.
function
([
A
],
gpu_matrix_inverse
(
A
),
mode
=
mode_with_gpu
)
N
=
1000
A_val
=
rand
(
N
,
N
)
.
astype
(
'float32'
)
A_val_inv
=
fn
(
A_val
)
utt
.
assert_allclose
(
np
.
dot
(
A_val_inv
,
A_val
),
np
.
eye
(
N
),
atol
=
1e-3
)
def
test_gpu_matrix_inverse_inplace
(
self
):
N
=
1000
A_val_gpu
=
gpuarray_shared_constructor
(
rand
(
N
,
N
)
.
astype
(
'float32'
))
A_val_copy
=
A_val_gpu
.
get_value
()
fn
=
theano
.
function
([],
GpuMagmaMatrixInverse
(
inplace
=
True
)(
A_val_gpu
),
mode
=
mode_with_gpu
,
accept_inplace
=
True
)
fn
()
utt
.
assert_allclose
(
np
.
dot
(
A_val_gpu
.
get_value
(),
A_val_copy
),
np
.
eye
(
N
),
atol
=
1e-3
)
def
test_gpu_matrix_inverse_inplace_opt
(
self
):
A
=
theano
.
tensor
.
fmatrix
(
"A"
)
fn
=
theano
.
function
([
A
],
matrix_inverse
(
A
),
mode
=
mode_with_gpu
)
assert
any
([
node
.
op
.
inplace
for
node
in
fn
.
maker
.
fgraph
.
toposort
()
if
isinstance
(
node
.
op
,
GpuMagmaMatrixInverse
)
])
def
run_gpu_svd
(
self
,
A_val
,
full_matrices
=
True
,
compute_uv
=
True
):
A
=
theano
.
tensor
.
fmatrix
(
"A"
)
f
=
theano
.
function
(
[
A
],
gpu_svd
(
A
,
full_matrices
=
full_matrices
,
compute_uv
=
compute_uv
),
mode
=
mode_with_gpu
)
return
f
(
A_val
)
def
assert_column_orthonormal
(
self
,
Ot
):
utt
.
assert_allclose
(
np
.
dot
(
Ot
.
T
,
Ot
),
np
.
eye
(
Ot
.
shape
[
1
]))
def
check_svd
(
self
,
A
,
U
,
S
,
VT
,
rtol
=
None
,
atol
=
None
):
S_m
=
np
.
zeros_like
(
A
)
np
.
fill_diagonal
(
S_m
,
S
)
utt
.
assert_allclose
(
np
.
dot
(
np
.
dot
(
U
,
S_m
),
VT
),
A
,
rtol
=
rtol
,
atol
=
atol
)
def
test_gpu_svd_wide
(
self
):
A
=
rand
(
100
,
50
)
.
astype
(
'float32'
)
M
,
N
=
A
.
shape
U
,
S
,
VT
=
self
.
run_gpu_svd
(
A
)
self
.
assert_column_orthonormal
(
U
)
self
.
assert_column_orthonormal
(
VT
.
T
)
self
.
check_svd
(
A
,
U
,
S
,
VT
)
U
,
S
,
VT
=
self
.
run_gpu_svd
(
A
,
full_matrices
=
False
)
self
.
assertEqual
(
U
.
shape
[
1
],
min
(
M
,
N
))
self
.
assert_column_orthonormal
(
U
)
self
.
assertEqual
(
VT
.
shape
[
0
],
min
(
M
,
N
))
self
.
assert_column_orthonormal
(
VT
.
T
)
def
test_gpu_svd_tall
(
self
):
A
=
rand
(
50
,
100
)
.
astype
(
'float32'
)
M
,
N
=
A
.
shape
U
,
S
,
VT
=
self
.
run_gpu_svd
(
A
)
self
.
assert_column_orthonormal
(
U
)
self
.
assert_column_orthonormal
(
VT
.
T
)
self
.
check_svd
(
A
,
U
,
S
,
VT
)
U
,
S
,
VT
=
self
.
run_gpu_svd
(
A
,
full_matrices
=
False
)
self
.
assertEqual
(
U
.
shape
[
1
],
min
(
M
,
N
))
self
.
assert_column_orthonormal
(
U
)
self
.
assertEqual
(
VT
.
shape
[
0
],
min
(
M
,
N
))
self
.
assert_column_orthonormal
(
VT
.
T
)
def
test_gpu_singular_values
(
self
):
A
=
theano
.
tensor
.
fmatrix
(
"A"
)
f_cpu
=
theano
.
function
(
[
A
],
theano
.
tensor
.
nlinalg
.
svd
(
A
,
compute_uv
=
False
),
mode
=
mode_without_gpu
)
f_gpu
=
theano
.
function
(
[
A
],
gpu_svd
(
A
,
compute_uv
=
False
),
mode
=
mode_with_gpu
)
A_val
=
rand
(
50
,
100
)
.
astype
(
'float32'
)
utt
.
assert_allclose
(
f_cpu
(
A_val
),
f_gpu
(
A_val
))
A_val
=
rand
(
100
,
50
)
.
astype
(
'float32'
)
utt
.
assert_allclose
(
f_cpu
(
A_val
),
f_gpu
(
A_val
))
theano/tensor/nlinalg.py
浏览文件 @
44726397
...
@@ -616,18 +616,37 @@ class SVD(Op):
...
@@ -616,18 +616,37 @@ class SVD(Op):
def
make_node
(
self
,
x
):
def
make_node
(
self
,
x
):
x
=
as_tensor_variable
(
x
)
x
=
as_tensor_variable
(
x
)
assert
x
.
ndim
==
2
,
"The input of svd function should be a matrix."
assert
x
.
ndim
==
2
,
"The input of svd function should be a matrix."
w
=
theano
.
tensor
.
matrix
(
dtype
=
x
.
dtype
)
s
=
theano
.
tensor
.
vector
(
dtype
=
x
.
dtype
)
u
=
theano
.
tensor
.
vector
(
dtype
=
x
.
dtype
)
if
self
.
compute_uv
:
v
=
theano
.
tensor
.
matrix
(
dtype
=
x
.
dtype
)
u
=
theano
.
tensor
.
matrix
(
dtype
=
x
.
dtype
)
return
Apply
(
self
,
[
x
],
[
w
,
u
,
v
])
vt
=
theano
.
tensor
.
matrix
(
dtype
=
x
.
dtype
)
return
Apply
(
self
,
[
x
],
[
u
,
s
,
vt
])
else
:
return
Apply
(
self
,
[
x
],
[
s
])
def
perform
(
self
,
node
,
inputs
,
outputs
):
def
perform
(
self
,
node
,
inputs
,
outputs
):
(
x
,)
=
inputs
(
x
,)
=
inputs
(
w
,
u
,
v
)
=
outputs
assert
x
.
ndim
==
2
,
"The input of svd function should be a matrix."
assert
x
.
ndim
==
2
,
"The input of svd function should be a matrix."
w
[
0
],
u
[
0
],
v
[
0
]
=
self
.
_numop
(
x
,
if
self
.
compute_uv
:
self
.
full_matrices
,
u
,
s
,
vt
=
outputs
self
.
compute_uv
)
u
[
0
],
s
[
0
],
vt
[
0
]
=
self
.
_numop
(
x
,
self
.
full_matrices
,
self
.
compute_uv
)
else
:
s
,
=
outputs
s
[
0
]
=
self
.
_numop
(
x
,
self
.
full_matrices
,
self
.
compute_uv
)
def
infer_shape
(
self
,
node
,
shapes
):
x_shape
,
=
shapes
M
,
N
=
x_shape
K
=
tensor
.
minimum
(
M
,
N
)
s_shape
=
(
K
,
)
if
self
.
compute_uv
:
u_shape
=
(
M
,
M
)
if
self
.
full_matrices
else
(
M
,
K
)
vt_shape
=
(
N
,
N
)
if
self
.
full_matrices
else
(
K
,
N
)
return
[
u_shape
,
s_shape
,
vt_shape
]
else
:
return
[
s_shape
]
def
svd
(
a
,
full_matrices
=
1
,
compute_uv
=
1
):
def
svd
(
a
,
full_matrices
=
1
,
compute_uv
=
1
):
...
...
theano/tensor/tests/test_nlinalg.py
浏览文件 @
44726397
from
__future__
import
absolute_import
,
print_function
,
division
from
__future__
import
absolute_import
,
print_function
,
division
import
unittest
import
unittest
import
itertools
import
numpy
as
np
import
numpy
as
np
import
numpy.linalg
import
numpy.linalg
from
numpy.testing
import
assert_array_almost_equal
from
numpy.testing
import
assert_array_almost_equal
...
@@ -20,7 +21,7 @@ from theano.tensor.nlinalg import (
...
@@ -20,7 +21,7 @@ from theano.tensor.nlinalg import (
AllocDiag
,
alloc_diag
,
ExtractDiag
,
extract_diag
,
diag
,
AllocDiag
,
alloc_diag
,
ExtractDiag
,
extract_diag
,
diag
,
trace
,
Det
,
det
,
Eig
,
eig
,
Eigh
,
EighGrad
,
eigh
,
trace
,
Det
,
det
,
Eig
,
eig
,
Eigh
,
EighGrad
,
eigh
,
matrix_dot
,
_zero_disconnected
,
qr
,
matrix_power
,
matrix_dot
,
_zero_disconnected
,
qr
,
matrix_power
,
norm
,
svd
,
TensorInv
,
tensorinv
,
tensorsolve
)
norm
,
svd
,
SVD
,
TensorInv
,
tensorinv
,
tensorsolve
)
from
nose.plugins.attrib
import
attr
from
nose.plugins.attrib
import
attr
from
nose.plugins.skip
import
SkipTest
from
nose.plugins.skip
import
SkipTest
...
@@ -128,18 +129,46 @@ def test_qr_modes():
...
@@ -128,18 +129,46 @@ def test_qr_modes():
assert
"name 'complete' is not defined"
in
str
(
e
)
assert
"name 'complete' is not defined"
in
str
(
e
)
def
test_svd
():
class
test_SVD
(
utt
.
InferShapeTester
):
rng
=
np
.
random
.
RandomState
(
utt
.
fetch_seed
())
op_class
=
SVD
A
=
tensor
.
matrix
(
"A"
,
dtype
=
theano
.
config
.
floatX
)
dtype
=
'float32'
U
,
V
,
T
=
svd
(
A
)
fn
=
function
([
A
],
[
U
,
V
,
T
])
a
=
rng
.
rand
(
4
,
4
)
.
astype
(
theano
.
config
.
floatX
)
n_u
,
n_v
,
n_t
=
np
.
linalg
.
svd
(
a
)
t_u
,
t_v
,
t_t
=
fn
(
a
)
assert
_allclose
(
n_u
,
t_u
)
def
setUp
(
self
):
assert
_allclose
(
n_v
,
t_v
)
super
(
test_SVD
,
self
)
.
setUp
()
assert
_allclose
(
n_t
,
t_t
)
self
.
rng
=
np
.
random
.
RandomState
(
utt
.
fetch_seed
())
self
.
A
=
theano
.
tensor
.
matrix
(
dtype
=
self
.
dtype
)
self
.
op
=
svd
def
test_svd
(
self
):
A
=
tensor
.
matrix
(
"A"
,
dtype
=
self
.
dtype
)
U
,
S
,
VT
=
svd
(
A
)
fn
=
function
([
A
],
[
U
,
S
,
VT
])
a
=
self
.
rng
.
rand
(
4
,
4
)
.
astype
(
self
.
dtype
)
n_u
,
n_s
,
n_vt
=
np
.
linalg
.
svd
(
a
)
t_u
,
t_s
,
t_vt
=
fn
(
a
)
assert
_allclose
(
n_u
,
t_u
)
assert
_allclose
(
n_s
,
t_s
)
assert
_allclose
(
n_vt
,
t_vt
)
fn
=
function
([
A
],
svd
(
A
,
compute_uv
=
False
))
t_s
=
fn
(
a
)
assert
_allclose
(
n_s
,
t_s
)
def
test_svd_infer_shape
(
self
):
self
.
validate_shape
((
4
,
4
),
full_matrices
=
True
,
compute_uv
=
True
)
self
.
validate_shape
((
4
,
4
),
full_matrices
=
False
,
compute_uv
=
True
)
self
.
validate_shape
((
2
,
4
),
full_matrices
=
False
,
compute_uv
=
True
)
self
.
validate_shape
((
4
,
2
),
full_matrices
=
False
,
compute_uv
=
True
)
self
.
validate_shape
((
4
,
4
),
compute_uv
=
False
)
def
validate_shape
(
self
,
shape
,
compute_uv
=
True
,
full_matrices
=
True
):
A
=
self
.
A
A_v
=
self
.
rng
.
rand
(
*
shape
)
.
astype
(
self
.
dtype
)
outputs
=
self
.
op
(
A
,
full_matrices
=
full_matrices
,
compute_uv
=
compute_uv
)
if
not
compute_uv
:
outputs
=
[
outputs
]
self
.
_compile_and_check
([
A
],
outputs
,
[
A_v
],
self
.
op_class
,
warn
=
False
)
def
test_tensorsolve
():
def
test_tensorsolve
():
...
@@ -403,8 +432,7 @@ class test_Eig(utt.InferShapeTester):
...
@@ -403,8 +432,7 @@ class test_Eig(utt.InferShapeTester):
super
(
test_Eig
,
self
)
.
setUp
()
super
(
test_Eig
,
self
)
.
setUp
()
self
.
rng
=
np
.
random
.
RandomState
(
utt
.
fetch_seed
())
self
.
rng
=
np
.
random
.
RandomState
(
utt
.
fetch_seed
())
self
.
A
=
theano
.
tensor
.
matrix
(
dtype
=
self
.
dtype
)
self
.
A
=
theano
.
tensor
.
matrix
(
dtype
=
self
.
dtype
)
self
.
X
=
np
.
asarray
(
self
.
rng
.
rand
(
5
,
5
),
self
.
X
=
np
.
asarray
(
self
.
rng
.
rand
(
5
,
5
),
dtype
=
self
.
dtype
)
dtype
=
self
.
dtype
)
self
.
S
=
self
.
X
.
dot
(
self
.
X
.
T
)
self
.
S
=
self
.
X
.
dot
(
self
.
X
.
T
)
def
test_infer_shape
(
self
):
def
test_infer_shape
(
self
):
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论