提交 b7cd3ce3 authored 作者: Alexander Matyasko's avatar Alexander Matyasko

Fix inplace matrix inverse and add tests for it

上级 3feae315
...@@ -357,8 +357,7 @@ class GpuMagmaSVD(COp): ...@@ -357,8 +357,7 @@ class GpuMagmaSVD(COp):
def __init__(self, full_matrices=True, compute_uv=True): def __init__(self, full_matrices=True, compute_uv=True):
self.full_matrices = full_matrices self.full_matrices = full_matrices
self.compute_uv = compute_uv self.compute_uv = compute_uv
COp.__init__(self, ['magma_svd.c'], COp.__init__(self, ['magma_svd.c'], 'APPLY_SPECIFIC(magma_svd)')
'APPLY_SPECIFIC(magma_svd)')
def c_headers(self): def c_headers(self):
return ['gpuarray/types.h', 'gpuarray/array.h', 'gpuarray/ext_cuda.h', return ['gpuarray/types.h', 'gpuarray/array.h', 'gpuarray/ext_cuda.h',
...@@ -429,9 +428,10 @@ class GpuMagmaMatrixInverse(COp): ...@@ -429,9 +428,10 @@ class GpuMagmaMatrixInverse(COp):
params_type = gpu_context_type params_type = gpu_context_type
def __init__(self, inplace=False): def __init__(self, inplace=False):
COp.__init__(self, ['magma_inv.c'], COp.__init__(self, ['magma_inv.c'], 'APPLY_SPECIFIC(magma_inv)')
'APPLY_SPECIFIC(magma_inv)')
self.inplace = inplace self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [0]}
def c_headers(self): def c_headers(self):
return ['gpuarray/types.h', 'gpuarray/array.h', 'gpuarray/ext_cuda.h', return ['gpuarray/types.h', 'gpuarray/array.h', 'gpuarray/ext_cuda.h',
...@@ -451,6 +451,9 @@ class GpuMagmaMatrixInverse(COp): ...@@ -451,6 +451,9 @@ class GpuMagmaMatrixInverse(COp):
return [config.magma.library_path] return [config.magma.library_path]
return [] return []
def clone_inplace(self):
return self.__class__(inplace=True)
def make_node(self, x): def make_node(self, x):
ctx_name = infer_context_name(x) ctx_name = infer_context_name(x)
x = as_gpuarray_variable(x, ctx_name) x = as_gpuarray_variable(x, ctx_name)
...@@ -471,4 +474,18 @@ class GpuMagmaMatrixInverse(COp): ...@@ -471,4 +474,18 @@ class GpuMagmaMatrixInverse(COp):
return shapes return shapes
gpu_matrix_inverse = GpuMagmaMatrixInverse() def gpu_matrix_inverse(a, inplace=False):
"""
This function performs the matrix inverse on GPU.
Parameters
----------
inplace : bool, optional
Whether or not to compute matrix inverse inplace.
Returns
-------
a_inv: matrix
"""
return GpuMagmaMatrixInverse(inplace=inplace)(a)
...@@ -40,9 +40,9 @@ int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **_A_inv, ...@@ -40,9 +40,9 @@ int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **_A_inv,
goto fail; goto fail;
} }
#ifdef INPLACE #ifdef INPLACE
Py_XDECREF(out); Py_XDECREF(A_inv);
A_inv = A; A_inv = A;
Py_INCREF(out); Py_INCREF(A_inv);
#else #else
A_inv = theano_try_copy(A_inv, A); A_inv = theano_try_copy(A_inv, A);
if (A_inv == NULL) { if (A_inv == NULL) {
......
...@@ -2015,6 +2015,14 @@ def local_gpu_matrix_inverse(op, context_name, inputs, outputs): ...@@ -2015,6 +2015,14 @@ def local_gpu_matrix_inverse(op, context_name, inputs, outputs):
return GpuMagmaMatrixInverse() return GpuMagmaMatrixInverse()
@register_inplace()
@local_optimizer([GpuMagmaMatrixInverse])
def local_inplace_matrix_inverse_inplace(node):
if isinstance(node.op, GpuMagmaMatrixInverse):
if not node.op.inplace:
return [node.op.clone_inplace()(*node.inputs)]
@register_opt('magma', 'fast_compile') @register_opt('magma', 'fast_compile')
@op_lifter([nlinalg.SVD]) @op_lifter([nlinalg.SVD])
@register_opt2([theano.tensor.nlinalg.SVD], 'magma', 'fast_compile') @register_opt2([theano.tensor.nlinalg.SVD], 'magma', 'fast_compile')
......
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
import unittest import unittest
import numpy as np import numpy as np
import theano from numpy.linalg.linalg import LinAlgError
import theano
from theano import config
from theano.gpuarray.linalg import (GpuCholesky, GpuMagmaMatrixInverse,
cusolver_available, gpu_matrix_inverse,
gpu_solve, gpu_svd)
from theano.tensor.nlinalg import matrix_inverse
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from .. import gpuarray_shared_constructor
from .config import mode_with_gpu, mode_without_gpu from .config import mode_with_gpu, mode_without_gpu
from .test_basic_ops import rand from .test_basic_ops import rand
from numpy.linalg.linalg import LinAlgError
from theano import config
from theano.gpuarray.linalg import (cusolver_available, gpu_solve, GpuCholesky,
gpu_matrix_inverse, gpu_svd)
class TestCusolver(unittest.TestCase): class TestCusolver(unittest.TestCase):
...@@ -199,6 +202,7 @@ class TestGpuCholesky(unittest.TestCase): ...@@ -199,6 +202,7 @@ class TestGpuCholesky(unittest.TestCase):
class TestMagma(unittest.TestCase): class TestMagma(unittest.TestCase):
def setUp(self): def setUp(self):
if not config.magma.enabled: if not config.magma.enabled:
self.skipTest('Magma is not enabled, skipping test') self.skipTest('Magma is not enabled, skipping test')
...@@ -208,10 +212,30 @@ class TestMagma(unittest.TestCase): ...@@ -208,10 +212,30 @@ class TestMagma(unittest.TestCase):
fn = theano.function([A], gpu_matrix_inverse(A), mode=mode_with_gpu.including('magma')) fn = theano.function([A], gpu_matrix_inverse(A), mode=mode_with_gpu.including('magma'))
N = 1000 N = 1000
A_val = np.random.rand(N, N).astype(np.float32) A_val = rand(N, N)
A_val_inv = fn(A_val) A_val_inv = fn(A_val)
utt.assert_allclose(np.dot(A_val_inv, A_val), np.eye(N), atol=1e-3) utt.assert_allclose(np.dot(A_val_inv, A_val), np.eye(N), atol=1e-3)
def test_gpu_matrix_inverse_inplace(self):
N = 1000
A_val_gpu = gpuarray_shared_constructor(rand(N, N))
A_val_copy = A_val_gpu.get_value()
fn = theano.function([], gpu_matrix_inverse(A_val_gpu, inplace=True),
mode=mode_with_gpu.including('magma'),
accept_inplace=True)
fn()
utt.assert_allclose(np.dot(A_val_gpu.get_value(), A_val_copy), np.eye(N), atol=1e-3)
def test_gpu_matrix_inverse_inplace_opt(self):
A = theano.tensor.fmatrix("A")
fn = theano.function([A], matrix_inverse(A),
mode=mode_with_gpu.including('magma'))
assert any([
node.op.inplace
for node in fn.maker.fgraph.toposort() if
isinstance(node.op, GpuMagmaMatrixInverse)
])
def run_gpu_svd(self, A_val, full_matrices=True, compute_uv=True): def run_gpu_svd(self, A_val, full_matrices=True, compute_uv=True):
A = theano.tensor.fmatrix("A") A = theano.tensor.fmatrix("A")
f = theano.function( f = theano.function(
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论