提交 91835cc8 authored 作者: notoraptor's avatar notoraptor 提交者: GitHub

Merge pull request #5895 from tfjgeorge/gpu_solve_inplace

Added an optimization for inplace gpu_cusolver_solve and tests
......@@ -60,6 +60,11 @@ def attach_cusolver_handle_to_context(ctx):
with ctx:
ctx.cusolver_handle = cusolver.cusolverDnCreate()
# it is a subset of all cases available in slinalg's MATRIX_STRUCTURE
MATRIX_STRUCTURES_SOLVE = (
'general',
'symmetric')
class GpuCusolverSolve(Op):
"""
......@@ -79,7 +84,8 @@ class GpuCusolverSolve(Op):
self.inplace = inplace
self.A_structure = A_structure
if self.inplace:
self.destroy_map = {0: [0, 1]}
self.destroy_map = {0: [0]}
assert A_structure in MATRIX_STRUCTURES_SOLVE
super(GpuCusolverSolve, self).__init__()
def make_node(self, inp1, inp2):
......
......@@ -70,7 +70,8 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedIncSubtensor1_dev20)
from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims
from .reduction import GpuMaxAndArgmax
from .linalg import (GpuCusolverSolve, GpuCholesky, cusolver_available)
from .linalg import (GpuCusolverSolve, MATRIX_STRUCTURES_SOLVE, GpuCholesky,
cusolver_available)
_logger = logging.getLogger("theano.gpuarray.opt")
......@@ -1974,7 +1975,17 @@ def local_gpu_maxandargmax(op, context_name, inputs, outputs):
def local_gpu_solve(op, context_name, inputs, outputs):
if not cusolver_available:
return
return GpuCusolverSolve()
if op.A_structure not in MATRIX_STRUCTURES_SOLVE:
return
return GpuCusolverSolve(A_structure=op.A_structure)
@register_inplace()
@local_optimizer([GpuCusolverSolve], inplace=True)
def local_inplace_gpu_solve(node):
if isinstance(node.op, GpuCusolverSolve) and not node.op.inplace:
return [GpuCusolverSolve(A_structure=node.op.A_structure, trans=node.op.trans,
inplace=True)(*node.inputs)]
# Cholesky decomposition
......
......@@ -587,13 +587,30 @@ def test_local_lift_solve():
f_gpu = theano.function([A, b], o, mode=mode_with_gpu)
assert not any(isinstance(n.op, slinalg.Solve)
for n in f_gpu.maker.fgraph.apply_nodes)
assert any(isinstance(n.op, GpuCusolverSolve)
assert any(isinstance(n.op, GpuCusolverSolve) and n.op.inplace
for n in f_gpu.maker.fgraph.apply_nodes)
A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
b_val = np.random.uniform(-0.4, 0.4, (5, 3)).astype("float32")
utt.assert_allclose(f_cpu(A_val, b_val), f_gpu(A_val, b_val))
def test_gpu_solve_not_inplace():
if not cusolver_available:
raise SkipTest('No cuSolver')
A = tensor.fmatrix()
b = tensor.fmatrix()
s = slinalg.solve(A, b)
o = tensor.dot(A, s)
f_cpu = theano.function([A, b], o, mode_without_gpu)
f_gpu = theano.function([A, b], o, mode=mode_with_gpu)
count_not_inplace = len([n.op for n in f_gpu.maker.fgraph.apply_nodes
if isinstance(n.op, GpuCusolverSolve) and not n.op.inplace])
assert count_not_inplace == 1, count_not_inplace
A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
b_val = np.random.uniform(-0.4, 0.4, (5, 3)).astype("float32")
utt.assert_allclose(f_cpu(A_val, b_val), f_gpu(A_val, b_val))
def test_local_lift_cholesky():
if not cusolver_available:
raise SkipTest('No cuSolver')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论