提交 8a005114 authored 作者: Alexander Matyasko's avatar Alexander Matyasko

Add local optimizers for magma cholesky op

上级 118c35e8
...@@ -258,6 +258,9 @@ class GpuCholesky(Op): ...@@ -258,6 +258,9 @@ class GpuCholesky(Op):
self.destroy_map = {0: [0]} self.destroy_map = {0: [0]}
super(GpuCholesky, self).__init__() super(GpuCholesky, self).__init__()
def clone_inplace(self):
return self.__class__(lower=self.lower, inplace=True)
def make_node(self, inp): def make_node(self, inp):
if not cusolver_available: if not cusolver_available:
raise RuntimeError('CUSOLVER is not available and ' raise RuntimeError('CUSOLVER is not available and '
...@@ -554,6 +557,9 @@ class GpuMagmaCholesky(CGpuKernelBase): ...@@ -554,6 +557,9 @@ class GpuMagmaCholesky(CGpuKernelBase):
return [config.magma.library_path] return [config.magma.library_path]
return [] return []
def clone_inplace(self):
return self.__class__(lower=self.lower, inplace=True)
def make_node(self, A): def make_node(self, A):
ctx_name = infer_context_name(A) ctx_name = infer_context_name(A)
A = as_gpuarray_variable(A, ctx_name) A = as_gpuarray_variable(A, ctx_name)
......
...@@ -2127,9 +2127,26 @@ def local_gpu_cholesky(op, context_name, inputs, outputs): ...@@ -2127,9 +2127,26 @@ def local_gpu_cholesky(op, context_name, inputs, outputs):
@local_optimizer([GpuCholesky], inplace=True) @local_optimizer([GpuCholesky], inplace=True)
def local_inplace_cholesky(node): def local_inplace_cholesky(node):
if isinstance(node.op, GpuCholesky) and not node.op.inplace: if isinstance(node.op, GpuCholesky) and not node.op.inplace:
return [GpuCholesky(lower=node.op.lower, inplace=True)(*node.inputs)] return [node.op.clone_inplace()(*node.inputs)]
@register_opt('magma', 'fast_compile')
@op_lifter([slinalg.cholesky, GpuCholesky])
@register_opt2([slinalg.Cholesky, GpuCholesky], 'magma', 'fast_compile')
def local_gpu_magma_cholesky(op, context_name, inputs, outputs):
if not config.magma.enabled:
return
return GpuMagmaCholesky(lower=op.lower, inplace=op.destructive)
@register_inplace()
@local_optimizer([GpuMagmaCholesky], inplace=True)
def local_inplace_gpu_magma_cholesky(node):
if isinstance(node.op, GpuMagmaCholesky) and not node.op.inplace:
return [node.op.clone_inplace()(*node.inputs)]
# Matrix inverse
@register_opt('magma', 'fast_compile') @register_opt('magma', 'fast_compile')
@op_lifter([nlinalg.MatrixInverse]) @op_lifter([nlinalg.MatrixInverse])
@register_opt2([theano.tensor.nlinalg.MatrixInverse], 'magma', 'fast_compile') @register_opt2([theano.tensor.nlinalg.MatrixInverse], 'magma', 'fast_compile')
...@@ -2146,12 +2163,12 @@ def local_gpu_matrix_inverse(op, context_name, inputs, outputs): ...@@ -2146,12 +2163,12 @@ def local_gpu_matrix_inverse(op, context_name, inputs, outputs):
@register_inplace() @register_inplace()
@local_optimizer([GpuMagmaMatrixInverse]) @local_optimizer([GpuMagmaMatrixInverse])
def local_inplace_matrix_inverse_inplace(node): def local_inplace_gpu_matrix_inverse(node):
if isinstance(node.op, GpuMagmaMatrixInverse): if isinstance(node.op, GpuMagmaMatrixInverse) and not node.op.inplace:
if not node.op.inplace:
return [node.op.clone_inplace()(*node.inputs)] return [node.op.clone_inplace()(*node.inputs)]
# Singular Value Decomposition
@register_opt('magma', 'fast_compile') @register_opt('magma', 'fast_compile')
@op_lifter([nlinalg.SVD]) @op_lifter([nlinalg.SVD])
@register_opt2([theano.tensor.nlinalg.SVD], 'magma', 'fast_compile') @register_opt2([theano.tensor.nlinalg.SVD], 'magma', 'fast_compile')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论