提交 cf4e0264 authored 作者: notoraptor's avatar notoraptor

Addressed nouiz comments.

上级 4c87b1b3
......@@ -5,7 +5,6 @@ import theano
import warnings
from theano import Op
from theano.gpuarray import basic_ops, GpuArrayType
import numpy as np
......@@ -254,7 +253,7 @@ class GpuCholesky(Op):
warnings.warn('The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8')
if not pygpu_available:
raise RuntimeError('Missing pygpu or triu/tril functions.'
'Try updating libgpuarray?')
'Install or update libgpuarray.')
context_name = basic_ops.infer_context_name(inp)
inp = basic_ops.as_gpuarray_variable(inp, context_name)
......@@ -262,14 +261,12 @@ class GpuCholesky(Op):
inp = basic_ops.gpu_contiguous(inp)
# this op can only operate on float32 matrices
# because of current implementation of triu/tril.
# TODO: support float64 for triu/tril in GpuArray and for GpuCholesky/GpuCusolverSolve in Theano.
assert inp.ndim == 2
assert inp.dtype == 'float32'
return theano.Apply(
self, [inp],
[GpuArrayType('float32',
broadcastable=inp.broadcastable,
context_name=context_name)()])
return theano.Apply(self, [inp], [inp.type()])
def prepare_node(self, node, storage_map, compute_map, impl):
ctx = node.inputs[0].type.context
......
......@@ -1977,6 +1977,13 @@ def local_gpu_cholesky(op, context_name, inputs, outputs):
return
return GpuCholesky(lower=op.lower, inplace=op.destructive)
@register_inplace()
@local_optimizer([GpuCholesky], inplace=True)
def local_inplace_cholesky(node):
if isinstance(node.op, GpuCholesky) and not node.op.inplace:
return [GpuCholesky(lower=node.op.lower, inplace=True)(*node.inputs)]
# Do not register in fast_run or fast_compile.
# It will be added to fast_run if the GPU is enabled.
optdb.register('gpua_scanOp_make_inplace',
......
......@@ -593,7 +593,8 @@ def test_local_lift_cholesky():
f_gpu = theano.function([A], o, mode=mode_with_gpu)
assert not any(isinstance(n.op, slinalg.Cholesky)
for n in f_gpu.maker.fgraph.apply_nodes)
assert any(isinstance(n.op, GpuCholesky)
# GpuCholesky op in this graph should be inplace (as his input is not reused by other op).
assert any(isinstance(n.op, GpuCholesky) and n.op.inplace
for n in f_gpu.maker.fgraph.apply_nodes)
M_val = np.random.normal(size=(3, 3)).astype("float32")
# A = M.dot(M) will be positive definite for all non-singular M
......@@ -601,6 +602,25 @@ def test_local_lift_cholesky():
utt.assert_allclose(f_cpu(A_val), f_gpu(A_val))
def test_gpu_cholesky_not_inplace():
if not cusolver_available:
raise SkipTest('No cuSolver')
A = tensor.fmatrix()
A_squared = A**2
B = slinalg.cholesky(A_squared)
D = B + A_squared
f_cpu = theano.function([A], D, mode=mode_without_gpu)
f_gpu = theano.function([A], D, mode=mode_with_gpu)
# GpuCholesky op in this graph should NOT be inplace (as his input is reused in another op)
count_cholesky_not_inplace = len([n.op for n in f_gpu.maker.fgraph.apply_nodes
if isinstance(n.op, GpuCholesky) and not n.op.inplace])
assert count_cholesky_not_inplace == 1, count_cholesky_not_inplace
M_val = np.random.normal(size=(3, 3)).astype("float32")
# A = M.dot(M) will be positive definite for all non-singular M
A_val = M_val.dot(M_val.T)
utt.assert_allclose(f_cpu(A_val), f_gpu(A_val))
def test_local_gpua_advanced_incsubtensor():
# test a corner case reported at gh-5589
target = tensor.ftensor4()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论