提交 4be04f9a authored 作者: Frederic's avatar Frederic

Make DiagonalSubtensor work on the GPU.

This need a Theano PR https://github.com/Theano/Theano/pull/1436 This is tested indirectly with the test for conv3d.
上级 631b672c
import theano
from theano.gradient import DisconnectedType
from theano.gof import Op, Apply
from theano import tensor
import theano.sandbox.cuda as cuda
def get_diagonal_subtensor_view(x, i0, i1):
......@@ -16,6 +18,9 @@ def get_diagonal_subtensor_view(x, i0, i1):
class DiagonalSubtensor(Op):
"""
Work on the GPU.
"""
def __init__(self, inplace):
self.inplace = inplace
if inplace:
......@@ -169,3 +174,26 @@ def conv3d(signals, filters,
else:
raise ValueError('invalid border mode', border_mode[1])
return out_5d
@cuda.opt.register_opt()
@theano.gof.local_optimizer([])
def local_gpu_diagonal_subtensor(node):
"""
diagonal_subtensor(host_from_gpu()) -> host_from_gpu(diagonal_subtensor)
gpu_from_host(diagonal_subtensor) -> diagonal_subtensor(gpu_from_host)
"""
if isinstance(node.op, DiagonalSubtensor):
input = node.inputs[0]
if input.owner and isinstance(input.owner.op, cuda.HostFromGpu):
return [cuda.host_from_gpu(diagonal_subtensor(cuda.gpu_from_host(input),
*node.inputs[1:]))]
if node.op == cuda.gpu_from_host:
host_input = node.inputs[0]
if host_input.owner and isinstance(host_input.owner.op,
DiagonalSubtensor):
diag_node = host_input.owner
return [tensor.diagonal_subtensor(
cuda.gpu_from_host(diag_node.inputs[0]),
*diag_node.inputs[1:])]
return False
......@@ -8,6 +8,14 @@ from theano.sandbox import cuda
from conv3d2d import *
if theano.config.mode == 'FAST_COMPILE':
mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpu')
else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
def test_get_diagonal_subtensor_view():
x = numpy.arange(20).reshape(5, 4)
......@@ -106,15 +114,21 @@ def test_conv3d():
pyconv3d(signals, filters)
print time.time() - t0
s_signals = theano.shared(signals)
s_filters = theano.shared(filters)
s_output = theano.shared(signals*0)
modes = [(mode_without_gpu, theano.tensor._shared)]
if cuda.cuda_available:
modes.append((mode_with_gpu, cuda.shared_constructor))
newconv3d = theano.function([],[],
updates={s_output: conv3d(s_signals, s_filters,
signals_shape=signals.shape,
filters_shape=filters.shape)})
for mode, shared in modes:
s_signals = shared(signals)
s_filters = shared(filters)
s_output = shared(signals*0)
t0 = time.time()
newconv3d()
print time.time() - t0
newconv3d = theano.function([], [],
updates={s_output: conv3d(s_signals, s_filters,
signals_shape=signals.shape,
filters_shape=filters.shape)},
mode=mode)
t0 = time.time()
newconv3d()
print time.time() - t0
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论