提交 cf85d105 authored 作者: James Bergstra's avatar James Bergstra

Merge pull request #2 from nouiz/gpu_diagonal_subtensor

Gpu diagonal subtensor
import theano
from theano.gradient import DisconnectedType from theano.gradient import DisconnectedType
from theano.gof import Op, Apply from theano.gof import Op, Apply
from theano import tensor from theano import tensor
import theano.sandbox.cuda as cuda
def get_diagonal_subtensor_view(x, i0, i1): def get_diagonal_subtensor_view(x, i0, i1):
...@@ -16,6 +18,9 @@ def get_diagonal_subtensor_view(x, i0, i1): ...@@ -16,6 +18,9 @@ def get_diagonal_subtensor_view(x, i0, i1):
class DiagonalSubtensor(Op): class DiagonalSubtensor(Op):
"""
Work on the GPU.
"""
def __init__(self, inplace): def __init__(self, inplace):
self.inplace = inplace self.inplace = inplace
if inplace: if inplace:
...@@ -169,3 +174,26 @@ def conv3d(signals, filters, ...@@ -169,3 +174,26 @@ def conv3d(signals, filters,
else: else:
raise ValueError('invalid border mode', border_mode[1]) raise ValueError('invalid border mode', border_mode[1])
return out_5d return out_5d
@cuda.opt.register_opt()
@theano.gof.local_optimizer([])
def local_gpu_diagonal_subtensor(node):
"""
diagonal_subtensor(host_from_gpu()) -> host_from_gpu(diagonal_subtensor)
gpu_from_host(diagonal_subtensor) -> diagonal_subtensor(gpu_from_host)
"""
if isinstance(node.op, DiagonalSubtensor):
input = node.inputs[0]
if input.owner and isinstance(input.owner.op, cuda.HostFromGpu):
return [cuda.host_from_gpu(diagonal_subtensor(cuda.gpu_from_host(input),
*node.inputs[1:]))]
if node.op == cuda.gpu_from_host:
host_input = node.inputs[0]
if host_input.owner and isinstance(host_input.owner.op,
DiagonalSubtensor):
diag_node = host_input.owner
return [tensor.diagonal_subtensor(
cuda.gpu_from_host(diag_node.inputs[0]),
*diag_node.inputs[1:])]
return False
import time import time
from conv3d2d import *
import numpy
import numpy
from scipy import ndimage from scipy import ndimage
import theano import theano
from theano.sandbox import cuda
from conv3d2d import *
if theano.config.mode == 'FAST_COMPILE':
mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpu')
else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
def test_get_diagonal_subtensor_view(): def test_get_diagonal_subtensor_view():
x = numpy.arange(20).reshape(5,4) x = numpy.arange(20).reshape(5, 4)
xv01 = get_diagonal_subtensor_view(x, 0, 1) xv01 = get_diagonal_subtensor_view(x, 0, 1)
# test that it works in 2d # test that it works in 2d
assert numpy.all(xv01 == [[12, 9, 6, 3], [16, 13, 10, 7]]) assert numpy.all(xv01 == [[12, 9, 6, 3], [16, 13, 10, 7]])
x = numpy.arange(24).reshape(4,3,2) x = numpy.arange(24).reshape(4, 3, 2)
xv01 = get_diagonal_subtensor_view(x, 0, 1) xv01 = get_diagonal_subtensor_view(x, 0, 1)
xv02 = get_diagonal_subtensor_view(x, 0, 2) xv02 = get_diagonal_subtensor_view(x, 0, 2)
xv12 = get_diagonal_subtensor_view(x, 1, 2) xv12 = get_diagonal_subtensor_view(x, 1, 2)
...@@ -22,20 +32,55 @@ def test_get_diagonal_subtensor_view(): ...@@ -22,20 +32,55 @@ def test_get_diagonal_subtensor_view():
#print 'x', x #print 'x', x
#print 'xv01', xv01 #print 'xv01', xv01
#print 'xv02', xv02 #print 'xv02', xv02
assert numpy.all(xv01 ==[ assert numpy.all(xv01 == [
[[12, 13], [8, 9], [4, 5]], [[12, 13], [8, 9], [4, 5]],
[[18, 19], [14, 15], [10, 11]]]) [[18, 19], [14, 15], [10, 11]]])
assert numpy.all(xv02 == [ assert numpy.all(xv02 == [
[[6, 1], [8,3], [10, 5]], [[6, 1], [8, 3], [10, 5]],
[[12, 7], [14, 9], [16, 11]],
[[18, 13], [20, 15], [22, 17]],
])
# diagonal views of each leading matrix is the same
# as the slices out of the diagonal view of the entire 3d tensor
for xi, xvi in zip(x, xv12):
assert numpy.all(xvi == get_diagonal_subtensor_view(xi, 0, 1))
def test_get_diagonal_subtensor_view_gpu():
x = numpy.arange(20, dtype='float32').reshape(5, 4)
x = cuda.CudaNdarray(x)
xv01 = get_diagonal_subtensor_view(x, 0, 1)
# test that it works in 2d
assert numpy.all(numpy.asarray(xv01) ==
[[12, 9, 6, 3], [16, 13, 10, 7]])
x = numpy.arange(24).reshape(4, 3, 2)
xv01 = get_diagonal_subtensor_view(x, 0, 1)
xv02 = get_diagonal_subtensor_view(x, 0, 2)
xv12 = get_diagonal_subtensor_view(x, 1, 2)
#print 'x', x
#print 'xv01', xv01
#print 'xv02', xv02
assert numpy.all(numpy.asarray(xv01) == [
[[12, 13], [8, 9], [4, 5]],
[[18, 19], [14, 15], [10, 11]]])
assert numpy.all(numpy.asarray(xv02) == [
[[6, 1], [8, 3], [10, 5]],
[[12, 7], [14, 9], [16, 11]], [[12, 7], [14, 9], [16, 11]],
[[18, 13], [20, 15], [22, 17]], [[18, 13], [20, 15], [22, 17]],
]) ])
# diagonal views of each leading matrix is the same # diagonal views of each leading matrix is the same
# as the slices out of the diagonal view of the entire 3d tensor # as the slices out of the diagonal view of the entire 3d tensor
for xi,xvi in zip(x, xv12): for xi, xvi in zip(x, numpy.asarray(xv12)):
assert numpy.all( xvi == get_diagonal_subtensor_view(xi, 0, 1)) assert numpy.all(numpy.asarray(xvi) ==
numpy.asarray(get_diagonal_subtensor_view(xi, 0, 1)))
def pyconv3d(signals, filters): def pyconv3d(signals, filters):
Ns, Ts, C, Hs, Ws = signals.shape Ns, Ts, C, Hs, Ws = signals.shape
...@@ -56,6 +101,7 @@ def pyconv3d(signals, filters): ...@@ -56,6 +101,7 @@ def pyconv3d(signals, filters):
#print s_i.shape, f_i.shape, r_i.shape, o_i.shape #print s_i.shape, f_i.shape, r_i.shape, o_i.shape
r_i += o_i[Tf2:-Tf2, Hf2:-Hf2, Wf2:-Wf2] r_i += o_i[Tf2:-Tf2, Hf2:-Hf2, Wf2:-Wf2]
def test_conv3d(): def test_conv3d():
Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32 Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32
...@@ -68,15 +114,21 @@ def test_conv3d(): ...@@ -68,15 +114,21 @@ def test_conv3d():
pyconv3d(signals, filters) pyconv3d(signals, filters)
print time.time() - t0 print time.time() - t0
s_signals = theano.shared(signals) modes = [(mode_without_gpu, theano.tensor._shared)]
s_filters = theano.shared(filters) if cuda.cuda_available:
s_output = theano.shared(signals*0) modes.append((mode_with_gpu, cuda.shared_constructor))
newconv3d = theano.function([],[], for mode, shared in modes:
updates={s_output: conv3d(s_signals, s_filters, s_signals = shared(signals)
signals_shape=signals.shape, s_filters = shared(filters)
filters_shape=filters.shape)}) s_output = shared(signals*0)
t0 = time.time() newconv3d = theano.function([], [],
newconv3d() updates={s_output: conv3d(s_signals, s_filters,
print time.time() - t0 signals_shape=signals.shape,
filters_shape=filters.shape)},
mode=mode)
t0 = time.time()
newconv3d()
print time.time() - t0
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论