提交 d65475af authored 作者: James Bergstra's avatar James Bergstra

Merge pull request #3 from nouiz/gpu_inc_diagonal_subtensor

Gpu inc diagonal subtensor and optimization refactoring.
...@@ -21,7 +21,12 @@ class DiagonalSubtensor(Op): ...@@ -21,7 +21,12 @@ class DiagonalSubtensor(Op):
""" """
Work on the GPU. Work on the GPU.
""" """
def __init__(self, inplace): def __str__(self):
if self.inplace:
return "%s{inplace}" % self.__class__.__name__
return "%s" % self.__class__.__name__
def __init__(self, inplace=False):
self.inplace = inplace self.inplace = inplace
if inplace: if inplace:
self.view_map = {0: [0]} self.view_map = {0: [0]}
...@@ -57,7 +62,12 @@ diagonal_subtensor = DiagonalSubtensor(False) ...@@ -57,7 +62,12 @@ diagonal_subtensor = DiagonalSubtensor(False)
class IncDiagonalSubtensor(Op): class IncDiagonalSubtensor(Op):
def __init__(self, inplace): def __str__(self):
if self.inplace:
return "%s{inplace}" % self.__class__.__name__
return "%s" % self.__class__.__name__
def __init__(self, inplace=False):
self.inplace = inplace self.inplace = inplace
if inplace: if inplace:
self.destroy_map = {0: [0]} self.destroy_map = {0: [0]}
...@@ -176,24 +186,49 @@ def conv3d(signals, filters, ...@@ -176,24 +186,49 @@ def conv3d(signals, filters,
return out_5d return out_5d
@cuda.opt.register_opt() def make_gpu_optimizer(op, to_gpu):
@theano.gof.local_optimizer([]) """This function create optimizer that move some inputs to the GPU
def local_gpu_diagonal_subtensor(node): for op that work on both CPU and GPU.
"""
diagonal_subtensor(host_from_gpu()) -> host_from_gpu(diagonal_subtensor) The op object is created by calling op(), so good default value
gpu_from_host(diagonal_subtensor) -> diagonal_subtensor(gpu_from_host) are needed.
We suppose the same op work with CPU and GPU inputs.
:param op: the op that support GPU inputs
:param to_gpu: a list of op inputs that are moved to the GPU.
""" """
if isinstance(node.op, DiagonalSubtensor): @theano.gof.local_optimizer([])
input = node.inputs[0] def local_to_gpu(node):
if input.owner and isinstance(input.owner.op, cuda.HostFromGpu): """
return [cuda.host_from_gpu(diagonal_subtensor(cuda.gpu_from_host(input), op(host_from_gpu()) -> host_from_gpu(op)
*node.inputs[1:]))] gpu_from_host(op) -> op(gpu_from_host)
if node.op == cuda.gpu_from_host: """
host_input = node.inputs[0] if isinstance(node.op, op):
if host_input.owner and isinstance(host_input.owner.op, #op(host_from_gpu()) -> host_from_gpu(op)
DiagonalSubtensor): #If any of the input that go on the GPU are on the GPU,
diag_node = host_input.owner #move the op to the gpu.
return [tensor.diagonal_subtensor( if any(node.inputs[idx].owner and
cuda.gpu_from_host(diag_node.inputs[0]), isinstance(node.inputs[idx].owner.op, cuda.HostFromGpu)
*diag_node.inputs[1:])] for idx in to_gpu):
return False new_inp = list(node.inputs)
for idx in to_gpu:
new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
return [cuda.host_from_gpu(op()(*new_inp))]
if node.op == cuda.gpu_from_host:
#gpu_from_host(op) -> op(gpu_from_host)
host_input = node.inputs[0]
if host_input.owner and isinstance(host_input.owner.op,
op):
op_node = host_input.owner
new_inp = list(op_node.inputs)
for idx in to_gpu:
new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
return [op()(*new_inp)]
return False
local_to_gpu.__name__ = "local_to_gpu_" + op.__name__
cuda.opt.register_opt()(local_to_gpu)
make_gpu_optimizer(DiagonalSubtensor, [0])
make_gpu_optimizer(IncDiagonalSubtensor, [0, 3])
...@@ -123,12 +123,24 @@ def test_conv3d(): ...@@ -123,12 +123,24 @@ def test_conv3d():
s_filters = shared(filters) s_filters = shared(filters)
s_output = shared(signals*0) s_output = shared(signals*0)
out = conv3d(s_signals, s_filters,
signals_shape=signals.shape,
filters_shape=filters.shape)
newconv3d = theano.function([], [], newconv3d = theano.function([], [],
updates={s_output: conv3d(s_signals, s_filters, updates={s_output: out},
signals_shape=signals.shape,
filters_shape=filters.shape)},
mode=mode) mode=mode)
t0 = time.time() t0 = time.time()
newconv3d() newconv3d()
print time.time() - t0 print time.time() - t0
gsignals, gfilters = theano.grad(out.sum(), [s_signals, s_filters])
gnewconv3d = theano.function([], [],
updates=[(s_filters, gfilters),
(s_signals, gsignals)],
mode=mode,
name='grad')
t0 = time.time()
gnewconv3d()
print 'grad', time.time() - t0
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论