提交 d65475af authored 作者: James Bergstra's avatar James Bergstra

Merge pull request #3 from nouiz/gpu_inc_diagonal_subtensor

Gpu inc diagonal subtensor and optimization refactoring.
...@@ -21,7 +21,12 @@ class DiagonalSubtensor(Op): ...@@ -21,7 +21,12 @@ class DiagonalSubtensor(Op):
""" """
Work on the GPU. Work on the GPU.
""" """
def __init__(self, inplace): def __str__(self):
if self.inplace:
return "%s{inplace}" % self.__class__.__name__
return "%s" % self.__class__.__name__
def __init__(self, inplace=False):
self.inplace = inplace self.inplace = inplace
if inplace: if inplace:
self.view_map = {0: [0]} self.view_map = {0: [0]}
...@@ -57,7 +62,12 @@ diagonal_subtensor = DiagonalSubtensor(False) ...@@ -57,7 +62,12 @@ diagonal_subtensor = DiagonalSubtensor(False)
class IncDiagonalSubtensor(Op): class IncDiagonalSubtensor(Op):
def __init__(self, inplace): def __str__(self):
if self.inplace:
return "%s{inplace}" % self.__class__.__name__
return "%s" % self.__class__.__name__
def __init__(self, inplace=False):
self.inplace = inplace self.inplace = inplace
if inplace: if inplace:
self.destroy_map = {0: [0]} self.destroy_map = {0: [0]}
...@@ -176,24 +186,49 @@ def conv3d(signals, filters, ...@@ -176,24 +186,49 @@ def conv3d(signals, filters,
return out_5d return out_5d
@cuda.opt.register_opt() def make_gpu_optimizer(op, to_gpu):
@theano.gof.local_optimizer([]) """This function create optimizer that move some inputs to the GPU
def local_gpu_diagonal_subtensor(node): for op that work on both CPU and GPU.
The op object is created by calling op(), so good default value
are needed.
We suppose the same op work with CPU and GPU inputs.
:param op: the op that support GPU inputs
:param to_gpu: a list of op inputs that are moved to the GPU.
""" """
diagonal_subtensor(host_from_gpu()) -> host_from_gpu(diagonal_subtensor) @theano.gof.local_optimizer([])
gpu_from_host(diagonal_subtensor) -> diagonal_subtensor(gpu_from_host) def local_to_gpu(node):
""" """
if isinstance(node.op, DiagonalSubtensor): op(host_from_gpu()) -> host_from_gpu(op)
input = node.inputs[0] gpu_from_host(op) -> op(gpu_from_host)
if input.owner and isinstance(input.owner.op, cuda.HostFromGpu): """
return [cuda.host_from_gpu(diagonal_subtensor(cuda.gpu_from_host(input), if isinstance(node.op, op):
*node.inputs[1:]))] #op(host_from_gpu()) -> host_from_gpu(op)
#If any of the input that go on the GPU are on the GPU,
#move the op to the gpu.
if any(node.inputs[idx].owner and
isinstance(node.inputs[idx].owner.op, cuda.HostFromGpu)
for idx in to_gpu):
new_inp = list(node.inputs)
for idx in to_gpu:
new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
return [cuda.host_from_gpu(op()(*new_inp))]
if node.op == cuda.gpu_from_host: if node.op == cuda.gpu_from_host:
#gpu_from_host(op) -> op(gpu_from_host)
host_input = node.inputs[0] host_input = node.inputs[0]
if host_input.owner and isinstance(host_input.owner.op, if host_input.owner and isinstance(host_input.owner.op,
DiagonalSubtensor): op):
diag_node = host_input.owner op_node = host_input.owner
return [tensor.diagonal_subtensor( new_inp = list(op_node.inputs)
cuda.gpu_from_host(diag_node.inputs[0]), for idx in to_gpu:
*diag_node.inputs[1:])] new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
return [op()(*new_inp)]
return False return False
local_to_gpu.__name__ = "local_to_gpu_" + op.__name__
cuda.opt.register_opt()(local_to_gpu)
make_gpu_optimizer(DiagonalSubtensor, [0])
make_gpu_optimizer(IncDiagonalSubtensor, [0, 3])
...@@ -123,12 +123,24 @@ def test_conv3d(): ...@@ -123,12 +123,24 @@ def test_conv3d():
s_filters = shared(filters) s_filters = shared(filters)
s_output = shared(signals*0) s_output = shared(signals*0)
newconv3d = theano.function([], [], out = conv3d(s_signals, s_filters,
updates={s_output: conv3d(s_signals, s_filters,
signals_shape=signals.shape, signals_shape=signals.shape,
filters_shape=filters.shape)}, filters_shape=filters.shape)
newconv3d = theano.function([], [],
updates={s_output: out},
mode=mode) mode=mode)
t0 = time.time() t0 = time.time()
newconv3d() newconv3d()
print time.time() - t0 print time.time() - t0
gsignals, gfilters = theano.grad(out.sum(), [s_signals, s_filters])
gnewconv3d = theano.function([], [],
updates=[(s_filters, gfilters),
(s_signals, gsignals)],
mode=mode,
name='grad')
t0 = time.time()
gnewconv3d()
print 'grad', time.time() - t0
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论