提交 54d16f99 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Generalize alpha_merge and output_merge.

上级 08957330
...@@ -12,7 +12,7 @@ if cuda_available: ...@@ -12,7 +12,7 @@ if cuda_available:
opt, GpuFromHost, opt, GpuFromHost,
HostFromGpu, host_from_gpu, HostFromGpu, host_from_gpu,
GpuDimShuffle) GpuDimShuffle)
from theano.sandbox.cuda.opt_util import alpha_merge, output_merge
class SparseBlockGemvSS(GpuOp): class SparseBlockGemvSS(GpuOp):
""" """
...@@ -647,47 +647,17 @@ if cuda_available: ...@@ -647,47 +647,17 @@ if cuda_available:
# Should be run before elemwise fusion # Should be run before elemwise fusion
@opt.register_opt() @opt.register_opt()
@opt.local_optimizer([GpuElemwise]) @alpha_merge(SparseBlockOuterSS, alpha_in=5, nd=4)
def local_merge_blocksparse_alpha(node): def local_merge_blocksparse_alpha(node, *inputs):
""" """
GpuElemwise{mul}(lr, SparseBlockOuterSS) -> SparseBlockOuterSS(..., alpha=lr) GpuElemwise{mul}(lr, SparseBlockOuterSS) -> SparseBlockOuterSS(..., alpha=lr)
""" """
if (isinstance(node.op, GpuElemwise) and return [sparse_block_outer_ss(*inputs)]
node.op.scalar_op == scalar.mul and
node.nin == 2):
ger = opt.find_node(node.inputs[0], SparseBlockOuterSS)
if ger is None:
ger = opt.find_node(node.inputs[1], SparseBlockOuterSS)
lr = opt.grab_cpu_scalar(node.inputs[0], nd=4)
else:
lr = opt.grab_cpu_scalar(node.inputs[1], nd=4)
if lr is None or ger is None:
return None
alpha = lr * ger.inputs[5]
return [sparse_block_outer_ss(*(ger.inputs[:5] + [alpha]))]
@opt.register_opt() @opt.register_opt()
@opt.local_optimizer([GpuElemwise]) @output_merge(SparseBlockOuterSS, alpha_in=5, out_in=0, nd=4)
def local_merge_blocksparse_output(node): def local_merge_blocksparse_output(node, *inputs):
if (isinstance(node.op, GpuElemwise) and return [sparse_block_outer_ss(*inputs)]
(node.op.scalar_op == scalar.sub or
node.op.scalar_op == scalar.add) and
node.nin == 2):
ger = opt.find_node(node.inputs[0], SparseBlockOuterSS)
W = node.inputs[1]
if ger is None:
ger = opt.find_node(node.inputs[1], SparseBlockOuterSS)
W = node.inputs[0]
if ger is None:
return None
if node.op.scalar_op == scalar.sub:
alpha = -ger.inputs[5]
W = W - ger.inputs[0]
else:
alpha = ger.inputs[5]
W = W + ger.inputs[0]
return [sparse_block_outer_ss(*([W] + ger.inputs[1:5] +
[alpha]))]
def sparse_block_dot_SS(W, h, inputIdx, b, outputIdx): def sparse_block_dot_SS(W, h, inputIdx, b, outputIdx):
......
...@@ -10,7 +10,7 @@ import numpy ...@@ -10,7 +10,7 @@ import numpy
import theano import theano
from theano import scalar as scal from theano import scalar as scal
from theano import config, tensor, gof, Constant from theano import config, tensor, gof
import theano.ifelse import theano.ifelse
from theano.compile import optdb from theano.compile import optdb
...@@ -47,7 +47,7 @@ from theano.sandbox.cuda.var import CudaNdarrayConstant ...@@ -47,7 +47,7 @@ from theano.sandbox.cuda.var import CudaNdarrayConstant
from theano.sandbox.cuda import gpu_optimizer, register_opt, gpu_seqopt, GpuOp from theano.sandbox.cuda import gpu_optimizer, register_opt, gpu_seqopt, GpuOp
from theano.scan_module import scan_utils, scan_op, scan_opt from theano.scan_module import scan_utils, scan_op, scan_opt
from theano.tensor.blas import _is_real_vector, _is_real_matrix from theano.tensor.blas import _is_real_vector, _is_real_matrix
from theano.tensor import nlinalg, DimShuffle from theano.tensor import nlinalg
from theano.tensor.nnet.Conv3D import Conv3D from theano.tensor.nnet.Conv3D import Conv3D
try: try:
...@@ -89,37 +89,6 @@ register_opt(name='gpu_constant_folding')( ...@@ -89,37 +89,6 @@ register_opt(name='gpu_constant_folding')(
tensor.opt.constant_folding) tensor.opt.constant_folding)
def grab_cpu_scalar(v, nd):
if v.owner is not None:
n = v.owner
if (isinstance(n.op, GpuDimShuffle) and
n.op.new_order == ('x',) * nd):
return host_from_gpu(n.inputs[0])
elif (isinstance(n.op, DimShuffle) and
n.op.new_order == ('x',) * nd):
return n.inputs[0]
elif isinstance(n.op, GpuFromHost):
return grab_cpu_scalar(n.inputs[0], nd=nd)
else:
return None
else:
if (isinstance(v, Constant) and
v.broadcastable == (True,) * nd):
return v.dimshuffle(())
def find_node(v, cls):
# This digs through possibly redundant transfers to for the node
# that has the op class specified.
if v.owner is not None:
if isinstance(v.owner.op, cls):
return v.owner
elif (isinstance(v.owner.op, GpuFromHost) and
v.owner.inputs[0].owner is not None and
isinstance(v.owner.inputs[0].owner.op, HostFromGpu)):
return find_node(v.owner.inputs[0].owner.inputs[0], cls)
else:
return None
# This is a partial list of CPU ops that can be in some circonstance # This is a partial list of CPU ops that can be in some circonstance
# moved to the GPU. This list is used by an optimization. # moved to the GPU. This list is used by an optimization.
# Hopefully, we can keep this list up to date. # Hopefully, we can keep this list up to date.
......
from functools import wraps
import numpy
import theano
from theano import scalar as scal, Constant
from theano.gof import local_optimizer
from theano.tensor import DimShuffle
from theano.sandbox.cuda.basic_ops import (
GpuFromHost, HostFromGpu, GpuDimShuffle, GpuElemwise)
def grab_cpu_scalar(v, nd):
if v.owner is not None:
n = v.owner
if (isinstance(n.op, GpuDimShuffle) and
n.op.new_order == ('x',) * nd):
return host_from_gpu(n.inputs[0])
elif (isinstance(n.op, DimShuffle) and
n.op.new_order == ('x',) * nd):
return n.inputs[0]
elif isinstance(n.op, GpuFromHost):
return grab_cpu_scalar(n.inputs[0], nd=nd)
else:
return None
else:
if (isinstance(v, Constant) and
v.broadcastable == (True,) * nd):
return v.dimshuffle(())
def find_node(v, cls):
# This digs through possibly redundant transfers to for the node
# that has the op class specified.
if v.owner is not None:
if isinstance(v.owner.op, cls):
return v.owner
elif (isinstance(v.owner.op, GpuFromHost) and
v.owner.inputs[0].owner is not None and
isinstance(v.owner.inputs[0].owner.op, HostFromGpu)):
return find_node(v.owner.inputs[0].owner.inputs[0], cls)
else:
return None
def alpha_merge(cls, alpha_in, nd):
def wrapper(maker):
@local_optimizer([GpuElemwise])
@wraps(maker)
def opt(node):
if (isinstance(node.op, GpuElemwise) and
node.op.scalar_op == scal.mul and
node.nin == 2):
targ = find_node(node.inputs[0], cls)
if targ is None:
targ = find_node(node.inputs[1], cls)
lr = grab_cpu_scalar(node.inputs[0], nd=nd)
else:
lr = grab_cpu_scalar(node.inputs[1], nd=nd)
if lr is None or targ is None:
return None
inputs = list(targ.inputs)
inputs[alpha_in] = lr * targ.inputs[alpha_in]
return maker(targ, *inputs)
return opt
return wrapper
def output_merge(cls, alpha_in, out_in, nd):
def wrapper(maker):
@local_optimizer([GpuElemwise])
@wraps(maker)
def opt(node):
if (isinstance(node.op, GpuElemwise) and
(node.op.scalar_op == scal.sub or
node.op.scalar_op == scal.add) and
node.nin == 2):
targ = find_node(node.inputs[0], cls)
W = node.inputs[1]
if targ is None:
targ = find_node(node.inputs[1], cls)
W = node.inputs[0]
if targ is None:
return None
if node.op.scalar_op == scal.sub:
alpha = -targ.inputs[alpha_in]
W = W - targ.inputs[out_in]
else:
alpha = targ.inputs[alpha_in]
W = W + targ.inputs[out_in]
inputs = list(targ.inputs)
inputs[out_in] = W
inputs[alpha_in] = alpha
return maker(targ, *inputs)
return opt
return wrapper
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论