提交 96f7cdf1 authored 作者: Frederic Bastien's avatar Frederic Bastien

Add missing GraphToGPULocalOptDB and use it.

上级 40d372f4
...@@ -66,7 +66,7 @@ from theano.gof.opt import ( ...@@ -66,7 +66,7 @@ from theano.gof.opt import (
OpKeyOptimizer) OpKeyOptimizer)
from theano.gof.optdb import \ from theano.gof.optdb import \
DB, Query, \ DB, LocalGroupDB, Query, \
EquilibriumDB, SequenceDB, ProxyDB EquilibriumDB, SequenceDB, ProxyDB
from theano.gof.toolbox import \ from theano.gof.toolbox import \
......
...@@ -1384,6 +1384,48 @@ class LocalOptGroup(LocalOptimizer): ...@@ -1384,6 +1384,48 @@ class LocalOptGroup(LocalOptimizer):
opt.add_requirements(fgraph) opt.add_requirements(fgraph)
class GraphToGPULocalOptGroup(LocalOptGroup):
"""
This is the equivalent of LocalOptGroup for GraphToGPU
"""
def __init__(self, *optimizers, **kwargs):
super(GraphToGPULocalOptGroup, self).__init__(*optimizers, **kwargs)
assert self.apply_all_opts is False
def transform(self, op, context_name, inputs, outputs):
if len(self.opts) == 0:
return
fgraph = outputs[0].fgraph
repl = None
while True:
opts = self.track_map[type(op)] + self.track_map[op] + self.track_map[None]
new_repl = None
for opt in opts:
opt_start = time.time()
new_repl = opt.transform(op, context_name, inputs, outputs)
opt_finish = time.time()
if self.profile:
self.time_opts[opt] += opt_start - opt_finish
self.process_count[opt] += 1
if not new_repl:
continue
else:
if self.profile:
self.node_created[opt] += len(graph.ops(fgraph.variables, new_repl))
self.applied_true[opt] += 1
break # break from the for loop over optimization.
if not new_repl: # No optimization applied in the last iteration
return repl
# only 1 iteration or we are at the start of the graph.
if not self.apply_all_opts or not new_repl[0].owner:
return new_repl
if len(new_repl) > 1:
s = set([v.owner for v in new_repl])
assert len(s) == 1
repl = new_repl
node = repl[0].owner
class OpSub(LocalOptimizer): class OpSub(LocalOptimizer):
""" """
......
...@@ -405,12 +405,14 @@ class LocalGroupDB(DB): ...@@ -405,12 +405,14 @@ class LocalGroupDB(DB):
""" """
def __init__(self, apply_all_opts=False, profile=False): def __init__(self, apply_all_opts=False, profile=False,
local_opt=opt.LocalOptGroup):
super(LocalGroupDB, self).__init__() super(LocalGroupDB, self).__init__()
self.failure_callback = None self.failure_callback = None
self.apply_all_opts = apply_all_opts self.apply_all_opts = apply_all_opts
self.profile = profile self.profile = profile
self.__position__ = {} self.__position__ = {}
self.local_opt = local_opt
def register(self, name, obj, *tags, **kwargs): def register(self, name, obj, *tags, **kwargs):
super(LocalGroupDB, self).register(name, obj, *tags) super(LocalGroupDB, self).register(name, obj, *tags)
...@@ -429,9 +431,9 @@ class LocalGroupDB(DB): ...@@ -429,9 +431,9 @@ class LocalGroupDB(DB):
opts = list(super(LocalGroupDB, self).query(*tags, **kwtags)) opts = list(super(LocalGroupDB, self).query(*tags, **kwtags))
opts.sort(key=lambda obj: (self.__position__[obj.name], obj.name)) opts.sort(key=lambda obj: (self.__position__[obj.name], obj.name))
ret = opt.LocalOptGroup(*opts, ret = self.local_opt(*opts,
apply_all_opts=self.apply_all_opts, apply_all_opts=self.apply_all_opts,
profile=self.profile) profile=self.profile)
return ret return ret
......
...@@ -38,7 +38,7 @@ from .elemwise import GpuElemwise ...@@ -38,7 +38,7 @@ from .elemwise import GpuElemwise
# These don't exist in gpuarray # These don't exist in gpuarray
# GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad # GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
from .nnet import GpuSoftmax from .nnet import GpuSoftmax
from .opt import (gpu_seqopt, register_opt, from .opt import (gpu_seqopt, register_opt, pool_db, pool_db2,
op_lifter, register_opt2) op_lifter, register_opt2)
from .opt_util import alpha_merge, output_merge, inplace_allocempty, pad_dims, unpad_dims from .opt_util import alpha_merge, output_merge, inplace_allocempty, pad_dims, unpad_dims
...@@ -2736,9 +2736,6 @@ def local_dnn_convi_output_merge(node, *inputs): ...@@ -2736,9 +2736,6 @@ def local_dnn_convi_output_merge(node, *inputs):
return [gpu_dnn_conv_gradI(algo=node.op.algo)(*inputs)] return [gpu_dnn_conv_gradI(algo=node.op.algo)(*inputs)]
@register_opt('cudnn', 'fast_compile')
@op_lifter([Pool])
@register_opt2([Pool], 'fast_compile', 'cudnn')
def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs): def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs):
if not dnn_available(ctx_name): if not dnn_available(ctx_name):
raise_no_cudnn() raise_no_cudnn()
...@@ -2758,11 +2755,16 @@ def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs): ...@@ -2758,11 +2755,16 @@ def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs):
img_padded = pad_dims(img, 2, nd) img_padded = pad_dims(img, 2, nd)
ret_padded = dnn_pool(img_padded, ws, stride=stride, pad=pad, mode=mode) ret_padded = dnn_pool(img_padded, ws, stride=stride, pad=pad, mode=mode)
return unpad_dims(ret_padded, img, 2, nd) return unpad_dims(ret_padded, img, 2, nd)
pool_db.register("local_gpua_pool_dnn_alternative",
op_lifter([Pool])(local_gpua_pool_dnn_alternative),
'gpuarray', 'fast_compile', 'fast_run', 'cudnn',
position=0)
pool_db2.register("local_gpua_pool_dnn_alternative",
local_optimizer([Pool])(local_gpua_pool_dnn_alternative),
'gpuarray', 'fast_compile', 'fast_run', 'cudnn',
position=0)
@register_opt('cudnn', 'fast_compile')
@op_lifter([MaxPoolGrad])
@register_opt2([MaxPoolGrad], 'fast_compile', 'cudnn')
def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs): def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
if not dnn_available(ctx_name): if not dnn_available(ctx_name):
raise_no_cudnn() raise_no_cudnn()
...@@ -2797,11 +2799,16 @@ def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs): ...@@ -2797,11 +2799,16 @@ def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
stride, stride,
pad) pad)
return unpad_dims(ret_padded, inp, 2, nd) return unpad_dims(ret_padded, inp, 2, nd)
pool_db.register("local_gpua_pool_dnn_grad_stride",
op_lifter([MaxPoolGrad])(local_gpua_pool_dnn_grad_stride),
'gpuarray', 'fast_compile', 'fast_run', 'cudnn',
position=0)
pool_db2.register("local_gpua_pool_dnn_grad_stride",
local_optimizer([MaxPoolGrad])(local_gpua_pool_dnn_grad_stride),
'gpuarray', 'fast_compile', 'fast_run', 'cudnn',
position=0)
@register_opt('cudnn', 'fast_compile')
@op_lifter([AveragePoolGrad])
@register_opt2([AveragePoolGrad], 'fast_compile', 'cudnn')
def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs): def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
if not dnn_available(ctx_name): if not dnn_available(ctx_name):
raise_no_cudnn() raise_no_cudnn()
...@@ -2832,6 +2839,14 @@ def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs): ...@@ -2832,6 +2839,14 @@ def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
stride, stride,
pad) pad)
return unpad_dims(ret_padded, inp, 2, nd) return unpad_dims(ret_padded, inp, 2, nd)
pool_db.register("local_gpua_avg_pool_dnn_grad_stride",
op_lifter([AveragePoolGrad])(local_gpua_avg_pool_dnn_grad_stride),
'gpuarray', 'fast_compile', 'fast_run', 'cudnn',
position=0)
pool_db2.register("local_gpua_avg_pool_dnn_grad_stride",
local_optimizer([AveragePoolGrad])(local_gpua_avg_pool_dnn_grad_stride),
'gpuarray', 'fast_compile', 'fast_run', 'cudnn',
position=0)
@register_opt('cudnn', 'fast_compile') @register_opt('cudnn', 'fast_compile')
......
...@@ -13,6 +13,7 @@ from theano import tensor, scalar, gof, config ...@@ -13,6 +13,7 @@ from theano import tensor, scalar, gof, config
from theano.compile import optdb from theano.compile import optdb
from theano.compile.ops import shape_i from theano.compile.ops import shape_i
from theano.gof import (local_optimizer, EquilibriumDB, TopoOptimizer, from theano.gof import (local_optimizer, EquilibriumDB, TopoOptimizer,
LocalGroupDB,
SequenceDB, Optimizer, DB, toolbox, graph) SequenceDB, Optimizer, DB, toolbox, graph)
from theano.ifelse import IfElse from theano.ifelse import IfElse
from theano.misc.ordered_set import OrderedSet from theano.misc.ordered_set import OrderedSet
...@@ -129,7 +130,10 @@ def register_opt2(tracks, *tags, **kwargs): ...@@ -129,7 +130,10 @@ def register_opt2(tracks, *tags, **kwargs):
''' '''
def f(local_opt): def f(local_opt):
name = (kwargs and kwargs.pop('name')) or local_opt.__name__ name = (kwargs and kwargs.pop('name')) or local_opt.__name__
opt = theano.gof.local_optimizer(tracks)(local_opt) if isinstance(local_opt, theano.gof.DB):
opt = local_opt
else:
opt = theano.gof.local_optimizer(tracks)(local_opt)
gpu_optimizer2.register(name, opt, 'fast_run', 'gpuarray', *tags) gpu_optimizer2.register(name, opt, 'fast_run', 'gpuarray', *tags)
return local_opt return local_opt
return f return f
...@@ -1592,15 +1596,8 @@ def local_gpua_lift_abstractconv_graph(op, context_name, inputs, outputs): ...@@ -1592,15 +1596,8 @@ def local_gpua_lift_abstractconv_graph(op, context_name, inputs, outputs):
return [op(*inps)] return [op(*inps)]
@register_opt()
@op_lifter([pool.Pool])
@register_opt2([pool.Pool])
def local_gpu_pool(op, ctx_name, inputs, outputs): def local_gpu_pool(op, ctx_name, inputs, outputs):
from .dnn import dnn_available
assert op.__props__ == ('ignore_border', 'mode', 'ndim') assert op.__props__ == ('ignore_border', 'mode', 'ndim')
if op.ignore_border and dnn_available(ctx_name):
return
inp, ws, stride, pad = inputs inp, ws, stride, pad = inputs
nd = op.ndim nd = op.ndim
if nd not in (2, 3): if nd not in (2, 3):
...@@ -1615,16 +1612,23 @@ def local_gpu_pool(op, ctx_name, inputs, outputs): ...@@ -1615,16 +1612,23 @@ def local_gpu_pool(op, ctx_name, inputs, outputs):
inp_padded = pad_dims(inp, 2, nd) inp_padded = pad_dims(inp, 2, nd)
ret_padded = op(inp_padded, ws, stride, pad) ret_padded = op(inp_padded, ws, stride, pad)
return unpad_dims(ret_padded, inp, 2, nd) return unpad_dims(ret_padded, inp, 2, nd)
pool_db = LocalGroupDB()
pool_db2 = LocalGroupDB(local_opt=theano.gof.opt.GraphToGPULocalOptGroup)
pool_db2.__name__ = "pool_db2"
lifter = op_lifter([pool.Pool])(local_gpu_pool)
pool_db.register("local_gpu_pool", lifter,
'gpuarray', 'fast_compile', 'fast_run',
position=1)
pool_db2.register("local_gpu_pool",
local_optimizer([pool.Pool])(local_gpu_pool),
'gpuarray', 'fast_compile', 'fast_run',
position=1)
register_opt('fast_compile', name='pool_db')(pool_db)
register_opt2([pool.Pool], 'fast_compile', name='pool_db2')(pool_db2)
@register_opt()
@op_lifter([pool.MaxPoolGrad])
@register_opt2([pool.MaxPoolGrad])
def local_gpu_max_pool_grad(op, ctx_name, inputs, outputs): def local_gpu_max_pool_grad(op, ctx_name, inputs, outputs):
from .dnn import dnn_available
assert op.__props__ == ('ignore_border', 'mode', 'ndim') assert op.__props__ == ('ignore_border', 'mode', 'ndim')
if op.ignore_border and dnn_available(ctx_name):
return
inp, out, out_grad, ws, stride, pad = inputs inp, out, out_grad, ws, stride, pad = inputs
nd = op.ndim nd = op.ndim
...@@ -1645,16 +1649,18 @@ def local_gpu_max_pool_grad(op, ctx_name, inputs, outputs): ...@@ -1645,16 +1649,18 @@ def local_gpu_max_pool_grad(op, ctx_name, inputs, outputs):
ret_padded = op(inp_padded, out_padded, out_grad_padded, ret_padded = op(inp_padded, out_padded, out_grad_padded,
ws, stride, pad) ws, stride, pad)
return unpad_dims(ret_padded, inp, 2, nd) return unpad_dims(ret_padded, inp, 2, nd)
lifter = op_lifter([pool.MaxPoolGrad])(local_gpu_max_pool_grad)
pool_db.register("local_gpu_max_pool_grad", lifter,
'gpuarray', 'fast_compile', 'fast_run',
position=1)
pool_db2.register("local_gpu_max_pool_grad",
local_optimizer([pool.MaxPoolGrad])(local_gpu_max_pool_grad),
'gpuarray', 'fast_compile', 'fast_run',
position=1)
@register_opt()
@op_lifter([pool.AveragePoolGrad])
@register_opt2([pool.AveragePoolGrad])
def local_gpu_average_pool_grad(op, ctx_name, inputs, outputs): def local_gpu_average_pool_grad(op, ctx_name, inputs, outputs):
from .dnn import dnn_available
assert op.__props__ == ('ignore_border', 'mode', 'ndim') assert op.__props__ == ('ignore_border', 'mode', 'ndim')
if op.ignore_border and dnn_available(ctx_name):
return
inp, out_grad, ws, stride, pad = inputs inp, out_grad, ws, stride, pad = inputs
nd = op.ndim nd = op.ndim
...@@ -1673,6 +1679,14 @@ def local_gpu_average_pool_grad(op, ctx_name, inputs, outputs): ...@@ -1673,6 +1679,14 @@ def local_gpu_average_pool_grad(op, ctx_name, inputs, outputs):
ret_padded = op(inp_padded, out_grad_padded, ret_padded = op(inp_padded, out_grad_padded,
ws, stride, pad) ws, stride, pad)
return unpad_dims(ret_padded, inp, 2, nd) return unpad_dims(ret_padded, inp, 2, nd)
lifter = op_lifter([pool.AveragePoolGrad])(local_gpu_average_pool_grad)
pool_db.register("local_gpu_average_pool_grad", lifter,
'gpuarray', 'fast_compile', 'fast_run',
position=1)
pool_db2.register("local_gpu_average_pool_grad",
local_optimizer([pool.AveragePoolGrad])(local_gpu_average_pool_grad),
'gpuarray', 'fast_compile', 'fast_run',
position=1)
@register_opt() @register_opt()
......
...@@ -54,7 +54,7 @@ def test_pool2d(): ...@@ -54,7 +54,7 @@ def test_pool2d():
ref_mode = copy.copy(mode_without_gpu) ref_mode = copy.copy(mode_without_gpu)
ref_mode.check_py_code = False ref_mode.check_py_code = False
gpu_mode = copy.copy(mode_with_gpu) gpu_mode = copy.copy(mode_with_gpu).excluding("cudnn")
gpu_mode.check_py_code = False gpu_mode.check_py_code = False
for shp in shps: for shp in shps:
...@@ -147,7 +147,7 @@ def test_pool3d(): ...@@ -147,7 +147,7 @@ def test_pool3d():
ref_mode = copy.copy(mode_without_gpu) ref_mode = copy.copy(mode_without_gpu)
ref_mode.check_py_code = False ref_mode.check_py_code = False
gpu_mode = copy.copy(mode_with_gpu) gpu_mode = copy.copy(mode_with_gpu).excluding("cudnn")
gpu_mode.check_py_code = False gpu_mode.check_py_code = False
for shp in shps: for shp in shps:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论