提交 6be35ca3 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #2533 from nouiz/cleanup

Code simplification/small speed up for downsample
......@@ -1676,8 +1676,10 @@ class OpKeyOptimizer(NavigatorOptimizer):
class ChangeTracker:
def __init__(self):
self.changed = False
self.nb_imported = 0
def on_import(self, fgraph, node, reason):
self.nb_imported += 1
self.changed = True
def on_change_input(self, fgraph, node, i, r, new_r, reason):
......@@ -1742,13 +1744,14 @@ class EquilibriumOptimizer(NavigatorOptimizer):
def add_requirements(self, fgraph):
super(EquilibriumOptimizer, self).add_requirements(fgraph)
fgraph.attach_feature(ChangeTracker())
for opt in self.get_local_optimizers():
opt.add_requirements(fgraph)
for opt in self.global_optimizers:
opt.add_requirements(fgraph)
def apply(self, fgraph, start_from=None):
change_tracker = ChangeTracker()
fgraph.attach_feature(change_tracker)
if start_from is None:
start_from = fgraph.outputs
else:
......@@ -1769,9 +1772,11 @@ class EquilibriumOptimizer(NavigatorOptimizer):
time_opts = {}
io_toposort_timing = []
nb_nodes = []
node_created = {}
for opt in self.global_optimizers + list(self.get_local_optimizers()):
global_process_count.setdefault(opt, 0)
time_opts.setdefault(opt, 0)
node_created.setdefault(opt, 0)
while changed and not max_use_abort:
process_count = {}
......@@ -1780,15 +1785,17 @@ class EquilibriumOptimizer(NavigatorOptimizer):
#apply global optimizers
for gopt in self.global_optimizers:
fgraph.change_tracker.reset()
change_tracker.reset()
nb = change_tracker.nb_imported
t_opt = time.time()
gopt.apply(fgraph)
time_opts[gopt] += time.time() - t_opt
if fgraph.change_tracker.changed:
if change_tracker.changed:
process_count.setdefault(gopt, 0)
process_count[gopt] += 1
global_process_count[gopt] += 1
changed = True
node_created[gopt] += change_tracker.nb_imported - nb
if global_process_count[gopt] > max_use:
max_use_abort = True
opt_name = (getattr(gopt, "name", None)
......@@ -1825,6 +1832,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
for lopt in (self.local_optimizers_all +
self.local_optimizers_map.get(type(node.op), []) +
self.local_optimizers_map.get(node.op, [])):
nb = change_tracker.nb_imported
t_opt = time.time()
lopt_change = self.process_node(fgraph, node, lopt)
time_opts[lopt] += time.time() - t_opt
......@@ -1833,6 +1841,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
process_count[lopt] += 1
global_process_count[lopt] += 1
changed = True
node_created[lopt] += change_tracker.nb_imported - nb
if global_process_count[lopt] > max_use:
max_use_abort = True
opt_name = (getattr(lopt, "name", None)
......@@ -1853,10 +1862,11 @@ class EquilibriumOptimizer(NavigatorOptimizer):
+ ". You can safely raise the current threshold of "
+ "%f with the theano flag 'optdb.max_use_ratio'." %
config.optdb.max_use_ratio)
fgraph.remove_feature(change_tracker)
return (self, loop_timing, loop_process_count,
(start_nb_nodes, end_nb_nodes, max_nb_nodes),
global_opt_timing, nb_nodes, time_opts, io_toposort_timing)
global_opt_timing, nb_nodes, time_opts, io_toposort_timing,
node_created)
def print_summary(self, stream=sys.stdout, level=0, depth=-1):
name = getattr(self, 'name', None)
......@@ -1871,7 +1881,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
def print_profile(stream, prof, level=0):
(opt, loop_timing, loop_process_count,
(start_nb_nodes, end_nb_nodes, max_nb_nodes),
global_opt_timing, nb_nodes, time_opts, io_toposort_timing) = prof
global_opt_timing, nb_nodes, time_opts, io_toposort_timing,
node_created) = prof
blanc = (' ' * level)
print >> stream, blanc, "EquilibriumOptimizer",
......@@ -1915,18 +1926,19 @@ class EquilibriumOptimizer(NavigatorOptimizer):
process_count[o] += v
for opt, count in process_count.iteritems():
if count > 0:
count_opt.append((time_opts[opt], count, opt))
count_opt.append((time_opts[opt], count,
node_created[opt], opt))
else:
not_used.append((time_opts[opt], opt))
not_used_time += time_opts[opt]
if count_opt:
print >> stream, blanc, \
' times - times applied - name:'
' times - times applied - nb node created - name:'
count_opt.sort()
for (t, count, opt) in count_opt[::-1]:
print >> stream, blanc, ' %.3fs - %d - %s' % (
t, count, opt)
for (t, count, n_created, opt) in count_opt[::-1]:
print >> stream, blanc, ' %.3fs - %d - %d - %s' % (
t, count, n_created, opt)
print >> stream, blanc, ' %.3fs - in %d optimization that where not used (display only those with a runtime > 0)' % (
not_used_time, len(not_used))
not_used.sort()
......
......@@ -304,7 +304,11 @@ def local_gpu_elemwise_1(node):
def local_gpu_split(node):
if isinstance(node.op, tensor.Split):
input = node.inputs[0]
if input.owner and isinstance(input.owner.op, HostFromGpu):
outs_clients = reduce(list.__add__,
[out.clients for out in node.outputs])
if (input.owner and isinstance(input.owner.op, HostFromGpu) or
any([c != 'output' and isinstance(c.op, GpuFromHost) for c, idx
in outs_clients])):
new_op = GpuSplit(node.op.len_splits)
split_res = new_op(gpu_from_host(input), *node.inputs[1:])
return [host_from_gpu(o) for o in split_res]
......
......@@ -289,7 +289,7 @@ def test_local_gpu_subtensor():
assert any([isinstance(node.op, cuda.GpuElemwise) for node in topo])
def test_local_split():
def test_local_gpu_split():
""" Test that the GpuSplit op is being applied and works """
# Construct symbolic split
x = tensor.fvector()
......@@ -310,6 +310,17 @@ def test_local_split():
# Check equality
assert all([(cpu == gpu).all() for cpu, gpu in zip(cpu_res, gpu_res)])
# Test the other path of the optimizer, when it is the output that
# is moved to the GPU.
ra = cuda.gpu_from_host(ra)
f = theano.function([x, splits], [ra, rb, rc],
mode=mode_with_gpu.excluding("InputToGpuOptimizer"))
gpu_res = f([0, 1, 2, 3, 4, 5], [3, 2, 1])
l = f.maker.fgraph.toposort()
assert any([isinstance(o.op, theano.sandbox.cuda.GpuSplit) for o in l])
# Check equality
assert all([(cpu == gpu).all() for cpu, gpu in zip(cpu_res, gpu_res)])
def test_print_op():
""" Test that print ops don't block gpu optimization"""
......
......@@ -197,13 +197,11 @@ class DownsampleFactorMax(Op):
'DownsampleFactorMax requires 4D input for now')
z_shape = self.out_shape(x.shape, self.ds, self.ignore_border, self.st)
if (z[0] is None) or (z[0].shape != z_shape):
z[0] = numpy.zeros(self.out_shape(x.shape, self.ds,
self.ignore_border, self.st))
z[0] = theano._asarray(z[0], dtype=x.dtype)
z[0] = numpy.empty(self.out_shape(x.shape, self.ds,
self.ignore_border, self.st),
dtype=x.dtype)
zz = z[0]
## zz needs to be initialized with -inf for the following to work
zz -= numpy.inf
#number of pooling output rows
pr = zz.shape[-2]
#number of pooling output cols
......@@ -221,11 +219,8 @@ class DownsampleFactorMax(Op):
for c in xrange(pc):
col_st = c * st1
col_end = __builtin__.min(col_st + ds1, img_cols)
for row_ind in xrange(row_st, row_end):
for col_ind in xrange(col_st, col_end):
zz[n, k, r, c] = \
__builtin__.max(zz[n, k, r, c],
x[n, k, row_ind, col_ind])
zz[n, k, r, c] = x[
n, k, row_st:row_end, col_st:col_end].max()
def infer_shape(self, node, in_shapes):
shp = self.out_shape(in_shapes[0], self.ds,
......@@ -594,8 +589,8 @@ class DownsampleFactorMaxGradGrad(Op):
z_shape = self.out_shape(x.shape, self.ds, self.ignore_border, self.st)
if (z[0] is None) or (z[0].shape != z_shape):
z[0] = numpy.zeros(self.out_shape(x.shape, self.ds,
self.ignore_border, self.st))
z[0] = theano._asarray(z[0], dtype=x.dtype)
self.ignore_border, self.st),
dtype=x.dtype)
ggz = z[0]
#number of pooling output rows
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论