提交 b5af3406 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #1669 from abergeron/eq_opt

make EquilibriumOptimizer use a dict to map ops to their nodes rather than running everything on everything.
...@@ -736,6 +736,14 @@ class LocalOptimizer(object): ...@@ -736,6 +736,14 @@ class LocalOptimizer(object):
_optimizer_idx[0] += 1 _optimizer_idx[0] += 1
return self._optimizer_idx return self._optimizer_idx
def tracks(self):
"""
Return the list of op classes that this opt applies to.
Return None to apply to all nodes.
"""
return None
def transform(self, node): def transform(self, node):
"""Transform a subgraph whose output is `node`. """Transform a subgraph whose output is `node`.
...@@ -772,8 +780,6 @@ class LocalOptimizer(object): ...@@ -772,8 +780,6 @@ class LocalOptimizer(object):
class FromFunctionLocalOptimizer(LocalOptimizer): class FromFunctionLocalOptimizer(LocalOptimizer):
"""WRITEME""" """WRITEME"""
def __init__(self, fn, tracks=None): def __init__(self, fn, tracks=None):
if tracks is None:
tracks = []
self.transform = fn self.transform = fn
self._tracks = tracks self._tracks = tracks
...@@ -791,9 +797,15 @@ class FromFunctionLocalOptimizer(LocalOptimizer): ...@@ -791,9 +797,15 @@ class FromFunctionLocalOptimizer(LocalOptimizer):
id(self)) id(self))
def local_optimizer(*tracks): def local_optimizer(tracks):
def decorator(f): def decorator(f):
"""WRITEME""" """WRITEME"""
if tracks is not None:
if len(tracks) is 0:
raise ValueError, ("Use None instead of an empty list to apply to all nodes.", f.__module__, f.__name__)
for t in tracks:
if not (isinstance(t, op.Op) or issubclass(t, op.PureOp)):
raise ValueError, ("Tracks are op classes or instances", f.__module__, f.__name__)
rval = FromFunctionLocalOptimizer(f, tracks) rval = FromFunctionLocalOptimizer(f, tracks)
rval.__name__ = f.__name__ rval.__name__ = f.__name__
return rval return rval
...@@ -870,7 +882,7 @@ class OpSub(LocalOptimizer): ...@@ -870,7 +882,7 @@ class OpSub(LocalOptimizer):
return self.op1 return self.op1
def tracks(self): def tracks(self):
return [[self.op1]] return [self.op1]
def transform(self, node): def transform(self, node):
if node.op != self.op1: if node.op != self.op1:
...@@ -901,7 +913,7 @@ class OpRemove(LocalOptimizer): ...@@ -901,7 +913,7 @@ class OpRemove(LocalOptimizer):
return self.op return self.op
def tracks(self): def tracks(self):
return [[self.op]] return [self.op]
def transform(self, node): def transform(self, node):
if node.op != self.op: if node.op != self.op:
...@@ -1008,17 +1020,7 @@ class PatternSub(LocalOptimizer): ...@@ -1008,17 +1020,7 @@ class PatternSub(LocalOptimizer):
return self.op return self.op
def tracks(self): def tracks(self):
def helper(pattern, sofar): return [self.op]
if isinstance(pattern, (list, tuple)):
sofar = sofar + (pattern[0],)
return reduce(tuple.__add__,
tuple(helper(p, sofar) for p in pattern[1:]),
())
elif isinstance(pattern, dict):
return helper(pattern['pattern'], sofar)
else:
return (sofar,)
return set(helper(self.in_pattern, ()))
def transform(self, node): def transform(self, node):
""" """
...@@ -1500,12 +1502,17 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1500,12 +1502,17 @@ class EquilibriumOptimizer(NavigatorOptimizer):
None, None,
ignore_newtrees=True, ignore_newtrees=True,
failure_callback=failure_callback) failure_callback=failure_callback)
self.local_optimizers = [] self.local_optimizers_map = dict()
self.local_optimizers_all = []
self.global_optimizers = [] self.global_optimizers = []
for opt in optimizers: for opt in optimizers:
if isinstance(opt, LocalOptimizer): if isinstance(opt, LocalOptimizer):
self.local_optimizers.append(opt) if opt.tracks() is None:
self.local_optimizers_all.append(opt)
else:
for c in opt.tracks():
self.local_optimizers_map.setdefault(c, []).append(opt)
else: else:
self.global_optimizers.append(opt) self.global_optimizers.append(opt)
self.max_depth = max_depth self.max_depth = max_depth
...@@ -1513,10 +1520,21 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1513,10 +1520,21 @@ class EquilibriumOptimizer(NavigatorOptimizer):
assert self.max_use_ratio is not None, ( assert self.max_use_ratio is not None, (
'max_use_ratio has to be a number') 'max_use_ratio has to be a number')
def get_local_optimizers(self):
for opt in self.local_optimizers_all:
yield opt
# if repeat is not a problem we can drop the set
s = set()
for lopt in self.local_optimizers_map.values():
for opt in lopt:
if opt not in s:
yield opt
s.add(opt)
def add_requirements(self, fgraph): def add_requirements(self, fgraph):
super(EquilibriumOptimizer, self).add_requirements(fgraph) super(EquilibriumOptimizer, self).add_requirements(fgraph)
fgraph.attach_feature(ChangeTracker()) fgraph.attach_feature(ChangeTracker())
for opt in self.local_optimizers: for opt in self.get_local_optimizers():
opt.add_requirements(fgraph) opt.add_requirements(fgraph)
for opt in self.global_optimizers: for opt in self.global_optimizers:
opt.add_requirements(fgraph) opt.add_requirements(fgraph)
...@@ -1542,7 +1560,7 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1542,7 +1560,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
time_opts = {} time_opts = {}
io_toposort_timing = [] io_toposort_timing = []
nb_nodes = [] nb_nodes = []
for opt in self.global_optimizers + self.local_optimizers: for opt in self.global_optimizers + list(self.get_local_optimizers()):
global_process_count.setdefault(opt, 0) global_process_count.setdefault(opt, 0)
time_opts.setdefault(opt, 0) time_opts.setdefault(opt, 0)
...@@ -1595,7 +1613,9 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1595,7 +1613,9 @@ class EquilibriumOptimizer(NavigatorOptimizer):
node = q.pop() node = q.pop()
current_node = node current_node = node
for lopt in self.local_optimizers: for lopt in (self.local_optimizers_all +
self.local_optimizers_map.get(type(node.op), []) +
self.local_optimizers_map.get(node.op, [])):
t_opt = time.time() t_opt = time.time()
lopt_change = self.process_node(fgraph, node, lopt) lopt_change = self.process_node(fgraph, node, lopt)
time_opts[lopt] += time.time() - t_opt time_opts[lopt] += time.time() - t_opt
...@@ -1634,7 +1654,7 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1634,7 +1654,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
print >> stream, "%s%s %s id=%i" % ( print >> stream, "%s%s %s id=%i" % (
(' ' * level), self.__class__.__name__, name, id(self)) (' ' * level), self.__class__.__name__, name, id(self))
if depth != 0: if depth != 0:
for lopt in self.local_optimizers: for lopt in self.get_local_optimizers():
lopt.print_summary(stream, level=(level + 2), lopt.print_summary(stream, level=(level + 2),
depth=(depth - 1)) depth=(depth - 1))
...@@ -1654,7 +1674,7 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1654,7 +1674,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
start_nb_nodes, end_nb_nodes, max_nb_nodes) start_nb_nodes, end_nb_nodes, max_nb_nodes)
print >> stream, blanc, " time io_toposort %.3fs" % sum( print >> stream, blanc, " time io_toposort %.3fs" % sum(
io_toposort_timing) io_toposort_timing)
s = sum([time_opts[o] for o in opt.local_optimizers]) s = sum([time_opts[o] for o in opt.get_local_optimizers()])
print >> stream, blanc, " time in local optimizers %.3fs" % s print >> stream, blanc, " time in local optimizers %.3fs" % s
s = sum([time_opts[o] for o in opt.global_optimizers]) s = sum([time_opts[o] for o in opt.global_optimizers])
print >> stream, blanc, " time in global optimizers %.3fs" % s print >> stream, blanc, " time in global optimizers %.3fs" % s
...@@ -1679,7 +1699,7 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1679,7 +1699,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
not_used = 0 not_used = 0
not_used_time = 0 not_used_time = 0
process_count = {} process_count = {}
for o in opt.global_optimizers + opt.local_optimizers: for o in opt.global_optimizers + list(opt.get_local_optimizers()):
process_count.setdefault(o, 0) process_count.setdefault(o, 0)
for count in loop_process_count: for count in loop_process_count:
for o, v in count.iteritems(): for o, v in count.iteritems():
...@@ -1707,8 +1727,8 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1707,8 +1727,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
#(opt, loop_timing, loop_process_count, max_nb_nodes, #(opt, loop_timing, loop_process_count, max_nb_nodes,
# global_opt_timing, nb_nodes, time_opts, io_toposort_timing) = prof1 # global_opt_timing, nb_nodes, time_opts, io_toposort_timing) = prof1
local_optimizers = set(prof1[0].local_optimizers).union( local_optimizers = set(prof1[0].get_local_optimizers()).union(
prof2[0].local_optimizers) prof2[0].get_local_optimizers())
global_optimizers = set(prof1[0].global_optimizers).union( global_optimizers = set(prof1[0].global_optimizers).union(
prof2[0].global_optimizers) prof2[0].global_optimizers)
new_opt = EquilibriumOptimizer( new_opt = EquilibriumOptimizer(
......
...@@ -384,7 +384,7 @@ def ifelse(condition, then_branch, else_branch, name=None): ...@@ -384,7 +384,7 @@ def ifelse(condition, then_branch, else_branch, name=None):
return tuple(rval) return tuple(rval)
@gof.local_optimizer([None]) @gof.local_optimizer([IfElse])
def cond_make_inplace(node): def cond_make_inplace(node):
op = node.op op = node.op
if isinstance(op, IfElse) and not op.as_view: if isinstance(op, IfElse) and not op.as_view:
...@@ -445,7 +445,7 @@ acceptable_ops = (theano.tensor.basic.Dot, ...@@ -445,7 +445,7 @@ acceptable_ops = (theano.tensor.basic.Dot,
theano.tensor.elemwise.DimShuffle) theano.tensor.elemwise.DimShuffle)
@gof.local_optimizer([None]) @gof.local_optimizer(acceptable_ops)
def ifelse_lift_single_if_through_acceptable_ops(main_node): def ifelse_lift_single_if_through_acceptable_ops(main_node):
"""This optimization lifts up certain ifelse instances. """This optimization lifts up certain ifelse instances.
...@@ -493,7 +493,7 @@ def ifelse_lift_single_if_through_acceptable_ops(main_node): ...@@ -493,7 +493,7 @@ def ifelse_lift_single_if_through_acceptable_ops(main_node):
return nw_outs return nw_outs
@gof.local_optimizer([None]) @gof.local_optimizer([IfElse])
def cond_merge_ifs_true(node): def cond_merge_ifs_true(node):
op = node.op op = node.op
if not isinstance(op, IfElse): if not isinstance(op, IfElse):
...@@ -517,7 +517,7 @@ def cond_merge_ifs_true(node): ...@@ -517,7 +517,7 @@ def cond_merge_ifs_true(node):
return op(*old_ins, **dict(return_list=True)) return op(*old_ins, **dict(return_list=True))
@gof.local_optimizer([None]) @gof.local_optimizer([IfElse])
def cond_merge_ifs_false(node): def cond_merge_ifs_false(node):
op = node.op op = node.op
if not isinstance(op, IfElse): if not isinstance(op, IfElse):
...@@ -592,7 +592,7 @@ class CondMerge(gof.Optimizer): ...@@ -592,7 +592,7 @@ class CondMerge(gof.Optimizer):
fgraph.replace_all_validate(pairs, reason='cond_merge') fgraph.replace_all_validate(pairs, reason='cond_merge')
@gof.local_optimizer([None]) @gof.local_optimizer([IfElse])
def cond_remove_identical(node): def cond_remove_identical(node):
op = node.op op = node.op
...@@ -643,7 +643,7 @@ def cond_remove_identical(node): ...@@ -643,7 +643,7 @@ def cond_remove_identical(node):
return rval return rval
@gof.local_optimizer([None]) @gof.local_optimizer([IfElse])
def cond_merge_random_op(main_node): def cond_merge_random_op(main_node):
if isinstance(main_node.op, IfElse): if isinstance(main_node.op, IfElse):
return False return False
......
...@@ -284,7 +284,7 @@ conv_rows_stack( float* img, float* kern, float* bias, float* out, ...@@ -284,7 +284,7 @@ conv_rows_stack( float* img, float* kern, float* bias, float* out,
gpu_convd = GpuConv3D() gpu_convd = GpuConv3D()
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([Conv3D])
def local_gpu_conv3d(node): def local_gpu_conv3d(node):
if isinstance(node.op, Conv3D): if isinstance(node.op, Conv3D):
if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]): if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]):
......
...@@ -341,7 +341,7 @@ convgrad_rows_stack( float* img, float* dCdH, float* dCdW, ...@@ -341,7 +341,7 @@ convgrad_rows_stack( float* img, float* dCdH, float* dCdW,
gpu_conv_grad3d = GpuConvGrad3D() gpu_conv_grad3d = GpuConvGrad3D()
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([ConvGrad3D])
def local_gpu_conv_gradd(node): def local_gpu_conv_gradd(node):
if isinstance(node.op, ConvGrad3D): if isinstance(node.op, ConvGrad3D):
if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]): if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]):
......
...@@ -348,7 +348,7 @@ conv_transp_rows_stack( float* H, float* kern, float* bias, float* R, ...@@ -348,7 +348,7 @@ conv_transp_rows_stack( float* H, float* kern, float* bias, float* R,
gpu_conv_transpd = GpuConvTransp3D() gpu_conv_transpd = GpuConvTransp3D()
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([ConvTransp3D])
def local_gpu_conv_transpd(node): def local_gpu_conv_transpd(node):
if isinstance(node.op, ConvTransp3D): if isinstance(node.op, ConvTransp3D):
if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]): if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]):
......
...@@ -405,7 +405,7 @@ def gpu_images2neibs(ten4, neib_shape, neib_step=None, mode='valid'): ...@@ -405,7 +405,7 @@ def gpu_images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
return GpuImages2Neibs(mode)(ten4, neib_shape, neib_step) return GpuImages2Neibs(mode)(ten4, neib_shape, neib_step)
@local_optimizer() @local_optimizer([Images2Neibs])
def use_gpu_images2neibs(node): def use_gpu_images2neibs(node):
if (type(node.op) is Images2Neibs and if (type(node.op) is Images2Neibs and
node.inputs[0].dtype == 'float32' and node.inputs[0].dtype == 'float32' and
......
差异被折叠。
...@@ -346,7 +346,7 @@ class CURAND_RandomStreams(object): ...@@ -346,7 +346,7 @@ class CURAND_RandomStreams(object):
return rval return rval
@local_optimizer([None]) @local_optimizer([CURAND_Base])
def local_destructive(node): def local_destructive(node):
op = node.op op = node.op
if isinstance(op, CURAND_Base) and not op.destructive: if isinstance(op, CURAND_Base) and not op.destructive:
......
...@@ -112,7 +112,7 @@ gpu_seqopt.register('InputToGpuArrayOptimizer', InputToGpuOptimizer(), ...@@ -112,7 +112,7 @@ gpu_seqopt.register('InputToGpuArrayOptimizer', InputToGpuOptimizer(),
0, 'fast_run', 'fast_compile', 'merge') 0, 'fast_run', 'fast_compile', 'merge')
@local_optimizer([]) @local_optimizer([gpu_from_host, host_from_gpu])
def local_cut_gpu_host_gpu(node): def local_cut_gpu_host_gpu(node):
if tensor.opt.opt.check_chain(node, gpu_from_host, host_from_gpu): if tensor.opt.opt.check_chain(node, gpu_from_host, host_from_gpu):
return [node.inputs[0].owner.inputs[0]] return [node.inputs[0].owner.inputs[0]]
......
...@@ -72,7 +72,7 @@ def hints(variable): ...@@ -72,7 +72,7 @@ def hints(variable):
@register_canonicalize @register_canonicalize
@local_optimizer([]) @local_optimizer([Hint])
def remove_hint_nodes(node): def remove_hint_nodes(node):
if is_hint_node(node): if is_hint_node(node):
# transfer hints from graph to Feature # transfer hints from graph to Feature
...@@ -224,7 +224,7 @@ def is_positive(v): ...@@ -224,7 +224,7 @@ def is_positive(v):
@register_stabilize @register_stabilize
@local_optimizer([]) @local_optimizer([Dot, Dot22])
def inv_as_solve(node): def inv_as_solve(node):
if not imported_scipy: if not imported_scipy:
return False return False
...@@ -242,7 +242,7 @@ def inv_as_solve(node): ...@@ -242,7 +242,7 @@ def inv_as_solve(node):
@register_canonicalize @register_canonicalize
@register_stabilize @register_stabilize
@register_specialize @register_specialize
@local_optimizer([]) @local_optimizer([DimShuffle])
def no_transpose_symmetric(node): def no_transpose_symmetric(node):
if isinstance(node.op, DimShuffle): if isinstance(node.op, DimShuffle):
x = node.inputs[0] x = node.inputs[0]
...@@ -253,7 +253,7 @@ def no_transpose_symmetric(node): ...@@ -253,7 +253,7 @@ def no_transpose_symmetric(node):
@register_stabilize @register_stabilize
@local_optimizer([]) @local_optimizer(None) # XXX: solve is defined later and can't be used here
def psd_solve_with_chol(node): def psd_solve_with_chol(node):
if node.op == solve: if node.op == solve:
A, b = node.inputs # result is solution Ax=b A, b = node.inputs # result is solution Ax=b
...@@ -269,7 +269,7 @@ def psd_solve_with_chol(node): ...@@ -269,7 +269,7 @@ def psd_solve_with_chol(node):
@register_stabilize @register_stabilize
@register_specialize @register_specialize
@local_optimizer([]) @local_optimizer(None) # XXX: det is defined later and can't be used here
def local_det_chol(node): def local_det_chol(node):
""" """
If we have det(X) and there is already an L=cholesky(X) If we have det(X) and there is already an L=cholesky(X)
...@@ -287,7 +287,7 @@ def local_det_chol(node): ...@@ -287,7 +287,7 @@ def local_det_chol(node):
@register_canonicalize @register_canonicalize
@register_stabilize @register_stabilize
@register_specialize @register_specialize
@local_optimizer([]) @local_optimizer([tensor.log])
def local_log_prod_sqr(node): def local_log_prod_sqr(node):
if node.op == tensor.log: if node.op == tensor.log:
x, = node.inputs x, = node.inputs
...@@ -307,7 +307,7 @@ def local_log_prod_sqr(node): ...@@ -307,7 +307,7 @@ def local_log_prod_sqr(node):
@register_canonicalize @register_canonicalize
@register_stabilize @register_stabilize
@register_specialize @register_specialize
@local_optimizer([]) @local_optimizer([tensor.log])
def local_log_pow(node): def local_log_pow(node):
if node.op == tensor.log: if node.op == tensor.log:
x, = node.inputs x, = node.inputs
......
...@@ -337,7 +337,7 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp): ...@@ -337,7 +337,7 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
""" % locals() """ % locals()
@local_optimizer() @local_optimizer([MultinomialFromUniform])
def local_gpu_multinomial(node): def local_gpu_multinomial(node):
if type(node.op) is MultinomialFromUniform: if type(node.op) is MultinomialFromUniform:
p, u = node.inputs p, u = node.inputs
......
...@@ -941,7 +941,7 @@ class MRG_RandomStreams(object): ...@@ -941,7 +941,7 @@ class MRG_RandomStreams(object):
return final_samples return final_samples
@local_optimizer([None]) @local_optimizer([mrg_uniform])
def mrg_random_make_inplace(node): def mrg_random_make_inplace(node):
op = node.op op = node.op
if isinstance(op, mrg_uniform) and not op.inplace: if isinstance(op, mrg_uniform) and not op.inplace:
......
...@@ -49,7 +49,7 @@ def info(*msg): ...@@ -49,7 +49,7 @@ def info(*msg):
_logger.info('INFO theano.scan: ' + ' '.join(msg)) _logger.info('INFO theano.scan: ' + ' '.join(msg))
@gof.local_optimizer([None]) @gof.local_optimizer([scan_op.Scan])
def remove_constants_and_unused_inputs_scan(node): def remove_constants_and_unused_inputs_scan(node):
''' '''
Move constants into the inner graph, and remove unused inputs. Move constants into the inner graph, and remove unused inputs.
...@@ -1337,7 +1337,7 @@ def make_equiv(lo, li): ...@@ -1337,7 +1337,7 @@ def make_equiv(lo, li):
return left, right return left, right
@gof.local_optimizer([None]) @gof.local_optimizer([scan_op.Scan])
def scan_merge_inouts(node): def scan_merge_inouts(node):
if not isinstance(node.op, scan_op.Scan): if not isinstance(node.op, scan_op.Scan):
return False return False
......
...@@ -32,7 +32,7 @@ sparse.register_specialize(local_csm_properties_csm) ...@@ -32,7 +32,7 @@ sparse.register_specialize(local_csm_properties_csm)
# This is tested in tests/test_basic.py:test_remove0 # This is tested in tests/test_basic.py:test_remove0
@gof.local_optimizer([None]) @gof.local_optimizer([sparse.Remove0])
def local_inplace_remove0(node): def local_inplace_remove0(node):
""" """
Optimization to insert inplace versions of Remove0. Optimization to insert inplace versions of Remove0.
...@@ -49,7 +49,7 @@ theano.compile.optdb.register('local_inplace_remove0', ...@@ -49,7 +49,7 @@ theano.compile.optdb.register('local_inplace_remove0',
gof.TopoOptimizer(local_inplace_remove0, gof.TopoOptimizer(local_inplace_remove0,
failure_callback=gof.TopoOptimizer.warn_inplace), failure_callback=gof.TopoOptimizer.warn_inplace),
60, 'fast_run', 'inplace') 60, 'fast_run', 'inplace')
@gof.local_optimizer([None]) @gof.local_optimizer([sparse.AddSD])
def local_inplace_addsd(node): def local_inplace_addsd(node):
""" """
Optimization to insert inplace versions of AddSD. Optimization to insert inplace versions of AddSD.
......
...@@ -1645,7 +1645,7 @@ class Dot22(GemmRelated): ...@@ -1645,7 +1645,7 @@ class Dot22(GemmRelated):
_dot22 = Dot22() _dot22 = Dot22()
@local_optimizer([T._dot]) @local_optimizer([T.Dot])
def local_dot_to_dot22(node): def local_dot_to_dot22(node):
# This works for tensor.outer too because basic.outer is a macro that # This works for tensor.outer too because basic.outer is a macro that
# produces a dot(dimshuffle,dimshuffle) of form 4 below # produces a dot(dimshuffle,dimshuffle) of form 4 below
...@@ -2025,7 +2025,7 @@ blas_optdb.register('local_dot22_to_dot22scalar', ...@@ -2025,7 +2025,7 @@ blas_optdb.register('local_dot22_to_dot22scalar',
#from opt import register_specialize, register_canonicalize #from opt import register_specialize, register_canonicalize
#@register_specialize #@register_specialize
@local_optimizer([]) @local_optimizer([T.sub, T.add])
def local_print_as_we_go_along(node): def local_print_as_we_go_along(node):
if node.op in (T.sub, T.add): if node.op in (T.sub, T.add):
debugprint(node) debugprint(node)
...@@ -266,7 +266,7 @@ def make_gpu_optimizer(op, to_gpu): ...@@ -266,7 +266,7 @@ def make_gpu_optimizer(op, to_gpu):
:param to_gpu: a list of op inputs that are moved to the GPU. :param to_gpu: a list of op inputs that are moved to the GPU.
""" """
@theano.gof.local_optimizer([]) @theano.gof.local_optimizer([op, cuda.gpu_from_host])
def local_to_gpu(node): def local_to_gpu(node):
""" """
op(host_from_gpu()) -> host_from_gpu(op) op(host_from_gpu()) -> host_from_gpu(op)
...@@ -302,7 +302,7 @@ if cuda.cuda_available: ...@@ -302,7 +302,7 @@ if cuda.cuda_available:
make_gpu_optimizer(IncDiagonalSubtensor, [0, 3]) make_gpu_optimizer(IncDiagonalSubtensor, [0, 3])
@theano.gof.local_optimizer([None]) @theano.gof.local_optimizer([DiagonalSubtensor, IncDiagonalSubtensor])
def local_inplace_DiagonalSubtensor(node): def local_inplace_DiagonalSubtensor(node):
""" also work for IncDiagonalSubtensor """ """ also work for IncDiagonalSubtensor """
if (isinstance(node.op, (DiagonalSubtensor, IncDiagonalSubtensor)) and if (isinstance(node.op, (DiagonalSubtensor, IncDiagonalSubtensor)) and
......
...@@ -589,7 +589,7 @@ opt.local_mul_canonizer.add_simplifier(softmax_simplifier, ...@@ -589,7 +589,7 @@ opt.local_mul_canonizer.add_simplifier(softmax_simplifier,
if 0: if 0:
@opt.register_specialize @opt.register_specialize
@gof.local_optimizer([]) @gof.local_optimizer([tensor.add])
def local_softmax_grad(node): def local_softmax_grad(node):
'''dy*sm - DimShuffle{0,'x'}(sum{1}(dy*sm))*sm -> softmax_grad(dy,sm)''' '''dy*sm - DimShuffle{0,'x'}(sum{1}(dy*sm))*sm -> softmax_grad(dy,sm)'''
#TODO what if the signs are changed? #TODO what if the signs are changed?
...@@ -1417,7 +1417,7 @@ def _is_const(z, val, approx=False): ...@@ -1417,7 +1417,7 @@ def _is_const(z, val, approx=False):
@opt.register_specialize @opt.register_specialize
@gof.local_optimizer([]) @gof.local_optimizer([subtensor.AdvancedSubtensor, tensor.log])
def local_advanced_indexing_crossentropy_onehot(node): def local_advanced_indexing_crossentropy_onehot(node):
log = None log = None
sm = None sm = None
......
差异被折叠。
...@@ -55,7 +55,7 @@ def local_max_and_argmax(node): ...@@ -55,7 +55,7 @@ def local_max_and_argmax(node):
return [new, None] return [new, None]
@register_uncanonicalize @register_uncanonicalize
@gof.local_optimizer([T._shape]) @gof.local_optimizer([T.neg])
def local_max_to_min(node): def local_max_to_min(node):
""" """
change -(max(-x)) to min change -(max(-x)) to min
......
...@@ -816,7 +816,7 @@ def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5], ...@@ -816,7 +816,7 @@ def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5],
return op(random_state, size, n, pvals) return op(random_state, size, n, pvals)
@gof.local_optimizer([None]) @gof.local_optimizer([RandomFunction])
def random_make_inplace(node): def random_make_inplace(node):
op = node.op op = node.op
if isinstance(op, RandomFunction) and not op.inplace: if isinstance(op, RandomFunction) and not op.inplace:
......
...@@ -3361,10 +3361,8 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3361,10 +3361,8 @@ class T_Join_and_Split(unittest.TestCase):
utt.verify_grad((lambda a, b: join(0, a, b)), [a_val, b_val], rng=rng) utt.verify_grad((lambda a, b: join(0, a, b)), [a_val, b_val], rng=rng)
def test_broadcastable_single_input_broadcastable_dimension(self): def test_broadcastable_single_input_broadcastable_dimension(self):
""" # Test that all broadcastable flags are preserved by a
Test that all broadcastable flags are preserved by a # single-input join.
single-input join.
"""
rng = numpy.random.RandomState(seed=utt.fetch_seed()) rng = numpy.random.RandomState(seed=utt.fetch_seed())
a_val = rng.rand(1, 4, 1).astype(self.floatX) a_val = rng.rand(1, 4, 1).astype(self.floatX)
a = self.shared(a_val, broadcastable=(True, False, True)) a = self.shared(a_val, broadcastable=(True, False, True))
...@@ -3387,10 +3385,8 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3387,10 +3385,8 @@ class T_Join_and_Split(unittest.TestCase):
#self.assertRaises(TypeError, f, bad_a_val) #self.assertRaises(TypeError, f, bad_a_val)
def test_broadcastable_flags_many_dims_and_inputs(self): def test_broadcastable_flags_many_dims_and_inputs(self):
""" # Test that the right broadcastable flags get set for a join
Test that the right broadcastable flags get set for a join # with many inputs and many input dimensions.
with many inputs and many input dimensions.
"""
a = TensorType(dtype=self.floatX, broadcastable=[1, 0, 1, 0, 0, 0])() a = TensorType(dtype=self.floatX, broadcastable=[1, 0, 1, 0, 0, 0])()
b = TensorType(dtype=self.floatX, broadcastable=[1, 1, 1, 0, 0, 0])() b = TensorType(dtype=self.floatX, broadcastable=[1, 1, 1, 0, 0, 0])()
c = TensorType(dtype=self.floatX, broadcastable=[1, 0, 0, 0, 0, 0])() c = TensorType(dtype=self.floatX, broadcastable=[1, 0, 0, 0, 0, 0])()
...@@ -3479,20 +3475,16 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3479,20 +3475,16 @@ class T_Join_and_Split(unittest.TestCase):
f(get_mat(3, 4), get_mat(3, 4), get_mat(2, 5)) f(get_mat(3, 4), get_mat(3, 4), get_mat(2, 5))
def test_rebroadcast(self): def test_rebroadcast(self):
""" # Regression test for a crash that used to happen when rebroadcasting.
Regression test for a crash that used to happen when rebroadcasting.
"""
x = tensor.TensorType(self.floatX, [False, False, True])() x = tensor.TensorType(self.floatX, [False, False, True])()
u = tensor.TensorType(self.floatX, [False, False, True])() u = tensor.TensorType(self.floatX, [False, False, True])()
# This line used to crash. # This line used to crash.
z = tensor.concatenate([x, -u], axis=2) z = tensor.concatenate([x, -u], axis=2)
def test_concatenate_same(self): def test_concatenate_same(self):
""" # Test that we can concatenate the same tensor multiple time.
Test that we can concatenate the same tensor multiple time.
In the past it was broken on the GPU. # In the past it was broken on the GPU.
"""
rng = numpy.random.RandomState(seed=utt.fetch_seed()) rng = numpy.random.RandomState(seed=utt.fetch_seed())
T_shared = self.shared(rng.rand(3, 4).astype(self.floatX)) T_shared = self.shared(rng.rand(3, 4).astype(self.floatX))
Tout = tensor.concatenate([T_shared, T_shared]) Tout = tensor.concatenate([T_shared, T_shared])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论