提交 b5af3406 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #1669 from abergeron/eq_opt

make EquilibriumOptimizer use a dict to map ops to their nodes rather than running everything on everything.
...@@ -736,6 +736,14 @@ class LocalOptimizer(object): ...@@ -736,6 +736,14 @@ class LocalOptimizer(object):
_optimizer_idx[0] += 1 _optimizer_idx[0] += 1
return self._optimizer_idx return self._optimizer_idx
def tracks(self):
"""
Return the list of op classes that this opt applies to.
Return None to apply to all nodes.
"""
return None
def transform(self, node): def transform(self, node):
"""Transform a subgraph whose output is `node`. """Transform a subgraph whose output is `node`.
...@@ -772,8 +780,6 @@ class LocalOptimizer(object): ...@@ -772,8 +780,6 @@ class LocalOptimizer(object):
class FromFunctionLocalOptimizer(LocalOptimizer): class FromFunctionLocalOptimizer(LocalOptimizer):
"""WRITEME""" """WRITEME"""
def __init__(self, fn, tracks=None): def __init__(self, fn, tracks=None):
if tracks is None:
tracks = []
self.transform = fn self.transform = fn
self._tracks = tracks self._tracks = tracks
...@@ -791,9 +797,15 @@ class FromFunctionLocalOptimizer(LocalOptimizer): ...@@ -791,9 +797,15 @@ class FromFunctionLocalOptimizer(LocalOptimizer):
id(self)) id(self))
def local_optimizer(*tracks): def local_optimizer(tracks):
def decorator(f): def decorator(f):
"""WRITEME""" """WRITEME"""
if tracks is not None:
if len(tracks) is 0:
raise ValueError, ("Use None instead of an empty list to apply to all nodes.", f.__module__, f.__name__)
for t in tracks:
if not (isinstance(t, op.Op) or issubclass(t, op.PureOp)):
raise ValueError, ("Tracks are op classes or instances", f.__module__, f.__name__)
rval = FromFunctionLocalOptimizer(f, tracks) rval = FromFunctionLocalOptimizer(f, tracks)
rval.__name__ = f.__name__ rval.__name__ = f.__name__
return rval return rval
...@@ -870,7 +882,7 @@ class OpSub(LocalOptimizer): ...@@ -870,7 +882,7 @@ class OpSub(LocalOptimizer):
return self.op1 return self.op1
def tracks(self): def tracks(self):
return [[self.op1]] return [self.op1]
def transform(self, node): def transform(self, node):
if node.op != self.op1: if node.op != self.op1:
...@@ -901,7 +913,7 @@ class OpRemove(LocalOptimizer): ...@@ -901,7 +913,7 @@ class OpRemove(LocalOptimizer):
return self.op return self.op
def tracks(self): def tracks(self):
return [[self.op]] return [self.op]
def transform(self, node): def transform(self, node):
if node.op != self.op: if node.op != self.op:
...@@ -1008,17 +1020,7 @@ class PatternSub(LocalOptimizer): ...@@ -1008,17 +1020,7 @@ class PatternSub(LocalOptimizer):
return self.op return self.op
def tracks(self): def tracks(self):
def helper(pattern, sofar): return [self.op]
if isinstance(pattern, (list, tuple)):
sofar = sofar + (pattern[0],)
return reduce(tuple.__add__,
tuple(helper(p, sofar) for p in pattern[1:]),
())
elif isinstance(pattern, dict):
return helper(pattern['pattern'], sofar)
else:
return (sofar,)
return set(helper(self.in_pattern, ()))
def transform(self, node): def transform(self, node):
""" """
...@@ -1500,12 +1502,17 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1500,12 +1502,17 @@ class EquilibriumOptimizer(NavigatorOptimizer):
None, None,
ignore_newtrees=True, ignore_newtrees=True,
failure_callback=failure_callback) failure_callback=failure_callback)
self.local_optimizers = [] self.local_optimizers_map = dict()
self.local_optimizers_all = []
self.global_optimizers = [] self.global_optimizers = []
for opt in optimizers: for opt in optimizers:
if isinstance(opt, LocalOptimizer): if isinstance(opt, LocalOptimizer):
self.local_optimizers.append(opt) if opt.tracks() is None:
self.local_optimizers_all.append(opt)
else:
for c in opt.tracks():
self.local_optimizers_map.setdefault(c, []).append(opt)
else: else:
self.global_optimizers.append(opt) self.global_optimizers.append(opt)
self.max_depth = max_depth self.max_depth = max_depth
...@@ -1513,10 +1520,21 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1513,10 +1520,21 @@ class EquilibriumOptimizer(NavigatorOptimizer):
assert self.max_use_ratio is not None, ( assert self.max_use_ratio is not None, (
'max_use_ratio has to be a number') 'max_use_ratio has to be a number')
def get_local_optimizers(self):
for opt in self.local_optimizers_all:
yield opt
# if repeat is not a problem we can drop the set
s = set()
for lopt in self.local_optimizers_map.values():
for opt in lopt:
if opt not in s:
yield opt
s.add(opt)
def add_requirements(self, fgraph): def add_requirements(self, fgraph):
super(EquilibriumOptimizer, self).add_requirements(fgraph) super(EquilibriumOptimizer, self).add_requirements(fgraph)
fgraph.attach_feature(ChangeTracker()) fgraph.attach_feature(ChangeTracker())
for opt in self.local_optimizers: for opt in self.get_local_optimizers():
opt.add_requirements(fgraph) opt.add_requirements(fgraph)
for opt in self.global_optimizers: for opt in self.global_optimizers:
opt.add_requirements(fgraph) opt.add_requirements(fgraph)
...@@ -1542,7 +1560,7 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1542,7 +1560,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
time_opts = {} time_opts = {}
io_toposort_timing = [] io_toposort_timing = []
nb_nodes = [] nb_nodes = []
for opt in self.global_optimizers + self.local_optimizers: for opt in self.global_optimizers + list(self.get_local_optimizers()):
global_process_count.setdefault(opt, 0) global_process_count.setdefault(opt, 0)
time_opts.setdefault(opt, 0) time_opts.setdefault(opt, 0)
...@@ -1595,7 +1613,9 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1595,7 +1613,9 @@ class EquilibriumOptimizer(NavigatorOptimizer):
node = q.pop() node = q.pop()
current_node = node current_node = node
for lopt in self.local_optimizers: for lopt in (self.local_optimizers_all +
self.local_optimizers_map.get(type(node.op), []) +
self.local_optimizers_map.get(node.op, [])):
t_opt = time.time() t_opt = time.time()
lopt_change = self.process_node(fgraph, node, lopt) lopt_change = self.process_node(fgraph, node, lopt)
time_opts[lopt] += time.time() - t_opt time_opts[lopt] += time.time() - t_opt
...@@ -1634,7 +1654,7 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1634,7 +1654,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
print >> stream, "%s%s %s id=%i" % ( print >> stream, "%s%s %s id=%i" % (
(' ' * level), self.__class__.__name__, name, id(self)) (' ' * level), self.__class__.__name__, name, id(self))
if depth != 0: if depth != 0:
for lopt in self.local_optimizers: for lopt in self.get_local_optimizers():
lopt.print_summary(stream, level=(level + 2), lopt.print_summary(stream, level=(level + 2),
depth=(depth - 1)) depth=(depth - 1))
...@@ -1654,7 +1674,7 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1654,7 +1674,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
start_nb_nodes, end_nb_nodes, max_nb_nodes) start_nb_nodes, end_nb_nodes, max_nb_nodes)
print >> stream, blanc, " time io_toposort %.3fs" % sum( print >> stream, blanc, " time io_toposort %.3fs" % sum(
io_toposort_timing) io_toposort_timing)
s = sum([time_opts[o] for o in opt.local_optimizers]) s = sum([time_opts[o] for o in opt.get_local_optimizers()])
print >> stream, blanc, " time in local optimizers %.3fs" % s print >> stream, blanc, " time in local optimizers %.3fs" % s
s = sum([time_opts[o] for o in opt.global_optimizers]) s = sum([time_opts[o] for o in opt.global_optimizers])
print >> stream, blanc, " time in global optimizers %.3fs" % s print >> stream, blanc, " time in global optimizers %.3fs" % s
...@@ -1679,7 +1699,7 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1679,7 +1699,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
not_used = 0 not_used = 0
not_used_time = 0 not_used_time = 0
process_count = {} process_count = {}
for o in opt.global_optimizers + opt.local_optimizers: for o in opt.global_optimizers + list(opt.get_local_optimizers()):
process_count.setdefault(o, 0) process_count.setdefault(o, 0)
for count in loop_process_count: for count in loop_process_count:
for o, v in count.iteritems(): for o, v in count.iteritems():
...@@ -1707,8 +1727,8 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1707,8 +1727,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
#(opt, loop_timing, loop_process_count, max_nb_nodes, #(opt, loop_timing, loop_process_count, max_nb_nodes,
# global_opt_timing, nb_nodes, time_opts, io_toposort_timing) = prof1 # global_opt_timing, nb_nodes, time_opts, io_toposort_timing) = prof1
local_optimizers = set(prof1[0].local_optimizers).union( local_optimizers = set(prof1[0].get_local_optimizers()).union(
prof2[0].local_optimizers) prof2[0].get_local_optimizers())
global_optimizers = set(prof1[0].global_optimizers).union( global_optimizers = set(prof1[0].global_optimizers).union(
prof2[0].global_optimizers) prof2[0].global_optimizers)
new_opt = EquilibriumOptimizer( new_opt = EquilibriumOptimizer(
......
...@@ -384,7 +384,7 @@ def ifelse(condition, then_branch, else_branch, name=None): ...@@ -384,7 +384,7 @@ def ifelse(condition, then_branch, else_branch, name=None):
return tuple(rval) return tuple(rval)
@gof.local_optimizer([None]) @gof.local_optimizer([IfElse])
def cond_make_inplace(node): def cond_make_inplace(node):
op = node.op op = node.op
if isinstance(op, IfElse) and not op.as_view: if isinstance(op, IfElse) and not op.as_view:
...@@ -445,7 +445,7 @@ acceptable_ops = (theano.tensor.basic.Dot, ...@@ -445,7 +445,7 @@ acceptable_ops = (theano.tensor.basic.Dot,
theano.tensor.elemwise.DimShuffle) theano.tensor.elemwise.DimShuffle)
@gof.local_optimizer([None]) @gof.local_optimizer(acceptable_ops)
def ifelse_lift_single_if_through_acceptable_ops(main_node): def ifelse_lift_single_if_through_acceptable_ops(main_node):
"""This optimization lifts up certain ifelse instances. """This optimization lifts up certain ifelse instances.
...@@ -493,7 +493,7 @@ def ifelse_lift_single_if_through_acceptable_ops(main_node): ...@@ -493,7 +493,7 @@ def ifelse_lift_single_if_through_acceptable_ops(main_node):
return nw_outs return nw_outs
@gof.local_optimizer([None]) @gof.local_optimizer([IfElse])
def cond_merge_ifs_true(node): def cond_merge_ifs_true(node):
op = node.op op = node.op
if not isinstance(op, IfElse): if not isinstance(op, IfElse):
...@@ -517,7 +517,7 @@ def cond_merge_ifs_true(node): ...@@ -517,7 +517,7 @@ def cond_merge_ifs_true(node):
return op(*old_ins, **dict(return_list=True)) return op(*old_ins, **dict(return_list=True))
@gof.local_optimizer([None]) @gof.local_optimizer([IfElse])
def cond_merge_ifs_false(node): def cond_merge_ifs_false(node):
op = node.op op = node.op
if not isinstance(op, IfElse): if not isinstance(op, IfElse):
...@@ -592,7 +592,7 @@ class CondMerge(gof.Optimizer): ...@@ -592,7 +592,7 @@ class CondMerge(gof.Optimizer):
fgraph.replace_all_validate(pairs, reason='cond_merge') fgraph.replace_all_validate(pairs, reason='cond_merge')
@gof.local_optimizer([None]) @gof.local_optimizer([IfElse])
def cond_remove_identical(node): def cond_remove_identical(node):
op = node.op op = node.op
...@@ -643,7 +643,7 @@ def cond_remove_identical(node): ...@@ -643,7 +643,7 @@ def cond_remove_identical(node):
return rval return rval
@gof.local_optimizer([None]) @gof.local_optimizer([IfElse])
def cond_merge_random_op(main_node): def cond_merge_random_op(main_node):
if isinstance(main_node.op, IfElse): if isinstance(main_node.op, IfElse):
return False return False
......
...@@ -284,7 +284,7 @@ conv_rows_stack( float* img, float* kern, float* bias, float* out, ...@@ -284,7 +284,7 @@ conv_rows_stack( float* img, float* kern, float* bias, float* out,
gpu_convd = GpuConv3D() gpu_convd = GpuConv3D()
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([Conv3D])
def local_gpu_conv3d(node): def local_gpu_conv3d(node):
if isinstance(node.op, Conv3D): if isinstance(node.op, Conv3D):
if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]): if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]):
......
...@@ -341,7 +341,7 @@ convgrad_rows_stack( float* img, float* dCdH, float* dCdW, ...@@ -341,7 +341,7 @@ convgrad_rows_stack( float* img, float* dCdH, float* dCdW,
gpu_conv_grad3d = GpuConvGrad3D() gpu_conv_grad3d = GpuConvGrad3D()
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([ConvGrad3D])
def local_gpu_conv_gradd(node): def local_gpu_conv_gradd(node):
if isinstance(node.op, ConvGrad3D): if isinstance(node.op, ConvGrad3D):
if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]): if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]):
......
...@@ -348,7 +348,7 @@ conv_transp_rows_stack( float* H, float* kern, float* bias, float* R, ...@@ -348,7 +348,7 @@ conv_transp_rows_stack( float* H, float* kern, float* bias, float* R,
gpu_conv_transpd = GpuConvTransp3D() gpu_conv_transpd = GpuConvTransp3D()
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([ConvTransp3D])
def local_gpu_conv_transpd(node): def local_gpu_conv_transpd(node):
if isinstance(node.op, ConvTransp3D): if isinstance(node.op, ConvTransp3D):
if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]): if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]):
......
...@@ -405,7 +405,7 @@ def gpu_images2neibs(ten4, neib_shape, neib_step=None, mode='valid'): ...@@ -405,7 +405,7 @@ def gpu_images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
return GpuImages2Neibs(mode)(ten4, neib_shape, neib_step) return GpuImages2Neibs(mode)(ten4, neib_shape, neib_step)
@local_optimizer() @local_optimizer([Images2Neibs])
def use_gpu_images2neibs(node): def use_gpu_images2neibs(node):
if (type(node.op) is Images2Neibs and if (type(node.op) is Images2Neibs and
node.inputs[0].dtype == 'float32' and node.inputs[0].dtype == 'float32' and
......
...@@ -121,7 +121,7 @@ gpu_seqopt.register('InputToGpuOptimizer', InputToGpuOptimizer(), ...@@ -121,7 +121,7 @@ gpu_seqopt.register('InputToGpuOptimizer', InputToGpuOptimizer(),
'merge') # TODO: how to make it mandatory for gpu_seqopt? 'merge') # TODO: how to make it mandatory for gpu_seqopt?
@local_optimizer([]) @local_optimizer([gpu_from_host, host_from_gpu])
def local_cut_gpu_host_gpu(node): def local_cut_gpu_host_gpu(node):
if tensor.opt.opt.check_chain(node, gpu_from_host, host_from_gpu): if tensor.opt.opt.check_chain(node, gpu_from_host, host_from_gpu):
return [node.inputs[0].owner.inputs[0]] return [node.inputs[0].owner.inputs[0]]
...@@ -170,7 +170,7 @@ def dtype_in_elemwise_supported(op): ...@@ -170,7 +170,7 @@ def dtype_in_elemwise_supported(op):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([tensor.Elemwise])
def local_gpu_elemwise_0(node): def local_gpu_elemwise_0(node):
"""elemwise(..., host_from_gpu, ...) """elemwise(..., host_from_gpu, ...)
-> host_from_gpu(elemwise(gpu_from_host, ..., gpu_from_host) -> host_from_gpu(elemwise(gpu_from_host, ..., gpu_from_host)
...@@ -229,7 +229,7 @@ def local_gpu_elemwise_0(node): ...@@ -229,7 +229,7 @@ def local_gpu_elemwise_0(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([gpu_from_host])
def local_gpu_elemwise_1(node): def local_gpu_elemwise_1(node):
""" """
gpu_from_host(Elemwise)) -> GpuElemwise(gpu_from_host(...)) gpu_from_host(Elemwise)) -> GpuElemwise(gpu_from_host(...))
...@@ -265,7 +265,7 @@ def local_gpu_elemwise_1(node): ...@@ -265,7 +265,7 @@ def local_gpu_elemwise_1(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([tensor.DimShuffle, gpu_from_host])
def local_gpu_dimshuffle_0(node): def local_gpu_dimshuffle_0(node):
""" """
dimshuffle(host_from_gpu()) -> host_from_gpu(gpu_dimshuffle) dimshuffle(host_from_gpu()) -> host_from_gpu(gpu_dimshuffle)
...@@ -290,7 +290,7 @@ def local_gpu_dimshuffle_0(node): ...@@ -290,7 +290,7 @@ def local_gpu_dimshuffle_0(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([tensor.SpecifyShape, gpu_from_host])
def local_gpu_specifyShape_0(node): def local_gpu_specifyShape_0(node):
""" """
specify_shape(host_from_gpu()) -> host_from_gpu(specify_shape) specify_shape(host_from_gpu()) -> host_from_gpu(specify_shape)
...@@ -313,7 +313,7 @@ def local_gpu_specifyShape_0(node): ...@@ -313,7 +313,7 @@ def local_gpu_specifyShape_0(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([gpu_from_host]) # XXX: broken: tensor.basic.dot is not an op
def local_gpu_dot_to_dot22(node): def local_gpu_dot_to_dot22(node):
""" """
gpu_from_host(dot) -> gpudot(gpu_from_host) gpu_from_host(dot) -> gpudot(gpu_from_host)
...@@ -376,7 +376,7 @@ def local_gpu_dot_to_dot22(node): ...@@ -376,7 +376,7 @@ def local_gpu_dot_to_dot22(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([theano.ifelse.IfElse, gpu_from_host])
def local_gpu_lazy_ifelse(node): def local_gpu_lazy_ifelse(node):
""" """
gpu_from_host(ifelse) -> gpu_ifelse(gpu_from_host) gpu_from_host(ifelse) -> gpu_ifelse(gpu_from_host)
...@@ -434,7 +434,7 @@ def local_gpu_lazy_ifelse(node): ...@@ -434,7 +434,7 @@ def local_gpu_lazy_ifelse(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([gpu_from_host, tensor.blas._dot22])
def local_gpu_dot22(node): def local_gpu_dot22(node):
""" """
gpu_from_host(dot22) -> gpudot(gpu_from_host) gpu_from_host(dot22) -> gpudot(gpu_from_host)
...@@ -456,7 +456,7 @@ def local_gpu_dot22(node): ...@@ -456,7 +456,7 @@ def local_gpu_dot22(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([gpu_from_host, tensor.blas._dot22scalar])
def local_gpu_dot22scalar(node): def local_gpu_dot22scalar(node):
""" """
gpu_from_host(dot22scalar) -> gpudot(gpu_from_host) gpu_from_host(dot22scalar) -> gpudot(gpu_from_host)
...@@ -482,7 +482,7 @@ def local_gpu_dot22scalar(node): ...@@ -482,7 +482,7 @@ def local_gpu_dot22scalar(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([gpu_from_host, tensor.blas_c.CGemv, tensor.blas.Gemv])
def local_gpu_gemv(node): def local_gpu_gemv(node):
""" """
gpu_from_host(gemv) -> gpu_gemv(gpu_from_host) gpu_from_host(gemv) -> gpu_gemv(gpu_from_host)
...@@ -523,7 +523,8 @@ def local_gpu_gemv(node): ...@@ -523,7 +523,8 @@ def local_gpu_gemv(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([gpu_from_host, tensor.blas_c.CGer, tensor.blas.Ger,
tensor.blas_scipy.ScipyGer])
def local_gpu_ger(node): def local_gpu_ger(node):
""" """
gpu_from_host(ger) -> gpu_ger(gpu_from_host) gpu_from_host(ger) -> gpu_ger(gpu_from_host)
...@@ -566,7 +567,7 @@ def local_gpu_ger(node): ...@@ -566,7 +567,7 @@ def local_gpu_ger(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([tensor.blas.gemm_no_inplace, gpu_from_host])
def local_gpu_gemm(node): def local_gpu_gemm(node):
""" """
gpu_from_host(gemm) -> gpu_gemm(gpu_from_host) gpu_from_host(gemm) -> gpu_gemm(gpu_from_host)
...@@ -601,7 +602,13 @@ def local_gpu_gemm(node): ...@@ -601,7 +602,13 @@ def local_gpu_gemm(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([tensor.elemwise.CAReduce,
tensor.elemwise.All,
tensor.elemwise.Any,
tensor.elemwise.CAReduceDtype,
tensor.elemwise.Sum,
tensor.elemwise.Prod,
tensor.elemwise.ProdWithoutZeros])
def local_gpu_careduce(node): def local_gpu_careduce(node):
if isinstance(node.op, tensor.elemwise.CAReduce): if isinstance(node.op, tensor.elemwise.CAReduce):
scalar_op = node.op.scalar_op scalar_op = node.op.scalar_op
...@@ -671,7 +678,7 @@ def local_gpu_careduce(node): ...@@ -671,7 +678,7 @@ def local_gpu_careduce(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([gpu_from_host, tensor.Reshape])
def local_gpu_reshape(node): def local_gpu_reshape(node):
if node.op == gpu_from_host: if node.op == gpu_from_host:
host_input = node.inputs[0] host_input = node.inputs[0]
...@@ -705,7 +712,7 @@ def local_gpu_reshape(node): ...@@ -705,7 +712,7 @@ def local_gpu_reshape(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([gpu_from_host, tensor.Flatten])
def local_gpu_flatten(node): def local_gpu_flatten(node):
if node.op == gpu_from_host: if node.op == gpu_from_host:
host_input = node.inputs[0] host_input = node.inputs[0]
...@@ -724,7 +731,7 @@ def local_gpu_flatten(node): ...@@ -724,7 +731,7 @@ def local_gpu_flatten(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([gpu_from_host, tensor.Subtensor])
def local_gpu_subtensor(node): def local_gpu_subtensor(node):
if node.op == gpu_from_host: if node.op == gpu_from_host:
host_input = node.inputs[0] host_input = node.inputs[0]
...@@ -745,7 +752,7 @@ def local_gpu_subtensor(node): ...@@ -745,7 +752,7 @@ def local_gpu_subtensor(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([gpu_from_host, tensor.AdvancedSubtensor1])
def local_gpu_advanced_subtensor1(node): def local_gpu_advanced_subtensor1(node):
if node.op == gpu_from_host: if node.op == gpu_from_host:
host_input = node.inputs[0] host_input = node.inputs[0]
...@@ -764,7 +771,7 @@ def local_gpu_advanced_subtensor1(node): ...@@ -764,7 +771,7 @@ def local_gpu_advanced_subtensor1(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([gpu_from_host, tensor.AdvancedIncSubtensor1])
def local_gpu_advanced_incsubtensor1(node): def local_gpu_advanced_incsubtensor1(node):
if node.op == gpu_from_host: if node.op == gpu_from_host:
host_input = node.inputs[0] host_input = node.inputs[0]
...@@ -838,7 +845,7 @@ def local_gpu_advanced_incsubtensor1(node): ...@@ -838,7 +845,7 @@ def local_gpu_advanced_incsubtensor1(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([gpu_from_host, tensor.IncSubtensor])
def local_gpu_incsubtensor(node): def local_gpu_incsubtensor(node):
if node.op == gpu_from_host: if node.op == gpu_from_host:
host_output = node.inputs[0] host_output = node.inputs[0]
...@@ -885,7 +892,7 @@ def local_gpu_incsubtensor(node): ...@@ -885,7 +892,7 @@ def local_gpu_incsubtensor(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([tensor.Shape])
def local_gpu_shape(node): def local_gpu_shape(node):
if isinstance(node.op, tensor.Shape): if isinstance(node.op, tensor.Shape):
x, = node.inputs x, = node.inputs
...@@ -896,7 +903,7 @@ def local_gpu_shape(node): ...@@ -896,7 +903,7 @@ def local_gpu_shape(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([tensor.Rebroadcast])
def local_gpu_rebroadcast(node): def local_gpu_rebroadcast(node):
'''rebroadcast(host_from_gpu(x)) -> host_from_gpu(rebroadcast(x))''' '''rebroadcast(host_from_gpu(x)) -> host_from_gpu(rebroadcast(x))'''
if isinstance(node.op, tensor.Rebroadcast): if isinstance(node.op, tensor.Rebroadcast):
...@@ -911,7 +918,7 @@ def gpu_print_wrapper(op, cnda): ...@@ -911,7 +918,7 @@ def gpu_print_wrapper(op, cnda):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([tensor.printing.Print])
def local_gpu_print_op(node): def local_gpu_print_op(node):
if isinstance(node.op, tensor.printing.Print): if isinstance(node.op, tensor.printing.Print):
x, = node.inputs x, = node.inputs
...@@ -932,7 +939,7 @@ import theano.tensor.nnet ...@@ -932,7 +939,7 @@ import theano.tensor.nnet
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias])
def local_gpu_crossentorpy_softmax_argmax_1hot_with_bias(node): def local_gpu_crossentorpy_softmax_argmax_1hot_with_bias(node):
if isinstance(node.op, tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias): if isinstance(node.op, tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias):
x, b, y = node.inputs x, b, y = node.inputs
...@@ -962,7 +969,7 @@ def local_gpu_crossentorpy_softmax_argmax_1hot_with_bias(node): ...@@ -962,7 +969,7 @@ def local_gpu_crossentorpy_softmax_argmax_1hot_with_bias(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([tensor.nnet.CrossentropySoftmax1HotWithBiasDx])
def local_gpu_crossentorpy_softmax_1hot_with_bias_dx(node): def local_gpu_crossentorpy_softmax_1hot_with_bias_dx(node):
if isinstance(node.op, tensor.nnet.CrossentropySoftmax1HotWithBiasDx): if isinstance(node.op, tensor.nnet.CrossentropySoftmax1HotWithBiasDx):
dnll, sm, yidx = node.inputs dnll, sm, yidx = node.inputs
...@@ -977,7 +984,7 @@ def local_gpu_crossentorpy_softmax_1hot_with_bias_dx(node): ...@@ -977,7 +984,7 @@ def local_gpu_crossentorpy_softmax_1hot_with_bias_dx(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([tensor.nnet.Softmax])
def local_gpu_softmax(node): def local_gpu_softmax(node):
if isinstance(node.op, tensor.nnet.Softmax): if isinstance(node.op, tensor.nnet.Softmax):
x, = node.inputs x, = node.inputs
...@@ -989,7 +996,7 @@ def local_gpu_softmax(node): ...@@ -989,7 +996,7 @@ def local_gpu_softmax(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([tensor.nnet.SoftmaxWithBias])
def local_gpu_softmax_with_bias(node): def local_gpu_softmax_with_bias(node):
if isinstance(node.op, tensor.nnet.SoftmaxWithBias): if isinstance(node.op, tensor.nnet.SoftmaxWithBias):
x, b = node.inputs x, b = node.inputs
...@@ -1005,7 +1012,7 @@ from theano.tensor.nnet import conv ...@@ -1005,7 +1012,7 @@ from theano.tensor.nnet import conv
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([gpu_from_host, conv.ConvOp])
def local_gpu_conv(node): def local_gpu_conv(node):
""" """
gpu_from_host(conv) -> gpu_conv(gpu_from_host) gpu_from_host(conv) -> gpu_conv(gpu_from_host)
...@@ -1105,7 +1112,7 @@ import theano.tensor.signal.downsample as downsample ...@@ -1105,7 +1112,7 @@ import theano.tensor.signal.downsample as downsample
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([downsample.DownsampleFactorMax])
def local_gpu_downsample_factor_max(node): def local_gpu_downsample_factor_max(node):
if isinstance(node.op, downsample.DownsampleFactorMax): if isinstance(node.op, downsample.DownsampleFactorMax):
x, = node.inputs x, = node.inputs
...@@ -1115,7 +1122,7 @@ def local_gpu_downsample_factor_max(node): ...@@ -1115,7 +1122,7 @@ def local_gpu_downsample_factor_max(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([downsample.DownsampleFactorMaxGrad])
def local_gpu_downsample_factor_max_grad(node): def local_gpu_downsample_factor_max_grad(node):
if isinstance(node.op, downsample.DownsampleFactorMaxGrad): if isinstance(node.op, downsample.DownsampleFactorMaxGrad):
x, z, gz = node.inputs x, z, gz = node.inputs
...@@ -1127,11 +1134,11 @@ def local_gpu_downsample_factor_max_grad(node): ...@@ -1127,11 +1134,11 @@ def local_gpu_downsample_factor_max_grad(node):
gpu_from_host(gz)))] gpu_from_host(gz)))]
from theano.sandbox.cuda.basic_ops import gpu_join from theano.sandbox.cuda.basic_ops import gpu_join, GpuJoin
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([tensor.Join])
def local_gpu_join(node): def local_gpu_join(node):
""" """
Inspired by the opt for convop. Inspired by the opt for convop.
...@@ -1188,6 +1195,14 @@ def local_gpu_join(node): ...@@ -1188,6 +1195,14 @@ def local_gpu_join(node):
return [replacement_node] return [replacement_node]
# This is a copy of the same opt in tensor to make the tests happy,
# but I'm not convinced it is actually needed.
@register_opt()
@local_optimizer([GpuJoin])
def local_gpujoin_1(node):
tensors = node.inputs[1:]
if len(tensors) == 1:
return [tensors[0]]
# Commented out because it can result in # Commented out because it can result in
# shared = dimshuffle(gemm_inplace(dimshuffle(shared))) # shared = dimshuffle(gemm_inplace(dimshuffle(shared)))
...@@ -1205,7 +1220,7 @@ def local_inplace_gemv(node): ...@@ -1205,7 +1220,7 @@ def local_inplace_gemv(node):
return [gpu_gemv_inplace(*node.inputs)] return [gpu_gemv_inplace(*node.inputs)]
@local_optimizer([gpu_gemm_no_inplace]) @local_optimizer([gpu_ger_no_inplace])
def local_inplace_ger(node): def local_inplace_ger(node):
if node.op == gpu_ger_no_inplace: if node.op == gpu_ger_no_inplace:
return [gpu_ger_inplace(*node.inputs)] return [gpu_ger_inplace(*node.inputs)]
...@@ -1336,7 +1351,7 @@ optdb.register('gpu_inplace_elemwise_opt', gpu_inplace_elemwise_optimizer, 75, ...@@ -1336,7 +1351,7 @@ optdb.register('gpu_inplace_elemwise_opt', gpu_inplace_elemwise_optimizer, 75,
@register_opt() @register_opt()
@local_optimizer([tensor.Alloc]) @local_optimizer([tensor.alloc])
def local_gpualloc(node): def local_gpualloc(node):
replace = False replace = False
if node.op == tensor.alloc: if node.op == tensor.alloc:
...@@ -1383,7 +1398,7 @@ def local_gpualloc(node): ...@@ -1383,7 +1398,7 @@ def local_gpualloc(node):
@register_opt() @register_opt()
@local_optimizer([tensor.Alloc]) @local_optimizer([GpuAlloc])
def local_gpualloc_memset_0(node): def local_gpualloc_memset_0(node):
if isinstance(node.op, GpuAlloc) and not node.op.memset_0: if isinstance(node.op, GpuAlloc) and not node.op.memset_0:
inp = node.inputs[0] inp = node.inputs[0]
...@@ -1395,7 +1410,7 @@ def local_gpualloc_memset_0(node): ...@@ -1395,7 +1410,7 @@ def local_gpualloc_memset_0(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([gpu_from_host, tensor.Eye])
def local_gpu_eye(node): def local_gpu_eye(node):
""" """
gpu_from_host(eye) -> gpueye(gpu_from_host) gpu_from_host(eye) -> gpueye(gpu_from_host)
...@@ -1479,7 +1494,7 @@ def tensor_to_cuda(x): ...@@ -1479,7 +1494,7 @@ def tensor_to_cuda(x):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer(None) # XXX: linalg is in sandbox, so don't import it globally
def local_gpu_extract_diagonal(node): def local_gpu_extract_diagonal(node):
""" """
extract_diagonal(host_from_gpu()) -> host_from_gpu(extract_diagonal) extract_diagonal(host_from_gpu()) -> host_from_gpu(extract_diagonal)
...@@ -1505,7 +1520,7 @@ def local_gpu_extract_diagonal(node): ...@@ -1505,7 +1520,7 @@ def local_gpu_extract_diagonal(node):
@register_opt('scan') @register_opt('scan')
@local_optimizer([]) @local_optimizer([gpu_from_host, scan_op.Scan])
def gpuScanOptimization(node): def gpuScanOptimization(node):
""" """
scan(host_from_gpu) -> host_from_gpu(GPUscan) scan(host_from_gpu) -> host_from_gpu(GPUscan)
......
...@@ -346,7 +346,7 @@ class CURAND_RandomStreams(object): ...@@ -346,7 +346,7 @@ class CURAND_RandomStreams(object):
return rval return rval
@local_optimizer([None]) @local_optimizer([CURAND_Base])
def local_destructive(node): def local_destructive(node):
op = node.op op = node.op
if isinstance(op, CURAND_Base) and not op.destructive: if isinstance(op, CURAND_Base) and not op.destructive:
......
...@@ -112,7 +112,7 @@ gpu_seqopt.register('InputToGpuArrayOptimizer', InputToGpuOptimizer(), ...@@ -112,7 +112,7 @@ gpu_seqopt.register('InputToGpuArrayOptimizer', InputToGpuOptimizer(),
0, 'fast_run', 'fast_compile', 'merge') 0, 'fast_run', 'fast_compile', 'merge')
@local_optimizer([]) @local_optimizer([gpu_from_host, host_from_gpu])
def local_cut_gpu_host_gpu(node): def local_cut_gpu_host_gpu(node):
if tensor.opt.opt.check_chain(node, gpu_from_host, host_from_gpu): if tensor.opt.opt.check_chain(node, gpu_from_host, host_from_gpu):
return [node.inputs[0].owner.inputs[0]] return [node.inputs[0].owner.inputs[0]]
......
...@@ -72,7 +72,7 @@ def hints(variable): ...@@ -72,7 +72,7 @@ def hints(variable):
@register_canonicalize @register_canonicalize
@local_optimizer([]) @local_optimizer([Hint])
def remove_hint_nodes(node): def remove_hint_nodes(node):
if is_hint_node(node): if is_hint_node(node):
# transfer hints from graph to Feature # transfer hints from graph to Feature
...@@ -224,7 +224,7 @@ def is_positive(v): ...@@ -224,7 +224,7 @@ def is_positive(v):
@register_stabilize @register_stabilize
@local_optimizer([]) @local_optimizer([Dot, Dot22])
def inv_as_solve(node): def inv_as_solve(node):
if not imported_scipy: if not imported_scipy:
return False return False
...@@ -242,7 +242,7 @@ def inv_as_solve(node): ...@@ -242,7 +242,7 @@ def inv_as_solve(node):
@register_canonicalize @register_canonicalize
@register_stabilize @register_stabilize
@register_specialize @register_specialize
@local_optimizer([]) @local_optimizer([DimShuffle])
def no_transpose_symmetric(node): def no_transpose_symmetric(node):
if isinstance(node.op, DimShuffle): if isinstance(node.op, DimShuffle):
x = node.inputs[0] x = node.inputs[0]
...@@ -253,7 +253,7 @@ def no_transpose_symmetric(node): ...@@ -253,7 +253,7 @@ def no_transpose_symmetric(node):
@register_stabilize @register_stabilize
@local_optimizer([]) @local_optimizer(None) # XXX: solve is defined later and can't be used here
def psd_solve_with_chol(node): def psd_solve_with_chol(node):
if node.op == solve: if node.op == solve:
A, b = node.inputs # result is solution Ax=b A, b = node.inputs # result is solution Ax=b
...@@ -269,7 +269,7 @@ def psd_solve_with_chol(node): ...@@ -269,7 +269,7 @@ def psd_solve_with_chol(node):
@register_stabilize @register_stabilize
@register_specialize @register_specialize
@local_optimizer([]) @local_optimizer(None) # XXX: det is defined later and can't be used here
def local_det_chol(node): def local_det_chol(node):
""" """
If we have det(X) and there is already an L=cholesky(X) If we have det(X) and there is already an L=cholesky(X)
...@@ -287,7 +287,7 @@ def local_det_chol(node): ...@@ -287,7 +287,7 @@ def local_det_chol(node):
@register_canonicalize @register_canonicalize
@register_stabilize @register_stabilize
@register_specialize @register_specialize
@local_optimizer([]) @local_optimizer([tensor.log])
def local_log_prod_sqr(node): def local_log_prod_sqr(node):
if node.op == tensor.log: if node.op == tensor.log:
x, = node.inputs x, = node.inputs
...@@ -307,7 +307,7 @@ def local_log_prod_sqr(node): ...@@ -307,7 +307,7 @@ def local_log_prod_sqr(node):
@register_canonicalize @register_canonicalize
@register_stabilize @register_stabilize
@register_specialize @register_specialize
@local_optimizer([]) @local_optimizer([tensor.log])
def local_log_pow(node): def local_log_pow(node):
if node.op == tensor.log: if node.op == tensor.log:
x, = node.inputs x, = node.inputs
......
...@@ -337,7 +337,7 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp): ...@@ -337,7 +337,7 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
""" % locals() """ % locals()
@local_optimizer() @local_optimizer([MultinomialFromUniform])
def local_gpu_multinomial(node): def local_gpu_multinomial(node):
if type(node.op) is MultinomialFromUniform: if type(node.op) is MultinomialFromUniform:
p, u = node.inputs p, u = node.inputs
......
...@@ -941,7 +941,7 @@ class MRG_RandomStreams(object): ...@@ -941,7 +941,7 @@ class MRG_RandomStreams(object):
return final_samples return final_samples
@local_optimizer([None]) @local_optimizer([mrg_uniform])
def mrg_random_make_inplace(node): def mrg_random_make_inplace(node):
op = node.op op = node.op
if isinstance(op, mrg_uniform) and not op.inplace: if isinstance(op, mrg_uniform) and not op.inplace:
......
...@@ -49,7 +49,7 @@ def info(*msg): ...@@ -49,7 +49,7 @@ def info(*msg):
_logger.info('INFO theano.scan: ' + ' '.join(msg)) _logger.info('INFO theano.scan: ' + ' '.join(msg))
@gof.local_optimizer([None]) @gof.local_optimizer([scan_op.Scan])
def remove_constants_and_unused_inputs_scan(node): def remove_constants_and_unused_inputs_scan(node):
''' '''
Move constants into the inner graph, and remove unused inputs. Move constants into the inner graph, and remove unused inputs.
...@@ -1337,7 +1337,7 @@ def make_equiv(lo, li): ...@@ -1337,7 +1337,7 @@ def make_equiv(lo, li):
return left, right return left, right
@gof.local_optimizer([None]) @gof.local_optimizer([scan_op.Scan])
def scan_merge_inouts(node): def scan_merge_inouts(node):
if not isinstance(node.op, scan_op.Scan): if not isinstance(node.op, scan_op.Scan):
return False return False
......
...@@ -32,7 +32,7 @@ sparse.register_specialize(local_csm_properties_csm) ...@@ -32,7 +32,7 @@ sparse.register_specialize(local_csm_properties_csm)
# This is tested in tests/test_basic.py:test_remove0 # This is tested in tests/test_basic.py:test_remove0
@gof.local_optimizer([None]) @gof.local_optimizer([sparse.Remove0])
def local_inplace_remove0(node): def local_inplace_remove0(node):
""" """
Optimization to insert inplace versions of Remove0. Optimization to insert inplace versions of Remove0.
...@@ -49,7 +49,7 @@ theano.compile.optdb.register('local_inplace_remove0', ...@@ -49,7 +49,7 @@ theano.compile.optdb.register('local_inplace_remove0',
gof.TopoOptimizer(local_inplace_remove0, gof.TopoOptimizer(local_inplace_remove0,
failure_callback=gof.TopoOptimizer.warn_inplace), failure_callback=gof.TopoOptimizer.warn_inplace),
60, 'fast_run', 'inplace') 60, 'fast_run', 'inplace')
@gof.local_optimizer([None]) @gof.local_optimizer([sparse.AddSD])
def local_inplace_addsd(node): def local_inplace_addsd(node):
""" """
Optimization to insert inplace versions of AddSD. Optimization to insert inplace versions of AddSD.
......
...@@ -1645,7 +1645,7 @@ class Dot22(GemmRelated): ...@@ -1645,7 +1645,7 @@ class Dot22(GemmRelated):
_dot22 = Dot22() _dot22 = Dot22()
@local_optimizer([T._dot]) @local_optimizer([T.Dot])
def local_dot_to_dot22(node): def local_dot_to_dot22(node):
# This works for tensor.outer too because basic.outer is a macro that # This works for tensor.outer too because basic.outer is a macro that
# produces a dot(dimshuffle,dimshuffle) of form 4 below # produces a dot(dimshuffle,dimshuffle) of form 4 below
...@@ -2025,7 +2025,7 @@ blas_optdb.register('local_dot22_to_dot22scalar', ...@@ -2025,7 +2025,7 @@ blas_optdb.register('local_dot22_to_dot22scalar',
#from opt import register_specialize, register_canonicalize #from opt import register_specialize, register_canonicalize
#@register_specialize #@register_specialize
@local_optimizer([]) @local_optimizer([T.sub, T.add])
def local_print_as_we_go_along(node): def local_print_as_we_go_along(node):
if node.op in (T.sub, T.add): if node.op in (T.sub, T.add):
debugprint(node) debugprint(node)
...@@ -266,7 +266,7 @@ def make_gpu_optimizer(op, to_gpu): ...@@ -266,7 +266,7 @@ def make_gpu_optimizer(op, to_gpu):
:param to_gpu: a list of op inputs that are moved to the GPU. :param to_gpu: a list of op inputs that are moved to the GPU.
""" """
@theano.gof.local_optimizer([]) @theano.gof.local_optimizer([op, cuda.gpu_from_host])
def local_to_gpu(node): def local_to_gpu(node):
""" """
op(host_from_gpu()) -> host_from_gpu(op) op(host_from_gpu()) -> host_from_gpu(op)
...@@ -302,7 +302,7 @@ if cuda.cuda_available: ...@@ -302,7 +302,7 @@ if cuda.cuda_available:
make_gpu_optimizer(IncDiagonalSubtensor, [0, 3]) make_gpu_optimizer(IncDiagonalSubtensor, [0, 3])
@theano.gof.local_optimizer([None]) @theano.gof.local_optimizer([DiagonalSubtensor, IncDiagonalSubtensor])
def local_inplace_DiagonalSubtensor(node): def local_inplace_DiagonalSubtensor(node):
""" also work for IncDiagonalSubtensor """ """ also work for IncDiagonalSubtensor """
if (isinstance(node.op, (DiagonalSubtensor, IncDiagonalSubtensor)) and if (isinstance(node.op, (DiagonalSubtensor, IncDiagonalSubtensor)) and
......
...@@ -589,7 +589,7 @@ opt.local_mul_canonizer.add_simplifier(softmax_simplifier, ...@@ -589,7 +589,7 @@ opt.local_mul_canonizer.add_simplifier(softmax_simplifier,
if 0: if 0:
@opt.register_specialize @opt.register_specialize
@gof.local_optimizer([]) @gof.local_optimizer([tensor.add])
def local_softmax_grad(node): def local_softmax_grad(node):
'''dy*sm - DimShuffle{0,'x'}(sum{1}(dy*sm))*sm -> softmax_grad(dy,sm)''' '''dy*sm - DimShuffle{0,'x'}(sum{1}(dy*sm))*sm -> softmax_grad(dy,sm)'''
#TODO what if the signs are changed? #TODO what if the signs are changed?
...@@ -1417,7 +1417,7 @@ def _is_const(z, val, approx=False): ...@@ -1417,7 +1417,7 @@ def _is_const(z, val, approx=False):
@opt.register_specialize @opt.register_specialize
@gof.local_optimizer([]) @gof.local_optimizer([subtensor.AdvancedSubtensor, tensor.log])
def local_advanced_indexing_crossentropy_onehot(node): def local_advanced_indexing_crossentropy_onehot(node):
log = None log = None
sm = None sm = None
......
...@@ -347,7 +347,7 @@ compile.optdb['canonicalize'].register( ...@@ -347,7 +347,7 @@ compile.optdb['canonicalize'].register(
@register_canonicalize @register_canonicalize
@register_stabilize @register_stabilize
@gof.local_optimizer([None]) @gof.local_optimizer([T.Dot])
def local_0_dot_x(node): def local_0_dot_x(node):
if not isinstance(node.op, T.Dot): if not isinstance(node.op, T.Dot):
return False return False
...@@ -390,7 +390,7 @@ def local_0_dot_x(node): ...@@ -390,7 +390,7 @@ def local_0_dot_x(node):
###################### ######################
@gof.local_optimizer([None, None]) @gof.local_optimizer([DimShuffle])
def local_dimshuffle_lift(node): def local_dimshuffle_lift(node):
""" """
"Lifts" DimShuffle through Elemwise operations and merges "Lifts" DimShuffle through Elemwise operations and merges
...@@ -431,7 +431,7 @@ def local_dimshuffle_lift(node): ...@@ -431,7 +431,7 @@ def local_dimshuffle_lift(node):
@register_canonicalize @register_canonicalize
@gof.local_optimizer([]) @gof.local_optimizer([T.DimShuffle])
def local_lift_transpose_through_dot(node): def local_lift_transpose_through_dot(node):
""" """
dot(x,y).T -> dot(y.T, x.T) dot(x,y).T -> dot(y.T, x.T)
...@@ -456,7 +456,7 @@ def local_lift_transpose_through_dot(node): ...@@ -456,7 +456,7 @@ def local_lift_transpose_through_dot(node):
return [T.dot(y.T, x.T)] return [T.dot(y.T, x.T)]
@gof.local_optimizer([]) @gof.local_optimizer([DimShuffle])
def dimshuffle_as_view(node): def dimshuffle_as_view(node):
op = node.op op = node.op
if not isinstance(op, DimShuffle) or op.inplace: if not isinstance(op, DimShuffle) or op.inplace:
...@@ -476,7 +476,7 @@ register_specialize(local_dimshuffle_lift) ...@@ -476,7 +476,7 @@ register_specialize(local_dimshuffle_lift)
@register_canonicalize @register_canonicalize
@gof.local_optimizer([]) @gof.local_optimizer([T.DimShuffle])
def local_dimshuffle_no_inplace_at_canonicalize(node): def local_dimshuffle_no_inplace_at_canonicalize(node):
if isinstance(node.op, T.DimShuffle) and node.op.inplace: if isinstance(node.op, T.DimShuffle) and node.op.inplace:
return [T.DimShuffle(node.op.input_broadcastable, return [T.DimShuffle(node.op.input_broadcastable,
...@@ -1211,9 +1211,9 @@ def local_useless_alloc(node): ...@@ -1211,9 +1211,9 @@ def local_useless_alloc(node):
@register_specialize @register_specialize
@register_canonicalize @register_canonicalize
@gof.local_optimizer([T._shape]) @gof.local_optimizer([T.shape])
def local_shape_to_shape_i(node): def local_shape_to_shape_i(node):
if node.op == T._shape: if node.op == T.shape:
# This optimization needs ShapeOpt and fgraph.shape_feature # This optimization needs ShapeOpt and fgraph.shape_feature
if not hasattr(node.fgraph, 'shape_feature'): if not hasattr(node.fgraph, 'shape_feature'):
return return
...@@ -1221,9 +1221,10 @@ def local_shape_to_shape_i(node): ...@@ -1221,9 +1221,10 @@ def local_shape_to_shape_i(node):
return [shape_feature.make_vector_shape(node.inputs[0])] return [shape_feature.make_vector_shape(node.inputs[0])]
# TODO: Not sure what type of node we are expecting here
@register_specialize @register_specialize
@register_canonicalize @register_canonicalize
@gof.local_optimizer([T._shape]) @gof.local_optimizer(None)
def local_track_shape_i(node): def local_track_shape_i(node):
try: try:
shape_feature = node.fgraph.shape_feature shape_feature = node.fgraph.shape_feature
...@@ -1423,7 +1424,7 @@ def local_remove_useless_assert(node): ...@@ -1423,7 +1424,7 @@ def local_remove_useless_assert(node):
return [assert_(node.inputs[0], *cond)] return [assert_(node.inputs[0], *cond)]
@gof.local_optimizer([T.Alloc]) @gof.local_optimizer([T.Elemwise])
def local_alloc_elemwise(node): def local_alloc_elemwise(node):
""" """
elemwise(alloc(x, shp), ..., y.TensorType(BROADCAST CONDITION)) elemwise(alloc(x, shp), ..., y.TensorType(BROADCAST CONDITION))
...@@ -1542,7 +1543,7 @@ else: ...@@ -1542,7 +1543,7 @@ else:
@register_canonicalize @register_canonicalize
@gof.local_optimizer([]) @gof.local_optimizer([T.Elemwise])
def local_upcast_elemwise_constant_inputs(node): def local_upcast_elemwise_constant_inputs(node):
"""This explicitly upcasts constant inputs to elemwise Ops, when """This explicitly upcasts constant inputs to elemwise Ops, when
those Ops do implicit upcasting anyway. those Ops do implicit upcasting anyway.
...@@ -1690,7 +1691,7 @@ def local_useless_subtensor(node): ...@@ -1690,7 +1691,7 @@ def local_useless_subtensor(node):
@register_canonicalize @register_canonicalize
@gof.local_optimizer([]) @gof.local_optimizer([Subtensor])
def local_subtensor_lift(node): def local_subtensor_lift(node):
""" """
unary(x)[idx] -> unary(x[idx])#any broadcast pattern. unary(x)[idx] -> unary(x[idx])#any broadcast pattern.
...@@ -1900,7 +1901,7 @@ def merge_two_slices(slice1, len1, slice2, len2): ...@@ -1900,7 +1901,7 @@ def merge_two_slices(slice1, len1, slice2, len2):
@register_canonicalize @register_canonicalize
@register_specialize @register_specialize
@gof.local_optimizer([]) @gof.local_optimizer([Subtensor])
def local_subtensor_merge(node): def local_subtensor_merge(node):
""" """
Refactored optimization to deal with all cases of tensor merging. Refactored optimization to deal with all cases of tensor merging.
...@@ -1962,7 +1963,7 @@ def local_subtensor_merge(node): ...@@ -1962,7 +1963,7 @@ def local_subtensor_merge(node):
@register_canonicalize @register_canonicalize
@register_specialize @register_specialize
@gof.local_optimizer([]) @gof.local_optimizer([Subtensor])
def local_subtensor_of_alloc(node): def local_subtensor_of_alloc(node):
"""alloc[x:y] -> alloc""" """alloc[x:y] -> alloc"""
if not isinstance(node.op, Subtensor): if not isinstance(node.op, Subtensor):
...@@ -2015,7 +2016,7 @@ def local_subtensor_of_alloc(node): ...@@ -2015,7 +2016,7 @@ def local_subtensor_of_alloc(node):
@register_canonicalize @register_canonicalize
@gof.local_optimizer([None]) @gof.local_optimizer([T.add])
def local_IncSubtensor_serialize(node): def local_IncSubtensor_serialize(node):
""" """
When using Subtensor, gradient graphs can be ugly. When using Subtensor, gradient graphs can be ugly.
...@@ -2087,7 +2088,7 @@ compile.optdb.register('pre_local_IncSubtensor_serialize', ...@@ -2087,7 +2088,7 @@ compile.optdb.register('pre_local_IncSubtensor_serialize',
#after priority 50 Destructive inplace operations #after priority 50 Destructive inplace operations
#gemm is the first one now, at priority 70 #gemm is the first one now, at priority 70
@gof.local_optimizer([None]) @gof.local_optimizer([IncSubtensor]) # XXX: GPU
def local_inplace_setsubtensor(node): def local_inplace_setsubtensor(node):
""" """
Also work for GpuIncSubtensor Also work for GpuIncSubtensor
...@@ -2106,7 +2107,7 @@ compile.optdb.register('local_inplace_setsubtensor', ...@@ -2106,7 +2107,7 @@ compile.optdb.register('local_inplace_setsubtensor',
'fast_run', 'inplace') # DEBUG 'fast_run', 'inplace') # DEBUG
@gof.local_optimizer([None]) @gof.local_optimizer([AdvancedIncSubtensor1]) # XXX: GPU
def local_inplace_incsubtensor1(node): def local_inplace_incsubtensor1(node):
""" also work for GpuAdvancedIncSubtensor1 """ """ also work for GpuAdvancedIncSubtensor1 """
if isinstance(node.op, AdvancedIncSubtensor1) and not node.op.inplace: if isinstance(node.op, AdvancedIncSubtensor1) and not node.op.inplace:
...@@ -2124,7 +2125,7 @@ compile.optdb.register('local_inplace_incsubtensor1', ...@@ -2124,7 +2125,7 @@ compile.optdb.register('local_inplace_incsubtensor1',
@register_canonicalize @register_canonicalize
@register_stabilize @register_stabilize
@gof.local_optimizer([None]) @gof.local_optimizer([IncSubtensor])
def local_incsubtensor_of_allocs(node): def local_incsubtensor_of_allocs(node):
""" """
IncSubtensor(x, zeros, idx) -> x IncSubtensor(x, zeros, idx) -> x
...@@ -2147,7 +2148,7 @@ def local_incsubtensor_of_allocs(node): ...@@ -2147,7 +2148,7 @@ def local_incsubtensor_of_allocs(node):
@register_canonicalize @register_canonicalize
@register_stabilize @register_stabilize
@gof.local_optimizer([None]) @gof.local_optimizer([IncSubtensor])
def local_setsubtensor_of_allocs(node): def local_setsubtensor_of_allocs(node):
""" """
SetSubtensor(x, x[idx], idx) -> x SetSubtensor(x, x[idx], idx) -> x
...@@ -2294,7 +2295,7 @@ def local_join_1(node): ...@@ -2294,7 +2295,7 @@ def local_join_1(node):
############### ###############
@register_canonicalize @register_canonicalize
@gof.local_optimizer([]) @gof.local_optimizer([T.Elemwise])
def local_remove_switch_const_cond(node): def local_remove_switch_const_cond(node):
""" """
This optimization makes the following changes in the graph: This optimization makes the following changes in the graph:
...@@ -2377,7 +2378,7 @@ def local_mul_switch_sink(node): ...@@ -2377,7 +2378,7 @@ def local_mul_switch_sink(node):
@register_canonicalize @register_canonicalize
@gof.local_optimizer([T.true_div]) @gof.local_optimizer([T.true_div, T.int_div, T.floor_div])
def local_div_switch_sink(node): def local_div_switch_sink(node):
""" """
This optimization makes the folowing changes in the graph: This optimization makes the folowing changes in the graph:
...@@ -2421,7 +2422,7 @@ def local_div_switch_sink(node): ...@@ -2421,7 +2422,7 @@ def local_div_switch_sink(node):
################ ################
@register_canonicalize @register_canonicalize
@register_stabilize @register_stabilize
@gof.local_optimizer([]) @gof.local_optimizer([T.Flatten])
def local_flatten_lift(node): def local_flatten_lift(node):
""" """
Flatten(UnaryElemwise(x)) -> UnaryElemwise(Flatten(x)) Flatten(UnaryElemwise(x)) -> UnaryElemwise(Flatten(x))
...@@ -2442,7 +2443,7 @@ def local_flatten_lift(node): ...@@ -2442,7 +2443,7 @@ def local_flatten_lift(node):
################## ##################
@gof.local_optimizer([None, None]) @gof.local_optimizer([T.Reshape])
def local_reshape_chain(node): def local_reshape_chain(node):
""" """
Reshape(Reshape(shape1),shape2) -> Reshape(shape2) Reshape(Reshape(shape1),shape2) -> Reshape(shape2)
...@@ -2470,7 +2471,7 @@ register_canonicalize(local_reshape_chain) ...@@ -2470,7 +2471,7 @@ register_canonicalize(local_reshape_chain)
@register_canonicalize @register_canonicalize
@register_stabilize @register_stabilize
@gof.local_optimizer([]) @gof.local_optimizer([T.Reshape])
def local_reshape_lift(node): def local_reshape_lift(node):
""" """
Reshape(UnaryElemwise(x)) -> UnaryElemwise(Reshape(x)) Reshape(UnaryElemwise(x)) -> UnaryElemwise(Reshape(x))
...@@ -2490,7 +2491,7 @@ def local_reshape_lift(node): ...@@ -2490,7 +2491,7 @@ def local_reshape_lift(node):
if 0: if 0:
# TODO: Test that this optimziation works. # TODO: Test that this optimziation works.
@register_canonicalize @register_canonicalize
@gof.local_optimizer([]) @gof.local_optimizer([T.Reshape])
def local_scalar_reshape(node): def local_scalar_reshape(node):
"""Eliminate reshape Ops whose inputs and outputs are scalars """ """Eliminate reshape Ops whose inputs and outputs are scalars """
if isinstance(node.op, T.Reshape): if isinstance(node.op, T.Reshape):
...@@ -2506,7 +2507,7 @@ if 0: ...@@ -2506,7 +2507,7 @@ if 0:
# TODO: Remember to take into account the new sum dtype argument if this # TODO: Remember to take into account the new sum dtype argument if this
# optimization is enabled. # optimization is enabled.
@register_canonicalize @register_canonicalize
@gof.local_optimizer([]) @gof.local_optimizer([T.Sum])
def local_sum_over_empty(node): def local_sum_over_empty(node):
if isinstance(node.op, T.Sum): if isinstance(node.op, T.Sum):
# This optimization needs ShapeOpt and fgraph.shape_feature # This optimization needs ShapeOpt and fgraph.shape_feature
...@@ -2528,7 +2529,7 @@ if 0: ...@@ -2528,7 +2529,7 @@ if 0:
################## ##################
@gof.local_optimizer([None, T.fill]) @gof.local_optimizer([T.Elemwise])
def local_fill_cut(node): def local_fill_cut(node):
""" """
f(fill(a,b), c) -> f(b, c) f(fill(a,b), c) -> f(b, c)
...@@ -2582,7 +2583,7 @@ register_canonicalize(local_fill_cut) ...@@ -2582,7 +2583,7 @@ register_canonicalize(local_fill_cut)
register_canonicalize(gof.OpRemove(T.tensor_copy), name='remove_tensor_copy') register_canonicalize(gof.OpRemove(T.tensor_copy), name='remove_tensor_copy')
@gof.local_optimizer([None, T.fill]) @gof.local_optimizer([T.Elemwise])
def local_fill_sink(node): def local_fill_sink(node):
""" """
f(fill(a, b), fill(c, d), e) -> fill(a, fill(c, f(b, d, e))) f(fill(a, b), fill(c, d), e) -> fill(a, fill(c, f(b, d, e)))
...@@ -2670,8 +2671,7 @@ class Canonizer(gof.LocalOptimizer): ...@@ -2670,8 +2671,7 @@ class Canonizer(gof.LocalOptimizer):
self.external_simplifiers.append((reason, simplifier)) self.external_simplifiers.append((reason, simplifier))
def tracks(self): def tracks(self):
return [[self.main, None], [self.inverse, None], return [self.main, self.inverse, self.reciprocal]
[self.reciprocal, None]]
def get_num_denum(self, input): def get_num_denum(self, input):
""" """
...@@ -3059,7 +3059,7 @@ register_canonicalize(local_neg_to_mul) ...@@ -3059,7 +3059,7 @@ register_canonicalize(local_neg_to_mul)
@register_specialize @register_specialize
@gof.local_optimizer([]) @gof.local_optimizer([T.Sum])
def local_sum_mul_by_scalar(node): def local_sum_mul_by_scalar(node):
"""sum(scalar * smth) -> scalar * sum(smth) """sum(scalar * smth) -> scalar * sum(smth)
sum(-smth) -> -sum(smth) sum(-smth) -> -sum(smth)
...@@ -3096,7 +3096,7 @@ def local_sum_mul_by_scalar(node): ...@@ -3096,7 +3096,7 @@ def local_sum_mul_by_scalar(node):
@register_specialize @register_specialize
@gof.local_optimizer([]) @gof.local_optimizer([T.Elemwise])
def local_elemwise_sub_zeros(node): def local_elemwise_sub_zeros(node):
""" """
Elemwise{sub}(X,X) -> zeros_like(X) Elemwise{sub}(X,X) -> zeros_like(X)
...@@ -3110,7 +3110,7 @@ def local_elemwise_sub_zeros(node): ...@@ -3110,7 +3110,7 @@ def local_elemwise_sub_zeros(node):
@register_canonicalize @register_canonicalize
@register_specialize @register_specialize
@gof.local_optimizer([]) @gof.local_optimizer([T.Sum])
def local_sum_div_dimshuffle(node): def local_sum_div_dimshuffle(node):
'''sum(a / dimshuffle{...}(b), axis=l) -> sum(a, axis={...}) / b, '''sum(a / dimshuffle{...}(b), axis=l) -> sum(a, axis={...}) / b,
if dimension l of the DimShuffle is 'x'.''' if dimension l of the DimShuffle is 'x'.'''
...@@ -3199,7 +3199,7 @@ def local_sum_div_dimshuffle(node): ...@@ -3199,7 +3199,7 @@ def local_sum_div_dimshuffle(node):
@register_canonicalize @register_canonicalize
@gof.local_optimizer([]) @gof.local_optimizer([T.Sum])
def local_sum_all_to_none(node): def local_sum_all_to_none(node):
"""Sum{0,1,...N} -> Sum{}""" """Sum{0,1,...N} -> Sum{}"""
if isinstance(node.op, T.Sum): if isinstance(node.op, T.Sum):
...@@ -3212,7 +3212,7 @@ def local_sum_all_to_none(node): ...@@ -3212,7 +3212,7 @@ def local_sum_all_to_none(node):
@register_canonicalize @register_canonicalize
@gof.local_optimizer([]) @gof.local_optimizer([T.Sum])
def local_sum_sum(node): def local_sum_sum(node):
""" """
Sum(Sum()) -> Sum Sum(Sum()) -> Sum
...@@ -3278,9 +3278,12 @@ def local_sum_sum(node): ...@@ -3278,9 +3278,12 @@ def local_sum_sum(node):
combined_sum = T.Sum(newaxis, dtype=out_dtype) combined_sum = T.Sum(newaxis, dtype=out_dtype)
return [combined_sum(summed.owner.inputs[0])] return [combined_sum(summed.owner.inputs[0])]
ALL_REDUCE = [T.elemwise.CAReduce, T.elemwise.All, T.elemwise.Any,
T.elemwise.Sum, T.elemwise.Prod,
T.elemwise.ProdWithoutZeros]
@register_canonicalize @register_canonicalize
@gof.local_optimizer([]) @gof.local_optimizer(ALL_REDUCE)
def local_cut_useless_reduce(node): def local_cut_useless_reduce(node):
"""Sum(a, axis=[]) -> a """ """Sum(a, axis=[]) -> a """
if isinstance(node.op, T.CAReduce): if isinstance(node.op, T.CAReduce):
...@@ -3296,7 +3299,7 @@ def local_cut_useless_reduce(node): ...@@ -3296,7 +3299,7 @@ def local_cut_useless_reduce(node):
# #
#@register_canonicalize #@register_canonicalize
@register_specialize @register_specialize
@gof.local_optimizer([]) @gof.local_optimizer(ALL_REDUCE)
def local_reduce_broadcastable(node): def local_reduce_broadcastable(node):
"""Remove reduction over broadcastable dimensions""" """Remove reduction over broadcastable dimensions"""
if isinstance(node.op, T.CAReduce): if isinstance(node.op, T.CAReduce):
...@@ -3335,7 +3338,7 @@ def local_reduce_broadcastable(node): ...@@ -3335,7 +3338,7 @@ def local_reduce_broadcastable(node):
@register_specialize @register_specialize
@gof.local_optimizer([]) @gof.local_optimizer([T.Sum])
def local_sum_alloc(node): def local_sum_alloc(node):
""" sum(alloc(constant,shapes...)) => constant*prod(shapes)""" """ sum(alloc(constant,shapes...)) => constant*prod(shapes)"""
if isinstance(node.op, T.Sum): if isinstance(node.op, T.Sum):
...@@ -3742,7 +3745,7 @@ def local_abs_lift(node): ...@@ -3742,7 +3745,7 @@ def local_abs_lift(node):
@register_specialize @register_specialize
@gof.local_optimizer([]) @gof.local_optimizer([T.mul, T.true_div])
def local_abs_merge(node): def local_abs_merge(node):
""" """
merge abs generated by local_abs_lift when the canonizer don't merge abs generated by local_abs_lift when the canonizer don't
...@@ -3917,8 +3920,7 @@ def attempt_distribution(factor, num, denum): ...@@ -3917,8 +3920,7 @@ def attempt_distribution(factor, num, denum):
neg_pairs))), num, denum neg_pairs))), num, denum
@gof.local_optimizer([T.mul, T.add, T.mul], [T.mul, T.sub, T.mul], @gof.local_optimizer([T.mul])
[T.mul, T.add, T.true_div], [T.mul, T.sub, T.true_div])
def local_greedy_distributor(node): def local_greedy_distributor(node):
""" """
This optimization tries to apply distributivity of multiplication This optimization tries to apply distributivity of multiplication
...@@ -3984,7 +3986,7 @@ register_canonicalize(local_greedy_distributor) ...@@ -3984,7 +3986,7 @@ register_canonicalize(local_greedy_distributor)
register_stabilize(local_greedy_distributor) register_stabilize(local_greedy_distributor)
@gof.local_optimizer([None]) @gof.local_optimizer(None)
def constant_folding(node): def constant_folding(node):
for input in node.inputs: for input in node.inputs:
if not isinstance(input, Constant): if not isinstance(input, Constant):
......
...@@ -55,7 +55,7 @@ def local_max_and_argmax(node): ...@@ -55,7 +55,7 @@ def local_max_and_argmax(node):
return [new, None] return [new, None]
@register_uncanonicalize @register_uncanonicalize
@gof.local_optimizer([T._shape]) @gof.local_optimizer([T.neg])
def local_max_to_min(node): def local_max_to_min(node):
""" """
change -(max(-x)) to min change -(max(-x)) to min
......
...@@ -816,7 +816,7 @@ def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5], ...@@ -816,7 +816,7 @@ def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5],
return op(random_state, size, n, pvals) return op(random_state, size, n, pvals)
@gof.local_optimizer([None]) @gof.local_optimizer([RandomFunction])
def random_make_inplace(node): def random_make_inplace(node):
op = node.op op = node.op
if isinstance(op, RandomFunction) and not op.inplace: if isinstance(op, RandomFunction) and not op.inplace:
......
...@@ -3361,10 +3361,8 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3361,10 +3361,8 @@ class T_Join_and_Split(unittest.TestCase):
utt.verify_grad((lambda a, b: join(0, a, b)), [a_val, b_val], rng=rng) utt.verify_grad((lambda a, b: join(0, a, b)), [a_val, b_val], rng=rng)
def test_broadcastable_single_input_broadcastable_dimension(self): def test_broadcastable_single_input_broadcastable_dimension(self):
""" # Test that all broadcastable flags are preserved by a
Test that all broadcastable flags are preserved by a # single-input join.
single-input join.
"""
rng = numpy.random.RandomState(seed=utt.fetch_seed()) rng = numpy.random.RandomState(seed=utt.fetch_seed())
a_val = rng.rand(1, 4, 1).astype(self.floatX) a_val = rng.rand(1, 4, 1).astype(self.floatX)
a = self.shared(a_val, broadcastable=(True, False, True)) a = self.shared(a_val, broadcastable=(True, False, True))
...@@ -3387,10 +3385,8 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3387,10 +3385,8 @@ class T_Join_and_Split(unittest.TestCase):
#self.assertRaises(TypeError, f, bad_a_val) #self.assertRaises(TypeError, f, bad_a_val)
def test_broadcastable_flags_many_dims_and_inputs(self): def test_broadcastable_flags_many_dims_and_inputs(self):
""" # Test that the right broadcastable flags get set for a join
Test that the right broadcastable flags get set for a join # with many inputs and many input dimensions.
with many inputs and many input dimensions.
"""
a = TensorType(dtype=self.floatX, broadcastable=[1, 0, 1, 0, 0, 0])() a = TensorType(dtype=self.floatX, broadcastable=[1, 0, 1, 0, 0, 0])()
b = TensorType(dtype=self.floatX, broadcastable=[1, 1, 1, 0, 0, 0])() b = TensorType(dtype=self.floatX, broadcastable=[1, 1, 1, 0, 0, 0])()
c = TensorType(dtype=self.floatX, broadcastable=[1, 0, 0, 0, 0, 0])() c = TensorType(dtype=self.floatX, broadcastable=[1, 0, 0, 0, 0, 0])()
...@@ -3479,20 +3475,16 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3479,20 +3475,16 @@ class T_Join_and_Split(unittest.TestCase):
f(get_mat(3, 4), get_mat(3, 4), get_mat(2, 5)) f(get_mat(3, 4), get_mat(3, 4), get_mat(2, 5))
def test_rebroadcast(self): def test_rebroadcast(self):
""" # Regression test for a crash that used to happen when rebroadcasting.
Regression test for a crash that used to happen when rebroadcasting.
"""
x = tensor.TensorType(self.floatX, [False, False, True])() x = tensor.TensorType(self.floatX, [False, False, True])()
u = tensor.TensorType(self.floatX, [False, False, True])() u = tensor.TensorType(self.floatX, [False, False, True])()
# This line used to crash. # This line used to crash.
z = tensor.concatenate([x, -u], axis=2) z = tensor.concatenate([x, -u], axis=2)
def test_concatenate_same(self): def test_concatenate_same(self):
""" # Test that we can concatenate the same tensor multiple time.
Test that we can concatenate the same tensor multiple time.
In the past it was broken on the GPU. # In the past it was broken on the GPU.
"""
rng = numpy.random.RandomState(seed=utt.fetch_seed()) rng = numpy.random.RandomState(seed=utt.fetch_seed())
T_shared = self.shared(rng.rand(3, 4).astype(self.floatX)) T_shared = self.shared(rng.rand(3, 4).astype(self.floatX))
Tout = tensor.concatenate([T_shared, T_shared]) Tout = tensor.concatenate([T_shared, T_shared])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论