code in a mess, but gemm-optimization works on more systematic test cases…

code in a mess, but gemm-optimization works on more systematic test cases including josephs NAACL graph

code in a mess, but gemm-optimization works on more systematic test cases…
b6b2c608 · James Bergstra · 43291f46 · b6b2c608 · b6b2c608 · b6b2c608
--- a/theano/compile/mode.py
+++ b/theano/compile/mode.py
@@ -63,11 +63,20 @@ def register_optimizer(name, opt):
        raise ValueError('Optimizer name already taken: %s' % name)
    predefined_optimizers[name] = opt
+class AddDestroyHandler(gof.Optimizer):
+    def apply(self, env):
+        pass
+    def add_requirements(self, env):
+        super(AddDestroyHandler, self).add_requirements(env)
+        env.extend(gof.DestroyHandler())
 optdb = gof.SequenceDB()
 optdb.register('merge1', gof.MergeOptimizer(), 0, 'fast_run', 'fast_compile')
 optdb.register('canonicalize', gof.EquilibriumDB(), 1, 'fast_run')
 optdb.register('specialize', gof.EquilibriumDB(), 2, 'fast_run')
-optdb.register('merge2', gof.EquilibriumDB(), 100, 'fast_run')
+optdb.register('merge2', gof.EquilibriumDB(), 49, 'fast_run')
+optdb.register('add_destroy_handler', AddDestroyHandler(), 49.5, 'fast_run', 'inplace')
+optdb.register('merge3', gof.EquilibriumDB(), 100, 'fast_run')
 class Mode(object):

--- a/theano/gof/__init__.py
+++ b/theano/gof/__init__.py
@@ -20,15 +20,14 @@ from link import \
 from op import \
    Op
-from opt import \
+from opt import (Optimizer, optimizer, SeqOptimizer,
-    Optimizer, optimizer, SeqOptimizer, \
+    MergeOptimizer, MergeOptMerge, 
-    MergeOptimizer, MergeOptMerge, \
+    LocalOptimizer, local_optimizer, LocalOptGroup, 
-    LocalOptimizer, local_optimizer, LocalOptGroup, \
+    OpSub, OpRemove, PatternSub, 
-    OpSub, OpRemove, PatternSub, \
+    NavigatorOptimizer, TopoOptimizer, EquilibriumOptimizer, 
-    NavigatorOptimizer, TopoOptimizer, EquilibriumOptimizer, \
+    keep_going, warn, 
-    keep_going, warn, \
+    InplaceOptimizer, PureThenInplaceOptimizer, 
-    InplaceOptimizer, PureThenInplaceOptimizer
+    OpKeyOptimizer)
-    #LocalOpKeyOptGroup, OpKeyOptimizer
 from optdb import \
    DB, Query, \

--- a/theano/gof/opt.py
+++ b/theano/gof/opt.py
@@ -265,6 +265,11 @@ class LocalOptimizer(object):
        raise utils.AbstractFunctionError()
+    def add_requirements(self, env):
+        """If this local optimization wants to add some requirements to the env,
+        This is the place to do it."""
+        env.extend(toolbox.ReplaceValidate())
 class FromFunctionLocalOptimizer(LocalOptimizer):
    """WRITEME"""
@@ -273,8 +278,6 @@ class FromFunctionLocalOptimizer(LocalOptimizer):
        self._tracks = tracks
    def tracks(self):
        return self._tracks
-    def add_requirements(self, env):
-        env.extend(toolbox.ReplaceValidate())
    def __str__(self):
        return getattr(self, 'name', '<FromFunctionLocalOptimizer instance>')
@@ -551,7 +554,7 @@ class NavigatorOptimizer(Optimizer):
    def __init__(self, local_opt, ignore_newtrees = 'auto', failure_callback = None):
        """
-        :param local_opt:  a LocalOptimizer to apply over a Env.
+        :param local_opt:  a LocalOptimizer to apply over a Env (or None is Ok too).
        :param ignore_newtrees: 
            - True: new subgraphs returned by an optimization is not a candidate for optimization
            - False: new subgraphs returned by an optimization is a candidate for optimization
@@ -617,6 +620,24 @@ class NavigatorOptimizer(Optimizer):
            env.remove_feature(u)
    def process_node(self, env, node, lopt = None):
+        """
+        This function will use `lopt` to `transform` the `node`.  The `transform` method will
+        return either False or a list of Results that are intended to replace `node.outputs`.
+        If the env accepts the replacement, then the optimization is successful, and this
+        function returns True.
+        If there are no replacement candidates or the env rejects the replacements, this
+        function returns False.
+        :param env:  an Env
+        :param node: an Apply instance in `env`
+        :param lopt: a LocalOptimizer instance that may have a better idea for how to compute
+        node's outputs.
+        :rtype: Bool
+        :returns: True iff the `node`'s outputs were replaced in the `env`.
+        """
        lopt = lopt or self.local_opt
        try:
            replacements = lopt.transform(node)
@@ -633,23 +654,21 @@ class NavigatorOptimizer(Optimizer):
            env.replace_all_validate(repl_pairs)
            return True
        except Exception, e:
+            # This means the replacements were rejected by the env.
+            #
+            # This is not supposed to happen.  The default failure_callback will print a
+            # traceback as a warning.
            if self.failure_callback is not None:
                self.failure_callback(e, self, repl_pairs)
-                #DEBUG DONT PUSH
-                #print lopt 
-                #print dir(lopt)
-                #raise
-                #END
                return False
            else:
                raise
    def add_requirements(self, env):
+        super(NavigatorOptimizer, self).add_requirements(env)
        env.extend(toolbox.ReplaceValidate())
+        if self.local_opt:
+            self.local_opt.add_requirements(env)
 class TopoOptimizer(NavigatorOptimizer):
    """WRITEME"""
@@ -722,7 +741,7 @@ class OpKeyOptimizer(NavigatorOptimizer):
          - NodeFinder
          - ReplaceValidate
        """
-        NavigatorOptimizer.add_requirements(self, env)
+        super(OpKeyOptimizer, self).add_requirements(env)
        env.extend(toolbox.NodeFinder())

--- a/theano/gof/optdb.py
+++ b/theano/gof/optdb.py
@@ -13,6 +13,8 @@ class DB(object):
    def __init__(self):
        self.__db__ = defaultdict(set)
        self._names = set()
+        self.name = None #will be reset by register 
+        #(via obj.name by the thing doing the registering)
    def register(self, name, obj, *tags):
        # N.B. obj is not an instance of class Optimizer.
@@ -21,6 +23,8 @@ class DB(object):
        if not isinstance(obj, (DB, opt.Optimizer, opt.LocalOptimizer)):
            raise Exception('wtf', obj)
+        if self.name is not None:
+            tags = tags + (self.name,)
        obj.name = name
        if name in self.__db__:
            raise ValueError('The name of the object cannot be an existing tag or the name of another existing object.', obj, name)
@@ -118,9 +122,10 @@ class EquilibriumDB(DB):
 class SequenceDB(DB):
-    def __init__(self):
+    def __init__(self, failure_callback = opt.warn):
        super(SequenceDB, self).__init__()
        self.__priority__ = {}
+        self.failure_callback = failure_callback
    def register(self, name, obj, priority, *tags):
        super(SequenceDB, self).register(name, obj, *tags)
@@ -130,6 +135,6 @@ class SequenceDB(DB):
        opts = super(SequenceDB, self).query(*tags, **kwtags)
        opts = list(opts)
        opts.sort(key = lambda obj: self.__priority__[obj.name])
-        return opt.SeqOptimizer(opts, failure_callback = opt.warn)
+        return opt.SeqOptimizer(opts, failure_callback = self.failure_callback)
--- a/theano/tensor/blas.py
+++ b/theano/tensor/blas.py
--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -316,7 +316,7 @@ class Elemwise(Op):
                     scalars
        * inplace_pattern: a dictionary that maps the index of an output to the
                           index of an input so the output is calculated inplace using
-                           the input's storage.
+                           the input's storage. (Just like destroymap, but without the lists.)
        """
        self.name = name
        self.scalar_op = scalar_op
@@ -357,16 +357,21 @@ class Elemwise(Op):
                args.append(input)
            else:
                # TODO: use LComplete instead
-                args.append(DimShuffle(input.type.broadcastable, ['x']*difference + range(length), inplace = True)(input))
+                args.append(DimShuffle(
+                    input.type.broadcastable, 
+                    ['x']*difference + range(length),
+                    inplace = True)(input))
        inputs = args
-#         # Following conditions should always be true?
+        #HERE: all the broadcast dims have the same length now
-#         try:
-#             assert len(set([len(input.type.broadcastable) for input in inputs])) == 1
-#         except (AssertionError, AttributeError):
-#             raise TypeError("All inputs to a Broadcast subclass must be Tensor instances and their broadcastable fields must all have the same length.", inputs)
+        #cleverness: we iterate over the first, second, third broadcast flag of all inputs in
+        #parallel... the all() gives us each output broadcastable bit in turn.
+        #it is multiplied by nout because Elemwise supports multiple outputs (nout of them)
        out_broadcastables = [[all(bcast) for bcast in zip(*[input.type.broadcastable for input in inputs])]] * shadow.nout
+        #inplace_pattern maps output idx -> input idx
        inplace_pattern = self.inplace_pattern
        if inplace_pattern:
            for overwriter, overwritten in inplace_pattern.items():
@@ -374,21 +379,32 @@ class Elemwise(Op):
                    if ib and not ob:
                        raise ValueError("Operation cannot be done inplace on an input with broadcasted dimensions.")
        out_dtypes = [o.type.dtype for o in shadow.outputs]
-        if any(inputs[i].type.dtype != out_dtypes[o] for i, o in inplace_pattern.items()):
+        if any(inputs[i].type.dtype != out_dtypes[o] for o, i in inplace_pattern.items()):
-            raise TypeError("Cannot do an inplace operation on incompatible data types.", [i.type.dtype for i in inputs], out_dtypes)
+            raise TypeError("Cannot do an inplace operation on incompatible data types.", 
+                    ([i.type.dtype for i in inputs], out_dtypes, inplace_pattern))
        outputs = [Tensor(dtype = dtype, broadcastable = broadcastable)() for dtype, broadcastable in zip(out_dtypes, out_broadcastables)]
        return Apply(self, inputs, outputs)
    def __eq__(self, other):
-        return type(self) == type(other) and self.scalar_op == other.scalar_op and self.inplace_pattern == other.inplace_pattern
+        if type(self) == type(other):
+            items = self.inplace_pattern.items()
+            other_items = other.inplace_pattern.items()
+            items.sort()
+            other_items.sort()
+            return self.scalar_op == other.scalar_op and items == other_items
+        return False
    def __hash__(self):
-        return hash(self.scalar_op) ^ hash(tuple(self.inplace_pattern.items()))
+        items = self.inplace_pattern.items()
+        items.sort()
+        return hash(self.scalar_op) ^ hash(tuple(items))
    def __str__(self):
        if self.name is None:
            if self.inplace_pattern:
-                return "Elemwise{%s}%s" % (self.scalar_op, str(self.inplace_pattern))
+                items = self.inplace_pattern.items()
+                items.sort()
+                return "Elemwise{%s}%s" % (self.scalar_op, str(items))
            else:
                return "Elemwise{%s}" % (self.scalar_op)
        else:
@@ -467,6 +483,7 @@ class Elemwise(Op):
                storage[0] = odat
        else:
            for i, (output, storage) in enumerate(zip(node.outputs, output_storage)):
+                #i is an output idx
                if i in self.inplace_pattern:
                    odat = inputs[self.inplace_pattern[i]]
                else:
@@ -500,7 +517,7 @@ class Elemwise(Op):
        defines = ""
        undefs = ""
-        dmap = dict([(node.outputs[i], [node.inputs[o]]) for i, o in self.inplace_pattern.items()])
+        dmap = dict([(node.outputs[o], [node.inputs[i]]) for o, i in self.inplace_pattern.items()])
        idtypes = [input.type.dtype_specs()[1] for input in inputs]

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
--- a/theano/tensor/tests/test_blas.py
+++ b/theano/tensor/tests/test_blas.py
--- a/theano/tensor/tests/test_joseph.py
+++ b/theano/tensor/tests/test_joseph.py
@@ -155,14 +155,14 @@ class QuadraticDenoisingAA(T.RModule):
        updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients))
        # INTERFACE METHODS
-        self.update = theano.Method(self.input, self.ncost, updates)
+        #self.update = theano.Method(self.input, self.ncost, updates)
-        self.compute_cost = theano.Method(self.input, self.cost)
+        #self.compute_cost = theano.Method(self.input, self.cost)
-        self.noisify = theano.Method(self.input, self.corrupted_input)
+        #self.noisify = theano.Method(self.input, self.corrupted_input)
-        self.reconstruction = theano.Method(self.input, self.output)
+        #self.reconstruction = theano.Method(self.input, self.output)
-        self.representation = theano.Method(self.input, self.hidden)
+        #self.representation = theano.Method(self.input, self.hidden)
-        self.reconstruction_through_noise = theano.Method(self.input, [self.corrupted_input, self.noutput])
+        #self.reconstruction_through_noise = theano.Method(self.input, [self.corrupted_input, self.noutput])
-        self.validate = theano.Method(self.input, [self.cost, self.output])
+        #self.validate = theano.Method(self.input, [self.cost, self.output])
    def _instance_initialize(self, obj, input_size, hidden_size, seed, lr, qfilter_relscale):
        """
@@ -291,16 +291,16 @@ class Module_Nclass(module.FancyModule):
        #define the apply method
        self.pred = T.argmax(linear_output, axis=1)
-        self.apply = module.Method([self.input], self.pred)
+        #self.apply = module.Method([self.input], self.pred)
-        self.validate = module.Method([self.input, self.targ], [self.cost, self.argmax, self.max_pr])
+        #self.validate = module.Method([self.input, self.targ], [self.cost, self.argmax, self.max_pr])
-        self.softmax_output = module.Method([self.input], self.softmax_unsupervised)
+        #self.softmax_output = module.Method([self.input], self.softmax_unsupervised)
        if self.params:
            gparams = T.grad(sum_xent, self.params)
-            self.update = module.Method([self.input, self.targ], sum_xent,
+            #self.update = module.Method([self.input, self.targ], sum_xent,
-                    updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
+                    #updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
 class ConvolutionalMLPInstance(module.FancyModuleInstance, Loss01):
    #initialize is called by Module.make
@@ -366,11 +366,6 @@ class ConvolutionalMLP(module.FancyModule):
                        )
        )
-#        to_update = []
-#        all_kits = []
-#        input_update = self.input_representations[0].update
-#        input_update.resolve_all()
        for i in self.inputs[1:]:
            self.input_representations.append(
                            QDAA(
@@ -411,11 +406,17 @@ class ConvolutionalMLP(module.FancyModule):
                        ] + self.hidden.qfilters
        input_pretraining_cost = sum(i.ncost for i in self.input_representations)
        hidden_pretraining_cost = self.hidden.ncost
-        input_pretraining_gradients = T.grad(input_pretraining_cost, input_pretraining_params)
+        input_pretraining_gradients = T.grad(input_pretraining_cost,
+                input_pretraining_params)
        hidden_pretraining_gradients = T.grad(hidden_pretraining_cost, hidden_pretraining_params)
-        pretraining_updates = dict((p, p - self.lr * g) for p, g in zip(input_pretraining_params, input_pretraining_gradients) +
+        pretraining_updates = \
-                                                                    zip(hidden_pretraining_params, hidden_pretraining_gradients))
+                dict((p, p - self.lr * g) for p, g in \
-        self.pretraining_update = module.Method(self.inputs, [input_pretraining_cost, hidden_pretraining_cost], pretraining_updates)
+                zip(input_pretraining_params, input_pretraining_gradients) \
+                + zip(hidden_pretraining_params, hidden_pretraining_gradients))
+        self.pretraining_update = module.Method(self.inputs, 
+                [input_pretraining_cost, hidden_pretraining_cost], 
+                pretraining_updates)
        finetuning_params = \
                        [self.input_representations[0].w1, self.input_representations[0].b1] + self.input_representations[0].qfilters + \
@@ -426,9 +427,8 @@ class ConvolutionalMLP(module.FancyModule):
        finetuning_updates = dict((p, p - self.lr * g) for p, g in zip(finetuning_params, finetuning_gradients))
        self.finetuning_update = module.Method(self.inputs + [self.targ], self.output.cost, finetuning_updates)
+        #self.validate = module.Method(self.inputs + [self.targ], [self.output.cost, self.output.argmax, self.output.max_pr])
-        self.validate = module.Method(self.inputs + [self.targ], [self.output.cost, self.output.argmax, self.output.max_pr])
+        #self.softmax_output = module.Method(self.inputs, self.output.softmax_unsupervised)
-        self.softmax_output = module.Method(self.inputs, self.output.softmax_unsupervised)
 def create(window_size=3, 
        input_dimension=9, 
@@ -462,15 +462,21 @@ JTEST = theano.compile.mode.optdb.query(*sys.argv[2:])
 print 'JTEST', JTEST
 theano.compile.register_optimizer('JTEST', JTEST)
 if __name__ == '__main__':
    optimizer = eval(sys.argv[1])
    m = create(compile_mode = theano.Mode(linker='c|py', optimizer=optimizer))
    prog_str = []
-    for i, node in enumerate(m.finetuning_update.maker.env.toposort()):
+    idx_of_node = {}
-        #print '   ', i, node
+    for i, node in enumerate(m.pretraining_update.maker.env.toposort()):
+        idx_of_node[node] = i
+        if False and i > -1:
+            print '   ', i, node, [(ii, idx_of_node.get(ii.owner, 'IN')) for ii in node.inputs]
        prog_str.append(str(node))
-    print "PROGRAM LEN %i HASH %i"% (len(m.finetuning_update.maker.env.nodes), reduce(lambda a, b: hash(a) ^ hash(b),prog_str))
+    #print input_pretraining_gradients[4].owner.inputs
+    #print input_pretraining_gradients[4].owner.inputs[1].owner.inputs
+    #sys.exit()
+    print "PROGRAM LEN %i HASH %i"% (len(m.pretraining_update.maker.env.nodes), reduce(lambda a, b: hash(a) ^ hash(b),prog_str))
    rng = N.random.RandomState(23904)