Merge branch 'master' of https://github.com/Theano/Theano

c1124b59 · Nicholas Leonard · 5a21600a · 8df7d385 · c1124b59 · c1124b59
--- a/doc/library/tensor/basic.txt
+++ b/doc/library/tensor/basic.txt
@@ -607,6 +607,27 @@ dimensions, see :meth:`_tensor_py_operators.dimshuffle`.
    have shape (2, 60).
+.. function:: tile(x, reps, ndim=None)
+    Construct an array by repeating the input `x` according to `reps`
+    pattern.
+    Tiles its input according to `reps`. The length of `reps` is the
+    number of dimension of `x` and contains the number of times to
+    tile `x` in each dimension.
+    :see: `numpy.tile
+        <http://docs.scipy.org/doc/numpy/reference/generated/numpy.tile.html>`_
+        documentation for examples.
+    :see: :func:`theano.tensor.extra_ops.repeat
+        <theano.tensor.extra_ops.repeat>`
+    :note: Currently, `reps` must be a constant, `x.ndim` and
+        `len(reps)` must be equal and, if specified, `ndim` must be
+        equal to both.
 Creating Tensor
 ===============

--- a/theano/gof/__init__.py
+++ b/theano/gof/__init__.py
@@ -57,7 +57,10 @@ from theano.gof.link import \
 from theano.gof.op import \
    Op, OpenMPOp, PureOp, ops_with_inner_function
-from theano.gof.opt import (Optimizer, optimizer, SeqOptimizer,
+from theano.gof.opt import (
+    Optimizer,
+    optimizer, inplace_optimizer,
+    SeqOptimizer,
    MergeOptimizer, MergeOptMerge,
    LocalOptimizer, local_optimizer, LocalOptGroup,
    OpSub, OpRemove, PatternSub,

--- a/theano/gof/opt.py
+++ b/theano/gof/opt.py
@@ -114,13 +114,13 @@ class Optimizer(object):
 class FromFunctionOptimizer(Optimizer):
    """WRITEME"""
-    def __init__(self, fn):
+    def __init__(self, fn, requirements=()):
        self.apply = fn
+        self.requirements = requirements
    def add_requirements(self, fgraph):
-        # Added by default
+        for req in self.requirements:
-        #fgraph.attach_feature(toolbox.ReplaceValidate())
+            req(fgraph)
-        pass
    def print_summary(self, stream=sys.stdout, level=0, depth=-1):
        print >> stream, "%s%s id=%i" % (
@@ -142,6 +142,16 @@ def optimizer(f):
    return rval
+def inplace_optimizer(f):
+    """decorator for FromFunctionOptimizer"""
+    dh_handler = dh.DestroyHandler
+    requirements = (lambda fgraph:
+                    fgraph.attach_feature(dh_handler()),)
+    rval = FromFunctionOptimizer(f, requirements)
+    rval.__name__ = f.__name__
+    return rval
 class SeqOptimizer(Optimizer, list):
    #inherit from Optimizer first to get Optimizer.__hash__
    """WRITEME
@@ -790,9 +800,14 @@ class LocalOptimizer(object):
 class FromFunctionLocalOptimizer(LocalOptimizer):
    """WRITEME"""
-    def __init__(self, fn, tracks=None):
+    def __init__(self, fn, tracks=None, requirements=()):
        self.transform = fn
        self._tracks = tracks
+        self.requirements = requirements
+    def add_requirements(self, fgraph):
+        for req in self.requirements:
+            req(fgraph)
    def tracks(self):
        return self._tracks
@@ -808,7 +823,7 @@ class FromFunctionLocalOptimizer(LocalOptimizer):
                id(self))
-def local_optimizer(tracks):
+def local_optimizer(tracks, inplace=False):
    def decorator(f):
        """WRITEME"""
        if tracks is not None:
@@ -817,7 +832,12 @@ def local_optimizer(tracks):
            for t in tracks:
                if not (isinstance(t, op.Op) or issubclass(t, op.PureOp)):
                    raise ValueError, ("Tracks are op classes or instances", f.__module__, f.__name__)
-        rval = FromFunctionLocalOptimizer(f, tracks)
+        requirements = ()
+        if inplace:
+            dh_handler = dh.DestroyHandler
+            requirements = (lambda fgraph:
+                            fgraph.attach_feature(dh_handler()),)
+        rval = FromFunctionLocalOptimizer(f, tracks, requirements)
        rval.__name__ = f.__name__
        return rval
    return decorator
@@ -852,6 +872,10 @@ class LocalOptGroup(LocalOptimizer):
            for lopt in self.opts:
                lopt.print_summary(stream, level=(level + 2), depth=depth)
+    def add_requirements(self, fgraph):
+        for opt in self.opts:
+            opt.add_requirements(fgraph)
 class _LocalOpKeyOptGroup(LocalOptGroup):
    """WRITEME"""

--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -1611,7 +1611,7 @@ class GpuCAReduce(GpuOp):
        """ % locals()
    def c_code_cache_version_apply(self, node):
-        version = [8]  # the version corresponding to the c code in this Op
+        version = [9]  # the version corresponding to the c code in this Op
        # now we insert versions for the ops on which we depend...
        scalar_node = Apply(self.scalar_op,

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1214,19 +1214,19 @@ def local_gpujoin_1(node):
 #   shared =  dimshuffle(gemm_inplace(dimshuffle(shared)))
 # which causes memory leaks (long term fix is to make the above not leak
 # memory)
-@local_optimizer([gpu_gemm_no_inplace])
+@local_optimizer([gpu_gemm_no_inplace], inplace=True)
 def local_inplace_gemm(node):
    if node.op == gpu_gemm_no_inplace:
        return [gpu_gemm_inplace(*node.inputs)]
-@local_optimizer([gpu_gemv_no_inplace])
+@local_optimizer([gpu_gemv_no_inplace], inplace=True)
 def local_inplace_gemv(node):
    if node.op == gpu_gemv_no_inplace:
        return [gpu_gemv_inplace(*node.inputs)]
-@local_optimizer([gpu_ger_no_inplace])
+@local_optimizer([gpu_ger_no_inplace], inplace=True)
 def local_inplace_ger(node):
    if node.op == gpu_ger_no_inplace:
        return [gpu_ger_inplace(*node.inputs)]

--- a/theano/sandbox/gpuarray/elemwise.py
+++ b/theano/sandbox/gpuarray/elemwise.py
@@ -1702,7 +1702,7 @@ class GpuCAReduceCuda(HideC, CAReduce):
        """ % locals()
    def c_code_cache_version_apply(self, node):
-        version = [8]  # the version corresponding to the c code in this Op
+        version = [9]  # the version corresponding to the c code in this Op
        # now we insert versions for the ops on which we depend...
        scalar_node = Apply(self.scalar_op,

--- a/theano/sandbox/gpuarray/opt.py
+++ b/theano/sandbox/gpuarray/opt.py
@@ -341,17 +341,20 @@ def local_gpua_crossentropysoftmaxargmax1hotwithbias(node):
 @op_lifter([tensor.nnet.CrossentropySoftmax1HotWithBiasDx])
 def local_gpua_crossentropysoftmax1hotwithbiasdx(node):
    return GpuCrossentropySoftmax1HotWithBiasDx()
 @register_opt()
 @op_lifter([tensor.nnet.Softmax])
 def local_gpua_softmax(node):
    return GpuSoftmax()
 @register_opt()
 @op_lifter([tensor.nnet.SoftmaxWithBias])
 def local_gpua_softmaxwithbias(node):
    return GpuSoftmaxWithBias()
 @register_opt()
 @op_lifter([gpu_from_host, ConvOp])
 def local_gpu_conv(node):

--- a/theano/tensor/blas.py
+++ b/theano/tensor/blas.py
@@ -1715,20 +1715,19 @@ def local_dot_to_dot22(node):
    _logger.info('Not optimizing dot with inputs %s %s %s %s',
                 x, y, x.type, y.type)
+@local_optimizer([gemm_no_inplace], inplace=True)
-@local_optimizer([gemm_no_inplace])
 def local_inplace_gemm(node):
    if node.op == gemm_no_inplace:
        return [gemm_inplace(*node.inputs)]
-@local_optimizer([gemv_no_inplace])
+@local_optimizer([gemv_no_inplace], inplace=True)
 def local_inplace_gemv(node):
    if node.op == gemv_no_inplace:
        return [gemv_inplace(*node.inputs)]
-@local_optimizer([ger])
+@local_optimizer([ger], inplace=True)
 def local_inplace_ger(node):
    if node.op == ger:
        return [ger_destructive(*node.inputs)]

--- a/theano/tensor/extra_ops.py
+++ b/theano/tensor/extra_ops.py
@@ -571,6 +571,8 @@ def repeat(x, repeats, axis=None):
    :param axis: int, optional.
+    :see: :func:`tensor.tile <tensor.tile>`
    .. versionadded:: 0.6
    """
    return RepeatOp(axis=axis)(x, repeats)

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -174,7 +174,7 @@ def inplace_elemwise_optimizer_op(OP):
    """
    We parametrise it to make it work for Elemwise and GpuElemwise op.
    """
-    @gof.optimizer
+    @gof.inplace_optimizer
    def inplace_elemwise_optimizer(fgraph):
        """
        Usage: inplace_elemwise_optimizer.optimize(fgraph)
@@ -2110,7 +2110,7 @@ compile.optdb.register('pre_local_IncSubtensor_serialize',
 #after priority 50 Destructive inplace operations
 #gemm is the first one now, at priority 70
-@gof.local_optimizer([IncSubtensor]) # XXX: GPU
+@gof.local_optimizer([IncSubtensor], inplace=True)
 def local_inplace_setsubtensor(node):
    """
    Also work for GpuIncSubtensor
@@ -2129,7 +2129,7 @@ compile.optdb.register('local_inplace_setsubtensor',
                       'fast_run', 'inplace')  # DEBUG
-@gof.local_optimizer([AdvancedIncSubtensor1]) # XXX: GPU
+@gof.local_optimizer([AdvancedIncSubtensor1], inplace=True)
 def local_inplace_incsubtensor1(node):
    """ also work for GpuAdvancedIncSubtensor1 """
    if isinstance(node.op, AdvancedIncSubtensor1) and not node.op.inplace: