Merge pull request #1811 from nouiz/opt

Opt Reduce(join())

Merge pull request #1811 from nouiz/opt
b0e70bd0 · abergeron · 450615d5 · 3ec2feb2 · b0e70bd0 · b0e70bd0
--- a/theano/sandbox/gpuarray/__init__.py
+++ b/theano/sandbox/gpuarray/__init__.py
@@ -44,7 +44,7 @@ if pygpu:
            init_dev(config.device)
            import theano.compile
            theano.compile.shared_constructor(gpuarray_shared_constructor)
-            optdb.add_tags('gpuarray_opt', 'fast_run', 'inplace')
+            optdb.add_tags('gpuarray_opt', 'fast_run', 'fast_compile', 'inplace')
        elif config.gpuarray.init_device != '':
            init_dev(config.gpuarray.init_device)
    except Exception:

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -1650,7 +1650,7 @@ def min(x, axis=None, keepdims=False):
        the result as dimensions with size one. With this option, the result
        will broadcast correctly against the original tensor.
    """
-
+    x = as_tensor_variable(x)
    str_x_type = str(x.dtype)
    if str_x_type.startswith('float') or str_x_type in int_dtypes:
        return -max(-x, axis=axis, keepdims=keepdims)
@@ -1671,7 +1671,7 @@ def argmin(x, axis=None, keepdims=False):
        the result as dimensions with size one. With this option, the result
        will broadcast correctly against the original tensor.
    """
-
+    x = as_tensor_variable(x)
    str_x_type = str(x.dtype)
    if str_x_type.startswith('float') or str_x_type in int_dtypes:
        return argmax(-x, axis=axis, keepdims=keepdims)

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -3308,6 +3308,41 @@ ALL_REDUCE = [T.elemwise.CAReduce, T.elemwise.All, T.elemwise.Any,
              T.elemwise.Sum, T.elemwise.Prod,
              T.elemwise.ProdWithoutZeros]

+@register_canonicalize
+@register_uncanonicalize  # Needed for MaxAndArgmax -> CAReduce
+@gof.local_optimizer(ALL_REDUCE)
+def local_reduce_join(node):
+    """Max(Join(a,b), axis=0) -> Maximum(a,b)  """
+    if (isinstance(node.op, T.CAReduce) and
+        node.inputs[0].owner and
+        isinstance(node.inputs[0].owner.op, T.Join)):
+
+        join = node.inputs[0].owner
+        if T.extract_constant(join.inputs[0]) != 0:
+            return
+
+        if isinstance(node.op.scalar_op, (scalar.Maximum, scalar.Minimum)):
+            #Support only 2 inputs for now
+            if len(join.inputs) != 3:
+                return
+        elif not isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul)):
+            return
+
+        new_inp = []
+        for inp in join.inputs[1:]:
+            inp = inp.owner
+            if not inp:
+                return
+            if (not isinstance(inp.op, DimShuffle) or
+                inp.op.new_order != ('x',) + tuple(range(inp.inputs[0].ndim))):
+                return
+            new_inp.append(inp.inputs[0])
+        ret = Elemwise(node.op.scalar_op)(*new_inp)
+        if ret.dtype == node.outputs[0].dtype:
+            return [ret]
+        #else the reduction do something about the dtype.
+
+
 @register_canonicalize
 @gof.local_optimizer(ALL_REDUCE)
 def local_cut_useless_reduce(node):

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -3776,8 +3776,10 @@ class T_local_sum(unittest.TestCase):

 class T_local_reduce(unittest.TestCase):
    def setUp(self):
-        self.mode = theano.compile.get_default_mode().including('canonicalize',
-                                                                'specialize')
+        self.mode = theano.compile.get_default_mode().including(
+            'canonicalize',
+            'specialize',
+            'uncanonicalize', 'local_max_and_argmax')

    def test_local_reduce_broadcast_all_0(self):
        for fct in [tensor.sum, tensor.all, tensor.any, tensor.prod,
@@ -3827,6 +3829,28 @@ class T_local_reduce(unittest.TestCase):
                isinstance(node.op, T.CAReduce)
                for node in f.maker.fgraph.toposort()])

+    def test_local_reduce_join(self):
+        vx = matrix()
+        vy = matrix()
+        vz = matrix()
+        x = numpy.asarray([[1, 0], [3, 4]], dtype=config.floatX)
+        y = numpy.asarray([[4, 0], [2, 1]], dtype=config.floatX)
+        z = numpy.asarray([[5, 0], [1, 2]], dtype=config.floatX)
+
+        for out, res in [
+            (T.max((vx, vy), 0), numpy.max((x, y), 0)),
+            (T.min((vx, vy), 0), numpy.min((x, y), 0)),
+            (T.sum((vx, vy, vz), 0), numpy.sum((x, y, z), 0)),
+            (T.prod((vx, vy, vz), 0), numpy.prod((x, y, z), 0)),
+            (T.prod((vx, vy.T, vz), 0), numpy.prod((x, y.T, z), 0)),
+        ]:
+            f = theano.function([vx, vy, vz], out,
+                                on_unused_input='ignore', mode=self.mode)
+            assert (f(x, y, z) == res).all(), out
+            topo = f.maker.fgraph.toposort()
+            assert len(topo) <= 2, out
+            assert isinstance(topo[-1].op, T.Elemwise), out
+

 class T_local_sum_dimshuffle(unittest.TestCase):
    def setUp(self):