Merge branch 'master' of https://github.com/Theano/Theano

f0fc09e6 · Peng Liu · 59cf621d · 99de36c8 · f0fc09e6 · f0fc09e6
--- a/NEWS.txt
+++ b/NEWS.txt
-.. _NEWS:
 =============
 Release Notes
 =============
@@ -12,10 +10,12 @@ We recommend that everybody update to this version.
 Highlights:
 - Python 2 and 3 support with the same code base
 - Faster optimization
- New GPU back-end
+- New GPU back-end:
-   - Float16 new back-end (need cuda 7.5)
-   - Multi dtypes
+   * Float16 new back-end (need cuda 7.5)
-   - Multi-GPU support in the same process
+   * Multi dtypes
+   * Multi-GPU support in the same process
 - Integration of CuDNN for better GPU performance
 - Many Scan improvements (execution speed up, ...)
 - optimizer=fast_compile moves computation to the GPU.

--- a/theano/compile/tests/test_function_name.py
+++ b/theano/compile/tests/test_function_name.py
-import unittest
+from __future__ import absolute_import, print_function, division
+import unittest
 import os
 import re
@@ -12,5 +13,5 @@ class FunctionName(unittest.TestCase):
        x = tensor.vector('x')
        func = theano.function([x], x + 1.)
-        regex = re.compile(os.path.basename('.*test_function_name.pyc?:13'))
+        regex = re.compile(os.path.basename('.*test_function_name.pyc?:14'))
        assert(regex.match(func.name) is not None)
--- a/theano/sandbox/cuda/blocksparse.py
+++ b/theano/sandbox/cuda/blocksparse.py
@@ -23,21 +23,13 @@ class GpuSparseBlockGemv(GpuOp):
    to change without notice.  Use the sandbox.blocksparse.sparse_block_dot()
    function for a stable interface.
    """
+    __props__ = ('inplace',)
    def __init__(self, inplace=False):
        self.inplace = inplace
        if self.inplace:
            self.destroy_map = {0: [0]}
-    def __eq__(self, other):
-        return type(self) == type(other) and self.inplace == other.inplace
-    def __hash__(self):
-        return hash(type(self)) ^ hash(self.inplace)
-    def __str__(self):
-        return "GpuSparseBlockGemv%s" % ("{inplace}" if self.inplace else "")
    def make_node(self, o, W, h, inputIdx, outputIdx):
        o = basic_ops.as_cuda_ndarray_variable(o)
        W = basic_ops.as_cuda_ndarray_variable(W)
@@ -350,28 +342,20 @@ gpu_sparse_block_gemv_inplace = GpuSparseBlockGemv(True)
 class GpuSparseBlockOuter(GpuOp):
    """
-    CPU version of SparseBlockOuter. See SparseBlockOuter's docstring for more
+    GPU version of SparseBlockOuter. See SparseBlockOuter's docstring for more
    information.
    This op should not be called directly since its interface is
    subject to change without notice.  It is involved in the gradient
    of GpuSparseBlockGemv. The gradient is not implemented.
    """
+    __props__ = ('inplace',)
    def __init__(self, inplace=False):
        self.inplace = inplace
        if self.inplace:
            self.destroy_map = {0: [0]}
-    def __eq__(self, other):
-        return type(self) == type(other) and self.inplace == other.inplace
-    def __hash__(self):
-        return hash(type(self)) ^ hash(self.inplace)
-    def __str__(self):
-        return "GpuSparseBlockOuter%s" % ("{inplace}" if self.inplace else "")
    def make_node(self, o, x, y, xIdx, yIdx, alpha=None):
        one = tensor.constant(numpy.asarray(1.0, dtype='float32'))
        o = basic_ops.as_cuda_ndarray_variable(o)

--- a/theano/sandbox/cuda/tests/test_blocksparse.py
+++ b/theano/sandbox/cuda/tests/test_blocksparse.py
@@ -7,7 +7,8 @@ import theano.tests.unittest_tools as utt
 import theano.tensor.nnet.tests.test_blocksparse
 import theano.sandbox.cuda as cuda_ndarray
-from theano.sandbox.cuda.blocksparse import (GpuSparseBlockOuter,
+from theano.sandbox.cuda.blocksparse import (GpuSparseBlockGemv,
+                                             GpuSparseBlockOuter,
                                             gpu_sparse_block_gemv,
                                             gpu_sparse_block_outer)
 from theano.sandbox.cuda.var import float32_shared_constructor
@@ -28,6 +29,8 @@ class BlockSparse_Gemv_and_Outer(
        self.mode = mode_with_gpu.excluding('constant_folding')
        self.gemv_op = gpu_sparse_block_gemv
        self.outer_op = gpu_sparse_block_outer
+        self.gemv_class = GpuSparseBlockGemv
+        self.outer_class = GpuSparseBlockOuter
    # This test is temporarily disabled since we disabled the output_merge
    # and alpha_merge optimizations for blocksparse due to brokeness.

--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
@@ -21,7 +21,7 @@ import numpy
 from six.moves import xrange
 import theano
-from theano.compat import PY3, imap, izip
+from theano.compat import imap, izip
 from theano import gof, printing
 from theano.gof import (Op, utils, Variable, Constant, Type, Apply,
                        FunctionGraph)
@@ -604,12 +604,11 @@ class _scalar_py_operators:
    def __mul__(self, other):
        return mul(self, other)
-    if PY3:
+    def __truediv__(self, other):
-        def __truediv__(self, other):
+        return div_proxy(self, other)
-            return div_proxy(self, other)
-    else:
+    def __div__(self, other):
-        def __div__(self, other):
+        return div_proxy(self, other)
-            return div_proxy(self, other)
    def __floordiv__(self, other):
        return int_div(self, other)

--- a/theano/scalar/tests/test_basic.py
+++ b/theano/scalar/tests/test_basic.py
@@ -9,6 +9,8 @@ If you do want to rewrite these tests, bear in mind:
  * FunctionGraph and DualLinker are old, use compile.function instead.
 """
+from __future__ import absolute_import, print_function, division
 import unittest
 import numpy as np

--- a/theano/scalar/tests/test_div_future.py
+++ b/theano/scalar/tests/test_div_future.py
+from __future__ import division
+import theano
+import theano.tensor as T
+import unittest
+class test_FutureDiv(unittest.TestCase):
+    def test_divide_floats(self):
+        a = T.dscalar('a')
+        b = T.dscalar('b')
+        c = theano.function([a, b], b / a)
+        d = theano.function([a, b], b // a)
+        assert c(6, 3) == 0.5
+        assert d(6, 3) == 0.0
--- a/theano/scalar/tests/test_div_no_future.py
+++ b/theano/scalar/tests/test_div_no_future.py
+import theano
+import theano.tensor as T
+import unittest
+class test_FutureDiv(unittest.TestCase):
+    def test_divide_floats(self):
+        a = T.dscalar('a')
+        b = T.dscalar('b')
+        c = theano.function([a, b], b / a)
+        d = theano.function([a, b], b // a)
+        assert c(6, 3) == 0.5
+        assert d(6, 3) == 0.0
--- a/theano/scan_module/tests/test_scan.py
+++ b/theano/scan_module/tests/test_scan.py
-from __future__ import print_function
+from __future__ import absolute_import, print_function, division
 import os
 import shutil
 import sys

--- a/theano/tensor/nnet/blocksparse.py
+++ b/theano/tensor/nnet/blocksparse.py
@@ -22,6 +22,7 @@ class SparseBlockGemv(Op):
        :scale: 50 %
    """
+    __props__ = ('inplace',)
    registered_opts = []
@@ -90,10 +91,7 @@ class SparseBlockGemv(Op):
        assert inputIdx.type.dtype in discrete_dtypes
        assert outputIdx.type.dtype in discrete_dtypes
-        output = o.type.__class__(dtype=o.type.dtype,
+        return Apply(self, [o, W, h, inputIdx, outputIdx], [o.type()])
-                                  broadcastable=(False,) * o.ndim)()
-        return Apply(self, [o, W, h, inputIdx, outputIdx], [output])
    def perform(self, node, inp, out_):
        o, W, h, iIdx, oIdx = inp[:5]
@@ -110,6 +108,9 @@ class SparseBlockGemv(Op):
                    o[b, j, :] += numpy.dot(h[b, i], w)
        out_[0][0] = o
+    def infer_shape(self, node, input_shapes):
+        return [input_shapes[0]]
    def grad(self, inputs, grads):
        o, W, h, inputIdx, outputIdx = inputs
        go = grads[0]
@@ -138,6 +139,7 @@ class SparseBlockOuter(Op):
    This op is involved in the gradient of SparseBlockGemv.
    """
+    __props__ = ('inplace',)
    registered_opts = []
@@ -190,11 +192,11 @@ class SparseBlockOuter(Op):
        if alpha is None:
            alpha = one
-        output = o.type.__class__(dtype=o.type.dtype,
-                                  broadcastable=(False,) * o.ndim)()
        return Apply(self, [o, x, y, xIdx, yIdx, alpha],
-                     [output])
+                     [o.type()])
+    def infer_shape(self, node, input_shapes):
+        return [input_shapes[0]]
    def perform(self, node, inp, out_):
        o, x, y, xIdx, yIdx, alpha = inp[:6]

--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -2302,7 +2302,7 @@ def h_softmax(x, batch_size, n_outputs, n_classes, n_outputs_per_class,
        output_probs = theano.tensor.nnet.softmax(
            activations.reshape((-1, n_outputs_per_class)))
        output_probs = output_probs.reshape((batch_size, n_classes, -1))
-        output_probs = class_probs[:, :, None] * output_probs
+        output_probs = class_probs.dimshuffle(0, 1, 'x') * output_probs
        output_probs = output_probs.reshape((batch_size, -1))
        # output_probs.shape[1] is n_classes * n_outputs_per_class, which might
        # be greater than n_outputs, so we ignore the potential irrelevant
@@ -2321,11 +2321,11 @@ def h_softmax(x, batch_size, n_outputs, n_classes, n_outputs_per_class,
        # Second softmax that computes the output probabilities
        activations = sparse_block_dot(
-            W2[None, :, :, :], x[:, None, :],
+            W2.dimshuffle('x', 0, 1, 2), x.dimshuffle(0, 'x', 1),
            tensor.zeros((batch_size, 1), dtype='int32'), b2,
-            target_classes[:, None])
+            target_classes.dimshuffle(0, 'x'))
-        output_probs = theano.tensor.nnet.softmax(activations[:, 0, :])
+        output_probs = theano.tensor.nnet.softmax(activations.dimshuffle(0, 2))
        target_class_probs = class_probs[tensor.arange(batch_size),
                                         target_classes]
        output_probs = output_probs[tensor.arange(batch_size),

--- a/theano/tensor/nnet/tests/test_blocksparse.py
+++ b/theano/tensor/nnet/tests/test_blocksparse.py
 """
    Tests for block sparse dot
 """
-import unittest
 import numpy
 from numpy.random import randn
@@ -10,15 +8,12 @@ import theano
 from theano import tensor
 import theano.tests.unittest_tools as utt
-from theano.tensor.nnet.blocksparse import sparse_block_dot, \
+from theano.tensor.nnet.blocksparse import (
-    sparse_block_gemv, sparse_block_outer
+    sparse_block_dot, sparse_block_gemv, sparse_block_outer,
+    SparseBlockGemv, SparseBlockOuter)
-class BlockSparse_Gemv_and_Outer(unittest.TestCase):
-    def runTest(self):
-        pass
+class BlockSparse_Gemv_and_Outer(utt.InferShapeTester):
    def setUp(self):
        utt.seed_rng()
        mode = None
@@ -29,6 +24,8 @@ class BlockSparse_Gemv_and_Outer(unittest.TestCase):
        )
        self.gemv_op = sparse_block_gemv
        self.outer_op = sparse_block_outer
+        self.gemv_class = SparseBlockGemv
+        self.outer_class = SparseBlockOuter
    @staticmethod
    def gemv_data():
@@ -280,3 +277,40 @@ class BlockSparse_Gemv_and_Outer(unittest.TestCase):
            o_val, x_val, y_val, xIdx_val, yIdx_val)
        utt.assert_allclose(ref_out, th_out)
+    def test_dot_infershape(self):
+        b = tensor.fmatrix()
+        W = tensor.ftensor4()
+        h = tensor.ftensor3()
+        iIdx = tensor.imatrix()
+        oIdx = tensor.imatrix()
+        self._compile_and_check([W, h, iIdx, b, oIdx],
+                                [sparse_block_dot(W, h, iIdx, b, oIdx)],
+                                self.gemv_data(),
+                                self.gemv_class)
+    def test_gemv_infershape(self):
+        b = tensor.fmatrix()
+        W = tensor.ftensor4()
+        h = tensor.ftensor3()
+        iIdx = tensor.imatrix()
+        oIdx = tensor.imatrix()
+        self._compile_and_check(
+            [W, h, iIdx, b, oIdx],
+            [self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)],
+            self.gemv_data(),
+            self.gemv_class)
+    def test_outer_infershape(self):
+        o = tensor.ftensor4()
+        x = tensor.ftensor3()
+        y = tensor.ftensor3()
+        xIdx = tensor.imatrix()
+        yIdx = tensor.imatrix()
+        self._compile_and_check([o, x, y, xIdx, yIdx],
+                                [self.outer_op(o, x, y, xIdx, yIdx)],
+                                self.outer_data(),
+                                self.outer_class)
--- a/theano/tensor/xlogx.py
+++ b/theano/tensor/xlogx.py
+from __future__ import absolute_import, print_function, division
 import numpy
 from theano.tensor.elemwise import Elemwise