Merge pull request #1522 from nouiz/small_stuff

Small stuff

Merge pull request #1522 from nouiz/small_stuff
3ca77162 · Pascal Lamblin · 999aeb92 · b0350903 · 3ca77162 · 3ca77162
--- a/theano/misc/do_nightly_build
+++ b/theano/misc/do_nightly_build
@@ -54,9 +54,10 @@ if [ "$RELEASE" ]; then
    echo
 fi
-echo "Executing tests with mode=FAST_COMPILE with --batch=1000"
+# with --batch=1000" # The buildbot freeze sometimes when collecting the tests to run
-echo "THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} --batch=1000 ${ARGS}"
+echo "Executing tests with mode=FAST_COMPILE"
-THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} --batch=1000 ${ARGS}
+echo "THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} ${ARGS}"
+THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} ${ARGS}
 echo "Number of elements in the compiledir:"
 ls ${COMPILEDIR}|wc -l
 echo

--- a/theano/misc/hooks/check_whitespace.py
+++ b/theano/misc/hooks/check_whitespace.py
@@ -12,7 +12,7 @@ import tokenize
 import argparse
 import reindent
-from theano.compat.six.StringIO import StringIO
+from theano.compat.six import StringIO
 SKIP_WHITESPACE_CHECK_FILENAME = ".hg/skip_whitespace_check"

--- a/theano/sandbox/multinomial.py
+++ b/theano/sandbox/multinomial.py
@@ -12,22 +12,28 @@ if cuda_available:
    from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host
    from theano.sandbox.cuda.opt import register_opt
 class MultinomialFromUniform(Op):
    '''Converts samples from a uniform into sample from a multinomial.'''
    def __init__(self, odtype):
-        self.odtype=odtype
+        self.odtype = odtype
    def __eq__(self, other):
-        return type(self) == type(other) and self.odtype==other.odtype
+        return type(self) == type(other) and self.odtype == other.odtype
    def __hash__(self):
        return hash((type(self), self.odtype))
    def __str__(self):
-        return '%s{%s}'%(self.__class__.__name__, self.odtype)
+        return '%s{%s}' % (self.__class__.__name__, self.odtype)
    def __setstate__(self, dct):
        self.__dict__.update(dct)
        try:
            self.odtype
        except AttributeError:
-            self.odtype='auto'
+            self.odtype = 'auto'
    def make_node(self, pvals, unis):
        pvals = T.as_tensor_variable(pvals)
        unis = T.as_tensor_variable(unis)
@@ -35,11 +41,12 @@ class MultinomialFromUniform(Op):
            raise NotImplementedError('pvals ndim should be 2', pvals.ndim)
        if unis.ndim != 1:
            raise NotImplementedError('unis ndim should be 1', unis.ndim)
-        if self.odtype=='auto':
+        if self.odtype == 'auto':
            odtype = pvals.dtype
        else:
            odtype = self.odtype
-        return Apply(self, [pvals, unis], [T.matrix(dtype=odtype)])
+        out = T.tensor(dtype=odtype, broadcastable=pvals.type.broadcastable)
+        return Apply(self, [pvals, unis], [out])
    def grad(self, ins, outgrads):
        pvals, unis = ins
@@ -121,6 +128,7 @@ class MultinomialFromUniform(Op):
        }
        } // END NESTED SCOPE
        """ % locals()
    def perform(self, node, ins, outs):
        (pvals, unis) = ins
        (z,) = outs
@@ -165,7 +173,7 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
            raise TypeError('pvals must be cudandarray', pvals)
        if not isinstance(unis.type, CudaNdarrayType):
            raise TypeError('unis must be cudandarray', unis)
-        if self.odtype=='auto':
+        if self.odtype == 'auto':
            odtype = pvals.dtype
        else:
            odtype = self.odtype
@@ -173,7 +181,9 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
            raise NotImplementedError(
                'GpuMultinomialFromUniform works only if '
                'self.odtype == pvals.dtype', odtype, pvals.dtype)
-        return Apply(self, [pvals, unis], [pvals.type()])
+        br = (pvals.broadcastable[1], pvals.broadcastable[0])
+        out = CudaNdarrayType(broadcastable=br)()
+        return Apply(self, [pvals, unis], [out])
    def perform(self, node, ins, outs):
        #The perform from parent don't work with CudaNdarray.  We
@@ -226,7 +236,6 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
        """ % locals()
    def c_code(self, node, name, ins, outs, sub):
        (pvals, unis) = ins
        (z,) = outs
@@ -327,25 +336,30 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
        } // END NESTED SCOPE
        """ % locals()
 @local_optimizer()
 def local_gpu_multinomial(node):
    if type(node.op) is MultinomialFromUniform:
        p, u = node.inputs
        m, = node.outputs
        if (p.dtype == u.dtype == m.dtype == 'float32' and
-            any([i.owner and isinstance(i.owner.op, theano.sandbox.cuda.HostFromGpu)
+            any([i.owner and isinstance(i.owner.op,
+                                        theano.sandbox.cuda.HostFromGpu)
                 for i in node.inputs])):
            gpu_op = GpuMultinomialFromUniform(node.op.odtype)
-            return [host_from_gpu(gpu_op(*[gpu_from_host(i) for i in node.inputs])).T]
+            return [host_from_gpu(gpu_op(*[gpu_from_host(i)
+                                           for i in node.inputs])).T]
    if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost) and
-        node.inputs[0].owner and type(node.inputs[0].owner.op) is MultinomialFromUniform):
+        node.inputs[0].owner and type(node.inputs[0].owner.op)
+        is MultinomialFromUniform):
        multi = node.inputs[0].owner
        p, u = multi.inputs
        m, = multi.outputs
        if (p.dtype == u.dtype == m.dtype == 'float32'):
            gpu_op = GpuMultinomialFromUniform(multi.op.odtype)
            ret = gpu_op(*[gpu_from_host(i) for i in multi.inputs]).T
-            # The dimshuffle is on the cpu, but will be moved to the gpu by an opt.
+            # The dimshuffle is on the cpu, but will be moved to the
+            # gpu by an opt.
            return [gpu_from_host(ret)]
 if cuda_available:

--- a/theano/sandbox/test_multinomial.py
+++ b/theano/sandbox/test_multinomial.py
@@ -9,15 +9,19 @@ from theano.compile.mode import get_default_mode, predefined_linkers
 from theano.gof.python25 import any
 import theano.sandbox.cuda as cuda
 def get_mode(gpu):
    mode = get_default_mode()
    mode = copy.copy(mode)
    if gpu:
-        mode = mode.including('gpu', 'gpu_local_optimizations', 'local_cut_gpu_host_gpu', 'local_gpu_multinomial')
+        mode = mode.including('gpu', 'gpu_local_optimizations',
+                              'local_cut_gpu_host_gpu',
+                              'local_gpu_multinomial')
    if isinstance(mode.linker, theano.gof.PerformLinker):
        mode.linker = predefined_linkers['c|py']
    return mode
 def run_with_c(f, gpu=False):
    mode = get_mode(gpu)
    f(mode, gpu)
@@ -30,52 +34,54 @@ def test_multinomial_0():
    p = tensor.fmatrix()
    u = tensor.fvector()
-    m = multinomial.MultinomialFromUniform('auto')(p,u)
+    m = multinomial.MultinomialFromUniform('auto')(p, u)
    def body(mode, gpu):
        #the m*2 allows the multinomial to reuse output
-        f = function([p,u], m*2, allow_input_downcast=True, mode=mode)
+        f = function([p, u], m*2, allow_input_downcast=True, mode=mode)
        if gpu:
-            assert any([type(node.op) is multinomial.GpuMultinomialFromUniform for node in f.maker.fgraph.toposort()])
+            assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
+                        for node in f.maker.fgraph.toposort()])
        # test that both first and second samples can be drawn
-        assert numpy.allclose(f([[1,0], [0,1]], [.1, .1]),
+        assert numpy.allclose(f([[1, 0], [0, 1]], [.1, .1]),
-                [[2,0], [0,2]])
+                              [[2, 0], [0, 2]])
        # test that both second labels can be drawn
-        r = f([[.2,.8], [.3,.7]], [.31, .31])
+        r = f([[.2, .8], [.3, .7]], [.31, .31])
-        assert numpy.allclose(r, [[0,2], [0,2]]), r
+        assert numpy.allclose(r, [[0, 2], [0, 2]]), r
        # test that both first labels can be drawn
-        r = f([[.2,.8], [.3,.7]], [.21, .21])
+        r = f([[.2, .8], [.3, .7]], [.21, .21])
-        assert numpy.allclose(r, [[0,2], [2,0]]), r
+        assert numpy.allclose(r, [[0, 2], [2, 0]]), r
        #change the size to make sure output gets reallocated ok
        # and also make sure that the GPU version doesn't screw up the
        # transposed-ness
-        r = f([[.2,.8] ], [.25])
+        r = f([[.2, .8]], [.25])
-        assert numpy.allclose(r, [[0,2]]), r
+        assert numpy.allclose(r, [[0, 2]]), r
    run_with_c(body)
    if cuda.cuda_available:
        run_with_c(body, True)
 #TODO: check a bigger example (make sure blocking on GPU is handled correctly)
 def test_multinomial_large():
    # DEBUG_MODE will test this on GPU
    def body(mode, gpu):
        p = tensor.fmatrix()
        u = tensor.fvector()
-        m = multinomial.MultinomialFromUniform('auto')(p,u)
+        m = multinomial.MultinomialFromUniform('auto')(p, u)
-        f = function([p,u], m*2, allow_input_downcast=True, mode=mode)
+        f = function([p, u], m*2, allow_input_downcast=True, mode=mode)
        if gpu:
-            assert any([type(node.op) is multinomial.GpuMultinomialFromUniform for node in f.maker.fgraph.toposort()])
+            assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
+                        for node in f.maker.fgraph.toposort()])
        pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4))+0.1
-        pval = pval / pval.sum(axis=1)[:,None]
+        pval = pval / pval.sum(axis=1)[:, None]
-        uval = numpy.ones_like(pval[:,0]) * 0.5
+        uval = numpy.ones_like(pval[:, 0]) * 0.5
-        mval = f(pval,uval)
+        mval = f(pval, uval)
        assert mval.shape == pval.shape
        if config.cast_policy == 'custom':
@@ -88,7 +94,7 @@ def test_multinomial_large():
            raise NotImplementedError(config.cast_policy)
        assert numpy.allclose(mval.sum(axis=1), 2)
        asdf = numpy.asarray([0, 0, 2, 0])+0*pval
-        assert numpy.allclose(mval, asdf) #broadcast over all rows
+        assert numpy.allclose(mval, asdf)  # broadcast over all rows
    run_with_c(body)
    if cuda.cuda_available:
        run_with_c(body, True)
@@ -97,36 +103,52 @@ def test_multinomial_large():
 def test_multinomial_dtypes():
    p = tensor.dmatrix()
    u = tensor.dvector()
-    m = multinomial.MultinomialFromUniform('auto')(p,u)
+    m = multinomial.MultinomialFromUniform('auto')(p, u)
    assert m.dtype == 'float64', m.dtype
    p = tensor.fmatrix()
    u = tensor.fvector()
-    m = multinomial.MultinomialFromUniform('auto')(p,u)
+    m = multinomial.MultinomialFromUniform('auto')(p, u)
    assert m.dtype == 'float32', m.dtype
    p = tensor.fmatrix()
    u = tensor.fvector()
-    m = multinomial.MultinomialFromUniform('float64')(p,u)
+    m = multinomial.MultinomialFromUniform('float64')(p, u)
    assert m.dtype == 'float64', m.dtype
 def test_gpu_opt():
    if not cuda.cuda_available:
        # Skip test if cuda_ndarray is not available.
        from nose.plugins.skip import SkipTest
        raise SkipTest('Optional package cuda not available')
-    # We test the case where we put the op on the gpu when the output is moved to the gpu.
+    # We test the case where we put the op on the gpu when the output
+    # is moved to the gpu.
    p = tensor.fmatrix()
    u = tensor.fvector()
-    m = multinomial.MultinomialFromUniform('auto')(p,u)
+    m = multinomial.MultinomialFromUniform('auto')(p, u)
    assert m.dtype == 'float32', m.dtype
    m_gpu = cuda.gpu_from_host(m)
-    f = function([p,u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
+    f = function([p, u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
-    assert any([type(node.op) is multinomial.GpuMultinomialFromUniform for node in f.maker.fgraph.toposort()])
+    assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
+                for node in f.maker.fgraph.toposort()])
    pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4))+0.1
-    pval = pval / pval.sum(axis=1)[:,None]
+    pval = pval / pval.sum(axis=1)[:, None]
-    uval = numpy.ones_like(pval[:,0]) * 0.5
+    uval = numpy.ones_like(pval[:, 0]) * 0.5
-    mval = f(pval,uval)
+    mval = f(pval, uval)
+    # Test with a row, it was failing in the past.
+    r = tensor.frow()
+    m = multinomial.MultinomialFromUniform('auto')(r, u)
+    assert m.dtype == 'float32', m.dtype
+    m_gpu = cuda.gpu_from_host(m)
+    f = function([r, u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
+    assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
+                for node in f.maker.fgraph.toposort()])
+    pval = numpy.arange(1 * 4, dtype='float32').reshape((1, 4))+0.1
+    pval = pval / pval.sum(axis=1)[:, None]
+    uval = numpy.ones_like(pval[:, 0]) * 0.5
+    mval2 = f(pval, uval)
--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -4656,9 +4656,10 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 1024):
                else:
                    s = scalar.Scalar(i.dtype).make_variable()
                    try:
+                        if theano.config.compute_test_value != 'off':
                            v = gof.op.get_test_value(i)
                            if v.size > 0:
-                            s.tag.test_value = gof.op.get_test_value(i).flatten()[0]
+                                s.tag.test_value = v.flatten()[0]
                    except AttributeError:
                        pass

--- a/theano/tensor/tests/test_subtensor.py
+++ b/theano/tensor/tests/test_subtensor.py
@@ -1146,7 +1146,7 @@ class TestAdvancedSubtensor(unittest.TestCase):
        subt = self.m[self.ix1, self.ix12]
        a = inc_subtensor(subt, subt)
-        typ = TensorType(self.m.type.dtype, self.ix2.type.broadcastable)
+        typ = tensor.TensorType(self.m.type.dtype, self.ix2.type.broadcastable)
        assert a.type == typ, (a.type, typ)
        f = theano.function([self.m, self.ix1, self.ix12], a,
                            allow_input_downcast=True)

--- a/theano/tests/diverse_tests.py
+++ b/theano/tests/diverse_tests.py
-from nose.plugins.skip import SkipTest
 import unittest
-import theano
 import numpy
-import random
 import numpy.random
+import theano
 from theano.tests import unittest_tools as utt
 '''
-  Different tests that are not connected to any particular Op, or functionality of
+  Different tests that are not connected to any particular Op, or
-  Theano. Here will go for example code that we will publish in papers, that we
+  functionality of Theano. Here will go for example code that we will
-  should ensure that it will remain operational
+  publish in papers, that we should ensure that it will remain
+  operational
 '''
 class T_scipy(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()
        self.orig_floatX = theano.config.floatX
    def tearDown(self):
        theano.config.floatX = self.orig_floatX
    def test_scipy_paper_example1(self):
        a = theano.tensor.vector('a')  # declare variable
        b = a + a**10                  # build expression
        f = theano.function([a], b)    # compile function
-        assert numpy.all(f([0,1,2]) == numpy.array([0,2,1026]))
+        assert numpy.all(f([0, 1, 2]) == numpy.array([0, 2, 1026]))
    def test_scipy_paper_example2(self):
        ''' This just sees if things compile well and if they run '''
@@ -34,7 +36,7 @@ class T_scipy(unittest.TestCase):
        shared = theano.shared
        function = theano.function
        rng = numpy.random
-        theano.config.floatX='float64'
+        theano.config.floatX = 'float64'
        #
        # ACTUAL SCRIPT FROM PAPER
@@ -49,18 +51,18 @@ class T_scipy(unittest.TestCase):
        xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1)
        prediction = p_1 > 0.5
        cost = xent.mean() + 0.01*(w**2).sum()
-        gw,gb = T.grad(cost, [w,b])
+        gw, gb = T.grad(cost, [w, b])
        # Compile expressions to functions
        train = function(
-            inputs=[x,y],
+            inputs=[x, y],
            outputs=[prediction, xent],
            updates=[(w, w-0.1*gw), (b, b-0.1*gb)])
        predict = function(inputs=[x], outputs=prediction)
        N = 4
        feats = 100
-        D = (rng.randn(N, feats), rng.randint(size=4,low=0, high=2))
+        D = (rng.randn(N, feats), rng.randint(size=4, low=0, high=2))
        training_steps = 10
        for i in range(training_steps):
            pred, err = train(D[0], D[1])
@@ -68,4 +70,3 @@ class T_scipy(unittest.TestCase):
 if __name__ == '__main__':
    unittest.main()