Merge pull request #1522 from nouiz/small_stuff

Small stuff

Merge pull request #1522 from nouiz/small_stuff
3ca77162 · Pascal Lamblin · 999aeb92 · b0350903 · 3ca77162 · 3ca77162
--- a/theano/misc/do_nightly_build
+++ b/theano/misc/do_nightly_build
@@ -54,9 +54,10 @@ if [ "$RELEASE" ]; then
    echo
 fi

-echo "Executing tests with mode=FAST_COMPILE with --batch=1000"
-echo "THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} --batch=1000 ${ARGS}"
-THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} --batch=1000 ${ARGS}
+# with --batch=1000" # The buildbot freeze sometimes when collecting the tests to run
+echo "Executing tests with mode=FAST_COMPILE"
+echo "THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} ${ARGS}"
+THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} ${ARGS}
 echo "Number of elements in the compiledir:"
 ls ${COMPILEDIR}|wc -l
 echo

--- a/theano/misc/hooks/check_whitespace.py
+++ b/theano/misc/hooks/check_whitespace.py
@@ -12,7 +12,7 @@ import tokenize

 import argparse
 import reindent
-from theano.compat.six.StringIO import StringIO
+from theano.compat.six import StringIO

 SKIP_WHITESPACE_CHECK_FILENAME = ".hg/skip_whitespace_check"


--- a/theano/sandbox/multinomial.py
+++ b/theano/sandbox/multinomial.py
@@ -12,22 +12,28 @@ if cuda_available:
    from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host
    from theano.sandbox.cuda.opt import register_opt

+
 class MultinomialFromUniform(Op):
    '''Converts samples from a uniform into sample from a multinomial.'''
    def __init__(self, odtype):
-        self.odtype=odtype
+        self.odtype = odtype
+
    def __eq__(self, other):
-        return type(self) == type(other) and self.odtype==other.odtype
+        return type(self) == type(other) and self.odtype == other.odtype
+
    def __hash__(self):
        return hash((type(self), self.odtype))
+
    def __str__(self):
-        return '%s{%s}'%(self.__class__.__name__, self.odtype)
+        return '%s{%s}' % (self.__class__.__name__, self.odtype)
+
    def __setstate__(self, dct):
        self.__dict__.update(dct)
        try:
            self.odtype
        except AttributeError:
-            self.odtype='auto'
+            self.odtype = 'auto'
+
    def make_node(self, pvals, unis):
        pvals = T.as_tensor_variable(pvals)
        unis = T.as_tensor_variable(unis)
@@ -35,11 +41,12 @@ class MultinomialFromUniform(Op):
            raise NotImplementedError('pvals ndim should be 2', pvals.ndim)
        if unis.ndim != 1:
            raise NotImplementedError('unis ndim should be 1', unis.ndim)
-        if self.odtype=='auto':
+        if self.odtype == 'auto':
            odtype = pvals.dtype
        else:
            odtype = self.odtype
-        return Apply(self, [pvals, unis], [T.matrix(dtype=odtype)])
+        out = T.tensor(dtype=odtype, broadcastable=pvals.type.broadcastable)
+        return Apply(self, [pvals, unis], [out])

    def grad(self, ins, outgrads):
        pvals, unis = ins
@@ -121,6 +128,7 @@ class MultinomialFromUniform(Op):
        }
        } // END NESTED SCOPE
        """ % locals()
+
    def perform(self, node, ins, outs):
        (pvals, unis) = ins
        (z,) = outs
@@ -165,15 +173,17 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
            raise TypeError('pvals must be cudandarray', pvals)
        if not isinstance(unis.type, CudaNdarrayType):
            raise TypeError('unis must be cudandarray', unis)
-        if self.odtype=='auto':
+        if self.odtype == 'auto':
            odtype = pvals.dtype
        else:
            odtype = self.odtype
        if odtype != pvals.dtype:
            raise NotImplementedError(
-                    'GpuMultinomialFromUniform works only if '
-                    'self.odtype == pvals.dtype', odtype, pvals.dtype)
-        return Apply(self, [pvals, unis], [pvals.type()])
+                'GpuMultinomialFromUniform works only if '
+                'self.odtype == pvals.dtype', odtype, pvals.dtype)
+        br = (pvals.broadcastable[1], pvals.broadcastable[0])
+        out = CudaNdarrayType(broadcastable=br)()
+        return Apply(self, [pvals, unis], [out])

    def perform(self, node, ins, outs):
        #The perform from parent don't work with CudaNdarray.  We
@@ -226,7 +236,6 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):

        """ % locals()

-
    def c_code(self, node, name, ins, outs, sub):
        (pvals, unis) = ins
        (z,) = outs
@@ -327,25 +336,30 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
        } // END NESTED SCOPE
        """ % locals()

+
 @local_optimizer()
 def local_gpu_multinomial(node):
    if type(node.op) is MultinomialFromUniform:
        p, u = node.inputs
        m, = node.outputs
        if (p.dtype == u.dtype == m.dtype == 'float32' and
-            any([i.owner and isinstance(i.owner.op, theano.sandbox.cuda.HostFromGpu)
+            any([i.owner and isinstance(i.owner.op,
+                                        theano.sandbox.cuda.HostFromGpu)
                 for i in node.inputs])):
            gpu_op = GpuMultinomialFromUniform(node.op.odtype)
-            return [host_from_gpu(gpu_op(*[gpu_from_host(i) for i in node.inputs])).T]
+            return [host_from_gpu(gpu_op(*[gpu_from_host(i)
+                                           for i in node.inputs])).T]
    if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost) and
-        node.inputs[0].owner and type(node.inputs[0].owner.op) is MultinomialFromUniform):
+        node.inputs[0].owner and type(node.inputs[0].owner.op)
+        is MultinomialFromUniform):
        multi = node.inputs[0].owner
        p, u = multi.inputs
        m, = multi.outputs
        if (p.dtype == u.dtype == m.dtype == 'float32'):
            gpu_op = GpuMultinomialFromUniform(multi.op.odtype)
            ret = gpu_op(*[gpu_from_host(i) for i in multi.inputs]).T
-            # The dimshuffle is on the cpu, but will be moved to the gpu by an opt.
+            # The dimshuffle is on the cpu, but will be moved to the
+            # gpu by an opt.
            return [gpu_from_host(ret)]

 if cuda_available:

--- a/theano/sandbox/test_multinomial.py
+++ b/theano/sandbox/test_multinomial.py
@@ -9,15 +9,19 @@ from theano.compile.mode import get_default_mode, predefined_linkers
 from theano.gof.python25 import any
 import theano.sandbox.cuda as cuda

+
 def get_mode(gpu):
    mode = get_default_mode()
    mode = copy.copy(mode)
    if gpu:
-        mode = mode.including('gpu', 'gpu_local_optimizations', 'local_cut_gpu_host_gpu', 'local_gpu_multinomial')
+        mode = mode.including('gpu', 'gpu_local_optimizations',
+                              'local_cut_gpu_host_gpu',
+                              'local_gpu_multinomial')
    if isinstance(mode.linker, theano.gof.PerformLinker):
        mode.linker = predefined_linkers['c|py']
    return mode

+
 def run_with_c(f, gpu=False):
    mode = get_mode(gpu)
    f(mode, gpu)
@@ -30,52 +34,54 @@ def test_multinomial_0():
    p = tensor.fmatrix()
    u = tensor.fvector()

-    m = multinomial.MultinomialFromUniform('auto')(p,u)
+    m = multinomial.MultinomialFromUniform('auto')(p, u)

    def body(mode, gpu):
        #the m*2 allows the multinomial to reuse output
-        f = function([p,u], m*2, allow_input_downcast=True, mode=mode)
+        f = function([p, u], m*2, allow_input_downcast=True, mode=mode)
        if gpu:
-            assert any([type(node.op) is multinomial.GpuMultinomialFromUniform for node in f.maker.fgraph.toposort()])
+            assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
+                        for node in f.maker.fgraph.toposort()])

        # test that both first and second samples can be drawn
-        assert numpy.allclose(f([[1,0], [0,1]], [.1, .1]),
-                [[2,0], [0,2]])
+        assert numpy.allclose(f([[1, 0], [0, 1]], [.1, .1]),
+                              [[2, 0], [0, 2]])

        # test that both second labels can be drawn
-        r = f([[.2,.8], [.3,.7]], [.31, .31])
-        assert numpy.allclose(r, [[0,2], [0,2]]), r
-
+        r = f([[.2, .8], [.3, .7]], [.31, .31])
+        assert numpy.allclose(r, [[0, 2], [0, 2]]), r

        # test that both first labels can be drawn
-        r = f([[.2,.8], [.3,.7]], [.21, .21])
-        assert numpy.allclose(r, [[0,2], [2,0]]), r
+        r = f([[.2, .8], [.3, .7]], [.21, .21])
+        assert numpy.allclose(r, [[0, 2], [2, 0]]), r

        #change the size to make sure output gets reallocated ok
        # and also make sure that the GPU version doesn't screw up the
        # transposed-ness
-        r = f([[.2,.8] ], [.25])
-        assert numpy.allclose(r, [[0,2]]), r
+        r = f([[.2, .8]], [.25])
+        assert numpy.allclose(r, [[0, 2]]), r

    run_with_c(body)
    if cuda.cuda_available:
        run_with_c(body, True)

+
 #TODO: check a bigger example (make sure blocking on GPU is handled correctly)
 def test_multinomial_large():
    # DEBUG_MODE will test this on GPU
    def body(mode, gpu):
        p = tensor.fmatrix()
        u = tensor.fvector()
-        m = multinomial.MultinomialFromUniform('auto')(p,u)
-        f = function([p,u], m*2, allow_input_downcast=True, mode=mode)
+        m = multinomial.MultinomialFromUniform('auto')(p, u)
+        f = function([p, u], m*2, allow_input_downcast=True, mode=mode)
        if gpu:
-            assert any([type(node.op) is multinomial.GpuMultinomialFromUniform for node in f.maker.fgraph.toposort()])
+            assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
+                        for node in f.maker.fgraph.toposort()])

        pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4))+0.1
-        pval = pval / pval.sum(axis=1)[:,None]
-        uval = numpy.ones_like(pval[:,0]) * 0.5
-        mval = f(pval,uval)
+        pval = pval / pval.sum(axis=1)[:, None]
+        uval = numpy.ones_like(pval[:, 0]) * 0.5
+        mval = f(pval, uval)

        assert mval.shape == pval.shape
        if config.cast_policy == 'custom':
@@ -88,7 +94,7 @@ def test_multinomial_large():
            raise NotImplementedError(config.cast_policy)
        assert numpy.allclose(mval.sum(axis=1), 2)
        asdf = numpy.asarray([0, 0, 2, 0])+0*pval
-        assert numpy.allclose(mval, asdf) #broadcast over all rows
+        assert numpy.allclose(mval, asdf)  # broadcast over all rows
    run_with_c(body)
    if cuda.cuda_available:
        run_with_c(body, True)
@@ -97,36 +103,52 @@ def test_multinomial_large():
 def test_multinomial_dtypes():
    p = tensor.dmatrix()
    u = tensor.dvector()
-    m = multinomial.MultinomialFromUniform('auto')(p,u)
+    m = multinomial.MultinomialFromUniform('auto')(p, u)
    assert m.dtype == 'float64', m.dtype

    p = tensor.fmatrix()
    u = tensor.fvector()
-    m = multinomial.MultinomialFromUniform('auto')(p,u)
+    m = multinomial.MultinomialFromUniform('auto')(p, u)
    assert m.dtype == 'float32', m.dtype

-
    p = tensor.fmatrix()
    u = tensor.fvector()
-    m = multinomial.MultinomialFromUniform('float64')(p,u)
+    m = multinomial.MultinomialFromUniform('float64')(p, u)
    assert m.dtype == 'float64', m.dtype

+
 def test_gpu_opt():
    if not cuda.cuda_available:
        # Skip test if cuda_ndarray is not available.
        from nose.plugins.skip import SkipTest
        raise SkipTest('Optional package cuda not available')

-    # We test the case where we put the op on the gpu when the output is moved to the gpu.
+    # We test the case where we put the op on the gpu when the output
+    # is moved to the gpu.
    p = tensor.fmatrix()
    u = tensor.fvector()
-    m = multinomial.MultinomialFromUniform('auto')(p,u)
+    m = multinomial.MultinomialFromUniform('auto')(p, u)
    assert m.dtype == 'float32', m.dtype
    m_gpu = cuda.gpu_from_host(m)

-    f = function([p,u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
-    assert any([type(node.op) is multinomial.GpuMultinomialFromUniform for node in f.maker.fgraph.toposort()])
+    f = function([p, u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
+    assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
+                for node in f.maker.fgraph.toposort()])
    pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4))+0.1
-    pval = pval / pval.sum(axis=1)[:,None]
-    uval = numpy.ones_like(pval[:,0]) * 0.5
-    mval = f(pval,uval)
+    pval = pval / pval.sum(axis=1)[:, None]
+    uval = numpy.ones_like(pval[:, 0]) * 0.5
+    mval = f(pval, uval)
+
+    # Test with a row, it was failing in the past.
+    r = tensor.frow()
+    m = multinomial.MultinomialFromUniform('auto')(r, u)
+    assert m.dtype == 'float32', m.dtype
+    m_gpu = cuda.gpu_from_host(m)
+
+    f = function([r, u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
+    assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
+                for node in f.maker.fgraph.toposort()])
+    pval = numpy.arange(1 * 4, dtype='float32').reshape((1, 4))+0.1
+    pval = pval / pval.sum(axis=1)[:, None]
+    uval = numpy.ones_like(pval[:, 0]) * 0.5
+    mval2 = f(pval, uval)
--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -4656,9 +4656,10 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 1024):
                else:
                    s = scalar.Scalar(i.dtype).make_variable()
                    try:
-                        v = gof.op.get_test_value(i)
-                        if v.size > 0:
-                            s.tag.test_value = gof.op.get_test_value(i).flatten()[0]
+                        if theano.config.compute_test_value != 'off':
+                            v = gof.op.get_test_value(i)
+                            if v.size > 0:
+                                s.tag.test_value = v.flatten()[0]
                    except AttributeError:
                        pass


--- a/theano/tensor/tests/test_subtensor.py
+++ b/theano/tensor/tests/test_subtensor.py
@@ -1146,7 +1146,7 @@ class TestAdvancedSubtensor(unittest.TestCase):
        subt = self.m[self.ix1, self.ix12]
        a = inc_subtensor(subt, subt)

-        typ = TensorType(self.m.type.dtype, self.ix2.type.broadcastable)
+        typ = tensor.TensorType(self.m.type.dtype, self.ix2.type.broadcastable)
        assert a.type == typ, (a.type, typ)
        f = theano.function([self.m, self.ix1, self.ix12], a,
                            allow_input_downcast=True)

--- a/theano/tests/diverse_tests.py
+++ b/theano/tests/diverse_tests.py
-from nose.plugins.skip import SkipTest
-
 import unittest
-import theano
+
 import numpy
-import random
 import numpy.random
-from theano.tests  import unittest_tools as utt
+
+import theano
+from theano.tests import unittest_tools as utt

 '''
-  Different tests that are not connected to any particular Op, or functionality of
-  Theano. Here will go for example code that we will publish in papers, that we
-  should ensure that it will remain operational
+  Different tests that are not connected to any particular Op, or
+  functionality of Theano. Here will go for example code that we will
+  publish in papers, that we should ensure that it will remain
+  operational
+
 '''

+
 class T_scipy(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()
        self.orig_floatX = theano.config.floatX
+
    def tearDown(self):
        theano.config.floatX = self.orig_floatX

-
    def test_scipy_paper_example1(self):
-        a = theano.tensor.vector('a') # declare variable
-        b = a + a**10                 # build expression
-        f = theano.function([a], b)   # compile function
-        assert numpy.all(f([0,1,2]) == numpy.array([0,2,1026]))
+        a = theano.tensor.vector('a')  # declare variable
+        b = a + a**10                  # build expression
+        f = theano.function([a], b)    # compile function
+        assert numpy.all(f([0, 1, 2]) == numpy.array([0, 2, 1026]))

    def test_scipy_paper_example2(self):
        ''' This just sees if things compile well and if they run '''
@@ -34,7 +36,7 @@ class T_scipy(unittest.TestCase):
        shared = theano.shared
        function = theano.function
        rng = numpy.random
-        theano.config.floatX='float64'
+        theano.config.floatX = 'float64'

        #
        # ACTUAL SCRIPT FROM PAPER
@@ -49,18 +51,18 @@ class T_scipy(unittest.TestCase):
        xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1)
        prediction = p_1 > 0.5
        cost = xent.mean() + 0.01*(w**2).sum()
-        gw,gb = T.grad(cost, [w,b])
+        gw, gb = T.grad(cost, [w, b])

        # Compile expressions to functions
        train = function(
-            inputs=[x,y],
+            inputs=[x, y],
            outputs=[prediction, xent],
            updates=[(w, w-0.1*gw), (b, b-0.1*gb)])
        predict = function(inputs=[x], outputs=prediction)

        N = 4
        feats = 100
-        D = (rng.randn(N, feats), rng.randint(size=4,low=0, high=2))
+        D = (rng.randn(N, feats), rng.randint(size=4, low=0, high=2))
        training_steps = 10
        for i in range(training_steps):
            pred, err = train(D[0], D[1])
@@ -68,4 +70,3 @@ class T_scipy(unittest.TestCase):

 if __name__ == '__main__':
    unittest.main()
-