Merge pull request #2214 from nouiz/blocksparse

Tests fix, pydotprint and Better error message.

Merge pull request #2214 from nouiz/blocksparse
e9c1d577 · Pascal Lamblin · 3a74c6ae · 35aff174 · e9c1d577 · e9c1d577
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,6 +22,7 @@ install:
 # So we test with 0.11. Our internal buildbot have 0.7.2.
  - conda create --yes -q -n py26 python=2.6 numpy=1.6 scipy=0.11 nose=1.1 pip
  - source activate py26
+  - pip install pydot
  - pip install . --no-deps --use-mirrors
 # command to run tests

--- a/theano/printing.py
+++ b/theano/printing.py
@@ -7,6 +7,7 @@ from copy import copy
 import logging
 import os
 import sys
+import warnings
 # Not available on all platforms
 hashlib = None
@@ -27,7 +28,6 @@ from theano import gof
 from theano import config
 from theano.compat.six import StringIO
 from theano.gof import Op, Apply
-from theano.gof.python25 import any
 from theano.compile import Function, debugmode
 from theano.compile.profilemode import ProfileMode
@@ -523,12 +523,14 @@ def pydotprint(fct, outfile=None,
               max_label_size=70, scan_graphs=False,
               var_with_name_simple=False,
               print_output_file=True,
-               assert_nb_all_strings=-1
+               assert_nb_all_strings=-1,
+               return_image=False,
               ):
    """
    Print to a file (png format) the graph of a compiled theano function's ops.
-    :param fct: the theano fct returned by theano.function.
+    :param fct: a compiled Theano function, a Variable, an Apply or
+                a list of Variable.
    :param outfile: the output file where to put the graph.
    :param compact: if True, will remove intermediate var that don't have name.
    :param format: the file format of the output.
@@ -557,6 +559,16 @@ def pydotprint(fct, outfile=None,
                the number of unique string nodes in the dot graph is equal to
                this number. This is used in tests to verify that dot won't
                merge Theano nodes.
+    :param return_image: If True, it will create the image and return it.
+        Useful to display the image in ipython notebook.
+        .. code-block:: python
+            import theano
+            v = theano.tensor.vector()
+            from IPython.display import SVG
+            SVG(theano.printing.pydotprint(v*2, return_image=True,
+                                           format='svg'))
    In the graph, ellipses are Apply Nodes (the execution of an op)
    and boxes are variables.  If variables have names they are used as
@@ -589,27 +601,39 @@ def pydotprint(fct, outfile=None,
        if (not isinstance(mode, ProfileMode)
            or not fct in mode.profile_stats):
            mode = None
-        fct_fgraph = fct.maker.fgraph
+        outputs = fct.maker.fgraph.outputs
+        topo = fct.maker.fgraph.toposort()
    elif isinstance(fct, gof.FunctionGraph):
        mode = None
        profile = None
-        fct_fgraph = fct
+        outputs = fct.outputs
+        topo = fct.toposort()
    else:
-        raise ValueError(('pydotprint expects as input a theano.function or '
+        if isinstance(fct, gof.Variable):
-                         'the FunctionGraph of a function!'), fct)
+            fct = [fct]
+        elif isinstance(fct, gof.Apply):
+            fct = fct.outputs
+        assert isinstance(fct, (list, tuple))
+        assert all(isinstance(v, gof.Variable) for v in fct)
+        fct = gof.FunctionGraph(inputs=gof.graph.inputs(fct),
+                                outputs=fct)
+        mode = None
+        profile = None
+        outputs = fct.outputs
+        topo = fct.toposort()
    if not pydot_imported:
        raise RuntimeError("Failed to import pydot. You must install pydot"
                            " for `pydotprint` to work.")
        return
    g = pd.Dot()
    if cond_highlight is not None:
        c1 = pd.Cluster('Left')
        c2 = pd.Cluster('Right')
        c3 = pd.Cluster('Middle')
        cond = None
-        for node in fct_fgraph.toposort():
+        for node in topo:
            if (node.op.__class__.__name__ == 'IfElse'
                and node.op.name == cond_highlight):
                cond = node
@@ -684,7 +708,6 @@ def pydotprint(fct, outfile=None,
        all_strings.add(varstr)
        return varstr
-    topo = fct_fgraph.toposort()
    apply_name_cache = {}
    def apply_name(node):
@@ -736,7 +759,6 @@ def pydotprint(fct, outfile=None,
    # Update the inputs that have an update function
    input_update = {}
-    outputs = list(fct_fgraph.outputs)
    if isinstance(fct, Function):
        for i in reversed(fct.maker.expanded_inputs):
            if i.update is not None:
@@ -792,7 +814,7 @@ def pydotprint(fct, outfile=None,
        for id, var in enumerate(node.outputs):
            varstr = var_name(var)
-            out = any([x[0] == 'output' for x in var.clients])
+            out = var in outputs
            label = str(var.type)
            if len(node.outputs) > 1:
                label = str(id) + ' ' + label
@@ -825,15 +847,11 @@ def pydotprint(fct, outfile=None,
    if not outfile.endswith('.' + format):
        outfile += '.' + format
-    g.write(outfile, prog='dot', format=format)
-    if print_output_file:
-        print 'The output file is available at', outfile
    if assert_nb_all_strings != -1:
-        assert len(all_strings) == assert_nb_all_strings
+        assert len(all_strings) == assert_nb_all_strings, len(all_strings)
    if scan_graphs:
-        scan_ops = [(idx, x) for idx, x in enumerate(fct_fgraph.toposort())
+        scan_ops = [(idx, x) for idx, x in enumerate(topo)
                    if isinstance(x.op, theano.scan_module.scan_op.Scan)]
        path, fn = os.path.split(outfile)
        basename = '.'.join(fn.split('.')[:-1])
@@ -851,6 +869,13 @@ def pydotprint(fct, outfile=None,
                       high_contrast, cond_highlight, colorCodes,
                       max_label_size, scan_graphs)
+    if return_image:
+        return g.create(prog='dot', format=format)
+    else:
+        g.write(outfile, prog='dot', format=format)
+        if print_output_file:
+            print 'The output file is available at', outfile
 def pydotprint_variables(vars,
                         outfile=None,
@@ -859,8 +884,15 @@ def pydotprint_variables(vars,
                         high_contrast=True, colorCodes=None,
                         max_label_size=50,
                         var_with_name_simple=False):
-    ''' Identical to pydotprint just that it starts from a variable instead
+    '''DEPRECATED: use pydotprint() instead.
-    of a compiled function. Could be useful ? '''
+    Identical to pydotprint just that it starts from a variable
+    instead of a compiled function. Could be useful ?
+    '''
+    warnings.warn("pydotprint_variables() is deprecated."
+                 " Use pydotprint() instead.")
    if colorCodes is None:
        colorCodes = default_colorCodes
@@ -949,7 +981,7 @@ def pydotprint_variables(vars,
                    g.add_node(pd.Node(varastr, color='green'))
            else:
                varastr = my_list[nd]
-            label = ''
+            label = None
            if len(app.inputs) > 1:
                label = str(i)
            g.add_edge(pd.Edge(varastr, astr, label=label))
@@ -974,7 +1006,7 @@ def pydotprint_variables(vars,
                    g.add_node(pd.Node(varastr, color=color))
            else:
                varastr = my_list[nd]
-            label = ''
+            label = None
            if len(app.outputs) > 1:
                label = str(i)
            g.add_edge(pd.Edge(astr, varastr, label=label))

--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -327,10 +327,19 @@ class GpuDimShuffle(GpuOp):
    def make_node(self, input):
        ib = tuple(input.type.broadcastable)
        if not ib == self.input_broadcastable:
-            raise TypeError(
+            if len(ib) != len(self.input_broadcastable):
-                "The number of dimensions and/or broadcastable pattern of the"
+                raise TypeError((
-                " input is incorrect for this op. Expected %s, got %s." %
+                    "The number of dimensions of the "
-                (self.input_broadcastable, ib))
+                    "input is incorrect for this op. Expected %s, got %s."
+                    % (self.input_broadcastable, ib)))
+            for expected, b in zip(self.input_broadcastable, ib):
+                if expected is True and b is False:
+                    raise TypeError((
+                        "The broadcastable pattern of the "
+                        "input is incorrect for this op. Expected %s, got %s."
+                        % (self.input_broadcastable, ib)))
+                #else, expected == b or expected is False and b is True
+                # Both case are good.
        ob = []
        if not isinstance(input.type, CudaNdarrayType):
            raise TypeError("The input of a GpuDimshuffle must"

--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
@@ -1362,7 +1362,8 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
    """Gradient wrt. filters for `GpuCorr3dMM`.
    :note: You will not want to use this directly, but rely on Theano's
-    automatic differentiation or graph optimization to use it as needed."""
+        automatic differentiation or graph optimization to use it as needed.
+    """
    def __init__(self, border_mode="valid",
                 subsample=(1, 1, 1),
@@ -1417,7 +1418,8 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
    """Gradient wrt. inputs for `GpuCorr3dMM`.
    :note: You will not want to use this directly, but rely on Theano's
-    automatic differentiation or graph optimization to use it as needed."""
+        automatic differentiation or graph optimization to use it as needed.
+    """
    def __init__(self, border_mode="valid",
                 subsample=(1, 1, 1),

--- a/theano/sandbox/cuda/blocksparse.py
+++ b/theano/sandbox/cuda/blocksparse.py
@@ -3,16 +3,17 @@ import theano
 from theano import Apply, tensor, scalar, Constant
 from theano.tensor import DimShuffle, discrete_dtypes
-from theano.gradient import grad_undefined, grad_not_implemented
+from theano.gradient import grad_undefined
 from theano.sandbox.cuda import cuda_available, GpuOp, GpuElemwise
 if cuda_available:
-    from theano.sandbox.cuda import (basic_ops, CudaNdarrayType,
+    from theano.sandbox.cuda import (basic_ops,
-                                     CudaNdarray, opt, GpuFromHost,
+                                     opt, GpuFromHost,
                                     HostFromGpu, host_from_gpu,
                                     GpuDimShuffle)
 class SparseBlockGemvSS(GpuOp):
    """
    This op computes the dot product of specified pieces of vectors
@@ -183,7 +184,8 @@ static int SparseBlockGemv_copy(PyArrayObject *a, npy_intp *b) {
                        cudaMemcpyHostToDevice);
  Py_DECREF(aa);
  if (err != cudaSuccess) {
-    PyErr_SetString(PyExc_RuntimeError, "Cannot copy index data to GPU");
+    PyErr_Format(PyExc_RuntimeError, "Cannot copy index data to GPU (%s)",
+                 cudaGetErrorString(err));
    return -1;
  }
  return 0;
@@ -241,11 +243,11 @@ Py_INCREF(%(out)s);
            res = """
 if (CudaNdarray_prep_output(&%(out)s, 3, CudaNdarray_HOST_DIMS(%(o)s)))
 {
-  PyErr_SetString(PyExc_RuntimeError, "Cannot allocate output");
+  // Error already set
  %(fail)s
 }
 if (CudaNdarray_CopyFromCudaNdarray(%(out)s, %(o)s)) {
-  PyErr_SetString(PyExc_RuntimeError, "Cannot copy data to output");
+  // Error already set
  %(fail)s
 }
 """ % dict(out=out, o=o, fail=sub['fail'])
@@ -313,7 +315,8 @@ CudaNdarray_HOST_STRIDES(%(out)s)[0], CudaNdarray_HOST_STRIDES(%(out)s)[1],
                             CudaNdarray_HOST_DIMS(%(h)s)[1] *
                             CudaNdarray_HOST_DIMS(%(o)s)[0]);
          if (err != CUBLAS_STATUS_SUCCESS) {
-            PyErr_SetString(PyExc_RuntimeError, "SgemvBatched failed");
+            PyErr_Format(PyExc_RuntimeError, "SgemvBatched failed(%%s)",
+                         cublasGetErrorString(err));
            %(fail)s
          }
        }
@@ -322,7 +325,7 @@ CudaNdarray_HOST_STRIDES(%(out)s)[0], CudaNdarray_HOST_STRIDES(%(out)s)[1],
                   W=W, fail=sub['fail'], name=nodename)
    def c_code_cache_version(self):
-        return (10,)
+        return (11,)
    def grad(self, inputs, grads):
        o, W, h, inputIdx, outputIdx = inputs
@@ -482,7 +485,8 @@ static int SparseBlockOuter_copy(PyArrayObject *a, npy_intp *b) {
                        cudaMemcpyHostToDevice);
  Py_DECREF(aa);
  if (err != cudaSuccess) {
-    PyErr_SetString(PyExc_RuntimeError, "Cannot copy index data to GPU");
+    PyErr_Format(PyExc_RuntimeError, "Cannot copy index data to GPU(%s)",
+                 cudaGetErrorString(err));
    return -1;
  }
  return 0;
@@ -541,11 +545,11 @@ Py_INCREF(%(out)s);
            res = """
 if (CudaNdarray_prep_output(&%(out)s, 4, CudaNdarray_HOST_DIMS(%(o)s)))
 {
-  PyErr_SetString(PyExc_RuntimeError, "Cannot allocate output");
+  // Python error already set
  %(fail)s
 }
 if (CudaNdarray_CopyFromCudaNdarray(%(out)s, %(o)s)) {
-  PyErr_SetString(PyExc_RuntimeError, "Cannot copy data to output");
+  //Error message already set
  %(fail)s
 }
 """ % dict(out=out, o=o, fail=sub['fail'])
@@ -612,7 +616,8 @@ CudaNdarray_HOST_STRIDES(%(out)s)[0], CudaNdarray_HOST_STRIDES(%(out)s)[1],
                       "block size too big. The current limit is 65535 for "
                       "iSize * oSize.");
    } else {
-      PyErr_SetString(PyExc_RuntimeError, "SgerBatched failed");
+      PyErr_Format(PyExc_RuntimeError, "SgerBatched failed(%%s)",
+                   cublasGetErrorString(err));
    }
    %(fail)s
  }
@@ -620,7 +625,7 @@ CudaNdarray_HOST_STRIDES(%(out)s)[0], CudaNdarray_HOST_STRIDES(%(out)s)[1],
            alpha=alpha, fail=sub['fail'])
    def c_code_cache_version(self):
-        return (9,)
+        return (10,)
 sparse_block_outer_ss = SparseBlockOuterSS(False)

--- a/theano/sandbox/cuda/cuda_ndarray.cuh
+++ b/theano/sandbox/cuda/cuda_ndarray.cuh
@@ -91,6 +91,8 @@ extern DllExport cublasHandle_t handle;
 *
 * device_malloc will set the Python error message before returning None.
 * device_free will return nonzero on failure (after setting the python error message)
+ *
+ * Set the Python error
 */
 DllExport void * device_malloc(size_t size);
 DllExport void * device_malloc(size_t size, int verbose);
@@ -148,6 +150,8 @@ enum operator_t
 /*
 * Return a CudaNdarray whose 'nd' dimensions are all 0.
 * if nd==-1, it is not initialized.
+ *
+ * Set the Python error
 */
 DllExport PyObject *
 CudaNdarray_New(int nd=-1);
@@ -286,6 +290,8 @@ static PyObject *CudaNdarray_SIZE_Object(const CudaNdarray *self, void *closure)
 * Allocate a new CudaNdarray with room for given number of dimensions
 *
 * No Storage space is allocated (and all dimensions are 0)
+ *
+ * Set the Python error
 */
 DllExport PyObject * CudaNdarray_new_nd(const int nd);
@@ -294,6 +300,8 @@ DllExport PyObject * CudaNdarray_new_nd(const int nd);
 *
 * Note: This does not allocate storage for data, or free
 *       pre-existing storage.
+ *
+ * Set the Python error
 */
 DllExport inline int ALWAYS_INLINE
 CudaNdarray_set_nd(CudaNdarray * self, const int nd)
@@ -505,6 +513,8 @@ DllExport int CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj);
 *               e.g. suppose self and other are 2D matrices and other
 *               has only one row. Then we need to copy this row several
 *               times when copying to self.
+ *
+ * Set the Python error
 */
 DllExport int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self,
        const CudaNdarray * other, bool unbroadcast = false);
@@ -575,6 +585,7 @@ DllExport int CudaNdarray_dimshuffle(CudaNdarray * self, unsigned int len, const
 DllExport PyObject*
 CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args);
+// Set the Python error
 int fprint_CudaNdarray(FILE * fd, const CudaNdarray *self);
@@ -589,6 +600,8 @@ DllExport int CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_othe
 // or a pointer to an ndarray of the right size. In the last case it will
 // not change.
 // If fortran is non-zero, a fortran order is expected/created
+//
+// Set the Python error
 DllExport int CudaNdarray_prep_output(CudaNdarray ** arr, int nd,
                                      const int * dims, int fortran = 0);

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1846,7 +1846,8 @@ def local_assert(node):
        node.inputs[0].owner and
        isinstance(node.inputs[0].owner.op,
                   HostFromGpu)):
-        return [host_from_gpu(node.op(node.inputs[0].owner.inputs[0]))]
+        return [host_from_gpu(node.op(node.inputs[0].owner.inputs[0],
+                                      *node.inputs[1:]))]
 @register_opt()

--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
@@ -80,7 +80,8 @@ def test_gpualloc():
    x = theano.shared(numpy.ones(3, dtype='float32'), 'x')
    m = (x).dimshuffle(['x', 0])
    v = tensor.alloc(1., *m.shape)
-    f = theano.function([], v + x, mode=mode_with_gpu)
+    f = theano.function([], v + x,
+                        mode=mode_with_gpu.excluding("local_alloc_elemwise"))
    l = f.maker.fgraph.toposort()
    assert numpy.any([isinstance(x.op, cuda.GpuAlloc) for x in l])

--- a/theano/sandbox/gpuarray/opt.py
+++ b/theano/sandbox/gpuarray/opt.py
@@ -514,7 +514,8 @@ def local_gpua_softmaxwithbias(node):
 @register_opt('fast_compile')
 @op_lifter([theano.tensor.opt.Assert])
 def local_assert(node):
-    return [host_from_gpu(node.op(node.inputs[0].owner.inputs[0]))]
+    return [host_from_gpu(node.op(node.inputs[0].owner.inputs[0],
+                                  *node.inputs[1:]))]
 @register_opt('fast_compile')

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -5198,7 +5198,17 @@ class Choose(Op):
    def infer_shape(self, node, shapes):
        if isinstance(node.inputs[1], TensorVariable):
-            return[(shapes[0])]
+            # We have padded node.inputs[0] to the right number of
+            # dimensions for the output
+            l = []
+            for sh1, sh2, b1 in zip(shapes[0],
+                                        shapes[1][1:],
+                                        node.inputs[0].broadcastable):
+                if b1:
+                    l.append(sh2)
+                else:
+                    l.append(sh1)
+            return [tuple(l)]
        else:
            import theano.typed_list
            assert isinstance(node.inputs[1],
@@ -5214,11 +5224,47 @@ class Choose(Op):
        # import at the top as it would cause circular import.
        import theano.typed_list
        a = as_tensor_variable(a)
-        if isinstance(choices, (tuple, list)):
+        if a.dtype not in theano.tensor.discrete_dtypes:
+            raise TypeError(
+                'choose first argument must have an [u]int* dtype. Got %s.'
+                % a.dtype)
+        if isinstance(choices, (tuple, list,
+                                theano.typed_list.TypedListVariable)):
            choice = theano.typed_list.make_list(choices)
+            choice_ndim = choice.ttype.ndim
+            choice_bcast = choice.ttype.broadcastable
        else:
            choice = as_tensor_variable(choices)
-        return Apply(self, [a, choice], [a.type()])
+            choice_ndim = choice.ndim - 1
+            choice_bcast = choice.broadcastable[1:]
+        out_ndim = numpy.max([a.ndim, choice_ndim])
+        # Make explicit all added broadcastable dimensions.
+        a = shape_padleft(a, out_ndim - a.ndim)
+        if len(choice_bcast) != out_ndim:
+            if isinstance(choice.type, TensorType):
+                choice = choice.dimshuffle(0,
+                                           *(('x',) *(out_ndim - choice_ndim) +
+                                             tuple(range(1, choice.ndim))))
+                choice_ndim = choice.ndim - 1
+                choice_bcast = choice.broadcastable[1:]
+            else:
+                raise NotImplementedError(
+                    "We currently didn't implemented that case. "
+                    "To make it work, explicitly add dimensions "
+                    "of size one for dimensions that will be broadcasted")
+                assert isinstance(node.inputs[1],
+                                  theano.typed_list.TypedListVariable)
+        bcast = [False] * out_ndim
+        for idx, (b1, b2) in enumerate(
+            zip(a.broadcastable,
+                (True,) * (out_ndim - choice_ndim) + choice_bcast)):
+            if b1 and b2:
+                bcast[idx] = True
+        o = TensorType(choice.dtype, bcast)
+        return Apply(self, [a, choice], [o()])
    def perform(self, node, inputs, (z, )):
        a = inputs[0]

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -7045,74 +7045,119 @@ class T_Power(unittest.TestCase):
 class T_Choose(utt.InferShapeTester):
    op = staticmethod(choose)
    op_class = Choose
+    modes = ['raise', 'wrap', 'clip']
    def test_numpy_compare(self):
-        a = tensor.vector(dtype='int64')
+        a = tensor.vector(dtype='int32')
-        b = tensor.matrix(dtype='int64')
+        b = tensor.matrix(dtype='float32')
-        A = numpy.asarray(numpy.random.rand(4), dtype='int64')
-        B = numpy.asarray(numpy.random.rand(4, 4), dtype='int64')
-        modes = ['raise', 'wrap', 'clip']
+        A = numpy.asarray(numpy.random.random_integers(0, 3, 4),
+                          dtype='int32')
+        B = numpy.asarray(numpy.random.rand(4, 4), dtype='float32')
-        for m in modes:
+        for m in self.modes:
            f = function([a, b], choose(a, b, mode=m))
            t_c = f(A, B)
            n_c = numpy.choose(A, B, mode=m)
            assert numpy.allclose(t_c, n_c)
-    def test_numpy_compare_tuple(self):
+    def test_broadcasted(self):
+        a = tensor.scalar(dtype='int32')
+        b = tensor.matrix(dtype='float32')
-        a = tensor.tensor3(dtype='int64')
+        # Test when a is broadcastable
-        b = tensor.tensor3(dtype='int64')
+        A = 3
-        c = tensor.tensor3(dtype='int64')
+        B = numpy.asarray(numpy.random.rand(4, 4), dtype='float32')
-        A = numpy.asarray(numpy.random.rand(2, 1, 1), dtype='int64')
+        for m in self.modes:
-        B = numpy.asarray(numpy.random.rand(1, 6, 1), dtype='int64')
+            f = function([a, b], choose(a, b, mode=m))
-        C = numpy.asarray(numpy.random.rand(1, 1, 5), dtype='int64')
+            t_c = f(A, B)
+            n_c = numpy.choose(A, B, mode=m)
+            assert numpy.allclose(t_c, n_c)
-        f = function([a, b, c], choose(a, (b, c)))
+        # Test when the result should be broadcastable
-        t_c = f(A, B, C)
+        b = theano.tensor.col(dtype='float32')
-        n_c = numpy.choose(A, (B, C))
+        B = numpy.asarray(numpy.random.rand(4, 1), dtype='float32')
+        for m in self.modes:
+            f = function([a, b], choose(a, b, mode=m))
+            assert choose(a, b, mode=m).broadcastable[0]
+            t_c = f(A, B)
+            n_c = numpy.choose(A, B, mode=m)
            assert numpy.allclose(t_c, n_c)
-    def test_infer_shape(self):
+    def test_dtype_error(self):
+        a = tensor.scalar(dtype='float32')
+        b = tensor.matrix(dtype='float32')
+        A = 3
+        B = numpy.asarray(numpy.random.rand(4, 4), dtype='float32')
+        self.assertRaises(TypeError, choose, a, b)
+    def test_numpy_compare_tuple(self):
-        a = tensor.matrix(dtype='int64')
+        a = tensor.tensor3(dtype='int32')
-        b = tensor.vector(dtype='int64')
+        b = tensor.tensor3(dtype='float32')
-        c = tensor.matrix(dtype='int64')
+        c = tensor.tensor3(dtype='float32')
-        d = tensor.vector(dtype='int64')
-        A = numpy.asarray(numpy.random.rand(5, 4), dtype='int64')
+        A = numpy.asarray(numpy.random.random_integers(0, 1, (2, 1, 1)),
-        B = numpy.asarray(numpy.random.rand(4), dtype='int64')
+                          dtype='int32')
-        C = numpy.asarray(numpy.random.rand(7, 4), dtype='int64')
+        B = numpy.asarray(numpy.random.rand(1, 6, 1), dtype='float32')
-        D = numpy.asarray(numpy.random.rand(4), dtype='int64')
+        C = numpy.asarray(numpy.random.rand(1, 1, 5), dtype='float32')
-        var1 = [a, b, a, b]
+        for m in self.modes:
-        var2 = [c, d, b, a]
+            f = function([a, b, c], choose(a, (b, c), mode=m))
-        mat1 = [A, B, A, B]
+            t_c = f(A, B, C)
-        mat2 = [C, D, B, A]
+            n_c = numpy.choose(A, (B, C), mode=m)
+            assert numpy.allclose(t_c, n_c)
-        for v, m, w, n in zip(var1, mat1, var2, mat2):
+    def test_infer_shape(self):
-            self._compile_and_check([v, w],  # theano.function inputs
+        for shp1, shp2 in [
-                                        [self.op(v, w)],  # theano.function outputs
+            ((5, 4), (7, 4)),
+            ((1, 4), (7, 4)),
+            ((5, 1), (7, 4)),
+            ((5, 4), (1, 4)),
+            ((5, 4), (7, 1)),
+            ((5, 4), (4,)),
+            ((1, 4), (4,)),
+            ((5, 1), (4,)),
+            ((5, 4), (1,)),
+            ((4,), (5, 4)),
+            ((1,), (5, 4)),
+            ((4,), (1, 4)),
+            ((4,), (3, 1)),
+            ((4,), (4,)),
+            ((1,), (4,)),
+            ((4,), (1,)),
+            ((1,), (1,)),
+        ]:
+            a = tensor.tensor(dtype='int32',
+                              broadcastable=[n == 1 for n in shp1])
+            c = tensor.tensor(dtype='float32',
+                              broadcastable=[n == 1 for n in shp2])
+            A = numpy.asarray(numpy.random.rand(*shp1) * shp2[0], dtype='int32')
+            C = numpy.asarray(numpy.random.rand(*shp2) * shp2[0], dtype='float32')
+            self._compile_and_check([a, c],  # theano.function inputs
+                                    [self.op(a, c)],  # theano.function outputs
                                    # Always use not square matrix!
                                    # inputs data
-                                        [m, n],
+                                    [A, C],
                                    # Op that should be removed from the graph.
                                    self.op_class)
 # Disabled as it isn't implemented.
    def ___test_infer_shape_tuple(self):
-        a = tensor.tensor3(dtype='int64')
+        a = tensor.tensor3(dtype='int32')
-        b = tensor.tensor3(dtype='int64')
+        b = tensor.tensor3(dtype='int32')
-        c = tensor.tensor3(dtype='int64')
+        c = tensor.tensor3(dtype='int32')
-        A = numpy.asarray([1, 0], dtype='int64').reshape((2, 1, 1))
+        A = numpy.asarray([1, 0], dtype='int32').reshape((2, 1, 1))
-        B = numpy.asarray(numpy.random.rand(1, 4, 1), dtype='int64')
+        B = numpy.asarray(numpy.random.rand(1, 4, 1), dtype='int32')
-        C = numpy.asarray(numpy.random.rand(1, 1, 7), dtype='int64')
+        C = numpy.asarray(numpy.random.rand(1, 1, 7), dtype='int32')
        f = function([a, b, c], choose(a, (b, c)))
        shape = (2, 4, 7)

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -2491,6 +2491,7 @@ def test_local_IncSubtensor_serialize():
    cost = T.sqr(t - y)
    dW = theano.grad(cost, W)
    mode = theano.compile.mode.get_default_mode().excluding('fusion')
+    mode = mode.including("local_IncSubtensor_serialize")
    f = theano.function([i, j, t], updates=[(W, W - 0.01 * dW)], mode=mode)
    topo = f.maker.fgraph.toposort()
    adds = [n for n in topo if isinstance(n.op, T.Elemwise) and

--- a/theano/tests/test_printing.py
+++ b/theano/tests/test_printing.py
@@ -45,6 +45,16 @@ def test_pydotprint_cond_highlight():
            ' is no IfElse node in the graph\n')
+def test_pydotprint_return_image():
+    # Skip test if pydot is not available.
+    if not theano.printing.pydot_imported:
+        raise SkipTest('pydot not available')
+    x = tensor.dvector()
+    ret = theano.printing.pydotprint(x * 2, return_image=True)
+    assert isinstance(ret, str)
 def test_pydotprint_variables():
    """
    This is a REALLY PARTIAL TEST.
@@ -65,11 +75,10 @@ def test_pydotprint_variables():
    new_handler.setLevel(logging.DEBUG)
    orig_handler = theano.logging_default_handler
-    theano.theano_logger.removeHandler(orig_handler)
-    theano.theano_logger.addHandler(new_handler)
    theano.theano_logger.removeHandler(orig_handler)
    theano.theano_logger.addHandler(new_handler)
    try:
+        theano.printing.pydotprint(x * 2)
        theano.printing.pydotprint_variables(x * 2)
    finally:
        theano.theano_logger.addHandler(orig_handler)
@@ -94,14 +103,13 @@ def test_pydotprint_long_name():
    f = theano.function([x], [x * 2, x + x], mode=mode)
    f([1, 2, 3, 4])
-    s = StringIO()
-    new_handler = logging.StreamHandler(s)
-    new_handler.setLevel(logging.DEBUG)
-    orig_handler = theano.logging_default_handler
    theano.printing.pydotprint(f, max_label_size=5,
                               print_output_file=False,
                               assert_nb_all_strings=6)
+    theano.printing.pydotprint([x * 2, x + x],
+                               max_label_size=5,
+                               print_output_file=False,
+                               assert_nb_all_strings=8)
 def test_pydotprint_profile():

--- a/theano/typed_list/basic.py
+++ b/theano/typed_list/basic.py
@@ -41,6 +41,7 @@ class _typed_list_py_operators:
        return index_(self, elem)
    ttype = property(lambda self: self.type.ttype)
+    dtype = property(lambda self: self.type.ttype.dtype)
 class TypedListVariable(_typed_list_py_operators, Variable):