Merged

c326cc22 · Olivier Delalleau · 1bee17f9 · 75c9c6c0 · c326cc22 · c326cc22
--- a/doc/library/config.txt
+++ b/doc/library/config.txt
@@ -144,7 +144,7 @@ import theano and print the config variable, as in:
 .. attribute:: floatX
-    String value: either 'float64' or 'float32'.
+    String value: either 'float64' or 'float32'
    Default: 'float64'
@@ -152,6 +152,41 @@ import theano and print the config variable, as in:
    and similar functions.  It also sets the default theano bit width for
    arguments passed as Python floating-point numbers.
+.. attribute:: cast_policy
+    String value: either 'numpy+floatX', 'numpy' or 'custom'
+    Default: 'custom'
+    This specifies how data types are implicitly figured out in Theano, e.g.  for
+    constants or in the result of arithmetic operations. The recommended value is
+    'numpy+floatX', that mimics numpy's behavior except for floats when
+    ``config.floatX`` is set to 'float32', for which we use float32 instead of
+    float64 unless the user is explicitly using data typed as float64. When
+    'numpy' is used, this specific floatX behavior is discarded. The current
+    default value is 'custom' for backward compatibility reason, and corresponds
+    to a set of custom rules originally used in Theano (which can be partially
+    customized, see e.g. the in-code help of ``tensor.NumpyAutocaster``). The
+    'custom' option will be deprecated in a future release of Theano.
+    **Until further notice, it is strongly advised to never change this option
+    within a script, and to always clean your Theano cache whenever you modify its
+    value**.
+.. attribute:: int_division
+    String value: either 'int', 'floatX' or 'raise'
+    Default: 'int'
+    Specifies what to do when one tries to compute `x / y`, where both `x` and
+    `y` are of integer types (possibly unsigned). 'int' means an integer is
+    returned (as in Python 2.X), but this behavior is deprecated. 'floatX'
+    returns a number of type given by ``config.floatX`. 'raise' is the safest
+    choice (and will become default in a future release of Theano) and raises
+    an error when one tries to do such an operation, enforcing the use of the
+    integer division operator (``//``) (if a float result is intended, either
+    cast one of the arguments to a float, or use `x.__truediv__(y)`).
 .. attribute:: mode
    String value: 'Mode', 'ProfileMode', 'DebugMode', 'FAST_RUN', 'FAST_COMPILE'

--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -15,11 +15,16 @@ AddConfigVar('floatX',
        EnumStr('float64', 'float32'),
        )
-# TODO Work-in-progress
+AddConfigVar('cast_policy',
-#AddConfigVar('casting_policy',
+        "Rules for implicit type casting (until further notice, do not modify within a script, and clear your Theano cache whenever it is modified)",
-#        "Rules for implicit casts of constants in arithmetic operations",
+        EnumStr('custom', 'numpy+floatX', 'numpy'),
-#        EnumStr('theano_0.3', 'numpy'),
+        )
-#        )
+AddConfigVar('int_division',
+        "What to do when one computes x / y, where both x and y are of "
+        "integer types",
+        EnumStr('int', 'raise', 'floatX'),
+        )
 #gpu mean let the driver select the gpu. Needed in case of gpu in exclusive mode.
 #gpuX mean use the gpu number X.

--- a/theano/configparser.py
+++ b/theano/configparser.py
@@ -7,6 +7,8 @@ import ConfigParser
 import logging
 import warnings
+import theano
 _logger = logging.getLogger('theano.config')
 class TheanoConfigWarning(Warning):
@@ -103,6 +105,17 @@ def _config_print(thing, buf):
        print >> buf, "    Value: ", cv.val
        print >> buf, ""
+def get_config_md5():
+    """
+    Return a string md5 of the current config options. It should be such that
+    we can safely assume that two different config setups will lead to two
+    different strings.
+    """
+    all_opts = sorted(_config_var_list, key=lambda cv: cv.fullname)
+    return theano.gof.cc.hash_from_code('\n'.join(['%s = %s' % (cv.fullname, cv.val) for cv in all_opts]))
 class TheanoConfigParser(object):
    #properties are installed by AddConfigVar
    _i_am_a_config_class = True
@@ -110,6 +123,7 @@ class TheanoConfigParser(object):
        sio = StringIO.StringIO()
        _config_print(self.__class__, sio)
        return sio.getvalue()
 # N.B. all instances of TheanoConfigParser give access to the same properties.
 config = TheanoConfigParser()

--- a/theano/gof/apply_shape.py
+++ b/theano/gof/apply_shape.py
@@ -4,6 +4,7 @@ This is not used currently very used. It appear in some case, but I'm not sure i
 It could help the current system to make it detect problem earlier when contructing the graph instead of during optimization.
 """
 import sys
+import theano
 from theano import gof
 def ishape(v):
@@ -35,7 +36,7 @@ class Apply(gof.Apply):
        try:
            oshapes = infer_shape(self, ishapes)
-        except NotImplementedError:
+        except theano.tensor.ShapeError:
            return
        for o, oshp in zip(outputs, oshapes):

--- a/theano/gof/cc.py
+++ b/theano/gof/cc.py
@@ -16,6 +16,7 @@ else:
    def hash_from_code(msg):
        return md5.new(msg).hexdigest()
+import theano
 from theano.gof.python25 import all
 from theano import config
@@ -791,7 +792,7 @@ class CLinker(link.Linker):
        The key returned by this function is of the form (version, signature)
        The signature has the following form:
        {{{
-            'CLinker.cmodule_key', compilation args, libraries,
+            'CLinker.cmodule_key', compilation args, libraries, config md5,
            (op0, input_signature0, output_signature0),
            (op1, input_signature1, output_signature1),
            ...
@@ -858,10 +859,12 @@ class CLinker(link.Linker):
        constant_ids = dict()
        op_pos = {} # Apply -> topological position
-        # first we put the header, compile_args, library names into the signature
+        # First we put the header, compile_args, library names and config md5
+        # into the signature.
        sig = ['CLinker.cmodule_key'] # will be cast to tuple on return
        if compile_args is not None: sig.append(tuple(compile_args))
        if libraries is not None: sig.append(tuple(libraries))
+        sig.append(theano.configparser.get_config_md5())
        # technically this should only be appended for gcc-compiled Ops
        # and the flags of other compilers should be inserted here... but it's not clear how to

--- a/theano/sandbox/neighbours.py
+++ b/theano/sandbox/neighbours.py
@@ -246,13 +246,13 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
    neib_shape = T.as_tensor_variable(neib_shape)
    original_shape = T.as_tensor_variable(original_shape)
-    new_neib_shape = T.stack( original_shape[-1]/neib_shape[1], neib_shape[1] )
+    new_neib_shape = T.stack(original_shape[-1] // neib_shape[1], neib_shape[1])
    output_2d = images2neibs(neibs.dimshuffle('x','x',0,1), new_neib_shape, mode=mode)
    if mode == 'ignore_borders':
        valid_shape = list(original_shape)
-        valid_shape[2]  = valid_shape[2] / neib_shape[0] * neib_shape[0]
+        valid_shape[2]  = (valid_shape[2] // neib_shape[0]) * neib_shape[0]
-        valid_shape[3]  = valid_shape[3] / neib_shape[1] * neib_shape[1]
+        valid_shape[3]  = (valid_shape[3] // neib_shape[1]) * neib_shape[1]
        output_4d = output_2d.reshape(valid_shape)
        #padding the borders with zeros
        for d in [2,3]:

--- a/theano/sandbox/rng_mrg.py
+++ b/theano/sandbox/rng_mrg.py
@@ -263,7 +263,7 @@ class mrg_uniform(mrg_uniform_base):
        if (%(size)s->dimensions[0] != %(ndim)s)
        {
            PyErr_Format(PyExc_ValueError, "size must have length %%i (not %%i)",
-                %(ndim)s, %(size)s->dimensions[0]);
+                %(ndim)s, int(%(size)s->dimensions[0]));
            %(fail)s
        }
        if (%(size)s->descr->type_num != PyArray_INT32)
@@ -589,6 +589,35 @@ class GPU_mrg_uniform(mrg_uniform_base):
    def c_code_cache_version(self):
        return (4,)
+def guess_n_streams(size, warn=True):
+    """
+    Return a guess at a good number of streams.
+    :param warn: If True, warn when a guess cannot be made (in which case
+    we return 30 * 256).
+    """
+    # TODO: a smart way of choosing the number of streams, see #612.
+    # Note that this code was moved out of `MRG_RandomStreams` so that it can
+    # be easily accessed from tests, where we want to disable the warning.
+    if (isinstance(size, (tuple, list)) and
+        all([isinstance(i, int) for i in size])):
+        # We can make a guess.
+        r = 1
+        for s in size:
+            r *= s
+        if r > 6:
+            r = r/6 # chosen as fastest for rbm_benchmark
+        return r
+    else:
+        if warn:
+            assert False
+            print >> sys.stderr, (
+                    "MRG_RandomStreams Can't determine #streams from "
+                    "size (%s), guessing 30*256") % str(size)
+        return 30 * 256
 class MRG_RandomStreams(object):
    """Module component with similar interface to numpy.random (numpy.random.RandomState)"""
@@ -654,18 +683,7 @@ class MRG_RandomStreams(object):
        return rval
    def n_streams(self, size):
-        # TODO: a smart way of choosing the number of streams, see #612.
+        return guess_n_streams(size, warn=True)
-        if isinstance(size, (tuple, list)) and all([isinstance(i,int) for i in size]):
-            r = 1
-            for s in size:
-                r *= s
-            if r > 6:
-                r = r/6 # chosen as fastest for rbm_benchmark
-            return r
-        print >> sys.stderr, ("MRG_RandomStreams Can't determine #streams from "
-                "size (%s), guessing 30*256")%str(size)
-        return 30*256
    def pretty_return(self, node_rstate, new_rstate, sample):
        sample.rstate = node_rstate
@@ -674,7 +692,8 @@ class MRG_RandomStreams(object):
        node_rstate.default_update = new_rstate
        return sample
-    def uniform(self, size=None, low=0.0, high=1.0, ndim=None, dtype=config.floatX, nstreams=None):
+    def uniform(self, size, low=0.0, high=1.0, ndim=None, dtype='floatX',
+                nstreams=None):
        """
        Sample a tensor of given size whose element from a uniform
        distribution between low and high.
@@ -683,10 +702,14 @@ class MRG_RandomStreams(object):
        ndim may be a plain integer to supplement the missing
        information.
-        :param: size: Can be a list of integer or Theano variable
+        :param size: Can be a list of integer or Theano variable
                (ex: the shape of other Theano Variable)
-                TODO: can size be None?
+        :param dtype: The output data type.
        """
+        if dtype == 'floatX':
+            dtype = config.floatX
        if isinstance(size, tuple):
            msg = "size must be a tuple of int or a Theano variable"
            assert all([isinstance(i,int) or isinstance(i,Variable)
@@ -728,16 +751,19 @@ class MRG_RandomStreams(object):
            raise NotImplementedError( 'Increase the size to match the broadcasting pattern of `low` and `high` arguments')
        return  r
-    def binomial(self, size=None, n=1, p=0.5, ndim=None, dtype='int64'):
+    def binomial(self, size=None, n=1, p=0.5, ndim=None, dtype='int64',
+                 nstreams=None):
        if n == 1:
-            if dtype=='float32' and self.use_cuda:
+            if dtype == 'float32' and self.use_cuda:
-                return cast(self.uniform(size=size, dtype=dtype) < p, dtype)
+                x = self.uniform(size=size, dtype=dtype, nstreams=nstreams)
            else:
-                return cast(self.uniform(size=size) < p, dtype)
+                x = self.uniform(size=size, nstreams=nstreams)
+            return cast(x < p, dtype)
        else:
            raise NotImplementedError("MRG_RandomStreams.binomial with n > 1")
-    def multinomial(self, size=None, n=1, pvals=None, ndim=None, dtype='int64'):
+    def multinomial(self, size=None, n=1, pvals=None, ndim=None, dtype='int64',
+                    nstreams=None):
        """
        Sample `n` (currently `n` needs to be 1) times from a multinomial
        distribution defined by probabilities pvals.
@@ -758,22 +784,31 @@ class MRG_RandomStreams(object):
                    ndim, size, pvals[:,0])
            assert ndim==1
            bcast = bcast+(pvals.type.broadcastable[-1],)
-            unis = self.uniform(size=size, ndim=1)
+            unis = self.uniform(size=size, ndim=1, nstreams=nstreams)
            op = multinomial.MultinomialFromUniform(dtype)
            return op(pvals, unis)
        else:
            raise NotImplementedError(("MRG_RandomStreams.multinomial only"
                " implemented with n == 1 and pvals.ndim = 2"))
-    def normal(self, size=None, avg=0.0, std=1.0, ndim=None, dtype=config.floatX):
+    def normal(self, size=None, avg=0.0, std=1.0, ndim=None,
+               dtype='floatX', nstreams=None):
        """
-        :param: size: Can be a list of integer or Theano variable(ex: the shape of other Theano Variable)
+        :param size: Can be a list of integers or Theano variables (ex: the
+        shape of another Theano Variable)
+        :param dtype: The output data type.
+        :param nstreams: Number of streams.
        """
        # We need an even number of ]0,1[ samples. Then we split them
        # in two halves. First half becomes our U1's for Box-Muller,
        # second half our U2's. See Wikipedia page:
        # http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
+        if dtype == 'floatX':
+            dtype = config.floatX
        evened = False
        constant = False
        if isinstance(size, tuple) and all([isinstance(i,int) for i in size]):
@@ -786,14 +821,15 @@ class MRG_RandomStreams(object):
        else:
            #if even, don't change, if odd, +1
            n_samples = prod(size)+(prod(size)%2)
-        flattened = self.uniform(size=(n_samples,), dtype=dtype)
+        flattened = self.uniform(size=(n_samples,), dtype=dtype,
+                                 nstreams=nstreams)
        if constant:
-            U1 = flattened[:n_samples/2]
+            U1 = flattened[:n_samples // 2]
-            U2 = flattened[n_samples/2:]
+            U2 = flattened[n_samples // 2:]
        else:
-            U1 = flattened[:prod(flattened.shape)/2]
+            U1 = flattened[:prod(flattened.shape) // 2]
-            U2 = flattened[prod(flattened.shape)/2:]
+            U2 = flattened[prod(flattened.shape) // 2:]
        #normal_samples = zeros_like(flattened)
        sqrt_ln_U1 = sqrt(-2.0*log(U1))

--- a/theano/sandbox/test_rng_mrg.py
+++ b/theano/sandbox/test_rng_mrg.py
@@ -350,7 +350,9 @@ def test_uniform():
        print 'ON CPU with size=(%s):'%str(size)
        x = tensor.matrix()
        R = MRG_RandomStreams(234, use_cuda=False)
-        u = R.uniform(size=size)
+        # Note: we specify `nstreams` to avoid a warning.
+        u = R.uniform(size=size,
+                      nstreams=rng_mrg.guess_n_streams(size, warn=False))
        f = theano.function(var_input, u, mode=mode)
        assert any([isinstance(node.op,theano.sandbox.rng_mrg.mrg_uniform)
                    for node in f.maker.env.toposort()])
@@ -366,7 +368,8 @@ def test_uniform():
            print ''
            print 'ON GPU with size=(%s):'%str(size)
            R = MRG_RandomStreams(234, use_cuda=True)
-            u = R.uniform(size=size, dtype='float32')
+            u = R.uniform(size=size, dtype='float32',
+                          nstreams=rng_mrg.guess_n_streams(size, warn=False))
            assert u.dtype == 'float32' #well, it's really that this test w GPU doesn't make sense otw
            f = theano.function(var_input, theano.Out(
                    theano.sandbox.cuda.basic_ops.gpu_from_host(u),
@@ -421,7 +424,9 @@ def test_binomial():
            print ''
            print 'ON CPU with size=(%s) and mean(%d):'%(str(size),mean)
            R = MRG_RandomStreams(234, use_cuda=False)
-            u = R.binomial(size=size, p=mean)
+            # Note: we specify `nstreams` to avoid a warning.
+            u = R.binomial(size=size, p=mean,
+                           nstreams=rng_mrg.guess_n_streams(size, warn=False))
            f = theano.function(var_input, u, mode=mode)
            theano.printing.debugprint(f)
            out = f(*input)
@@ -433,7 +438,9 @@ def test_binomial():
                print ''
                print 'ON GPU with size=(%s) and mean(%d):'%(str(size),mean)
                R = MRG_RandomStreams(234, use_cuda=True)
-                u = R.binomial(size=size, p=mean, dtype='float32')
+                u = R.binomial(size=size, p=mean, dtype='float32',
+                               nstreams=rng_mrg.guess_n_streams(size,
+                                                                warn=False))
                assert u.dtype == 'float32' #well, it's really that this test w GPU doesn't make sense otw
                f = theano.function(var_input, theano.Out(
                        theano.sandbox.cuda.basic_ops.gpu_from_host(u),
@@ -478,7 +485,9 @@ def test_normal0():
        print 'ON CPU:'
        R = MRG_RandomStreams(234, use_cuda=False)
-        n = R.normal(size=size, avg=avg, std=std)
+        # Note: we specify `nstreams` to avoid a warning.
+        n = R.normal(size=size, avg=avg, std=std,
+                     nstreams=rng_mrg.guess_n_streams(size, warn=False))
        f = theano.function(var_input, n, mode=mode)
        theano.printing.debugprint(f)
        out  = f(*input)
@@ -491,7 +500,8 @@ def test_normal0():
            print ''
            print 'ON GPU:'
            R = MRG_RandomStreams(234, use_cuda=True)
-            n = R.normal(size=size, avg=avg, std=std, dtype='float32')
+            n = R.normal(size=size, avg=avg, std=std, dtype='float32',
+                         nstreams=rng_mrg.guess_n_streams(size, warn=False))
            assert n.dtype == 'float32' #well, it's really that this test w GPU doesn't make sense otw
            f = theano.function(var_input, theano.Out(
                theano.sandbox.cuda.basic_ops.gpu_from_host(n),
@@ -557,7 +567,8 @@ def test_multinomial():
    pvals = numpy.asarray(numpy.random.uniform(size=sample_size))
    pvals = numpy.apply_along_axis(lambda row : row/numpy.sum(row), 1, pvals)
    R = MRG_RandomStreams(234, use_cuda=False)
-    m = R.multinomial(pvals=pvals, dtype=config.floatX)
+    # Note: we specify `nstreams` to avoid a warning.
+    m = R.multinomial(pvals=pvals, dtype=config.floatX, nstreams=30 * 256)
    f = theano.function([], m, mode=mode_)
    theano.printing.debugprint(f)
    out = f()

--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
@@ -12,8 +12,9 @@ If you want to use a scalar variable in a Theano graph,
 you probably want to use theano.tensor.[c,z,f,d,b,w,i,l,]scalar!
 """
-import math
+import math, warnings
 from copy import copy
+from itertools import imap
 import numpy, theano
@@ -26,11 +27,37 @@ builtin_complex = complex
 builtin_int = int
 builtin_float = float
+class ComplexError(Exception):
+    """Raised if complex numbers are used in an unsupported operation."""
+    pass
+class IntegerDivisionError(Exception):
+    """Raised if someone tries to divide integers with '/' instead of '//'."""
+    pass
 def upcast(dtype, *dtypes):
-    z = numpy.zeros((), dtype = dtype)
+    # Should we try to keep float32 instead of float64? This is used so that
-    for dtype in dtypes:
+    # for instance mixing int64 with float32 yields float32 instead of float64.
-        z = z + numpy.zeros((), dtype = dtype)
+    # Note that we store this boolean as a one-element list so that it can be
-    return str(z.dtype)
+    # modified within `make_array`.
+    keep_float32 = [(config.cast_policy == 'numpy+floatX' and
+                     config.floatX == 'float32')]
+    def make_array(dt):
+        if dt == 'float64':
+            # There is an explicit float64 dtype: we cannot keep float32.
+            keep_float32[0] = False
+        return numpy.zeros((), dtype=dt)
+    z = make_array(dtype)
+    for dt in dtypes:
+        z = z + make_array(dt=dt)
+    rval = str(z.dtype)
+    if rval == 'float64' and keep_float32[0]:
+        return 'float32'
+    else:
+        return rval
 def as_scalar(x, name = None):
    if isinstance(x, gof.Apply):
@@ -47,6 +74,7 @@ def as_scalar(x, name = None):
    except TypeError:
        raise TypeError("Cannot convert %s to Scalar" % x, type(x))
 def constant(x):
    # pass through numpy scalars, since they are already typed on purpose typically.
    if hasattr(x,'dtype'):
@@ -383,8 +411,9 @@ uint_types = uint8, uint16, uint32, uint64
 float_types = float32, float64
 complex_types = complex64, complex128
+discrete_types = int_types + uint_types
 continuous_types = float_types + complex_types
 class _scalar_py_operators:
    #UNARY
@@ -416,7 +445,8 @@ class _scalar_py_operators:
    def __sub__(self,other): return sub(self,other)
    def __mul__(self,other): return mul(self,other)
    def __div__(self,other): return div_proxy(self,other)
-    def __mod__(self,other): return mod(self,other)
+    def __floordiv__(self, other): return int_div(self, other)
+    def __mod__(self, other): return mod_check(self, other)
    def __pow__(self,other): return pow(self,other)
    #ARITHMETIC - RIGHT-OPERAND
@@ -995,32 +1025,74 @@ class Sub(BinaryScalarOp):
        return first_part, second_part
 sub = Sub(upcast_out, name = 'sub')
-def div_proxy(x, y):
-    """Proxy for either true_div or int_div, depending on types of x, y.
+def int_or_true_div(x_discrete, y_discrete):
+    """
+    Return 'int' or 'true' depending on the type of division used for x / y.
+    :param x_discrete: True if `x` is discrete ([unsigned] integer).
+    :param y_discrete: True if `x` is discrete ([unsigned] integer).
+    :returns: 'int' if `x / y` should be an integer division, or `true` if it
+    should be a true division.
+    Raises an IntegerDivisionError if both `x_discrete` and `y_discrete` are
+    True and `config.int_division` is set to 'raise'.
+    This function is used by both scalar/basic.py and tensor.basic/py.
    """
-    if as_scalar(x).type.dtype.startswith('int') and as_scalar(y).type.dtype.startswith('int'):
+    if (x_discrete and y_discrete):
-        return int_div(x, y)
+        if config.int_division == 'raise':
+            raise IntegerDivisionError(
+                "With `config.int_division` set to 'raise', dividing two "
+                "integer types with '/' is forbidden to avoid confusion "
+                "between integer and floating point divisions. Please "
+                "use // for integer division, or if you want a float result "
+                "either cast one of the arguments to a float or directly call "
+                "`x.__truediv__(y)`.")
+        elif config.int_division == 'int':
+            warnings.warn(
+                    "Division of two integer types with x / y is deprecated, "
+                    "please use x // y for an integer division "
+                    "(set `config.int_division = raise` to track the origin "
+                    "of this warning)",
+                    DeprecationWarning)
+            return 'int'
+        elif config.int_division == 'floatX':
+            return 'true'
+        else:
+            raise NotImplementedError(config.int_division)
    else:
-        return true_div(x, y)
+        return 'true'
+def div_proxy(x, y):
+    """Proxy for either true_div or int_div, depending on types of x, y."""
+    f = eval('%s_div' % int_or_true_div(as_scalar(x).type in discrete_types,
+                                        as_scalar(y).type in discrete_types))
+    return f(x, y)
 class TrueDiv(BinaryScalarOp):
    def output_types(self, types):
-        if all(t not in continuous_types for t in types):
+        if all(t in discrete_types for t in types):
-            return [float64]
+            return [Scalar(config.floatX)]
        else:
            return super(TrueDiv, self).output_types(types)
    def impl(self, x, y):
        x = numpy.asarray(x)
        y = numpy.asarray(y)
-        if str(x.dtype).startswith('int') and str(y.dtype).startswith('int'):
+        if all(a.dtype in discrete_types for a in (x, y)):
-            return float(x) / y
+            return numpy.array(float(x) / y, dtype=config.floatX)
        else:
            return x / y
    def c_code(self, node, name, (x, y), (z, ), sub):
        #we generate good c code only when both are complex!
        if sum([node.inputs[0].type in complex_types, node.inputs[1].type in complex_types])==1:
            raise NotImplementedError('type not supported', type)
-        if node.inputs[0].type in int_types and node.inputs[1].type in int_types:
+        if (node.inputs[0].type in discrete_types and
+            node.inputs[1].type in discrete_types):
            return "%(z)s = ((double)%(x)s) / %(y)s;" % locals()
        return "%(z)s = %(x)s / %(y)s;" % locals()
    def grad(self, (x, y), (gz, )):
@@ -1029,11 +1101,15 @@ class TrueDiv(BinaryScalarOp):
        if x.type in float_types:
            first_part = cast(gz / y, x.type.dtype)
        else:
+            assert x.type in discrete_types
            first_part = None
+        if y.type in complex_types:
+            raise NotImplementedError()
        if y.type in float_types:
            second_part = cast(-(gz * x) / (y * y), y.type.dtype)
        else:
+            assert y.type in discrete_types
            second_part = None
        return first_part, second_part
 true_div = TrueDiv(upcast_out, name = 'true_div')
@@ -1049,9 +1125,29 @@ int_div = IntDiv(upcast_out, name = 'int_div')
 floor_div = int_div
+def raise_complex_error():
+    raise ComplexError(
+                "Theano does not support the mod operator (%) on "
+                "complex numbers, since numpy deprecated it.")
+def mod_check(x, y):
+    if (as_scalar(x).type in complex_types or
+        as_scalar(y).type in complex_types):
+        # Currently forbidden.
+        raise_complex_error()
+    else:
+        return mod(x, y)
 class Mod(BinaryScalarOp):
    def impl(self, x, y):
+        if isinstance(x, numpy.complex) or isinstance(y, numpy.complex):
+            raise_complex_error()
        return x % y
    def c_code_cache_version(self):
        return (5,)
@@ -1061,20 +1157,34 @@ class Mod(BinaryScalarOp):
    def c_code(self, node, name, (x, y), (z, ), sub):
        """
-        We want the result to have the same sign as python, not the other implementaiton of mod.
+        We want the result to have the same sign as python, not the other implementation of mod.
        """
        #raise NotImplementedError("Unlike Python, C's modulo returns negative modulo on negative dividend (to implement)")
        t = node.inputs[0].type.upcast(*[ i.type for i in node.inputs[1:]])
-        if t in int_types or t in ['uint8','int8','uint16','int16','uint32','int32','uint64','int64']:
+        if (str(t) in imap(str, discrete_types) or
+            t in ['uint8','int8','uint16','int16','uint32','int32','uint64','int64'] or
+            t in discrete_types):
+            # The above or's should not be needed anymore. However, for now we
+            # keep them out of safety, and verify they are useless with an
+            # assert.
+            assert str(t) in imap(str, discrete_types)
            x_mod_y = "THEANO_MACRO_MOD(%(x)s, %(y)s)"%locals()
            x_mod_ymm = "THEANO_MACRO_MOD(-%(x)s, -%(y)s)"%locals()
            x_mod_ypm = "THEANO_MACRO_MOD(%(x)s, -%(y)s)"%locals()
            x_mod_ymp = "THEANO_MACRO_MOD(-%(x)s, %(y)s)"%locals()
-        elif t in float_types or t in ['float32','float64']:
+        elif (str(t) in imap(str, float_types) or
+              t in ['float32','float64'] or
+              t in float_types):
+            # The above or's should not be needed anymore. However, for now we
+            # keep them out of safety, and verify they are useless with an
+            # assert.
+            assert str(t) in imap(str, float_types)
            x_mod_y = "fmod(%(x)s,%(y)s)"%locals()
            x_mod_ymm = "fmod(-%(x)s,-%(y)s)"%locals()
            x_mod_ypm = "fmod(%(x)s,-%(y)s)"%locals()
            x_mod_ymp = "fmod(-%(x)s,%(y)s)"%locals()
+        elif str(t) in imap(str, complex_types):
+            raise_complex_error()
        else:
            raise NotImplementedError('type not supported', type)

--- a/theano/scalar/tests/test_basic.py
+++ b/theano/scalar/tests/test_basic.py
@@ -37,6 +37,7 @@ class test_ScalarOps(unittest.TestCase):
    #As we use theano.scalar normally, but we use theano.tensor.scalar
    #that is not important. Also this make the theano fct fail at call time
    #so this is not a silent bug.
+    # --> This is why it is purposedly named 'tes_mod' instead of 'test_mod'.
    def tes_mod(self):
        """
        We add this test as not all language and C implementation give the same
@@ -174,6 +175,19 @@ class test_logical(unittest.TestCase):
            self.assertTrue(fn(a,b) == ~a, (a,))
+class test_complex_mod(unittest.TestCase):
+    """Make sure % fails on complex numbers."""
+    def test_fail(self):
+        x = complex64()
+        y = int32()
+        try:
+            x % y
+            assert False
+        except ComplexError:
+            pass
 class test_div(unittest.TestCase):
    def test_0(self):
        a = int8()
@@ -182,9 +196,9 @@ class test_div(unittest.TestCase):
        d = float64()
        f = float32()
-        print (a/b).owner.op
+        print (a//b).owner.op
-        assert isinstance((a/b).owner.op, IntDiv)
+        assert isinstance((a//b).owner.op, IntDiv)
-        assert isinstance((b/a).owner.op, IntDiv)
+        assert isinstance((b//a).owner.op, IntDiv)
        assert isinstance((b/d).owner.op, TrueDiv)
        assert isinstance((b/f).owner.op, TrueDiv)
        assert isinstance((f/a).owner.op, TrueDiv)

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -7,6 +7,7 @@ import sys # for sys.maxint
 from theano.configparser import config, AddConfigVar, BoolParam
 import traceback #for overriding Op.__call__
 import warnings
+from itertools import izip
 import numpy, theano
 #from copy import copy as python_copy
@@ -23,6 +24,9 @@ from theano.gof.python25 import partial, any, all
 from theano import compile, printing
 from theano.printing import pprint
+# We use these exceptions as well.
+from theano.scalar import ComplexError, IntegerDivisionError
 ### set up the external interface
 from elemwise import Elemwise, DimShuffle, CAReduce, Sum
@@ -36,6 +40,17 @@ def _warn(*msg):
 #This is needed as we will hide it later
 python_complex=complex
+# Define common subsets of dtypes (as strings).
+int_dtypes = map(str, scal.int_types)
+discrete_dtypes = map(str, scal.discrete_types)
+complex_dtypes = map(str, scal.complex_types)
+class ShapeError(Exception):
+    """Raised when the shape cannot be computed."""
+    pass
 def check_equal_numpy(x, y):
    """
    Returns True iff x and y are equal (checks the dtype and
@@ -162,36 +177,64 @@ class NumpyAutocaster(object):
    """
    This class is used to cast python ints and floats to numpy arrays.
-    The behaviour for numpy scalars is a bit tricky... but tends to work in
+    The behavior when called on scalar `x` depends on `config.cast_policy`:
-    practice.
+        - 'numpy' will simply use the same type as found by `numpy.asarray(x)`.
-    If the dtype of a numpy scalar is in the self.dtypes list, then this 'cast'
+        - 'numpy+floatX' will do the same, except it will use float32 instead
-    is a no-op.
+          of float64 if `x` is a Python float and `config.floatX` is set to
+          'float32' (note that if `x` is a numpy scalar whose data type is
-    When config.floatX is float32 (at the time of calling), then this function
+          float64, it is not modified since we assume the user is purposedly
-    downcasts float and numpy.float arguments to numpy.float32, if float32 is
+          using float64).
-    in the self.dtypes list.
+        - 'custom' lets one define a tuple of data types such that:
+            - if `x` is already a numpy scalar and its data type is in this
-    Python ints are always 64bit and floats are always double precision.
+              tuple, then it is returned unchanged;
-    This class uses the algorithm in __call__ to use a narrower dtype when no
+            - otherwise, the first data type in this tuple that can represent
-    precision would be lost, and to even lose precision when this is demanded
+              `x` without loss of precision will be used, unless `x` is a float
-    by the list of dtypes (e.g. to automatically cast all floats to
+              and 'float32' is in the tuple (in which case `x` is cast as a
-    single-precision if self.dtypes does not include full precision floats).
+              float32);
+            - if no data type can represent `x` without loss of precision, then
+              the last data type in the tuple will be used.
    """
    def __init__(self, dtypes):
+        """
+        Constructor.
+        :type dtypes: Tuple of strings.
+        :param dtypes: The ordered list of preferred data types (only used when
+        `config.cast_policy` is set to 'custom', see the `NumpyAutocaster` help
+        for details).
+        """
        self.dtypes = tuple(dtypes)
    def __call__(self, x):
-        # Change the default casting behaviour for python floats to always cast
+        # Make sure we only deal with scalars.
-        # to float32
+        assert (isinstance(x, int) or
-        dtype = None
+                isinstance(x, float) or
+                (isinstance(x, numpy.ndarray) and x.ndim == 0))
+        if config.cast_policy == 'numpy':
+            return numpy.asarray(x)
+        elif config.cast_policy == 'numpy+floatX':
+            rval = numpy.asarray(x)
+            if (rval.dtype == 'float64' and         # numpy wants float64
+                config.floatX == 'float32' and      # but we prefer float32
+                not hasattr(x, 'dtype')):           # and `x` was not typed
+                rval = theano._asarray(rval, dtype='float32')
+            return rval
+        # The following is the original code, corresponding to the 'custom'
+        # option for `config.cast_policy`.
+        assert config.cast_policy == 'custom'
        try:
            # Pass through numpy scalars, since they are already typed on
            # purpose typically.
            if str(x.dtype) in self.dtypes:
-                return theano._asarray(x, dtype=x.dtype) #leave dtype alone
+                # No need to cast `x` into a new dtype. Note that we still
+                # need to convert it into an array, because it may not be
+                # one already (e.g. if x == numpy.float64(1.1)).
+                return numpy.asarray(x)
        except AttributeError:
+            # Means `x` has no 'dtype' attribute.
            pass
        # unsafe downcast of float64 variables when config.floatX == 'float32'
@@ -223,7 +266,10 @@ autocast_float = NumpyAutocaster(('float32', 'float64'))
 # have the same type as the xmatrix().
 #
 class autocast_float_as(object):
-    """This class makes it possible to temporarily and locally adjust autocasting behaviour.
+    """
+    This class makes it possible to temporarily and locally adjust autocasting
+    behavior when `config.cast_policy` is set to 'custom'.
+    If `config.cast_policy` is not 'custom', an exception is raised.
    For example:
    >>> with autocast_float_as('float32') as _dummy:
@@ -235,10 +281,13 @@ class autocast_float_as(object):
    """
    def __init__(self, *dtypes):
        self.dtypes = dtypes
+        assert config.cast_policy == 'custom'
    def __enter__(self):
+        assert config.cast_policy == 'custom'
        self.old_dtypes = autocast_float.dtypes
        autocast_float.dtypes = self.dtypes
    def __exit__(self, *args):
+        assert config.cast_policy == 'custom'
        autocast_float.dtypes = self.old_dtypes
 def constant_or_value(x, rtype, name=None, ndim=None, dtype=None):
@@ -260,6 +309,11 @@ def constant_or_value(x, rtype, name=None, ndim=None, dtype=None):
            x_ = autocast_int(x)
        elif rtype is TensorConstant and isinstance(x, float):
            x_ = autocast_float(x)
+        elif rtype is TensorConstant and isinstance(x, long):
+            # It is not clear what would happen if one was to use a `long`
+            # number as a constant in a Theano graph. As a result, we throw
+            # an exception in this situation.
+            raise NotImplementedError('Constants of type `long` not supported')
        elif isinstance(x, numpy.ndarray):
            x_ = x
            # Currently we do not have a bool dtype in Theano.
@@ -352,7 +406,7 @@ def _allclose(a, b):
        rtol = float64_rtol
    # Work around bug in Numpy, see http://projects.scipy.org/numpy/ticket/1684
-    if str(b.dtype).startswith('int') and (numpy.absolute(b) < 0).any():
+    if str(b.dtype) in int_dtypes and (numpy.absolute(b) < 0).any():
        b = theano._asarray(b, dtype='float64')
    return numpy.allclose(a,b, atol=atol, rtol=rtol)
@@ -1094,6 +1148,10 @@ class _tensor_py_operators:
    def __div__(self,other):
        try:
            return div_proxy(self,other)
+        except IntegerDivisionError:
+            # This is to raise the exception that occurs when trying to divide
+            # two integer arrays (currently forbidden).
+            raise
        except Exception, e:
            return NotImplemented
    def __pow__(self,other):
@@ -1103,7 +1161,11 @@ class _tensor_py_operators:
            return NotImplemented
    def __mod__(self,other):
        try:
-            return mod(self,other)
+            return mod_check(self, other)
+        except ComplexError:
+            # This is to raise the exception that occurs when trying to compute
+            # x % y with either x or y a complex number.
+            raise
        except Exception, e:
            return NotImplemented
@@ -1852,7 +1914,7 @@ def min(x, axis='DEFAULT'):
        "flatten the tensor before calling min()."),
        stacklevel=2)
    str_x_type = str(x.dtype)
-    if str_x_type.startswith('float') or str_x_type.startswith('int'):
+    if str_x_type.startswith('float') or str_x_type in int_dtypes:
        return -max(-x, axis=axis)
    else:
        #Be careful about unsigned integers, complex
@@ -1882,7 +1944,7 @@ def argmin(x, axis='DEFAULT'):
        "axis before calling argmin."),
        stacklevel=2)
    str_x_type = str(x.dtype)
-    if str_x_type.startswith('float') or str_x_type.startswith('int'):
+    if str_x_type.startswith('float') or str_x_type in int_dtypes:
        return argmax(-x, axis=axis)
    else:
        #Be careful about unsigned integers, complex
@@ -2385,7 +2447,7 @@ def mean(input, axis = None, op = False):
    if op:
        return Mean(axis)(input)
-    if str(input.dtype).startswith('int'):
+    if str(input.dtype) in discrete_dtypes:
            # we need to cast eventually anyway, and this helps
            # to prevents overflow
        input = cast(input, 'float64')
@@ -2529,12 +2591,11 @@ def minimum(x,y):
    # see decorator for function body
 def div_proxy(x, y):
-    """Proxy for either true_div or int_div, depending on types of x, y.
+    """Proxy for either true_div or int_div, depending on types of x, y."""
-    """
+    f = eval('%s_div' % scal.int_or_true_div(
-    if as_tensor_variable(x).type.dtype.startswith('int') and as_tensor_variable(y).type.dtype.startswith('int'):
+        as_tensor_variable(x).dtype in discrete_dtypes,
-        return int_div(x, y)
+        as_tensor_variable(y).dtype in discrete_dtypes))
-    else:
+    return f(x, y)
-        return true_div(x, y)
 @_scal_elemwise_with_nfunc('add', 2, 1)
 def add(a, *other_terms):
@@ -2566,6 +2627,15 @@ def int_div(a, b):
    """elementwise integer-division"""
    # see decorator for function body
+def mod_check(x, y):
+    """Make sure we do not try to use complex numbers."""
+    if (as_tensor_variable(x).dtype in complex_dtypes or
+        as_tensor_variable(y).dtype in complex_dtypes):
+        # Currently forbidden.
+        scal.raise_complex_error()
+    else:
+        return mod(x, y)
 @_scal_elemwise_with_nfunc('mod', 2, 1)
 def mod(a, b):
    """elementwise modulo"""
@@ -2868,7 +2938,7 @@ class Subtensor(Op):
        padded = ( actual_idx_list +
                  [slice(None, None, None)]*(len(xshp)-len(self.idx_list)))
        i = 0
-        for idx, xl in zip(padded, xshp):
+        for idx, xl in izip(padded, xshp):
            if isinstance(idx, slice):
                # If it is the default (None, None, None) slice, or a variant,
                # the shape will be xl
@@ -2878,7 +2948,7 @@ class Subtensor(Op):
                    outshp.append(xl)
                else:
                    cnf = get_canonical_form_slice(idx, xl)
-                    length = (cnf[0].stop - cnf[0].start -1)/cnf[0].step + 1
+                    length = (cnf[0].stop - cnf[0].start -1) // cnf[0].step + 1
                    length = switch(lt(length,0), 0, length)
                    outshp.append(length)
                i += 1
@@ -2978,15 +3048,28 @@ class SubtensorPrinter:
 pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Subtensor), SubtensorPrinter())
-def setsubtensor(x, y, idx_list, inplace=False):
-    print >> sys.stderr, "tensor.setsubtensor is deprecated - please use set_subtensor"
+def setsubtensor(x, y, idx_list, inplace=False, show_warning=True):
+    # Note that `show_warning` should only be set to False by tests, in order
+    # to make sure this old code is still working.
+    if show_warning:
+        print >> sys.stderr, (
+                "tensor.setsubtensor is deprecated - please use set_subtensor")
    the_op = IncSubtensor(idx_list, inplace, set_instead_of_inc=True)
-    return the_op(x, y, *Subtensor.collapse(idx_list, lambda entry: isinstance(entry, Variable)))
+    return the_op(x, y, *Subtensor.collapse(
-def incsubtensor(x, y, idx_list, inplace=False):
+                                    idx_list,
-    print >> sys.stderr, "tensor.incsubtensor is deprecated - please use inc_subtensor"
+                                    lambda entry: isinstance(entry, Variable)))
+def incsubtensor(x, y, idx_list, inplace=False, show_warning=True):
+    # Note that `show_warning` should only be set to False by tests, in order
+    # to make sure this old code is still working.
+    if show_warning:
+        print >> sys.stderr, "tensor.incsubtensor is deprecated - please use inc_subtensor"
    the_op = IncSubtensor(idx_list, inplace, set_instead_of_inc=False)
    return the_op(x, y, *Subtensor.collapse(idx_list, lambda entry: isinstance(entry, Variable)))
 def set_subtensor(x, y, inplace=False):
    """Return x with the given subtensor overwritten by y.
@@ -3519,14 +3602,14 @@ class Join(Op):
        # that whenever I get a None. Should we just remove gof/apply_shape
        # if it is depricated ??
        if ishapes[1] is None:
-            raise NotImplementedError
+            raise ShapeError()
        n_dim = len(ishapes[1])
        for shape in ishapes[1:]:
            if shape is None:
-                raise NotImplementedError
+                raise ShapeError()
            for shape_i in shape:
                if shape_i is None:
-                    raise NotImplementedError
+                    raise ShapeError()
            # at this point the inputs have been broadcasted so they should
            # all have the same shape
            assert len(shape) == n_dim
@@ -4025,6 +4108,31 @@ def arange(start, stop=None, step=1, dtype=None):
    # If dtype is not provided, infer it from the other arguments
    if dtype is None:
        dtype = scal.upcast(start.type.dtype, stop.type.dtype, step.type.dtype)
+        if config.cast_policy in ('numpy', 'numpy+floatX'):
+            # We enforce numpy semantics, except in the special case where
+            # `config.cast_policy` is 'numpy+floatX' and we want to use float32
+            # rather than float64.
+            # As an example, if `start`, `stop` and `step` are all int32,
+            # `numpy.arange` returns an int64 array (on 64-bit platforms),
+            # while the upcast above returns int32.
+            numpy_dtype = numpy.arange(
+                    start=numpy.array(0, dtype=start.dtype),
+                    stop=numpy.array(1, dtype=stop.dtype),
+                    step=numpy.array(1, dtype=step.dtype)).dtype
+            if numpy_dtype != dtype:
+                if (config.cast_policy == 'numpy+floatX' and
+                    config.floatX == 'float32' and
+                    numpy_dtype == 'float64' and
+                    # No explicit float64 in the three arguments?
+                    all(dt != 'float64'
+                        for dt in [s.dtype for s in (start, stop, step)])):
+                    # We use float32 instead.
+                    assert dtype != 'float64'
+                    dtype = 'float32'
+                else:
+                    # We use the same dtype as numpy instead of the result of
+                    # the upcast.
+                    dtype = str(numpy_dtype)
    if dtype not in _arange:
        _arange[dtype] = ARange(dtype)

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -454,7 +454,73 @@ class Elemwise(Op):
        """
        inputs = map(as_tensor_variable, inputs)
-        shadow = self.scalar_op.make_node(*[Scalar(dtype = t.type.dtype)() for t in inputs])
+        input_dtypes = [i.dtype for i in inputs]
+        scalar_inputs = []
+        array_inputs = []
+        for input_idx, input in enumerate(inputs):
+            if input.ndim == 0:
+                scalar_inputs.append((input_idx, input))
+            else:
+                array_inputs.append((input_idx, input))
+        shadow = self.scalar_op.make_node(*[Scalar(dtype=dtype)() for dtype in input_dtypes])
+        out_dtypes = [o.type.dtype for o in shadow.outputs]
+        if (scalar_inputs and
+            array_inputs and
+            theano.config.cast_policy in ('numpy', 'numpy+floatX')):
+            # We need to make sure that scalars do not upcast arrays unless
+            # they are fundamentally different. This is specified in
+            #   http://docs.scipy.org/doc/numpy/reference/ufuncs.html
+            # in the 'casting rules' section.
+            # It seems difficult to find a generic mechanism that would work
+            # for any elemwise Op. In the following we use a heuristic that
+            # should work for simple Ops, but may break in the future for more
+            # complex Ops (in which case we may need to implement a way for
+            # these Ops to override this heuristic).
+            # The heuristic consists in detecting a situation where we suspect
+            # some scalar input upcasted an array, by comparing the highest
+            # type of the outputs with the highest type of the input arrays.
+            # If it happens that the former is of higher type than the latter,
+            # then we go through all scalar inputs and if they are of a higher
+            # type than the highest type of the input arrays, we pretend they
+            # actually are of the same type (the idea is that we suspect they
+            # are responsible for the upcasting, so by downcasting them we hope
+            # to get rid of this upcasting).
+            array_dtype = scalar.upcast(*[a[1].dtype for a in array_inputs])
+            out_dtype = scalar.upcast(*out_dtypes)
+            def is_higher(dtype_a, dtype_b):
+                return (dtype_a != dtype_b and
+                        scalar.upcast(dtype_a, dtype_b) == dtype_a)
+            if is_higher(out_dtype, array_dtype):
+                # We are in the situation described above.
+                modified_scalar_inputs = False
+                for input_idx, input in scalar_inputs:
+                    if scalar.upcast(input.dtype, array_dtype) == out_dtype:
+                        # This scalar may be responsible for the upcasting.
+                        input_dtypes[input_idx] = array_dtype
+                        modified_scalar_inputs = True
+                if modified_scalar_inputs:
+                    # Update 'shadow' and 'out_dtypes'.
+                    shadow = self.scalar_op.make_node(
+                            *[Scalar(dtype=dtype)() for dtype in input_dtypes])
+                    out_dtypes = [o.type.dtype for o in shadow.outputs]
+                    # The whole point of all this is to try to avoid upcasting
+                    # the dtype of the input arrays. The following assert makes
+                    # sure this goal was achieved. Note however that it might
+                    # fail for some Ops that purposedly upcast arrays, in which
+                    # case it would probably be better to use a different
+                    # mechanism for such Ops.
+                    out_dtype = scalar.upcast(*out_dtypes)
+                    assert not is_higher(out_dtype, array_dtype)
+                else:
+                    # Same as above: safety assert to make sure our heuristics
+                    # did its job. It may fail in the future for some Ops that
+                    # would require a different mechanism.
+                    import pdb; pdb.set_trace()
+                    raise AssertionError(
+                            'Heuristic failure - see Elemwise.make_node')
        target_length = max([input.type.ndim for input in inputs])
@@ -487,7 +553,6 @@ class Elemwise(Op):
                for ob, ib in zip(out_broadcastables[overwriter], inputs[overwritten].type.broadcastable):
                    if ib and not ob:
                        raise ValueError("Operation cannot be done inplace on an input with broadcasted dimensions.")
-        out_dtypes = [o.type.dtype for o in shadow.outputs]
        if any(inputs[i].type.dtype != out_dtypes[o] for o, i in inplace_pattern.items()):
            raise TypeError("Cannot do an inplace operation on incompatible data types.",
                    ([i.type.dtype for i in inputs], out_dtypes, inplace_pattern))

--- a/theano/tensor/nnet/Conv3D.py
+++ b/theano/tensor/nnet/Conv3D.py
@@ -135,9 +135,9 @@ class Conv3D(theano.Op):
        vidDur = V_shape[3]
        filterDur = W_shape[3]
-        output_height = T.floor( (vidHeight - filterHeight) / dr )+1
+        output_height = T.floor((vidHeight - filterHeight) // dr) + 1
-        output_width = T.floor( (vidWidth - filterWidth) / dc )+1
+        output_width = T.floor((vidWidth - filterWidth) // dc) + 1
-        output_dur = T.floor( (vidDur - filterDur) / dt ) +1
+        output_dur = T.floor((vidDur - filterDur) // dt) + 1
        rval = (batch_size,  output_height, output_width, output_dur, output_channels )

--- a/theano/tensor/nnet/conv.py
+++ b/theano/tensor/nnet/conv.py
@@ -575,14 +575,15 @@ class ConvOp(Op):
            try:
                fmshp = ConvOp.getOutputShape(imshp[1:], kshp, (self.dx,self.dy), self.out_mode)
            except TypeError:
-                raise NotImplementedError()
+                raise theano.tensor.ShapeError()
            outshp = (batch_size,fmo) + tuple(fmshp)
            return [outshp]
        else:
            # Haven't implemented this case. imshp and kshp may be symbollic
            # and ConvOp.getOutputShape doesn't handle this. In this case
            # we simply let the default function do its work.
-            raise NotImplementedError()
+            raise theano.tensor.ShapeError()
    def perform(self,node, inp, out):
        """

--- a/theano/tensor/nnet/tests/test_nnet.py
+++ b/theano/tensor/nnet/tests/test_nnet.py
@@ -879,6 +879,7 @@ def test_argmax_pushdown():
            [x],
            [out])
+    config.warn.argmax_pushdown_bug = False
    theano.compile.mode.optdb.query(
            theano.compile.mode.OPT_FAST_RUN).optimize(env)
@@ -922,6 +923,7 @@ def test_argmax_pushdown_bias():
            [x,b],
            [out])
+    config.warn.argmax_pushdown_bug = False
    theano.compile.mode.optdb.query(
            theano.compile.mode.OPT_FAST_RUN).optimize(env)

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -27,11 +27,12 @@ from theano import compile  #to register the optimizer built by this file
 from theano.gof.python25 import any, all
 from theano.gof.opt import Optimizer, pre_constant_merge, pre_greedy_local_optimizer
 from theano.gof import toolbox, DestroyHandler
-from basic import get_constant_value
+from basic import get_constant_value, ShapeError
 # Utilities
 def out2in(*local_opts):
    """WRITEME """
    return opt.TopoOptimizer(opt.LocalOptGroup(*local_opts),
@@ -528,7 +529,7 @@ class ShapeFeature(object):
    the cost of many Ops accurately, and generate c-code that is specific [e.g. unrolled] to
    particular sizes.
-    If you can determine the shape only in some case, return NotImplementedError when you can't
+    In cases where you cannot figure out the shape, raise a ShapeError.
    .. note::
@@ -714,13 +715,22 @@ class ShapeFeature(object):
        try:
            o_shapes = shape_infer(node, [self.shape_of[r] for r in node.inputs])
-        except NotImplementedError:
+        except ShapeError:
            o_shapes = self.default_infer_shape(node, [self.shape_of[r] for r in node.inputs])
+        except NotImplementedError, e:
+            raise NotImplementedError(
+                    'Code called by infer_shape failed raising a '
+                    'NotImplementedError. Raising NotImplementedError to '
+                    'indicate that a shape cannot be computed is no longer '
+                    'supported, and one should now use tensor.ShapeError '
+                    'instead. The original exception message is: %s' % e)
        except Exception, e:
            _logger.error('Failed to infer_shape from Op %s (i_shapes=%s): %s %s'% (node.op,
                [self.shape_of[r] for r in node.inputs],
                type(e), str(e)))
-            o_shapes = self.default_infer_shape(node, [self.shape_of[r] for r in node.inputs])
+            # We raise the exception to make sure the user knows something bad
+            # is going on.
+            raise
        # this is packed information
        # an element of o_shapes is either None or a tuple
@@ -3410,11 +3420,12 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 1024):
    """
    def local_fuse(node):
        """
-        As part of specialisation, we fuse two consecutive elemwise op of the same shape.
+        As part of specialization, we fuse two consecutive elemwise Ops of the
+        same shape.
-        For mixed dtype, we let the Compise op do the cast. It let the C compile do the cast.
-        The number of dimension is validated at call time by theano itself.
+        For mixed dtype, we let the Composite op do the cast. It lets the C
+        compiler do the cast.
+        The number of dimensions is validated at call time by theano itself.
        """
        # META TODO:  PUT THESE THINGS IN TRAC, NOT TODO NOTES!!
        # TODO: use broadcast flag?

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -47,6 +47,75 @@ def eval_outputs(outputs):
        return variables[0]
    return variables
+def get_numeric_subclasses(cls=numpy.number, ignore=None):
+    """
+    Return subclasses of `cls` in the numpy scalar hierarchy.
+    We only return subclasses that correspond to unique data types.
+    The hierarchy can be seen here:
+        http://docs.scipy.org/doc/numpy/reference/arrays.scalars.html
+    """
+    if ignore is None:
+        ignore = []
+    rval = []
+    dtype = numpy.dtype(cls)
+    dtype_num = dtype.num
+    if dtype_num not in ignore:
+        # Safety check: we should be able to represent 0 with this data type.
+        numpy.array(0, dtype=dtype)
+        rval.append(cls)
+        ignore.append(dtype_num)
+    for sub in cls.__subclasses__():
+        rval += [c for c in get_numeric_subclasses(sub, ignore=ignore)]
+    return rval
+def get_numeric_types(with_int=True, with_float=True, with_complex=False,
+                      with_128_bit=False):
+    """
+    Return numpy numeric data types.
+    :param with_int: Whether to include integer types.
+    :param with_float: Whether to include floating point types.
+    :param with_complex: Whether to include complex types.
+    :param with_128_bit: Whether to include 128/256-bit types.
+    :returns: A list of unique data type objects. Note that multiple data types
+    may share the same string representation, but can be differentiated through
+    their `num` attribute.
+    Note that we could probably rely on the lists of types defined in the
+    `scalar` module. However with this function we can test more unique dtype
+    objects, and possibly detect defects in dtypes that may be introduced in
+    numpy in the future.
+    """
+    rval = []
+    def is_within(cls1, cls2):
+        # Return True if scalars defined from `cls1` are within the hierarchy
+        # starting from `cls2`.
+        # The third test below is to catch for instance the fact that
+        # one can use ``dtype=numpy.number`` and obtain a float64 scalar, even
+        # though `numpy.number` is not under `numpy.floating` in the class
+        # hierarchy.
+        return (cls1 is cls2 or
+                issubclass(cls1, cls2) or
+                isinstance(numpy.array([0], dtype=cls1)[0], cls2))
+    for cls in get_numeric_subclasses():
+        dtype = numpy.dtype(cls)
+        if ((not with_complex and is_within(cls, numpy.complexfloating)) or
+            (not with_int and is_within(cls, numpy.integer)) or
+            (not with_float and is_within(cls, numpy.floating)) or
+            (not with_128_bit and ('128' in str(dtype) or
+                                   '256' in str(dtype)))):
+            # Ignore this class.
+            continue
+        rval.append([str(dtype), dtype, dtype.num])
+    # We sort it to be deterministic, then remove the string and num elements.
+    return [x[1] for x in sorted(rval, key=str)]
 def _numpy_checker(x, y):
    """
    Checks if x.data and y.data have the same contents.
@@ -374,6 +443,18 @@ _good_broadcast_div_mod_normal_float_inplace = dict(same_shapes = (rand(2, 3), r
 _good_broadcast_div_mod_normal_float = dict(empty2 = (numpy.asarray([0]), numpy.asarray([])),
                                            **_good_broadcast_div_mod_normal_float_inplace
                                            )
+def no_complex(d):
+    """Remove pairs from dictionary d when the value contains complex data."""
+    return dict((k, v) for k, v in d.iteritems()
+                if all(str(x.dtype) not in tensor.complex_dtypes for x in v))
+# 'No-complex' versions.
+_good_broadcast_div_mod_normal_float_no_complex = no_complex(
+                                        _good_broadcast_div_mod_normal_float)
+_good_broadcast_div_mod_normal_float_inplace_no_complex = no_complex(
+                                _good_broadcast_div_mod_normal_float_inplace)
 _grad_broadcast_div_mod_normal = dict(same_shapes = (rand(2, 3), rand(2, 3)),
                                      scalar = (rand(2, 3), rand(1, 1)),
                                      row = (rand(2, 3), rand(1, 3)),
@@ -389,8 +470,9 @@ _grad_broadcast_div_mod_normal = dict(same_shapes = (rand(2, 3), rand(2, 3)),
 div_grad_rtol=None
 if config.floatX=='float32':
-    #We raise the relative tolerence for the grad as their is error in float32
+    # We raise the relative tolerance for the grad as there can be errors in
-    #This is probably caused by our way of computing the gradient error.
+    # float32.
+    # This is probably caused by our way of computing the gradient error.
    div_grad_rtol=0.025
 DivTester = makeBroadcastTester(op = true_div,
                                  expected = lambda x, y: x / y,
@@ -410,14 +492,14 @@ DivInplaceTester = makeBroadcastTester(op = inplace.true_div_inplace,
 ModTester = makeBroadcastTester(op = mod,
                                  expected = lambda x, y: numpy.asarray(x % y, dtype=theano.scalar.basic.upcast(x.dtype, y.dtype)),
-                                  good = _good_broadcast_div_mod_normal_float,
+                                  good = _good_broadcast_div_mod_normal_float_no_complex,
 #                                               integers = (randint(2, 3), randint_nonzero(2, 3)),
 #                                               dtype_mixup_1 = (rand(2, 3), randint_nonzero(2, 3)),
 #                                               dtype_mixup_2 = (randint_nonzero(2, 3), rand(2, 3))),
                                  )
 ModInplaceTester = makeBroadcastTester(op = inplace.mod_inplace,
                                         expected = lambda x, y: numpy.asarray(x % y, dtype=theano.scalar.basic.upcast(x.dtype, y.dtype)),
-                                         good = _good_broadcast_div_mod_normal_float_inplace,
+                                         good = _good_broadcast_div_mod_normal_float_inplace_no_complex,
                                         inplace = True)
 _good_broadcast_pow_normal_float = dict(same_shapes = (rand_ranged(1, 5, (2, 3)), rand_ranged(-3, 3, (2, 3))),
@@ -2180,7 +2262,7 @@ class T_Join_and_Split(unittest.TestCase):
    def test_stack_scalar_make_vector(self):
        '''Test that calling stack() on scalars instantiates MakeVector,
-        not Join. Test that the floatX dtype stay floatX, not down casted to int64'''
+        not Join. Test that the floatX dtype stay floatX, not downcasted to int64'''
        a = tensor.scalar('a')
        b = tensor.scalar('b')
        s = stack(a, b, a, b)
@@ -2665,9 +2747,9 @@ class T_divimpl(unittest.TestCase):
                (5.0/11.0))
        assert numpy.allclose(function([i, ii, d, f, c], f/i)(5, 3, 7.0, 11.0, numpy.complex(5,3)),
                (11.0/5.0))
-        assert numpy.allclose(function([i, ii, d, f, c], i/ii)(5, 3, 7.0, 11.0, numpy.complex(5,3)),
+        assert numpy.allclose(function([i, ii, d, f, c], i//ii)(5, 3, 7.0, 11.0, numpy.complex(5,3)),
                (5/3))
-        assert numpy.allclose(function([i, ii, d, f, c], ii/i)(5, 3, 7.0, 11.0, numpy.complex(5,3)),
+        assert numpy.allclose(function([i, ii, d, f, c], ii//i)(5, 3, 7.0, 11.0, numpy.complex(5,3)),
                (3/5))
        assert numpy.allclose(function([i, ii, d, f, c], true_div(i,ii))(5, 3, 7.0, 11.0, numpy.complex(5,3)),
                (5./3.))
@@ -3056,7 +3138,13 @@ class T_scalarfromtensor(unittest.TestCase):
        v = eval_outputs([ss])
        self.assertTrue(v == 56, v)
-        self.assertTrue(isinstance(v, numpy.int8))
+        if config.cast_policy == 'custom':
+            self.assertTrue(isinstance(v, numpy.int8))
+        elif config.cast_policy in ('numpy', 'numpy+floatX'):
+            self.assertTrue(isinstance(
+                v, getattr(numpy, str(numpy.asarray(56).dtype))))
+        else:
+            raise NotImplementedError(config.cast_policy)
        self.assertTrue(v.shape == (), v.shape)
        tt = lscalar()
        ss = scalar_from_tensor(tt)
@@ -3538,7 +3626,13 @@ class TestARange(unittest.TestCase):
        out = arange(start, stop)
        f = function([start, stop], out)
-        assert out.dtype == start.type.dtype
+        if config.cast_policy == 'custom':
+            assert out.dtype == start.type.dtype
+        elif config.cast_policy in ('numpy', 'numpy+floatX'):
+            assert out.dtype == numpy.arange(numpy.int32(0),
+                                             numpy.int32(1)).dtype
+        else:
+            raise NotImplementedError(config.cast_policy)
        assert numpy.all(f(0,5) == numpy.arange(0,5))
        assert numpy.all(f(-5,1) == numpy.arange(-5,1))
        assert numpy.all(f(0,0) == numpy.arange(0,0))
@@ -3560,7 +3654,12 @@ class TestARange(unittest.TestCase):
        out = arange(stop)
        f = function([stop], out)
-        assert out.dtype == stop.type.dtype
+        if config.cast_policy == 'custom':
+            assert out.dtype == stop.type.dtype
+        elif config.cast_policy in ('numpy', 'numpy+floatX'):
+            assert out.dtype == numpy.arange(numpy.int32(1)).dtype
+        else:
+            raise NotImplementedError(config.cast_policy)
        assert numpy.all(f(8) == numpy.arange(8))
        assert numpy.all(f(-2) == numpy.arange(-2))
@@ -3568,24 +3667,93 @@ class TestARange(unittest.TestCase):
        fout = arange(fstop)
        ff = function([fstop], fout)
-        assert fout.dtype == fstop.type.dtype
+        if config.cast_policy == 'custom':
+            assert fout.dtype == fstop.type.dtype
+        elif config.cast_policy == 'numpy':
+            assert fout.dtype == numpy.arange(numpy.float32(1)).dtype
+        elif config.cast_policy == 'numpy+floatX':
+            if config.floatX == 'float32':
+                assert fout.dtype == 'float32'
+            else:
+                assert fout.dtype == numpy.arange(numpy.float32(1)).dtype
+        else:
+            raise NotImplementedError(config.cast_policy)
        fstop_values = [0.2, -0.7, 8.5]
        for fstop_v in fstop_values:
            fstop_v32 = numpy.float32(fstop_v)
            assert numpy.all(ff(fstop_v32) == numpy.arange(fstop_v))
    def test_upcast(self):
-        """Test that arange compute output type adequately"""
+        """Test that arange computes output type adequately"""
-        assert arange(iscalar()).dtype == iscalar().dtype
+        if config.cast_policy == 'custom':
-        assert arange(fscalar()).dtype == fscalar().dtype
+            assert arange(iscalar()).dtype == iscalar().dtype
-        assert arange(dscalar()).dtype == dscalar().dtype
+            assert arange(fscalar()).dtype == fscalar().dtype
+            assert arange(dscalar()).dtype == dscalar().dtype
-        # int32 + float32 -> float64
-        assert arange(iscalar(), fscalar()).dtype == dscalar().dtype
+            # int32 + float32 -> float64
-        assert arange(iscalar(), dscalar()).dtype == dscalar().dtype
+            assert arange(iscalar(), fscalar()).dtype == dscalar().dtype
-        assert arange(fscalar(), dscalar()).dtype == dscalar().dtype
+            assert arange(iscalar(), dscalar()).dtype == dscalar().dtype
+            assert arange(fscalar(), dscalar()).dtype == dscalar().dtype
-        assert arange(iscalar(), fscalar(), dscalar()).dtype == dscalar().dtype
+            assert arange(iscalar(), fscalar(), dscalar()).dtype == dscalar().dtype
+        elif config.cast_policy in ('numpy', 'numpy+floatX'):
+            for dtype in get_numeric_types():
+                # Test with a single argument.
+                arange_dtype = arange(scalar(dtype=str(dtype))).dtype
+                numpy_dtype = numpy.arange(numpy.array(1, dtype=dtype)).dtype
+                if (dtype != 'float64' and
+                    numpy_dtype == 'float64' and
+                    config.cast_policy == 'numpy+floatX' and
+                    config.floatX == 'float32'):
+                    # We want a float32 arange.
+                    assert arange_dtype == 'float32'
+                else:
+                    # Follow numpy.
+                    assert arange_dtype == numpy_dtype
+                # Test with two arguments.
+                for stop_dtype in get_numeric_types():
+                    arange_dtype = arange(
+                            start=scalar(dtype=str(dtype)),
+                            stop=scalar(dtype=str(stop_dtype))).dtype
+                    numpy_dtype = numpy.arange(
+                            start=numpy.array(0, dtype=dtype),
+                            stop=numpy.array(1, dtype=stop_dtype)).dtype
+                    if (dtype != 'float64' and
+                        stop_dtype != 'float64' and
+                        numpy_dtype == 'float64' and
+                        config.cast_policy == 'numpy+floatX' and
+                        config.floatX == 'float32'):
+                        # We want a float32 arange.
+                        assert arange_dtype == 'float32'
+                    else:
+                        # Follow numpy.
+                        assert arange_dtype == numpy_dtype
+                    # Test with three arguments.
+                    for step_dtype in get_numeric_types():
+                        arange_dtype = arange(
+                                start=scalar(dtype=str(dtype)),
+                                stop=scalar(dtype=str(stop_dtype)),
+                                step=scalar(dtype=str(step_dtype))).dtype
+                        numpy_dtype = numpy.arange(
+                                start=numpy.array(0, dtype=dtype),
+                                stop=numpy.array(1, dtype=stop_dtype),
+                                step=numpy.array(1, dtype=step_dtype)).dtype
+                        if (dtype != 'float64' and
+                            stop_dtype != 'float64' and
+                            step_dtype != 'float64' and
+                            numpy_dtype == 'float64' and
+                            config.cast_policy == 'numpy+floatX' and
+                            config.floatX == 'float32'):
+                            # We want a float32 arange.
+                            assert arange_dtype == 'float32'
+                        else:
+                            # Follow numpy.
+                            assert arange_dtype == numpy_dtype
+        else:
+            raise NotImplementedError(config.cast_policy)
    def test_dtype_cache(self):
        """Checks that the same Op is returned on repeated calls to arange
@@ -3624,7 +3792,13 @@ class TestARange(unittest.TestCase):
        f = function([start, stop], out.shape, mode=mode)
        assert len(f.maker.env.toposort())==4
 #4 [Elemwise{sub,no_inplace}(stop, start), Elemwise{Cast{int64}}(Elemwise{sub,no_inplace}.0), Elemwise{Maximum{output_types_preference=transfer_type{0}}}[(0, 0)](Elemwise{Cast{int64}}.0, 0), MakeVector(Elemwise{Maximum{output_types_preference=transfer_type{0}}}[(0, 0)].0)]
-        assert out.dtype == start.type.dtype
+        if config.cast_policy == 'custom':
+            assert out.dtype == start.type.dtype
+        elif config.cast_policy in ('numpy', 'numpy+floatX'):
+            assert out.dtype == numpy.arange(
+                    numpy.int32(0), numpy.int32(1), numpy.int32(1)).dtype
+        else:
+            raise NotImplementedError(config.cast_policy)
        assert numpy.all(f(0,5) == len(numpy.arange(0,5)))
        assert numpy.all(f(2,11) == len(numpy.arange(2,11)))
        assert numpy.all(f(-5,1) == len(numpy.arange(-5,1)))
@@ -4074,6 +4248,22 @@ def test_default_state():
    assert numpy.allclose(f(numpy.asarray(2.2, dtype=config.floatX)), 7)
 def test_autocast():
+    backup_config = config.cast_policy
+    # Call test functions for all possible values of `config.cast_policy`.
+    for autocast_cfg in (
+            'custom',
+            'numpy',
+            'numpy+floatX',
+            ):
+        config.cast_policy = autocast_cfg
+        try:
+            eval('_test_autocast_' + autocast_cfg.replace('+', '_'))()
+        finally:
+            config.cast_policy = backup_config
+def _test_autocast_custom():
+    """Called from `test_autocast`."""
+    assert config.cast_policy == 'custom'
    orig_autocast = autocast_float.dtypes
    # Test that autocast_float_as sets the autocast dtype correctly
@@ -4165,6 +4355,131 @@ def test_autocast():
    finally:
        ac.__exit__()
+def _test_autocast_numpy():
+    """Called from `test_autocast`."""
+    assert config.cast_policy == 'numpy'
+    # Go through some typical scalar values.
+    def ok(z):
+        assert tensor.constant(z).dtype == numpy.asarray(z).dtype
+    for x in ([2**i for i in xrange(63)] +
+              [0] +
+              [0., 1., 1.1, 1.5]):
+        n_x = numpy.asarray(x)
+        # Make sure the data type is the same as the one found by numpy.
+        ok(x)
+        ok(-x)
+        ok(x - 1)
+        ok(-x + 1)
+        ok(n_x)
+def _test_autocast_numpy_floatX():
+    """Called from `test_autocast`."""
+    assert config.cast_policy == 'numpy+floatX'
+    backup_floatX = config.floatX
+    def ok(z, floatX):
+        if (isinstance(z, float) and
+            floatX == 'float32' and
+            not hasattr(z, 'dtype')):
+            # Special case where we use 'float32' instead of 'float64'.
+            assert tensor.constant(z).dtype == 'float32'
+        else:
+            assert tensor.constant(z).dtype == numpy.asarray(z).dtype
+    try:
+        # Test with various values of `config.floatX`.
+        for floatX in ('float32', 'float64'):
+            config.floatX = floatX
+            # Go through some typical scalar values.
+            for x in ([2**i for i in xrange(63)] +
+                      [0] +
+                      [0., 1., 1.1, 1.5]):
+                ok(x, floatX)
+                ok(-x, floatX)
+                ok(x - 1, floatX)
+                ok(-x + 1, floatX)
+                ok(numpy.asarray(x), floatX)
+                ok(numpy.float64(x), floatX)
+    finally:
+        config.floatX = backup_floatX
+class test_arithmetic_cast(unittest.TestCase):
+    """
+    Test output types of typical arithmeric operations (* / + - //).
+    We only test the behavior for `config.cast_policy` set to either 'numpy' or
+    'numpy+floatX': the 'custom' behavior is (at least partially) tested in
+    `_test_autocast_custom`.
+    """
+    def test_arithmetic_cast(self):
+        backup_config = config.cast_policy
+        dtypes = get_numeric_types(with_complex=True)
+        # Here:
+        # scalar == scalar stored as a 0d array
+        # array == 1d array
+        # i_scalar == scalar type used internally by Theano
+        theano_scalar = lambda dtype: tensor.scalar(dtype=str(dtype))
+        numpy_scalar = lambda dtype: numpy.array(1, dtype=dtype)
+        theano_array = lambda dtype: tensor.vector(dtype=str(dtype))
+        numpy_array = lambda dtype: numpy.array([1], dtype=dtype)
+        theano_i_scalar = lambda dtype: theano.scalar.Scalar(str(dtype))()
+        numpy_i_scalar = numpy_scalar
+        try:
+            for cfg in ('numpy', 'numpy+floatX'):
+                config.cast_policy = cfg
+                for op in (operator.add, operator.sub, operator.mul,
+                           operator.div, operator.floordiv):
+                    for a_type in dtypes:
+                        for b_type in dtypes:
+                            # Note that we do not test division between
+                            # integers as this is currently forbidden.
+                            if (op is operator.div and
+                                a_type in tensor.discrete_dtypes and
+                                b_type in tensor.discrete_dtypes):
+                                continue
+                            # We will test all meaningful combinations of
+                            # scalar and array operations.
+                            for combo in (
+                                          ('scalar', 'scalar'),
+                                          ('array', 'array'),
+                                          ('scalar', 'array'),
+                                          ('array', 'scalar'),
+                                          ('i_scalar', 'i_scalar'),
+                                          ):
+                                theano_args = map(eval,
+                                        ['theano_%s' % c for c in combo])
+                                numpy_args = map(eval,
+                                        ['numpy_%s' % c for c in combo])
+                                theano_dtype = op(
+                                        theano_args[0](a_type),
+                                        theano_args[1](b_type)).type.dtype
+                                # For numpy we have a problem:
+                                #   http://projects.scipy.org/numpy/ticket/1827
+                                # The current expected behavior is to use
+                                # the highest data type that numpy may return.
+                                numpy_dtypes = [
+                                        op(numpy_args[0](a_type),
+                                           numpy_args[1](b_type)).dtype,
+                                        op(numpy_args[1](b_type),
+                                           numpy_args[0](a_type)).dtype]
+                                numpy_dtype = theano.scalar.upcast(
+                                        *map(str, numpy_dtypes))
+                                if (cfg == 'numpy+floatX' and
+                                    config.floatX == 'float32' and
+                                    a_type != 'float64' and
+                                    b_type != 'float64' and
+                                    numpy_dtype == 'float64'):
+                                    # We should keep float32.
+                                    assert theano_dtype == 'float32'
+                                else:
+                                    assert theano_dtype == numpy_dtype
+        finally:
+            config.cast_policy = backup_config
 class test_broadcast(unittest.TestCase):
    def test_broadcast_bigdim(self):
        def f():
@@ -4373,6 +4688,18 @@ class T_as_tensor_variable(unittest.TestCase):
        assert ten.type.dtype == 'uint8'
+class test_complex_mod(unittest.TestCase):
+    """Make sure % fails on complex numbers."""
+    def test_fail(self):
+        x = vector(dtype='complex64')
+        try:
+            x % 5
+            assert False
+        except ComplexError:
+            pass
 if __name__ == '__main__':
    if 1:
        unittest.main()

--- a/theano/tensor/tests/test_incsubtensor.py
+++ b/theano/tensor/tests/test_incsubtensor.py
@@ -30,9 +30,11 @@ class Test_incsubtensor(unittest.TestCase):
        for do_set in [False,True]:
            if do_set:
-                resut = T.setsubtensor(a, increment, [sl1, sl2])
+                resut = T.setsubtensor(a, increment, [sl1, sl2],
+                                       show_warning=False)
            else:
-                resut = T.incsubtensor(a, increment, [sl1, sl2])
+                resut = T.incsubtensor(a, increment, [sl1, sl2],
+                                       show_warning=False)
            f = theano.function([a, increment, sl2_end], resut)
@@ -59,7 +61,7 @@ class Test_incsubtensor(unittest.TestCase):
        def inc_slice(*s):
            def just_numeric_args(a,b):
-                return T.incsubtensor(a, b, s)
+                return T.incsubtensor(a, b, s, show_warning=False)
            return just_numeric_args
        # vector

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -647,10 +647,14 @@ def test_local_merge_abs():
 def test_mixeddiv():
-    """Test that int division is preserved"""
+    """Test that int division raises an exception."""
    i = iscalar()
    d = dscalar()
-    assert 0 == function([i,d], d*(i/(i+1)))(3, 1.0)
+    try:
+        0 == function([i,d], d*(i/(i+1)))(3, 1.0)
+        assert False
+    except theano.scalar.IntegerDivisionError:
+        pass
 def test_const_type_in_mul_canonizer():
    input = dmatrix()
@@ -2487,6 +2491,7 @@ class T_local_sum(unittest.TestCase):
        assert numpy.allclose(f(input),input.sum())
+        config.warn.sum_sum_bug = False
        f = theano.function([a],a.sum(0).sum(0).sum(0),mode=self.mode)
        assert len(f.maker.env.nodes)==1
        assert numpy.allclose(f(input),input.sum())
@@ -2496,6 +2501,7 @@ class T_local_sum(unittest.TestCase):
        input=numpy.arange(3*3*3, dtype=config.floatX).reshape(3,3,3)
        dims=[(0,0),(1,0),(2,0),(0,1),(1,1),(2,1)]
+        config.warn.sum_sum_bug = False
        for d,dd in dims:
            f = theano.function([a],a.sum(d).sum(dd),mode=self.mode)
            assert numpy.allclose(f(input),input.sum(d).sum(dd))
@@ -2541,6 +2547,7 @@ class T_local_sum(unittest.TestCase):
                assert len(f.maker.env.nodes)==nb_nodes[2]
                assert f.maker.env.toposort()[-1].op==T.alloc
+            config.warn.sum_sum_bug = False
            for d, dd in [(0,0),(1,0),(2,0),(0,1),(1,1),(2,1)]:
                f = theano.function([a],t_like(a).sum(d).sum(dd),mode=mode)
                print f.maker.env.toposort()
@@ -2600,6 +2607,8 @@ class T_local_sum_dimshuffle(unittest.TestCase):
        c_val = rng.randn(2,2,2).astype(config.floatX)
        d_val = numpy.asarray(rng.randn(), config.floatX)
+        config.warn.sum_sum_bug = False
+        config.warn.sum_div_dimshuffle_bug = False
        for i,s in enumerate(sums):
            print i
            f = theano.function([a,b,c,d], s, mode=self.mode)

--- a/theano/tests/test_tutorial.py
+++ b/theano/tests/test_tutorial.py
 """ test code snippet in the Theano tutorials.
 """
-import unittest
+import os, unittest
 import theano
 import theano.tensor as T
 from theano import function
@@ -722,6 +722,15 @@ class T_loading_and_saving(unittest.TestCase):
        mode_instance = theano.compile.mode.get_mode(None)
        if not isinstance(mode_instance, theano.compile.debugmode.DebugMode):
+            if os.path.exists('obj.save') or os.path.exists('objects.save'):
+                # We do not want to delete these files silently, in case for
+                # some reason they would be something else than test-generated
+                # files.
+                # Ideally we would save those files in a temporary directory...
+                raise AssertionError(
+                        'Please get rid of files obj.save and '
+                        'objects.save in directory %s' % os.getcwd())
            f = file('obj.save', 'wb')
            cPickle.dump(my_obj, f, protocol=cPickle.HIGHEST_PROTOCOL)
            f.close()
@@ -746,6 +755,9 @@ class T_loading_and_saving(unittest.TestCase):
                loaded_objects.append(cPickle.load(f))
            f.close()
+            # Cleanup created files.
+            os.remove('obj.save')
+            os.remove('objects.save')
 class T_modes(unittest.TestCase):
    ## All tests here belog to