Added new behaviors for data type casts (enabled through config.cast_policy)

Additional changes: - Implemented floor div (//) for the Scalar type - Trying to use a true div (/) on integer arguments will raise an exception - True div on unsigned integers is now coherent with the behavior on signed integers - True div on integers now results in floatX instead of systematically being float64 - Added a few sanity checks in TrueDiv.grad - An exception is raised if one tries to use a constant of type 'long', because it is not clear what is going to happen - Fixed mean() of unsigned integer arrays

Added new behaviors for data type casts (enabled through config.cast_policy)
b5843aa7 · Olivier Delalleau · c4f512af · b5843aa7 · b5843aa7 · b5843aa7
--- a/doc/library/config.txt
+++ b/doc/library/config.txt
@@ -144,7 +144,7 @@ import theano and print the config variable, as in:

 .. attribute:: floatX

-    String value: either 'float64' or 'float32'.
+    String value: either 'float64' or 'float32'

    Default: 'float64'

@@ -152,6 +152,23 @@ import theano and print the config variable, as in:
    and similar functions.  It also sets the default theano bit width for
    arguments passed as Python floating-point numbers.

+.. attribute:: cast_policy
+
+    String value: either 'numpy+floatX', 'numpy' or 'custom'
+
+    Default: 'custom'
+
+    This specifies how data types are implicitly figured out in Theano, e.g.  for
+    constants or in the result of arithmetic operations. The recommended value is
+    'numpy+floatX', that mimics numpy's behavior except for floats when
+    ``config.floatX`` is set to 'float32', for which we use float32 instead of
+    float64 unless the user is explicitly using data typed as float64. When
+    'numpy' is used, this specific floatX behavior is discarded. The current
+    default value is 'custom' for backward compatibility reason, and corresponds
+    to a set of custom rules originally used in Theano (which can be partially
+    customized, see e.g. the in-code help of ``tensor.NumpyAutocaster``). The
+    'custom' option will be deprecated in a future release of Theano.
+
 .. attribute:: mode

    String value: 'Mode', 'ProfileMode', 'DebugMode', 'FAST_RUN', 'FAST_COMPILE'

--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -15,11 +15,10 @@ AddConfigVar('floatX',
        EnumStr('float64', 'float32'),
        )

-# TODO Work-in-progress
-#AddConfigVar('casting_policy',
-#        "Rules for implicit casts of constants in arithmetic operations",
-#        EnumStr('theano_0.3', 'numpy'),
-#        )
+AddConfigVar('cast_policy',
+        "Rules for implicit type casting.",
+        EnumStr('custom', 'numpy+floatX', 'numpy'),
+        )

 #gpu mean let the driver select the gpu. Needed in case of gpu in exclusive mode.
 #gpuX mean use the gpu number X.

--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
@@ -26,11 +26,28 @@ builtin_complex = complex
 builtin_int = int
 builtin_float = float

+
 def upcast(dtype, *dtypes):
-    z = numpy.zeros((), dtype = dtype)
-    for dtype in dtypes:
-        z = z + numpy.zeros((), dtype = dtype)
-    return str(z.dtype)
+    # Should we try to keep float32 instead of float64? This is used so that
+    # for instance mixing int64 with float32 yields float32 instead of float64.
+    # Note that we store this boolean as a one-element list so that it can be
+    # modified within `make_array`.
+    keep_float32 = [(config.cast_policy == 'numpy+floatX' and
+                     config.floatX == 'float32')]
+    def make_array(dt):
+        if dt == 'float64':
+            # There is an explicit float64 dtype: we cannot keep float32.
+            keep_float32[0] = False
+        return numpy.zeros((), dtype=dt)
+    z = make_array(dtype)
+    for dt in dtypes:
+        z = z + make_array(dt=dt)
+    rval = str(z.dtype)
+    if rval == 'float64' and keep_float32[0]:
+        return 'float32'
+    else:
+        return rval
+

 def as_scalar(x, name = None):
    if isinstance(x, gof.Apply):
@@ -47,6 +64,7 @@ def as_scalar(x, name = None):
    except TypeError:
        raise TypeError("Cannot convert %s to Scalar" % x, type(x))

+
 def constant(x):
    # pass through numpy scalars, since they are already typed on purpose typically.
    if hasattr(x,'dtype'):
@@ -383,6 +401,7 @@ uint_types = uint8, uint16, uint32, uint64
 float_types = float32, float64
 complex_types = complex64, complex128

+discrete_types = int_types + uint_types
 continuous_types = float_types + complex_types

 class _scalar_py_operators:
@@ -416,6 +435,7 @@ class _scalar_py_operators:
    def __sub__(self,other): return sub(self,other)
    def __mul__(self,other): return mul(self,other)
    def __div__(self,other): return div_proxy(self,other)
+    def __floordiv__(self,other): return int_div(self,other)
    def __mod__(self,other): return mod(self,other)
    def __pow__(self,other): return pow(self,other)

@@ -995,32 +1015,48 @@ class Sub(BinaryScalarOp):
        return first_part, second_part
 sub = Sub(upcast_out, name = 'sub')

+
 def div_proxy(x, y):
-    """Proxy for either true_div or int_div, depending on types of x, y.
    """
-    if as_scalar(x).type.dtype.startswith('int') and as_scalar(y).type.dtype.startswith('int'):
-        return int_div(x, y)
+    Currently used as a check to ensure we are not trying to divide integers.
+
+    In 0.4 we will get rid of this function to always use true_div:
+        http://trac-hg.assembla.com/theano/ticket/669
+    """
+    if (as_scalar(x).type in discrete_types and
+        as_scalar(y).type in discrete_types):
+        # Following discussion on theano-dev ("Inconsistent behavior in integer
+        # division"), we will change the semantics of "/" on integer types in
+        # Theano 0.4. Until then, it is forbidden to use "/" on integers.
+        raise NotImplementedError(
+                "Dividing two integers with '/' is forbidden until Theano v0.4"
+                " is released (where the result will be a floating point "
+                "number). In the meantime, please either use '//' for integer "
+                "division, or cast one of the arguments to a floating point "
+                "type for float division.")
    else:
        return true_div(x, y)

+
 class TrueDiv(BinaryScalarOp):
    def output_types(self, types):
-        if all(t.dtype.startswith('int') for t in types):
-            return [float64]
+        if all(t in discrete_types for t in types):
+            return [Scalar(config.floatX)]
        else:
            return super(TrueDiv, self).output_types(types)
    def impl(self, x, y):
        x = numpy.asarray(x)
        y = numpy.asarray(y)
-        if str(x.dtype).startswith('int') and str(y.dtype).startswith('int'):
-            return float(x) / y
+        if all(a.dtype in discrete_types for a in (x, y)):
+            return numpy.array(float(x) / y, dtype=config.floatX)
        else:
            return x / y
    def c_code(self, node, name, (x, y), (z, ), sub):
        #we generate good c code only when both are complex!
        if sum([node.inputs[0].type in complex_types, node.inputs[1].type in complex_types])==1:
            raise NotImplementedError('type not supported', type)
-        if node.inputs[0].type in int_types and node.inputs[1].type in int_types:
+        if (node.inputs[0].type in discrete_types and
+            node.inputs[1].type in discrete_types):
            return "%(z)s = ((double)%(x)s) / %(y)s;" % locals()
        return "%(z)s = %(x)s / %(y)s;" % locals()
    def grad(self, (x, y), (gz, )):
@@ -1029,11 +1065,15 @@ class TrueDiv(BinaryScalarOp):
        if x.type in float_types:
            first_part = cast(gz / y, x.type.dtype)
        else:
+            assert x.type in discrete_types
            first_part = None

+        if y.type in complex_types:
+            raise NotImplementedError()
        if y.type in float_types:
            second_part = cast(-(gz * x) / (y * y), y.type.dtype)
        else:
+            assert y.type in discrete_types
            second_part = None
        return first_part, second_part
 true_div = TrueDiv(upcast_out, name = 'true_div')

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -36,6 +36,11 @@ def _warn(*msg):
 #This is needed as we will hide it later
 python_complex=complex

+# Define common subsets of dtypes (as strings).
+int_dtypes = map(str, scal.int_types)
+discrete_dtypes = map(str, scal.discrete_types)
+
+
 def check_equal_numpy(x, y):
    """
    Returns True iff x and y are equal (checks the dtype and
@@ -162,36 +167,64 @@ class NumpyAutocaster(object):
    """
    This class is used to cast python ints and floats to numpy arrays.

-    The behaviour for numpy scalars is a bit tricky... but tends to work in
-    practice.
-    If the dtype of a numpy scalar is in the self.dtypes list, then this 'cast'
-    is a no-op.
-
-    When config.floatX is float32 (at the time of calling), then this function
-    downcasts float and numpy.float arguments to numpy.float32, if float32 is
-    in the self.dtypes list.
-
-    Python ints are always 64bit and floats are always double precision.
-    This class uses the algorithm in __call__ to use a narrower dtype when no
-    precision would be lost, and to even lose precision when this is demanded
-    by the list of dtypes (e.g. to automatically cast all floats to
-    single-precision if self.dtypes does not include full precision floats).
-
+    The behavior when called on scalar `x` depends on `config.cast_policy`:
+        - 'numpy' will simply use the same type as found by `numpy.asarray(x)`.
+        - 'numpy+floatX' will do the same, except it will use float32 instead
+          of float64 if `x` is a Python float and `config.floatX` is set to
+          'float32' (note that if `x` is a numpy scalar whose data type is
+          float64, it is not modified since we assume the user is purposedly
+          using float64).
+        - 'custom' lets one define a tuple of data types such that:
+            - if `x` is already a numpy scalar and its data type is in this
+              tuple, then it is returned unchanged;
+            - otherwise, the first data type in this tuple that can represent
+              `x` without loss of precision will be used, unless `x` is a float
+              and 'float32' is in the tuple (in which case `x` is cast as a
+              float32);
+            - if no data type can represent `x` without loss of precision, then
+              the last data type in the tuple will be used.
    """
    def __init__(self, dtypes):
+        """
+        Constructor.
+
+        :type dtypes: Tuple of strings.
+        :param dtypes: The ordered list of preferred data types (only used when
+        `config.cast_policy` is set to 'custom', see the `NumpyAutocaster` help
+        for details).
+        """
        self.dtypes = tuple(dtypes)

    def __call__(self, x):
-        # Change the default casting behaviour for python floats to always cast
-        # to float32
-        dtype = None
+        # Make sure we only deal with scalars.
+        assert (isinstance(x, int) or
+                isinstance(x, float) or
+                (isinstance(x, numpy.ndarray) and x.ndim == 0))
+
+        if config.cast_policy == 'numpy':
+            return numpy.asarray(x)
+        elif config.cast_policy == 'numpy+floatX':
+            rval = numpy.asarray(x)
+            if (rval.dtype == 'float64' and         # numpy wants float64
+                config.floatX == 'float32' and      # but we prefer float32
+                not hasattr(x, 'dtype')):           # and `x` was not typed
+                rval = theano._asarray(rval, dtype='float32')
+            return rval
+
+        # The following is the original code, corresponding to the 'custom'
+        # option for `config.cast_policy`.
+        assert config.cast_policy == 'custom'

        try:
            # Pass through numpy scalars, since they are already typed on
            # purpose typically.
            if str(x.dtype) in self.dtypes:
-                return theano._asarray(x, dtype=x.dtype) #leave dtype alone
+                # No need to cast `x` into a new dtype. Note that we still
+                # need to convert it into an array, because it may not be
+                # one already (e.g. if x == numpy.float64(1.1)).
+                return numpy.asarray(x)
        except AttributeError:
+            # Means `x` has no 'dtype' attribute.
            pass

        # unsafe downcast of float64 variables when config.floatX == 'float32'
@@ -223,7 +256,10 @@ autocast_float = NumpyAutocaster(('float32', 'float64'))
 # have the same type as the xmatrix().
 #
 class autocast_float_as(object):
-    """This class makes it possible to temporarily and locally adjust autocasting behaviour.
+    """
+    This class makes it possible to temporarily and locally adjust autocasting
+    behavior when `config.cast_policy` is set to 'custom'.
+    If `config.cast_policy` is not 'custom', an exception is raised.

    For example:
    >>> with autocast_float_as('float32') as _dummy:
@@ -235,10 +271,13 @@ class autocast_float_as(object):
    """
    def __init__(self, *dtypes):
        self.dtypes = dtypes
+        assert config.cast_policy == 'custom'
    def __enter__(self):
+        assert config.cast_policy == 'custom'
        self.old_dtypes = autocast_float.dtypes
        autocast_float.dtypes = self.dtypes
    def __exit__(self, *args):
+        assert config.cast_policy == 'custom'
        autocast_float.dtypes = self.old_dtypes

 def constant_or_value(x, rtype, name=None, ndim=None, dtype=None):
@@ -260,6 +299,11 @@ def constant_or_value(x, rtype, name=None, ndim=None, dtype=None):
            x_ = autocast_int(x)
        elif rtype is TensorConstant and isinstance(x, float):
            x_ = autocast_float(x)
+        elif rtype is TensorConstant and isinstance(x, long):
+            # It is not clear what would happen if one was to use a `long`
+            # number as a constant in a Theano graph. As a result, we throw
+            # an exception in this situation.
+            raise NotImplementedError('Constants of type `long` not supported')
        elif isinstance(x, numpy.ndarray):
            x_ = x
            # Currently we do not have a bool dtype in Theano.
@@ -352,7 +396,7 @@ def _allclose(a, b):
        rtol = float64_rtol

    # Work around bug in Numpy, see http://projects.scipy.org/numpy/ticket/1684
-    if str(b.dtype).startswith('int') and (numpy.absolute(b) < 0).any():
+    if str(b.dtype) in int_dtypes and (numpy.absolute(b) < 0).any():
        b = theano._asarray(b, dtype='float64')

    return numpy.allclose(a,b, atol=atol, rtol=rtol)
@@ -1094,6 +1138,10 @@ class _tensor_py_operators:
    def __div__(self,other):
        try:
            return div_proxy(self,other)
+        except NotImplementedError:
+            # This is to raise the exception that occurs when trying to divide
+            # two integer arrays (currently forbidden).
+            raise
        except Exception, e:
            return NotImplemented
    def __pow__(self,other):
@@ -1848,7 +1896,7 @@ def min(x, axis='DEFAULT'):
        "flatten the tensor before calling min()."),
        stacklevel=2)
    str_x_type = str(x.dtype)
-    if str_x_type.startswith('float') or str_x_type.startswith('int'):
+    if str_x_type.startswith('float') or str_x_type in int_dtypes:
        return -max(-x, axis=axis)
    else:
        #Be careful about unsigned integers, complex
@@ -1878,7 +1926,7 @@ def argmin(x, axis='DEFAULT'):
        "axis before calling argmin."),
        stacklevel=2)
    str_x_type = str(x.dtype)
-    if str_x_type.startswith('float') or str_x_type.startswith('int'):
+    if str_x_type.startswith('float') or str_x_type in int_dtypes:
        return argmax(-x, axis=axis)
    else:
        #Be careful about unsigned integers, complex
@@ -2381,7 +2429,7 @@ def mean(input, axis = None, op = False):
    if op:
        return Mean(axis)(input)

-    if str(input.dtype).startswith('int'):
+    if str(input.dtype) in discrete_dtypes:
            # we need to cast eventually anyway, and this helps
            # to prevents overflow
        input = cast(input, 'float64')
@@ -2524,14 +2572,23 @@ def minimum(x,y):
    """
    # see decorator for function body

+
 def div_proxy(x, y):
    """Proxy for either true_div or int_div, depending on types of x, y.
    """
-    if as_tensor_variable(x).type.dtype.startswith('int') and as_tensor_variable(y).type.dtype.startswith('int'):
-        return int_div(x, y)
+    if (as_tensor_variable(x).dtype in discrete_dtypes and
+        as_tensor_variable(y).dtype in discrete_dtypes):
+        # See the same in scalar/basic.py
+        raise NotImplementedError(
+                "Dividing two integer arrays with '/' is forbidden until "
+                "Theano v0.4 is released (where the result will be a floating "
+                "point number). In the meantime, please either use '//' for "
+                "integer division, or cast one of the arguments to a floating "
+                "point type for float division.")
    else:
        return true_div(x, y)

+
 @_scal_elemwise_with_nfunc('add', 2, 1)
 def add(a, *other_terms):
    """elementwise addition"""
@@ -4021,6 +4078,31 @@ def arange(start, stop=None, step=1, dtype=None):
    # If dtype is not provided, infer it from the other arguments
    if dtype is None:
        dtype = scal.upcast(start.type.dtype, stop.type.dtype, step.type.dtype)
+        if config.cast_policy in ('numpy', 'numpy+floatX'):
+            # We enforce numpy semantics, except in the special case where
+            # `config.cast_policy` is 'numpy+floatX' and we want to use float32
+            # rather than float64.
+            # As an example, if `start`, `stop` and `step` are all int32,
+            # `numpy.arange` returns an int64 array (on 64-bit platforms),
+            # while the upcast above returns int32.
+            numpy_dtype = numpy.arange(
+                    start=numpy.array(0, dtype=start.dtype),
+                    stop=numpy.array(1, dtype=stop.dtype),
+                    step=numpy.array(1, dtype=step.dtype)).dtype
+            if numpy_dtype != dtype:
+                if (config.cast_policy == 'numpy+floatX' and
+                    config.floatX == 'float32' and
+                    numpy_dtype == 'float64' and
+                    # No explicit float64 in the three arguments?
+                    all(dt != 'float64'
+                        for dt in [s.dtype for s in (start, stop, step)])):
+                    # We use float32 instead.
+                    assert dtype != 'float64'
+                    dtype = 'float32'
+                else:
+                    # We use the same dtype as numpy instead of the result of
+                    # the upcast.
+                    dtype = str(numpy_dtype)

    if dtype not in _arange:
        _arange[dtype] = ARange(dtype)

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -454,7 +454,49 @@ class Elemwise(Op):
        """

        inputs = map(as_tensor_variable, inputs)
-        shadow = self.scalar_op.make_node(*[Scalar(dtype = t.type.dtype)() for t in inputs])
+        input_dtypes = [i.dtype for i in inputs]
+
+        scalar_inputs = []
+        array_inputs = []
+        for input_idx, input in enumerate(inputs):
+            if input.ndim == 0:
+                scalar_inputs.append((input_idx, input))
+            else:
+                array_inputs.append((input_idx, input))
+
+        if (scalar_inputs and
+            array_inputs and
+            theano.config.cast_policy in ('numpy', 'numpy+floatX')):
+            # We need to make sure that scalars do not upcast arrays unless
+            # they are fundamentally different. This is specified in
+            #   http://docs.scipy.org/doc/numpy/reference/ufuncs.html
+            # in the 'casting rules' section.
+            array_dtype = scalar.upcast(*[a[1].dtype for a in array_inputs])
+            for input_idx, input in scalar_inputs:
+                # Replace this scalar input's type with the one that numpy
+                # would use when adding this scalar to the array.
+                # Note that currently numpy's behavior is not consistent, which
+                # is a bug (will be fixed in 1.6). See for details
+                #   http://projects.scipy.org/numpy/ticket/1827
+                # As a result, we pick the highest precision data type that
+                # numpy may decide to use (although we may prefer float32 over
+                # float64).
+                n_inputs = [
+                        numpy.array(0, dtype=input_dtypes[input_idx]),
+                        numpy.array([0], dtype=array_dtype)]
+                n_types = [(n_inputs[0] + n_inputs[1]).dtype,
+                           (n_inputs[1] + n_inputs[0]).dtype]
+                n_highest_type = scalar.upcast(*map(str, n_types))
+                if (n_highest_type == 'float64' and
+                    theano.config.cast_policy == 'numpy+floatX' and
+                    theano.config.floatX == 'float32' and
+                    array_dtype != 'float64' and
+                    input_dtypes[input_idx] != 'float64'):
+                    # Prefer float 32 instead.
+                    n_highest_type = 'float32'
+                input_dtypes[input_idx] = n_highest_type
+
+        shadow = self.scalar_op.make_node(*[Scalar(dtype=dtype)() for dtype in input_dtypes])

        target_length = max([input.type.ndim for input in inputs])


--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -47,6 +47,75 @@ def eval_outputs(outputs):
        return variables[0]
    return variables

+def get_numeric_subclasses(cls=numpy.number, ignore=None):
+    """
+    Return subclasses of `cls` in the numpy scalar hierarchy.
+
+    We only return subclasses that correspond to unique data types.
+    The hierarchy can be seen here:
+        http://docs.scipy.org/doc/numpy/reference/arrays.scalars.html
+    """
+    if ignore is None:
+        ignore = []
+    rval = []
+    dtype = numpy.dtype(cls)
+    dtype_num = dtype.num
+    if dtype_num not in ignore:
+        # Safety check: we should be able to represent 0 with this data type.
+        numpy.array(0, dtype=dtype)
+        rval.append(cls)
+        ignore.append(dtype_num)
+    for sub in cls.__subclasses__():
+        rval += [c for c in get_numeric_subclasses(sub, ignore=ignore)]
+    return rval
+
+
+def get_numeric_types(with_int=True, with_float=True, with_complex=False,
+                      with_128_bit=False):
+    """
+    Return numpy numeric data types.
+
+    :param with_int: Whether to include integer types.
+
+    :param with_float: Whether to include floating point types.
+
+    :param with_complex: Whether to include complex types.
+
+    :param with_128_bit: Whether to include 128/256-bit types.
+
+    :returns: A list of unique data type objects. Note that multiple data types
+    may share the same string representation, but can be differentiated through
+    their `num` attribute.
+
+    Note that we could probably rely on the lists of types defined in the
+    `scalar` module. However with this function we can test more unique dtype
+    objects, and possibly detect defects in dtypes that may be introduced in
+    numpy in the future.
+    """
+    rval = []
+    def is_within(cls1, cls2):
+        # Return True if scalars defined from `cls1` are within the hierarchy
+        # starting from `cls2`.
+        # The third test below is to catch for instance the fact that
+        # one can use ``dtype=numpy.number`` and obtain a float64 scalar, even
+        # though `numpy.number` is not under `numpy.floating` in the class
+        # hierarchy.
+        return (cls1 is cls2 or
+                issubclass(cls1, cls2) or
+                isinstance(numpy.array([0], dtype=cls1)[0], cls2))
+    for cls in get_numeric_subclasses():
+        dtype = numpy.dtype(cls)
+        if ((not with_complex and is_within(cls, numpy.complexfloating)) or
+            (not with_int and is_within(cls, numpy.integer)) or
+            (not with_float and is_within(cls, numpy.floating)) or
+            (not with_128_bit and ('128' in str(dtype) or
+                                   '256' in str(dtype)))):
+            # Ignore this class.
+            continue
+        rval.append([str(dtype), dtype, dtype.num])
+    # We sort it to be deterministic, then remove the string and num elements.
+    return [x[1] for x in sorted(rval, key=str)]
+
 def _numpy_checker(x, y):
    """
    Checks if x.data and y.data have the same contents.
@@ -2180,7 +2249,7 @@ class T_Join_and_Split(unittest.TestCase):

    def test_stack_scalar_make_vector(self):
        '''Test that calling stack() on scalars instantiates MakeVector,
-        not Join. Test that the floatX dtype stay floatX, not down casted to int64'''
+        not Join. Test that the floatX dtype stay floatX, not downcasted to int64'''
        a = tensor.scalar('a')
        b = tensor.scalar('b')
        s = stack(a, b, a, b)
@@ -3056,7 +3125,13 @@ class T_scalarfromtensor(unittest.TestCase):
        v = eval_outputs([ss])

        self.assertTrue(v == 56, v)
-        self.assertTrue(isinstance(v, numpy.int8))
+        if config.cast_policy == 'custom':
+            self.assertTrue(isinstance(v, numpy.int8))
+        elif config.cast_policy in ('numpy', 'numpy+floatX'):
+            self.assertTrue(isinstance(
+                v, getattr(numpy, str(numpy.asarray(56).dtype))))
+        else:
+            raise NotImplementedError(config.cast_policy)
        self.assertTrue(v.shape == (), v.shape)
        tt = lscalar()
        ss = scalar_from_tensor(tt)
@@ -3538,7 +3613,13 @@ class TestARange(unittest.TestCase):
        out = arange(start, stop)
        f = function([start, stop], out)

-        assert out.dtype == start.type.dtype
+        if config.cast_policy == 'custom':
+            assert out.dtype == start.type.dtype
+        elif config.cast_policy in ('numpy', 'numpy+floatX'):
+            assert out.dtype == numpy.arange(numpy.int32(0),
+                                             numpy.int32(1)).dtype
+        else:
+            raise NotImplementedError(config.cast_policy)
        assert numpy.all(f(0,5) == numpy.arange(0,5))
        assert numpy.all(f(-5,1) == numpy.arange(-5,1))
        assert numpy.all(f(0,0) == numpy.arange(0,0))
@@ -3560,7 +3641,12 @@ class TestARange(unittest.TestCase):
        out = arange(stop)
        f = function([stop], out)

-        assert out.dtype == stop.type.dtype
+        if config.cast_policy == 'custom':
+            assert out.dtype == stop.type.dtype
+        elif config.cast_policy in ('numpy', 'numpy+floatX'):
+            assert out.dtype == numpy.arange(numpy.int32(1)).dtype
+        else:
+            raise NotImplementedError(config.cast_policy)
        assert numpy.all(f(8) == numpy.arange(8))
        assert numpy.all(f(-2) == numpy.arange(-2))

@@ -3568,24 +3654,93 @@ class TestARange(unittest.TestCase):
        fout = arange(fstop)
        ff = function([fstop], fout)

-        assert fout.dtype == fstop.type.dtype
+        if config.cast_policy == 'custom':
+            assert fout.dtype == fstop.type.dtype
+        elif config.cast_policy == 'numpy':
+            assert fout.dtype == numpy.arange(numpy.float32(1)).dtype
+        elif config.cast_policy == 'numpy+floatX':
+            if config.floatX == 'float32':
+                assert fout.dtype == 'float32'
+            else:
+                assert fout.dtype == numpy.arange(numpy.float32(1)).dtype
+        else:
+            raise NotImplementedError(config.cast_policy)
+
        fstop_values = [0.2, -0.7, 8.5]
        for fstop_v in fstop_values:
            fstop_v32 = numpy.float32(fstop_v)
            assert numpy.all(ff(fstop_v32) == numpy.arange(fstop_v))

    def test_upcast(self):
-        """Test that arange compute output type adequately"""
-        assert arange(iscalar()).dtype == iscalar().dtype
-        assert arange(fscalar()).dtype == fscalar().dtype
-        assert arange(dscalar()).dtype == dscalar().dtype
-
-        # int32 + float32 -> float64
-        assert arange(iscalar(), fscalar()).dtype == dscalar().dtype
-        assert arange(iscalar(), dscalar()).dtype == dscalar().dtype
-        assert arange(fscalar(), dscalar()).dtype == dscalar().dtype
-
-        assert arange(iscalar(), fscalar(), dscalar()).dtype == dscalar().dtype
+        """Test that arange computes output type adequately"""
+        if config.cast_policy == 'custom':
+            assert arange(iscalar()).dtype == iscalar().dtype
+            assert arange(fscalar()).dtype == fscalar().dtype
+            assert arange(dscalar()).dtype == dscalar().dtype
+
+            # int32 + float32 -> float64
+            assert arange(iscalar(), fscalar()).dtype == dscalar().dtype
+            assert arange(iscalar(), dscalar()).dtype == dscalar().dtype
+            assert arange(fscalar(), dscalar()).dtype == dscalar().dtype
+
+            assert arange(iscalar(), fscalar(), dscalar()).dtype == dscalar().dtype
+        elif config.cast_policy in ('numpy', 'numpy+floatX'):
+            for dtype in get_numeric_types():
+                # Test with a single argument.
+                arange_dtype = arange(scalar(dtype=str(dtype))).dtype
+                numpy_dtype = numpy.arange(numpy.array(1, dtype=dtype)).dtype
+                if (dtype != 'float64' and
+                    numpy_dtype == 'float64' and
+                    config.cast_policy == 'numpy+floatX' and
+                    config.floatX == 'float32'):
+                    # We want a float32 arange.
+                    assert arange_dtype == 'float32'
+                else:
+                    # Follow numpy.
+                    assert arange_dtype == numpy_dtype
+                
+                # Test with two arguments.
+                for stop_dtype in get_numeric_types():
+                    arange_dtype = arange(
+                            start=scalar(dtype=str(dtype)),
+                            stop=scalar(dtype=str(stop_dtype))).dtype
+                    numpy_dtype = numpy.arange(
+                            start=numpy.array(0, dtype=dtype),
+                            stop=numpy.array(1, dtype=stop_dtype)).dtype
+                    if (dtype != 'float64' and
+                        stop_dtype != 'float64' and
+                        numpy_dtype == 'float64' and
+                        config.cast_policy == 'numpy+floatX' and
+                        config.floatX == 'float32'):
+                        # We want a float32 arange.
+                        assert arange_dtype == 'float32'
+                    else:
+                        # Follow numpy.
+                        assert arange_dtype == numpy_dtype
+
+                    # Test with three arguments.
+                    for step_dtype in get_numeric_types():
+                        arange_dtype = arange(
+                                start=scalar(dtype=str(dtype)),
+                                stop=scalar(dtype=str(stop_dtype)),
+                                step=scalar(dtype=str(step_dtype))).dtype
+                        numpy_dtype = numpy.arange(
+                                start=numpy.array(0, dtype=dtype),
+                                stop=numpy.array(1, dtype=stop_dtype),
+                                step=numpy.array(1, dtype=step_dtype)).dtype
+                        if (dtype != 'float64' and
+                            stop_dtype != 'float64' and
+                            step_dtype != 'float64' and
+                            numpy_dtype == 'float64' and
+                            config.cast_policy == 'numpy+floatX' and
+                            config.floatX == 'float32'):
+                            # We want a float32 arange.
+                            assert arange_dtype == 'float32'
+                        else:
+                            # Follow numpy.
+                            assert arange_dtype == numpy_dtype
+        else:
+            raise NotImplementedError(config.cast_policy)

    def test_dtype_cache(self):
        """Checks that the same Op is returned on repeated calls to arange
@@ -3624,7 +3779,13 @@ class TestARange(unittest.TestCase):
        f = function([start, stop], out.shape, mode=mode)
        assert len(f.maker.env.toposort())==4
 #4 [Elemwise{sub,no_inplace}(stop, start), Elemwise{Cast{int64}}(Elemwise{sub,no_inplace}.0), Elemwise{Maximum{output_types_preference=transfer_type{0}}}[(0, 0)](Elemwise{Cast{int64}}.0, 0), MakeVector(Elemwise{Maximum{output_types_preference=transfer_type{0}}}[(0, 0)].0)]
-        assert out.dtype == start.type.dtype
+        if config.cast_policy == 'custom':
+            assert out.dtype == start.type.dtype
+        elif config.cast_policy in ('numpy', 'numpy+floatX'):
+            assert out.dtype == numpy.arange(
+                    numpy.int32(0), numpy.int32(1), numpy.int32(1)).dtype
+        else:
+            raise NotImplementedError(config.cast_policy)
        assert numpy.all(f(0,5) == len(numpy.arange(0,5)))
        assert numpy.all(f(2,11) == len(numpy.arange(2,11)))
        assert numpy.all(f(-5,1) == len(numpy.arange(-5,1)))
@@ -4074,6 +4235,22 @@ def test_default_state():
    assert numpy.allclose(f(numpy.asarray(2.2, dtype=config.floatX)), 7)

 def test_autocast():
+    backup_config = config.cast_policy
+    # Call test functions for all possible values of `config.cast_policy`.
+    for autocast_cfg in (
+            'custom',
+            'numpy',
+            'numpy+floatX',
+            ):
+        config.cast_policy = autocast_cfg
+        try:
+            eval('_test_autocast_' + autocast_cfg.replace('+', '_'))()
+        finally:
+            config.cast_policy = backup_config
+
+def _test_autocast_custom():
+    """Called from `test_autocast`."""
+    assert config.cast_policy == 'custom'
    orig_autocast = autocast_float.dtypes

    # Test that autocast_float_as sets the autocast dtype correctly
@@ -4165,6 +4342,131 @@ def test_autocast():
    finally:
        ac.__exit__()

+
+def _test_autocast_numpy():
+    """Called from `test_autocast`."""
+    assert config.cast_policy == 'numpy'
+    # Go through some typical scalar values.
+    def ok(z):
+        assert tensor.constant(z).dtype == numpy.asarray(z).dtype
+    for x in ([2**i for i in xrange(63)] +
+              [0] +
+              [0., 1., 1.1, 1.5]):
+        n_x = numpy.asarray(x)
+        # Make sure the data type is the same as the one found by numpy.
+        ok(x)
+        ok(-x)
+        ok(x - 1)
+        ok(-x + 1)
+        ok(n_x)
+
+
+def _test_autocast_numpy_floatX():
+    """Called from `test_autocast`."""
+    assert config.cast_policy == 'numpy+floatX'
+    backup_floatX = config.floatX
+    def ok(z, floatX):
+        if (isinstance(z, float) and
+            floatX == 'float32' and
+            not hasattr(z, 'dtype')):
+            # Special case where we use 'float32' instead of 'float64'.
+            assert tensor.constant(z).dtype == 'float32'
+        else:
+            assert tensor.constant(z).dtype == numpy.asarray(z).dtype
+    try:
+        # Test with various values of `config.floatX`.
+        for floatX in ('float32', 'float64'):
+            config.floatX = floatX
+            # Go through some typical scalar values.
+            for x in ([2**i for i in xrange(63)] +
+                      [0] +
+                      [0., 1., 1.1, 1.5]):
+                ok(x, floatX)
+                ok(-x, floatX)
+                ok(x - 1, floatX)
+                ok(-x + 1, floatX)
+                ok(numpy.asarray(x), floatX)
+                ok(numpy.float64(x), floatX)
+    finally:
+        config.floatX = backup_floatX
+
+
+class test_arithmetic_cast(unittest.TestCase):
+
+    """
+    Test output types of typical arithmeric operations (* / + - //).
+
+    We only test the behavior for `config.cast_policy` set to either 'numpy' or
+    'numpy+floatX': the 'custom' behavior is (at least partially) tested in
+    `_test_autocast_custom`.
+    """
+
+    def test_arithmetic_cast(self):
+        backup_config = config.cast_policy
+        dtypes = get_numeric_types(with_complex=True)
+        # Here:
+        # scalar == scalar stored as a 0d array
+        # array == 1d array
+        # i_scalar == scalar type used internally by Theano
+        theano_scalar = lambda dtype: tensor.scalar(dtype=str(dtype))
+        numpy_scalar = lambda dtype: numpy.array(1, dtype=dtype)
+        theano_array = lambda dtype: tensor.vector(dtype=str(dtype))
+        numpy_array = lambda dtype: numpy.array([1], dtype=dtype)
+        theano_i_scalar = lambda dtype: theano.scalar.Scalar(str(dtype))()
+        numpy_i_scalar = numpy_scalar
+        try:
+            for cfg in ('numpy', 'numpy+floatX'):
+                config.cast_policy = cfg
+                for op in (operator.add, operator.sub, operator.mul,
+                           operator.div, operator.floordiv):
+                    for a_type in dtypes:
+                        for b_type in dtypes:
+                            # Note that we do not test division between
+                            # integers as this is currently forbidden.
+                            if (op is operator.div and
+                                a_type in tensor.discrete_dtypes and
+                                b_type in tensor.discrete_dtypes):
+                                continue
+                            # We will test all meaningful combinations of
+                            # scalar and array operations.
+                            for combo in (
+                                          ('scalar', 'scalar'),
+                                          ('array', 'array'),
+                                          ('scalar', 'array'),
+                                          ('array', 'scalar'),
+                                          ('i_scalar', 'i_scalar'),
+                                          ):
+                                theano_args = map(eval,
+                                        ['theano_%s' % c for c in combo])
+                                numpy_args = map(eval,
+                                        ['numpy_%s' % c for c in combo])
+                                theano_dtype = op(
+                                        theano_args[0](a_type),
+                                        theano_args[1](b_type)).type.dtype
+                                # For numpy we have a problem:
+                                #   http://projects.scipy.org/numpy/ticket/1827
+                                # The current expected behavior is to use
+                                # the highest data type that numpy may return.
+                                numpy_dtypes = [
+                                        op(numpy_args[0](a_type),
+                                           numpy_args[1](b_type)).dtype,
+                                        op(numpy_args[1](b_type),
+                                           numpy_args[0](a_type)).dtype]
+                                numpy_dtype = theano.scalar.upcast(
+                                        *map(str, numpy_dtypes))
+                                if (cfg == 'numpy+floatX' and
+                                    config.floatX == 'float32' and
+                                    a_type != 'float64' and
+                                    b_type != 'float64' and
+                                    numpy_dtype == 'float64'):
+                                    # We should keep float32.
+                                    assert theano_dtype == 'float32'
+                                else:
+                                    assert theano_dtype == numpy_dtype
+        finally:
+            config.cast_policy = backup_config
+
+
 class test_broadcast(unittest.TestCase):
    def test_broadcast_bigdim(self):
        def f():