Merge pull request #5091 from abergeron/bool

Add bool dtype in scalar and tensor.

Merge pull request #5091 from abergeron/bool
bf9413c8 · Pascal Lamblin · GitHub · 58164835 · 46a30357 · bf9413c8
--- a/doc/tutorial/gradients.txt
+++ b/doc/tutorial/gradients.txt
@@ -124,7 +124,7 @@ do is to loop over the entries in *y* and compute the gradient of
 >>> import theano.tensor as T
 >>> x = T.dvector('x')
 >>> y = x ** 2
->>> J, updates = theano.scan(lambda i, y,x : T.grad(y[i], x), sequences=T.arange(y.shape[0]), non_sequences=[y,x])
+>>> J, updates = theano.scan(lambda i, y, x : T.grad(y[i], x), sequences=T.arange(y.shape[0]), non_sequences=[y, x])
 >>> f = theano.function([x], J, updates=updates)
 >>> f([4, 4])
 array([[ 8.,  0.],

--- a/setup.py
+++ b/setup.py
@@ -53,7 +53,7 @@ PLATFORMS           = ["Windows", "Linux", "Solaris", "Mac OS-X", "Unix"]
 MAJOR               = 0
 MINOR               = 9
 MICRO               = 0
-SUFFIX              = "dev3"  # Should be blank except for rc's, betas, etc.
+SUFFIX              = "dev4"  # Should be blank except for rc's, betas, etc.
 ISRELEASED          = False
 VERSION             = '%d.%d.%d%s' % (MAJOR, MINOR, MICRO, SUFFIX)

--- a/theano/gof/tests/test_fg.py
+++ b/theano/gof/tests/test_fg.py
 from __future__ import absolute_import, print_function, division
 import os
 import pickle
-import sys
 import unittest
-from nose.plugins.skip import SkipTest
 import theano
 from theano.compat import PY3
 from theano.gof import CachedConstantError, FunctionGraph
@@ -32,14 +29,11 @@ class TFunctionGraph(unittest.TestCase):
        pickle.loads(s)
    def test_node_outputs_not_used(self):
-        """In the past, we where removing some not used variable from
+        # In the past, we where removing some not used variable from
-        fgraph.variables event if the apply had other output used in
+        # fgraph.variables event if the apply had other output used in
-        the graph. This caused a crash.
+        # the graph. This caused a crash.
-        This test run the pickle that reproduce this case.
+        # This test run the pickle that reproduce this case.
-        """
-        if sys.version_info[:2] < (2, 7):
-            raise SkipTest("This test need python 2.7 or more recent.")
        with open(os.path.join(os.path.dirname(__file__),
                               'test_fg_old_crash.pkl'),
                  'rb') as f:

--- a/theano/gpuarray/elemwise.py
+++ b/theano/gpuarray/elemwise.py
@@ -129,7 +129,8 @@ class GpuElemwise(HideC, Elemwise):
            support_code += fake_node.op.c_support_code()
        except MethodNotDefined:
            pass
-        for npy, ga in [("npy_uint8", "ga_ubyte"),
+        for npy, ga in [("npy_bool", "ga_bool"),
+                        ("npy_uint8", "ga_ubyte"),
                        ("npy_uint16", "ga_ushort"),
                        ("npy_uint32", "ga_uint"),
                        ("npy_uint64", "ga_ulong"),

--- a/theano/gpuarray/type.py
+++ b/theano/gpuarray/type.py
@@ -408,6 +408,7 @@ class GpuArrayType(Type):
                'float16': (float, 'npy_float16', 'NPY_FLOAT16'),
                'float32': (float, 'npy_float32', 'NPY_FLOAT32'),
                'float64': (float, 'npy_float64', 'NPY_FLOAT64'),
+                'bool': (int, 'npy_bool', 'NPY_BOOL'),
                'uint8': (int, 'npy_uint8', 'NPY_UINT8'),
                'int8': (int, 'npy_int8', 'NPY_INT8'),
                'uint16': (int, 'npy_uint16', 'NPY_UINT16'),

--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -477,7 +477,7 @@ def grad(cost, wrt, consider_constant=None,
        # function, sure, but nonetheless one we can and should support.
        # So before we try to cast it make sure it even has a dtype
        if (hasattr(g_cost.type, 'dtype') and
-                cost.type.dtype not in tensor.discrete_dtypes):
+                cost.type.dtype in tensor.continuous_dtypes):
                # Here we enforce the constraint that floating point variables
                # have the same dtype as their gradient.
                g_cost = g_cost.astype(cost.type.dtype)
@@ -485,7 +485,7 @@ def grad(cost, wrt, consider_constant=None,
        # This is to be enforced by the Op.grad method for the
        # Op that outputs cost.
        if hasattr(g_cost.type, 'dtype'):
-            assert g_cost.type.dtype not in tensor.discrete_dtypes
+            assert g_cost.type.dtype in tensor.continuous_dtypes
        grad_dict[cost] = g_cost
@@ -1335,12 +1335,11 @@ def _float_ones_like(x):
    """ Like ones_like, but forces the object to have a
    floating point dtype """
-    rval = tensor.ones_like(x)
+    dtype = x.type.dtype
+    if dtype not in tensor.float_dtypes:
+        dtype = theano.config.floatX
-    if rval.type.dtype.find('float') != -1:
+    return tensor.ones_like(x, dtype=dtype)
-        return rval
-    return rval.astype(theano.config.floatX)
 class numeric_grad(object):

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -237,8 +237,11 @@ optdb['canonicalize'].register('local_cut_gpu_host_gpu',
 # 'float64', 'complex128' and 'complex64' are not supported in elemwise
 # on the gpu.
-elemwise_cuda_dtype_supported = ['float32', 'uint8', 'int8', 'uint16', 'int16',
+elemwise_cuda_dtype_supported = ['float32', 'bool',
-                                 'uint32', 'int32', 'uint64', 'int64']
+                                 'uint8', 'int8',
+                                 'uint16', 'int16',
+                                 'uint32', 'int32',
+                                 'uint64', 'int64']
 def dtype_in_elemwise_supported(op):
@@ -298,8 +301,8 @@ def local_gpu_elemwise_0(node):
                        return False
                #   first establish that float32 can store all inputs
-                upcastable = set(['float32', 'int8', 'int16', 'uint8',
+                upcastable = set(['float32', 'bool', 'int8', 'int16',
-                                  'uint16'])
+                                  'uint8', 'uint16'])
                # case 1 - all inputs are already float32
                if all([i.type.dtype == 'float32' for i in node.inputs]):
                    # TODO: change this when fusion makes Elemwise with

--- a/theano/sandbox/cuda/rng_curand.py
+++ b/theano/sandbox/cuda/rng_curand.py
@@ -28,7 +28,7 @@ class CURAND_Base(GpuOp):
    CURAND.  This Op uses a generic-typed shared variable to point to a CObject
    that encapsulates this opaque reference.
-    Each random variable is created with a generator of False.
+    Each random variable is created with a generator of None.
    The actual random number generator is allocated from the seed, on the first
    call to allocate random numbers (see c_code).
@@ -210,7 +210,7 @@ class CURAND_Base(GpuOp):
                %(fail)s;
            }
            %(o_generator)s = PyCObject_FromVoidPtr(gen, &free_generator);
-            assert (%(i_generator)s == Py_False);
+            assert (%(i_generator)s == Py_None);
        }
        else if (%(destructive)s)
        {
@@ -244,7 +244,7 @@ class CURAND_Base(GpuOp):
        return code
    def c_code_cache_version(self):
-        return (4,)
+        return (5,)
 class CURAND_Normal(CURAND_Base):
@@ -328,7 +328,7 @@ class CURAND_RandomStreams(object):
        else:
            msg = "size must be a tuple of int or a Theano variable"
            assert isinstance(size, Variable) and size.ndim == 1, msg
-        generator = theano.shared(False)  # makes a generic
+        generator = theano.shared(None)  # makes a generic
        s_size = theano.tensor.as_tensor_variable(size)
        u = CURAND_Uniform.new_auto_update(generator, ndim, dtype, s_size,
                                           self.next_seed())
@@ -360,7 +360,7 @@ class CURAND_RandomStreams(object):
        else:
            msg = "size must be a tuple of int or a Theano variable"
            assert isinstance(size, Variable) and size.ndim == 1, msg
-        generator = theano.shared(False)  # makes a generic
+        generator = theano.shared(None)  # makes a generic
        s_size = theano.tensor.as_tensor_variable(size)
        u = CURAND_Normal.new_auto_update(generator, ndim, dtype, s_size,
                                          self.next_seed())

--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
@@ -34,6 +34,7 @@ from theano.gradient import grad_undefined
 from theano.printing import pprint
 import collections
+builtin_bool = bool
 builtin_complex = complex
 builtin_int = int
 builtin_float = float
@@ -161,7 +162,7 @@ class Scalar(Type):
    TODO: refactor to be named ScalarType for consistency with TensorType.
    """
+    __props__ = ('dtype',)
    ndim = 0
    def __init__(self, dtype):
@@ -200,6 +201,8 @@ class Scalar(Type):
    def values_eq_approx(self, a, b, tolerance=1e-4):
        # The addition have risk of overflow especially with [u]int8
+        if self.dtype == 'bool':
+            return a == b
        diff = a - b
        if diff == 0:
            return True
@@ -227,12 +230,6 @@ class Scalar(Type):
        else:
            return []
-    def __eq__(self, other):
-        return type(self) == type(other) and other.dtype == self.dtype
-    def __hash__(self):
-        return hash('theano.scalar.Scalar') ^ hash(self.dtype)
    def dtype_specs(self):
        try:
            # To help debug dtype/typenum problem, here is code to get
@@ -244,7 +241,8 @@ class Scalar(Type):
            #     now, as Theano always expect the exact typenum that
            #     correspond to our supported dtype.
            """
-            for dtype in ['int8', 'uint8', 'short', 'ushort', 'intc', 'uintc',
+            for dtype in ['bool', 'int8', 'uint8', 'short', 'ushort', 'intc',
+                          'uintc',
                          'longlong', 'ulonglong', 'single', 'double',
                          'longdouble', 'csingle', 'cdouble', 'clongdouble',
                          'float32', 'float64', 'int8', 'int16', 'int32',
@@ -260,6 +258,7 @@ class Scalar(Type):
                'complex128': (numpy.complex128, 'theano_complex128',
                               'Complex128'),
                'complex64': (numpy.complex64, 'theano_complex64', 'Complex64'),
+                'bool': (numpy.bool_, 'npy_bool', 'Bool'),
                'uint8': (numpy.uint8, 'npy_uint8', 'UInt8'),
                'int8': (numpy.int8, 'npy_int8', 'Int8'),
                'uint16': (numpy.uint16, 'npy_uint16', 'UInt16'),
@@ -288,12 +287,13 @@ class Scalar(Type):
    def c_literal(self, data):
        if 'complex' in self.dtype:
            raise NotImplementedError("No literal for complex values.")
+        if self.dtype == 'bool':
+            return '1' if data else '0'
        return str(data)
    def c_declare(self, name, sub, check_input=True):
        if(check_input):
            pre = """
-                typedef %(dtype)s %(name)s_dtype; // Deprecated use dtype_%(name)s instead.
                typedef %(dtype)s dtype_%(name)s;
            """ % dict(name=name, dtype=self.dtype_specs()[1])
        else:
@@ -309,6 +309,7 @@ class Scalar(Type):
    def c_extract(self, name, sub, check_input=True):
        if self.dtype == 'float16':
+            # This doesn't work at the numpy level
            raise NotImplementedError('float16')
        specs = self.dtype_specs()
        if(check_input):
@@ -517,6 +518,7 @@ theano.compile.register_view_op_c_code(
    1)
+bool = get_scalar_type('bool')
 int8 = get_scalar_type('int8')
 int16 = get_scalar_type('int16')
 int32 = get_scalar_type('int32')
@@ -536,7 +538,8 @@ uint_types = uint8, uint16, uint32, uint64
 float_types = float16, float32, float64
 complex_types = complex64, complex128
-discrete_types = int_types + uint_types
+integer_types = int_types + uint_types
+discrete_types = (bool,) + integer_types
 continuous_types = float_types + complex_types
 all_types = discrete_types + continuous_types
@@ -681,38 +684,57 @@ complexs64 = _multi(complex64)
 complexs128 = _multi(complex128)
-# Using a class instead of a function makes it possible to deep-copy it in
+def upcast_out(*types):
-# Python 2.4.
+    dtype = Scalar.upcast(*types)
-# Note that currently only a few functions use this mechanism, because it is
+    return get_scalar_type(dtype),
-# enough to make the test-suite pass with Python 2.4. However, it may prove
-# necessary to use this same mechanism in other places as well in the future.
-class upcast_out(object):
-    def __new__(self, *types):
-        dtype = Scalar.upcast(*types)
-        return get_scalar_type(dtype),
-class upgrade_to_float(object):
+def upcast_out_nobool(*types):
-    def __new__(self, *types):
+    type = upcast_out(*types)
-        """
+    if type[0] == bool:
-        Upgrade any int types to float32 or float64 to avoid losing precision.
+        raise TypeError("bool output not supported")
+    return type
-        """
-        conv = {int8: float32,
-                int16: float32,
-                int32: float64,
-                int64: float64,
-                uint8: float32,
-                uint16: float32,
-                uint32: float64,
-                uint64: float64}
-        return get_scalar_type(Scalar.upcast(*[conv.get(type, type)
-                               for type in types])),
+def upcast_out_min8(*types):
+    type = upcast_out(*types)
+    if type[0] == bool:
+        return int8,
+    return type
-class same_out(object):
-    def __new__(self, type):
+def upgrade_to_float(*types):
-        return type,
+    """
+    Upgrade any int types to float32 or float64 to avoid losing precision.
+    """
+    conv = {bool: float32,
+            int8: float32,
+            int16: float32,
+            int32: float64,
+            int64: float64,
+            uint8: float32,
+            uint16: float32,
+            uint32: float64,
+            uint64: float64}
+    return get_scalar_type(Scalar.upcast(*[conv.get(type, type)
+                                           for type in types])),
+def same_out(type):
+    return type,
+def same_out_nobool(type):
+    if type == bool:
+        raise TypeError("bool input not supported")
+    return type,
+def same_out_min8(type):
+    if type == bool:
+        return int8,
+    return type,
 def upcast_out_no_complex(*types):
@@ -728,6 +750,8 @@ def same_out_float_only(type):
 class transfer_type(gof.utils.object2):
+    __props__ = ('transfer',)
    def __init__(self, *transfer):
        assert all(type(x) in [int, str] or x is None for x in transfer)
        self.transfer = transfer
@@ -748,26 +772,16 @@ class transfer_type(gof.utils.object2):
        return retval
        # return [upcast if i is None else types[i] for i in self.transfer]
-    def __eq__(self, other):
-        return type(self) == type(other) and self.transfer == other.transfer
-    def __hash__(self):
-        return hash(self.transfer)
 class specific_out(gof.utils.object2):
+    __props__ = ('spec',)
    def __init__(self, *spec):
        self.spec = spec
    def __call__(self, *types):
        return self.spec
-    def __eq__(self, other):
-        return type(self) == type(other) and self.spec == other.spec
-    def __hash__(self):
-        return hash(self.spec)
 def int_out(*types):
    return int64,
@@ -914,15 +928,15 @@ class ScalarOp(Op):
        return test
    def __hash__(self):
-        return hash(type(self).__name__) ^ hash(
+        return hash((type(self),
-            getattr(self, 'output_types_preference', 0))
+                     getattr(self, 'output_types_preference', 0)))
    def __str__(self):
        if hasattr(self, 'name') and self.name:
            return self.name
        else:
            param = [(k, v) for k, v in self.__dict__.items()
-                     if k not in ["name", "_op_use_c_code",
+                     if k not in ["name", "_op_use_c_code", "bool",
                                  "output_types_preference"]]
            if param:
                return "%s{%s}" % (self.__class__.__name__,
@@ -1006,31 +1020,65 @@ class BinaryScalarOp(ScalarOp):
 ###############
 class LogicalComparison(BinaryScalarOp):
+    def __init__(self, *args, **kwargs):
+        BinaryScalarOp.__init__(self, *args, **kwargs)
+        # This is for compat with old pickles.
+        self.bool = True
+    def __eq__(self, other):
+        return (BinaryScalarOp.__eq__(self, other) and
+                getattr(self, 'bool', False) == getattr(self, 'bool', False))
+    def __hash__(self):
+        # bool should always be True
+        return BinaryScalarOp.__hash__(self)
    def output_types(self, *input_dtypes):
-        return [int8]
+        return [bool] if getattr(self, 'bool', False) else [int8]
    def grad(self, inputs, output_gradients):
        x, y = inputs
        out = self(x, y)
-        assert str(out.type.dtype).find('int') != -1
+        assert out.type == bool
        return [x.zeros_like().astype(theano.config.floatX),
                y.zeros_like().astype(theano.config.floatX)]
+    def c_code_cache_version(self):
+        super_version = super(LogicalComparison, self).c_code_cache_version()
+        return super_version + (0,)
 class FixedLogicalComparison(UnaryScalarOp):
    """
    Comparison to a fixed value.
    """
+    def __init__(self, *args, **kwargs):
+        UnaryScalarOp.__init__(self, *args, **kwargs)
+        # This is for compat with old pickles
+        self.bool = True
+    def __eq__(self, other):
+        return (UnaryScalarOp.__eq__(self, other) and
+                getattr(self, 'bool', False) == getattr(self, 'bool', False))
+    def __hash__(self):
+        # bool should always be True
+        return UnaryScalarOp.__hash__(self)
    def output_types(self, *input_dtypes):
-        return [int8]
+        return [bool] if getattr(self, 'bool', False) else [int8]
    def grad(self, inputs, output_gradients):
        x, = inputs
        out = self(x)
-        assert str(out.type.dtype).find('int') != -1
+        assert out.type == bool
        return [x.zeros_like().astype(theano.config.floatX)]
+    def c_code_cache_version(self):
+        super_version = super(FixedLogicalComparison, self).c_code_cache_version()
+        return super_version + (0,)
 class LT(LogicalComparison):
    identity = False
@@ -1202,21 +1250,10 @@ class InRange(LogicalComparison):
    def c_code(self, node, name, inputs, outputs, sub):
        (x, low, hi) = inputs
        (z,) = outputs
-        if self.openlow:
-            cmp1 = '>'
-        else:
-            cmp1 = '>='
-        # backport
-        # cmp1 = '>' if self.openlow else '>='
-        if self.openhi:
+        cmp1 = '>' if self.openlow else '>='
-            cmp2 = '<'
+        cmp2 = '<' if self.openhi else '<='
-        else:
-            cmp2 = '<='
-        # backport
-        # cmp2 = '<' if self.openhi else '<='
        return ("%(z)s = %(x)s %(cmp1)s %(low)s &&"
                " %(x)s %(cmp2)s %(hi)s;" % locals())
@@ -1247,13 +1284,8 @@ class Switch(ScalarOp):
    nfunc_spec = ('where', 3, 1)
    def impl(self, cond, ift, iff):
-        if cond:
+        return ift if cond else iff
-            return ift
-        else:
-            return iff
-            # backport
-            # return ift if cond else iff
    def c_code(self, node, name, inputs, outputs, sub):
        (cond, ift, iff) = inputs
        (z,) = outputs
@@ -1290,9 +1322,9 @@ switch = Switch()
 class UnaryBitOp(UnaryScalarOp):
    def output_types(self, *input_types):
        for i in input_types[0]:
-            if i not in (int8, int16, int32, int64):
+            if i not in discrete_types:
-                raise TypeError('input to a BitOp must have type int8,'
+                raise TypeError('input to a BitOp must have type (u)int8, '
-                                ' int16, int32 or int64... not %s' % i)
+                                '(u)int16, (u)int32 or (u)int64 or bool not %s' % i)
        return upcast_out(*input_types[0])
    def grad(self, inputs, output_gradients):
@@ -1302,10 +1334,13 @@ class UnaryBitOp(UnaryScalarOp):
 class BinaryBitOp(BinaryScalarOp):
    def output_types(self, *input_types):
        t0, t1 = input_types[0]
+        if t0 == bool and t1 == bool:
+            return [bool]
        for i in input_types[0]:
-            if i not in (int8, int16, int32, int64):
+            if i not in integer_types:
-                raise TypeError('input to a BitOp must have type int8,'
+                raise TypeError('input to a BitOp must have type (u)int8, '
-                                ' int16, int32 or int64... not %s' % i)
+                                '(u)int16, (u)int32 or (u)int64 or '
+                                'be all bools not %s' % i)
        return upcast_out(*input_types[0])
    def grad(self, inputs, output_gradients):
@@ -1371,6 +1406,8 @@ class Invert(UnaryBitOp):
    def c_code(self, node, name, inputs, outputs, sub):
        (x,) = inputs
        (z,) = outputs
+        if node.outputs[0].type == bool:
+            return "%(z)s = (!%(x)s);" % locals()
        return "%(z)s = (~%(x)s);" % locals()
 invert = Invert()
@@ -1463,10 +1500,13 @@ class Add(ScalarOp):
    def c_code(self, node, name, inputs, outputs, sub):
        (z,) = outputs
+        op = " + "
+        if node.outputs[0].type == bool:
+            op = " || "
        if not inputs:
            return z + " = 0;"
        else:
-            return z + " = " + " + ".join(inputs) + ";"
+            return z + " = " + op.join(inputs) + ";"
    def grad(self, inputs, gout):
        (gz,) = gout
@@ -1502,10 +1542,13 @@ class Mul(ScalarOp):
    def c_code(self, node, name, inputs, outputs, sub):
        (z,) = outputs
+        op = " * "
+        if node.outputs[0].type == bool:
+            op = " && "
        if not inputs:
            return z + " = 1;"
        else:
-            return z + " = " + " * ".join(inputs) + ";"
+            return z + " = " + op.join(inputs) + ";"
    def grad(self, inputs, gout):
        (gz,) = gout
@@ -1571,7 +1614,7 @@ class Sub(BinaryScalarOp):
        second_part = -gz
        return first_part, second_part
-sub = Sub(upcast_out, name='sub')
+sub = Sub(upcast_out_nobool, name='sub')
 def int_or_true_div(x_discrete, y_discrete):
@@ -1937,7 +1980,7 @@ class Pow(BinaryScalarOp):
        raise theano.gof.utils.MethodNotDefined()
-pow = Pow(upcast_out, name='pow')
+pow = Pow(upcast_out_min8, name='pow')
 class Clip(ScalarOp):
@@ -2062,6 +2105,8 @@ class Cast(UnaryScalarOp):
    def c_code(self, node, name, inputs, outputs, sub):
        (x,) = inputs
        (z,) = outputs
+        if node.outputs[0].type == bool:
+            return "%s = (%s) ? 1 : 0;" % (z, x)
        return "%s = (%s)%s;" % (z, node.outputs[0].type.dtype_specs()[1], x)
    def grad(self, inputs, gout):
@@ -2075,10 +2120,11 @@ class Cast(UnaryScalarOp):
    def c_code_cache_version(self):
        s = super(Cast, self).c_code_cache_version()
        if s:
-            return (3,) + s
+            return (4,) + s
        else:
            return s
+convert_to_bool = Cast(bool, name='convert_to_bool')
 convert_to_int8 = Cast(int8, name='convert_to_int8')
 convert_to_int16 = Cast(int16, name='convert_to_int16')
 convert_to_int32 = Cast(int32, name='convert_to_int32')
@@ -2094,6 +2140,7 @@ convert_to_complex64 = Cast(complex64, name='convert_to_complex64')
 convert_to_complex128 = Cast(complex128, name='convert_to_complex128')
 _cast_mapping = {
+    'bool': convert_to_bool,
    'int8': convert_to_int8,
    'int16': convert_to_int16,
    'int32': convert_to_int32,
@@ -2173,6 +2220,12 @@ abs_ = Abs(same_out)
 class Sgn(UnaryScalarOp):
    nfunc_spec = ('sign', 1, 1)
+    @staticmethod
+    def output_types_preference(x):
+        if x == bool:
+            raise TypeError(x)
+        return same_out_nocomplex(x)
    def impl(self, x):
        # casting to output type is handled by filter
        return numpy.sign(x)
@@ -2205,7 +2258,7 @@ class Sgn(UnaryScalarOp):
            return (4,) + s
        else:  # if parent is unversioned, we are too
            return s
-sgn = Sgn(same_out_nocomplex, name='sgn')
+sgn = Sgn(name='sgn')
 class Ceil(UnaryScalarOp):
@@ -2228,7 +2281,7 @@ class Ceil(UnaryScalarOp):
        (x,) = inputs
        (z,) = outputs
        return "%(z)s = ceil(%(x)s);" % locals()
-ceil = Ceil(same_out_nocomplex, name='ceil')
+ceil = Ceil(upgrade_to_float_no_complex, name='ceil')
 class Floor(UnaryScalarOp):
@@ -2251,7 +2304,7 @@ class Floor(UnaryScalarOp):
        (x,) = inputs
        (z,) = outputs
        return "%(z)s = floor(%(x)s);" % locals()
-floor = Floor(same_out_nocomplex, name='floor')
+floor = Floor(upgrade_to_float_no_complex, name='floor')
 class Trunc(UnaryScalarOp):
@@ -2269,7 +2322,7 @@ class Trunc(UnaryScalarOp):
        (x,) = inputs
        (z,) = outputs
        return "%(z)s = %(x)s >= 0? floor(%(x)s): -floor(-%(x)s);" % locals()
-trunc = Trunc(same_out_nocomplex, name='trunc')
+trunc = Trunc(upgrade_to_float_no_complex, name='trunc')
 class RoundHalfToEven(UnaryScalarOp):
@@ -2409,13 +2462,6 @@ class Neg(UnaryScalarOp):
    nfunc_spec = ('negative', 1, 1)
    def impl(self, x):
-        # We have to make sure x is not a numpy.bool_, because
-        # `-numpy.bool_(True)` is `False` (we want 0), and
-        # `-numpy.bool_(False)` is `True` (we want 1).
-        # This happens for Composite, as the intermediate results are not
-        # casted in the dtype of the intermediate variable in general.
-        if isinstance(x, numpy.bool_):
-            x = numpy.int8(x)
        return -x
    def grad(self, inputs, gout):
@@ -2433,7 +2479,7 @@ class Neg(UnaryScalarOp):
        (x,) = inputs
        (z,) = outputs
        return "%(z)s = -%(x)s;" % locals()
-neg = Neg(same_out, name='neg')
+neg = Neg(same_out_nobool, name='neg')
 pprint.assign(add, printing.OperatorPrinter('+', -2, 'either'))
 pprint.assign(mul, printing.OperatorPrinter('*', -1, 'either'))
@@ -3423,7 +3469,7 @@ class Conj(UnaryScalarOp):
    def impl(self, x):
        return numpy.conj(x)
-conj = Conj(same_out, name='conj')
+conj = Conj(same_out_min8, name='conj')
 class ComplexFromPolar(BinaryScalarOp):

--- a/theano/scalar/tests/test_basic.py
+++ b/theano/scalar/tests/test_basic.py
@@ -155,17 +155,6 @@ class test_composite(unittest.TestCase):
        si3 = theano.scalar.float32()
        sop.make_node(si0 * si3, si1, si2)
-    def test_composite_neg_bool(self):
-        # Check that taking the negation of a Boolean intermediate value
-        # works correctly with Python code. It used to be an issue because
-        # `-numpy.bool_(True)` is False and `-numpy.bool_(False)` is True.
-        x = floats('x')
-        y = - (x > 0)
-        z = Composite([x], [y]).make_node(x).outputs[0]
-        f = theano.function([x], z, mode=theano.Mode(linker='py'))
-        for inp, out in zip([-1, 0, 1], [0, 0, -1]):
-            self.assertTrue(f(inp) == out)
 class test_logical(unittest.TestCase):
    def test_gt(self):

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -52,6 +52,7 @@ python_all = all
 complex_dtypes = list(map(str, scal.complex_types))
 continuous_dtypes = list(map(str, scal.continuous_types))
 float_dtypes = list(map(str, scal.float_types))
+integer_dtypes = list(map(str, scal.integer_types))
 discrete_dtypes = list(map(str, scal.discrete_types))
 all_dtypes = list(map(str, scal.all_types))
 int_dtypes = list(map(str, scal.int_types))
@@ -302,7 +303,7 @@ class NumpyAutocaster(object):
        # returns either an exact x_==x, or the last cast x_
        return x_
-autocast_int = NumpyAutocaster(('int16', 'int32', 'int64'))
+autocast_int = NumpyAutocaster(('int8', 'int16', 'int32', 'int64'))
 autocast_float = NumpyAutocaster(('float16', 'float32', 'float64'))
@@ -379,16 +380,10 @@ def constant_or_value(x, rtype, name=None, ndim=None, dtype=None):
            x_ = autocast_float(x)
        elif isinstance(x, numpy.ndarray):
            x_ = x
-            # Currently we do not have a bool dtype in Theano.
-            # So we upcast it to uint8 to avoid breaking our interface for
-            # constant.
-            if x.dtype == 'bool':
-                x_ = numpy.asarray(x_, dtype='uint8')
        else:
-            # Here x is probably a list or a tuple. If it contains a long,
+            # Here x is probably a list or a tuple. If it contains a
-            # we will behave like the current NumPy version: 1.7 and below,
+            # long, we will behave like the current NumPy version: it
-            # it will only work if the long fits in int64. For NumPy 1.7.1+,
+            # will work if the long fits in int64 or uint64.
-            # it will work if the long fits in int64 or uint64.
            x_ = numpy.asarray(x)
            if x_.size == 0 and not hasattr(x, 'dtype'):
                x_ = numpy.asarray(x, dtype=config.floatX)
@@ -521,11 +516,6 @@ def _allclose(a, b, rtol=None, atol=None):
    if atol is not None:
        atol_ = atol
-    # Work around bug in Numpy, see
-    # http://projects.scipy.org/numpy/ticket/1684
-    if str(b.dtype) in int_dtypes and (numpy.absolute(b) < 0).any():
-        b = theano._asarray(b, dtype='float64')
    return numpy.allclose(a, b, atol=atol_, rtol=rtol_)
@@ -1247,6 +1237,10 @@ def _conversion(real_value, name):
 # what types you are casting to what.  That logic is implemented by the
 # `cast()` function below.
+_convert_to_bool = _conversion(
+    elemwise.Elemwise(scal.convert_to_bool), 'bool')
+"""Cast to boolean"""
 _convert_to_int8 = _conversion(
    elemwise.Elemwise(scal.convert_to_int8), 'int8')
 """Cast to 8-bit integer"""
@@ -1300,6 +1294,7 @@ _convert_to_complex128 = _conversion(
 """Cast to double-precision complex"""
 _cast_mapping = {
+    'bool': _convert_to_bool,
    'int8': _convert_to_int8,
    'int16': _convert_to_int16,
    'int32': _convert_to_int32,
@@ -3191,6 +3186,10 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
    for i in axis:
        s = true_div(s, shp[i])
+    # This can happen when axis is an empty list/tuple
+    if s.dtype != shp.dtype and s.dtype in discrete_dtypes:
+        s = cast(s, shp.dtype)
    if dtype == 'float16' or (dtype is None and input.dtype == 'float16'):
        s = cast(s, 'float16')
    s.name = 'mean'

--- a/theano/tensor/blas.py
+++ b/theano/tensor/blas.py
@@ -1189,7 +1189,7 @@ def _gemm_canonicalize(r, scale, rval, maxclients):
    def scaled(thing):
        if scale == 1:
            return thing
-        if scale == -1:
+        if scale == -1 and thing.type.dtype != 'bool':
            return -thing
        else:
            return scale * thing

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -20,12 +20,6 @@ from theano.tensor import elemwise_cgen as cgen
 config = theano.config
-# We cannot import discrete_dtypes or float_dtypes from tensor.basic yet,
-# so we redefine them here
-discrete_dtypes = list(map(str, scalar.discrete_types))
-float_dtypes = list(map(str, scalar.float_types))
-int_dtypes = list(map(str, scalar.int_types))
 # tensor depends on elemwise to provide definitions for several ops
 # but elemwise needs to make TensorType instances, so we have these as
@@ -818,9 +812,9 @@ second dimension
        # NumPy 1.10.1 raise an error when giving the signature
        # when the input is complex. So add it only when inputs is int.
        out_dtype = node.outputs[0].dtype
-        if (out_dtype in float_dtypes and
+        if (out_dtype in theano.tensor.float_dtypes and
                isinstance(self.nfunc, numpy.ufunc) and
-                node.inputs[0].dtype in discrete_dtypes):
+                node.inputs[0].dtype in theano.tensor.discrete_dtypes):
            char = numpy.sctype2char(out_dtype)
            sig = char * node.nin + '->' + char * node.nout
            node.tag.sig = sig
@@ -1076,7 +1070,7 @@ second dimension
        # We loop over the "aliased" outputs, i.e., those that are
        # inplace (overwrite the contents of one of the inputs) and
-        # make the output pointers point to theur corresponding input
+        # make the output pointers point to their corresponding input
        # pointers.
        for output, oname in izip(aliased_outputs, aliased_onames):
            olv_index = inputs.index(dmap[output][0])
@@ -1641,10 +1635,10 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
        task1_code = self.scalar_op.c_code(
            Apply(self.scalar_op,
-                  [get_scalar_type(dtype=input.type.dtype).make_variable()
+                  [get_scalar_type(dtype=iv.type.dtype).make_variable()
-                   for input in (node.inputs * 2)],
+                   for iv in (node.inputs * 2)],
-                  [get_scalar_type(dtype=output.type.dtype).make_variable()
+                  [get_scalar_type(dtype=ov.type.dtype).make_variable()
-                   for input in node.outputs]),
+                   for ov in node.outputs]),
            None,
            ["%s_i" % aname, "%s_i" % inames[0]],
            ["%s_i" % aname],
@@ -1708,18 +1702,16 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
 class All(CAReduce):
-    """ Applies `bitwise and` to all the values of a tensor along the
+    """ Applies `logical and` to all the values of a tensor along the
    specified axis(es).
-    Equivalent to `CAReduce(scalar.and\_, axis=axis)`.
    """
    def __init__(self, axis=None):
        CAReduce.__init__(self, scalar.and_, axis)
    def _output_dtype(self, idtype):
-        return "int8"
+        return "bool"
    def __str__(self):
        if self.axis is None:
@@ -1729,7 +1721,7 @@ class All(CAReduce):
    def make_node(self, input):
        input = as_tensor_variable(input)
-        if input.dtype not in ["int8", "uint8"]:
+        if input.dtype != "bool":
            input = theano.tensor.neq(input, 0)
        ret = super(All, self).make_node(input)
        return ret
@@ -1743,15 +1735,13 @@ class Any(CAReduce):
    """ Applies `bitwise or` to all the values of a tensor along the
    specified axis(es).
-    Equivalent to `CAReduce(scalar.or\_, axis=axis)`.
    """
    def __init__(self, axis=None):
        CAReduce.__init__(self, scalar.or_, axis)
    def _output_dtype(self, idtype):
-        return "int8"
+        return "bool"
    def __str__(self):
        if self.axis is None:
@@ -1761,7 +1751,7 @@ class Any(CAReduce):
    def make_node(self, input):
        input = as_tensor_variable(input)
-        if input.dtype not in ["int8", "uint8"]:
+        if input.dtype != "bool":
            input = theano.tensor.neq(input, 0)
        ret = super(Any, self).make_node(input)
        return ret
@@ -1863,6 +1853,7 @@ class CAReduceDtype(CAReduce):
        if dtype is None:
            # If input has a discrete dtype, upcast it to 64
            return dict(
+                bool='int64',
                int8='int64',
                int16='int64',
                int32='int64',
@@ -1878,6 +1869,7 @@ class CAReduceDtype(CAReduce):
        acc_dtype = self.acc_dtype
        if acc_dtype is None:
            return dict(
+                bool='int64',
                int8='int64',
                int16='int64',
                int32='int64',
@@ -1990,7 +1982,7 @@ class Sum(CAReduceDtype):
        out = self(*inp)
-        if out.dtype.find('int') != -1:
+        if out.dtype not in theano.tensor.continuous_dtypes:
            return [x.zeros_like(dtype=theano.config.floatX)]
        gz, = grads
@@ -2101,8 +2093,8 @@ class Prod(CAReduceDtype):
        out = self(*inp)
-        if (out.dtype in discrete_dtypes or
+        if (out.dtype in theano.tensor.discrete_dtypes or
-                self.acc_dtype in discrete_dtypes):
+                self.acc_dtype in theano.tensor.discrete_dtypes):
            # There is an int conversion in the way
            return [prod_in.zeros_like(dtype=theano.config.floatX)]

--- a/theano/tensor/extra_ops.py
+++ b/theano/tensor/extra_ops.py
 from __future__ import absolute_import, print_function, division
 import numpy as np
 import numpy
-import warnings
 from six.moves import xrange
 import theano
@@ -561,103 +560,6 @@ def diff(x, n=1, axis=-1):
    return DiffOp(n=n, axis=axis)(x)
-class BinCountOp(theano.Op):
-    """
-    .. note:: Deprecated
-              Use bincount() instead.
-              See function bincount for docstring.
-    """
-    compatible_type = ('int8', 'int16', 'int32', 'int64',
-                       'uint8', 'uint16', 'uint32', 'uint64')
-    """Tuple of all compatible dtype for the parameter of this op."""
-    __props__ = ("minlength",)
-    def __init__(self, minlength=None):
-        self.minlength = minlength
-        if minlength is not None:
-            numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
-            if not bool(numpy_ver >= [1, 6]):
-                raise NotImplementedError(
-                    "BinCountOp with minlength attribute"
-                    " requires NumPy 1.6 or higher.")
-    def make_node(self, x, weights):
-        warnings.warn((
-            "Tile op is deprecated, use tile function instead."),
-            stacklevel=3)
-        x = basic.as_tensor_variable(x)
-        if x.dtype not in BinCountOp.compatible_type:
-            raise TypeError("Inputs dtype must be an integer.")
-        # Some dtypes are not supported by numpy's implementation of bincount.
-        # Until another one is available, we should fail at graph construction
-        # time, not wait for execution.
-        int_bitwidth = theano.configdefaults.python_int_bitwidth()
-        if int_bitwidth == 64:
-            numpy_unsupported_dtypes = ('uint64',)
-        if int_bitwidth == 32:
-            numpy_unsupported_dtypes = ('uint32', 'int64', 'uint64')
-        intp_bitwidth = theano.configdefaults.local_bitwidth()
-        if intp_bitwidth == 32:
-            out_type = basic.ivector()
-        elif intp_bitwidth == 64:
-            out_type = basic.lvector()
-        if x.dtype in numpy_unsupported_dtypes:
-            raise TypeError(
-                ("Input dtypes %s are not supported by numpy.bincount, "
-                 % numpy_unsupported_dtypes), x.dtype)
-        if x.ndim != 1:
-            raise TypeError("Inputs must be of dimension 1.")
-        if weights is None:
-            weights = theano.gof.Constant(theano.gof.Generic(), None)
-        else:
-            weights = basic.as_tensor_variable(weights)
-            out_type = basic.dvector()
-            if weights.ndim != 1:
-                raise TypeError("Weights cannot have a number of"
-                                "dimension different of 1.")
-        return theano.Apply(self, [x, weights], [out_type])
-    def perform(self, node, inputs, output_storage):
-        x = inputs[0]
-        weights = inputs[1]
-        z = output_storage[0]
-        if weights is not None and weights.shape != x.shape:
-            raise TypeError("All inputs must have the same shape.")
-        # Needed for numpy 1.4.1 compatibility
-        if self.minlength:
-            out = np.bincount(x, weights=weights, minlength=self.minlength)
-        else:
-            out = np.bincount(x, weights=weights)
-        z[0] = theano._asarray(out, dtype=node.outputs[0].dtype)
-    def grad(self, inputs, outputs_gradients):
-        output = self(*inputs)
-        if output.dtype.find('int') != -1:
-            return [inp.zeros_like().astype(theano.config.floatX)
-                    for inp in inputs]
-        raise NotImplementedError()
-    def infer_shape(self, node, ins_shapes):
-        x = node.inputs[0]
-        m = basic.max(x) + 1
-        if self.minlength is not None:
-            m = basic.maximum(m, self.minlength)
-        return [[m]]
 def bincount(x, weights=None, minlength=None, assert_nonneg=False):
    """Count number of occurrences of each value in array of ints.
@@ -773,7 +675,7 @@ class RepeatOp(theano.Op):
        x = basic.as_tensor_variable(x)
        repeats = basic.as_tensor_variable(repeats)
-        if repeats.dtype not in tensor.discrete_dtypes:
+        if repeats.dtype not in tensor.integer_dtypes:
            raise TypeError("repeats.dtype must be an integer.")
        # Some dtypes are not supported by numpy's implementation of repeat.

--- a/theano/tensor/nnet/sigm.py
+++ b/theano/tensor/nnet/sigm.py
@@ -75,7 +75,7 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
        # float16 limits: -11.0, 7.0f
        # We use the float32 limits for float16 for now as the
-        # computation will happend in float32 anyway.
+        # computation will happen in float32 anyway.
        if (node.inputs[0].type == scalar.float32 or
                node.inputs[0].type == scalar.float16):
            return """%(z)s = %(x)s < -88.0f ? 0.0 : %(x)s > 15.0f ? 1.0f : 1.0f /(1.0f + exp(-%(x)s));""" % locals()

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -5906,7 +5906,10 @@ def local_mul_specialize(node):
            if new_inputs:
                if len(new_inputs) == 1:
                    if neg:
-                        rval = -new_inputs[0]
+                        if new_inputs[0].dtype in (T.uint_dtypes + ['bool']):
+                            return
+                        else:
+                            rval = -new_inputs[0]
                    else:
                        rval = new_inputs[0]
                else:

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -1206,9 +1206,7 @@ IntDivInplaceTester = makeBroadcastTester(
 CeilTester = makeBroadcastTester(op=tensor.ceil,
-        expected=lambda a: numpy.asarray(
+        expected=upcast_float16_ufunc(numpy.ceil),
-            numpy.ceil(a),
-            a.dtype),
        good=_good_broadcast_unary_normal_no_complex,
        grad=copymod(_grad_broadcast_unary_normal,
            without=['corner_case'],
@@ -1217,7 +1215,7 @@ CeilTester = makeBroadcastTester(op=tensor.ceil,
                dtype=floatX)]))
 CeilInplaceTester = makeBroadcastTester(op=inplace.ceil_inplace,
-        expected=lambda a: numpy.asarray(numpy.ceil(a), a.dtype),
+        expected=upcast_float16_ufunc(numpy.ceil),
        good=_good_broadcast_unary_normal_no_complex,
        # corner cases includes a lot of integers: points where Ceil is not
        # continuous (not differentiable)
@@ -1229,7 +1227,7 @@ CeilInplaceTester = makeBroadcastTester(op=inplace.ceil_inplace,
        inplace=True)
 FloorTester = makeBroadcastTester(op=tensor.floor,
-        expected=lambda a: numpy.asarray(numpy.floor(a), a.dtype),
+        expected=upcast_float16_ufunc(numpy.floor),
        good=_good_broadcast_unary_normal_no_complex,
        # XXX: why does grad of floor not give huge values at
        #      the integer points in the 'corner_case' in
@@ -1238,20 +1236,20 @@ FloorTester = makeBroadcastTester(op=tensor.floor,
        grad=_grad_broadcast_unary_normal)
 FloorInplaceTester = makeBroadcastTester(op=inplace.floor_inplace,
-        expected=lambda a: numpy.asarray(numpy.floor(a), a.dtype),
+        expected=upcast_float16_ufunc(numpy.floor),
        good=_good_broadcast_unary_normal_no_complex,
        grad=_grad_broadcast_unary_normal,
        inplace=True)
 TruncInplaceTester = makeBroadcastTester(
    op=inplace.trunc_inplace,
-    expected=lambda a: numpy.asarray(numpy.trunc(a), a.dtype),
+    expected=upcast_float16_ufunc(numpy.trunc),
    good=_good_broadcast_unary_normal_no_complex,
    inplace=True)
 TruncTester = makeBroadcastTester(
    op=tensor.trunc,
-    expected=lambda a: numpy.asarray(numpy.trunc(a), a.dtype),
+    expected=upcast_float16_ufunc(numpy.trunc),
    good=_good_broadcast_unary_normal_no_complex)
 RoundHalfToEvenTester = makeBroadcastTester(
@@ -5005,7 +5003,7 @@ class T_scalarfromtensor(unittest.TestCase):
        self.assertTrue(v == 56, v)
        if config.cast_policy == 'custom':
-            self.assertTrue(isinstance(v, numpy.int16))
+            self.assertTrue(isinstance(v, numpy.int8))
        elif config.cast_policy in ('numpy', 'numpy+floatX'):
            self.assertTrue(isinstance(
                v, getattr(numpy, str(numpy.asarray(56).dtype))))
@@ -7120,7 +7118,7 @@ class T_as_tensor_variable(unittest.TestCase):
    def test_ndarray_bool(self):
        ten = as_tensor_variable(numpy.array([True, False, False, True, True]))
-        assert ten.type.dtype == 'uint8'
+        assert ten.type.dtype == 'bool'
    def test_memmap(self):
        inp = numpy.random.rand(4, 3)
@@ -8192,25 +8190,3 @@ def test_symbolic_slice():
    a, b = x.shape[:2]
    output = a.eval({x: numpy.zeros((5, 4, 3, 2), dtype=theano.config.floatX)})
    assert output == numpy.array(5)
-def test_composite_neg_bool():
-    # Check that taking the negation of a Boolean intermediate value
-    # works correctly with Python code. It used to be an issue because
-    # `-numpy.bool_(True)` is False and `-numpy.bool_(False)` is True.
-    x = theano.tensor.vector()
-    f = theano.function([x], - (x > 0), mode=theano.Mode(linker='py'))
-    utt.assert_allclose(f([-1, 0, 1]), [0, 0, -1])
-"""
-if __name__ == '__main__':
-    if 0:
-        unittest.main()
-    else:
-        testcase = FloorInplaceTester
-        suite = unittest.TestLoader()
-        suite = suite.loadTestsFromTestCase(testcase)
-        unittest.TextTestRunner(verbosity=2).run(suite)
-"""
--- a/theano/tensor/tests/test_elemwise.py
+++ b/theano/tensor/tests/test_elemwise.py
@@ -113,42 +113,42 @@ class test_reduce_axes(unittest.TestCase):
                [numpy.array(0), numpy.array(1)]]
        for a in axes:
            x = tensor.matrix()
-            m = x.sum(a)
+            x.sum(a)
    def test_mean_axes(self):
        axes = [None, 0, 1, [0, 1], numpy.array(1),
                [numpy.array(0), numpy.array(1)]]
        for a in axes:
            x = tensor.matrix()
-            m = x.mean(a)
+            x.mean(a)
    def test_max_axes(self):
        axes = [None, 0, 1, [0, 1], numpy.array(1),
                [numpy.array(0), numpy.array(1)]]
        for a in axes:
            x = tensor.matrix()
-            m = x.max(a)
+            x.max(a)
    def test_min_axes(self):
        axes = [None, 0, 1, [0, 1], numpy.array(1),
                [numpy.array(0), numpy.array(1)]]
        for a in axes:
            x = tensor.matrix()
-            m = x.min(a)
+            x.min(a)
    def test_argmax_axes(self):
        axes = [None, 0, 1, [0, 1], numpy.array(1),
                [numpy.array(0), numpy.array(1)]]
        for a in axes:
            x = tensor.matrix()
-            m = x.argmax(a)
+            x.argmax(a)
    def test_var_axes(self):
        axes = [None, 0, 1, [0, 1], numpy.array(1),
                [numpy.array(0), numpy.array(1)]]
        for a in axes:
            x = tensor.matrix()
-            m = x.var(a)
+            x.var(a)
 class test_Broadcast(unittest.TestCase):
@@ -159,7 +159,7 @@ class test_Broadcast(unittest.TestCase):
    ctype = TensorType
    cop = Elemwise
-    openmp_minsize = 2*config.openmp_elemwise_minsize
+    openmp_minsize = 2 * config.openmp_elemwise_minsize
    openmp_minsize_sqrt = int(math.ceil(math.sqrt(openmp_minsize)))
    # The order is important if you change them.
@@ -346,8 +346,7 @@ class test_CAReduce(unittest_tools.InferShapeTester):
             ((5, 0), (1, )),
             ((5, 0), ()),
             ((), None),
-             ((), ())
+             ((), ())]
-    ]
    type = TensorType
    def with_linker(self, linker, scalar_op=scalar.add, dtype="floatX",
@@ -371,7 +370,7 @@ class test_CAReduce(unittest_tools.InferShapeTester):
            f = copy(linker).accept(FunctionGraph([x], [e])).make_function()
            xv = numpy.asarray(numpy.random.rand(*xsh))
-            if not "int" in dtype:
+            if "int" not in dtype:
                xv = numpy.asarray(xv, dtype=dtype)
            else:
                xv = numpy.asarray(xv < 0.5, dtype=dtype)
@@ -452,10 +451,6 @@ class test_CAReduce(unittest_tools.InferShapeTester):
                else:
                    self.fail()
            else:
-                # numpy.{all,any} return bool type,
-                # but theano ops return an int8 array instead
-                if scalar_op in [scalar.and_, scalar.or_]:
-                    zv = numpy.asarray(zv, dtype='int8')
                if test_nan:
                    try:
                        self.assertTrue(
@@ -614,18 +609,20 @@ class test_Prod(unittest.TestCase):
        x = theano.tensor.dmatrix()
        # sanity check
-        x2 = theano.tensor.dmatrix()
        p = Prod(axis=1)(x)
-        p2 = Prod(axis=1)(x2)
-        fn = theano.function([x, x2], [p - p2], mode=self.mode)
+        # Uncomment this for debugging if needed
-        # print "hand computed diff for each row"
+        # x2 = theano.tensor.dmatrix()
-        x2_val = numpy.asarray([[1., 2., 3.003], [0.003, 5., 6], [
+        # p2 = Prod(axis=1)(x2)
-            0., 0., 9.01]])
+        # fn = theano.function([x, x2], [p - p2], mode=self.mode)
-        # print fn(x_val, x2_val)
+        # print("hand computed diff for each row")
-        fn2 = theano.function([x], [theano.tensor.grad(p.sum(), x)],
+        # x2_val = numpy.asarray([[1., 2., 3.003], [0.003, 5., 6], [
-             mode=self.mode)
+        #     0., 0., 9.01]])
-        # print "real grad"
+        # print(fn(x_val, x2_val))
-        # print fn2(x_val)
+        # fn2 = theano.function([x], [theano.tensor.grad(p.sum(), x)],
+        #                       mode=self.mode)
+        # print("real grad")
+        # print(fn2(x_val))
        fn3 = theano.function([x], [p], mode=self.mode)
        assert numpy.allclose(fn3(x_val), [6., 0., 0.])
@@ -637,14 +634,14 @@ class test_Prod(unittest.TestCase):
        # def fn5(x5):
        #    return theano.tensor.sqr(Prod(axis=1)(x5))
-        #x4 = theano.tensor.dmatrix()
+        # x4 = theano.tensor.dmatrix()
-        #p4 = theano.tensor.sqr(Prod(axis=1)(x4))
+        # p4 = theano.tensor.sqr(Prod(axis=1)(x4))
-        #fn4 = theano.function([x4], p4)
+        # fn4 = theano.function([x4], p4)
-        # print "with sqr"
+        # print("with sqr")
-        # print fn4(x_val)
+        # print(fn4(x_val))
-        # print fn4(x2_val)
+        # print(fn4(x2_val))
-        #unittest_tools.verify_grad(fn5, [x_val])
+        # unittest_tools.verify_grad(fn5, [x_val])
    @attr('slow')
    def test_prod_no_zeros_in_input(self):
@@ -695,30 +692,33 @@ class test_Prod(unittest.TestCase):
        x = theano.tensor.dmatrix()
        pwz_a1 = ProdWithoutZeros(axis=0)(x)
        pwz_grad = theano.grad(theano.tensor.sum(pwz_a1), x)
-        fn_a1 = theano.function([x], pwz_grad, mode=self.mode)
+        theano.function([x], pwz_grad, mode=self.mode)
    @attr('slow')
    def test_other_grad_tests(self):
        x = theano.tensor.dmatrix()
        x_val1 = numpy.array([[1, 2, 3], [0, 5, 6], [0, 0, 9]],
-             dtype='float32')
+                             dtype='float32')
        x_val2 = numpy.array([[1, 2, 0], [0, 5, 6], [7, 8, 9], [9, 10, 0]],
-             dtype='float32')
+                             dtype='float32')
        rng = rng = numpy.random.RandomState(43)
        p = Prod(axis=1)
        grad_p = theano.tensor.grad(p(x).sum(), x)
        grad_fn = theano.function([x], grad_p, mode=self.mode)
-        assert numpy.allclose(grad_fn(x_val1), [[6., 3., 2.], [30., 0.,
+        assert numpy.allclose(
-            0.], [0., 0., 0.]])
+            grad_fn(x_val1),
-        assert numpy.allclose(grad_fn(x_val2), [[0., 0., 2.], [30.,
+            [[6., 3., 2.], [30., 0., 0.], [0., 0., 0.]])
-             0., 0.], [72., 63., 56.], [0., 0., 90.]])
+        assert numpy.allclose(
+            grad_fn(x_val2),
+            [[0., 0., 2.], [30., 0., 0.], [72., 63., 56.], [0., 0., 90.]])
        p_axis0 = Prod(axis=0)
        grad_p_axis0 = theano.tensor.grad(p_axis0(x).sum(), x)
        grad_fn_axis0 = theano.function([x], grad_p_axis0, mode=self.mode)
-        assert numpy.allclose(grad_fn_axis0(x_val2), [[0., 400.,
+        assert numpy.allclose(
-             0.], [63., 160., 0.], [0., 100., 0.], [0., 80., 0.]])
+            grad_fn_axis0(x_val2),
+            [[0., 400., 0.], [63., 160., 0.], [0., 100., 0.], [0., 80., 0.]])
        tensor.verify_grad(p, [x_val1], rng=rng, mode=self.mode)
@@ -768,7 +768,7 @@ class test_IsInf_IsNan(unittest.TestCase):
            numpy_isfunc = getattr(numpy, isfunc)
            for x in self.test_vals:
                if ((x.ndim == 0 and input is not self.scalar) or
-                    (x.ndim == 1 and input is not self.vector)):
+                        (x.ndim == 1 and input is not self.vector)):
                    # We only test with the appropriate input type.
                    continue
                t_out = theano_isfunc(x)
@@ -788,12 +788,10 @@ class T_reduce_dtype(unittest.TestCase):
    op = CAReduce
    axes = [None, 0, 1, [], [0], [1], [0, 1]]
    methods = ['sum', 'prod']
-    dtypes = imap(str, theano.scalar.all_types)
+    dtypes = list(imap(str, theano.scalar.all_types))
+    # Test the default dtype of a method().
    def test_reduce_default_dtype(self):
-        """
-        Test the default dtype of a method().
-        """
        # We try multiple axis combinations even though axis should not matter.
        for method in self.methods:
            for idx, dtype in enumerate(self.dtypes):
@@ -801,6 +799,7 @@ class T_reduce_dtype(unittest.TestCase):
                x = tensor.matrix(dtype=dtype)
                s = getattr(x, method)(axis=axis)
                assert s.dtype == dict(
+                    bool='int64',
                    int8='int64',
                    int16='int64',
                    int32='int64',
@@ -818,6 +817,7 @@ class T_reduce_dtype(unittest.TestCase):
    def test_reduce_default_acc_dtype(self):
        # Test the default acc_dtype of a reduce().
        # We try multiple axis combinations even though axis should not matter.
        for method in self.methods:
            for idx, dtype in enumerate(self.dtypes):
@@ -825,12 +825,14 @@ class T_reduce_dtype(unittest.TestCase):
                x = tensor.matrix(dtype=dtype)
                s = getattr(x, method)(axis=axis)
                assert s.owner.op.acc_dtype == dict(
+                    bool='int64',
                    int8='int64',
                    int16='int64',
                    int32='int64',
                    uint8='uint64',
                    uint16='uint64',
                    uint32='uint64',
+                    float16='float32',
                    float32='float64',
                    complex64='complex128',
                ).get(dtype, dtype)
@@ -844,20 +846,18 @@ class T_reduce_dtype(unittest.TestCase):
    @attr('slow')
    def test_reduce_custom_dtype(self):
-        """
+        # Test the ability to provide your own output dtype for a reduce.
-        Test the ability to provide your own output dtype for a reduce.
-        """
        # We try multiple axis combinations even though axis should not matter.
        idx = 0
        for method in self.methods:
            for input_dtype in self.dtypes:
                x = tensor.matrix(dtype=input_dtype)
                for output_dtype in self.dtypes:
-                # If the output is a complex, the gradient of the reduce will
+                    # Only tests case where both input and output are complex.
-                # cast the complex to the input dtype. We can't call the normal
+                    icomplex = input_dtype.startswith('complex')
-                # cast on a complex to a not complex as this is ambiguous.
+                    ocomplex = output_dtype.startswith('complex')
-                    if (not input_dtype.startswith('complex') and
+                    if icomplex != ocomplex:
-                        output_dtype.startswith('complex')):
                        continue
                    axis = self.axes[idx % len(self.axes)]
@@ -866,8 +866,8 @@ class T_reduce_dtype(unittest.TestCase):
                    f = theano.function([x], var, mode=self.mode)
                    topo = f.maker.fgraph.toposort()
-                    assert [n for n in topo if isinstance(n.op, self.op)], (topo,
+                    assert [n for n in topo if isinstance(n.op, self.op)], \
-                                                                            dtype)
+                        (topo, output_dtype)
                    data = numpy.random.rand(3, 4) * 10
                    data = data.astype(input_dtype)
                    f(data)
@@ -879,30 +879,28 @@ class T_reduce_dtype(unittest.TestCase):
                    idx += 1
    def test_reduce_custom_acc_dtype(self):
-        """
+        # Test the ability to provide your own accumulator dtype for a reduce.
-        Test the ability to provide your own accumulator dtype for a reduce.
-        """
        # We try multiple axis combinations even though axis should not matter.
        idx = 0
        for method in self.methods:
            for input_dtype in self.dtypes:
                x = tensor.matrix(dtype=input_dtype)
                for acc_dtype in self.dtypes:
-                # If the accumulator is a complex, the gradient of the reduce will
+                    # If the accumulator is a complex, the gradient of the reduce will
-                # cast the complex to the input dtype. We can't call the normal
+                    # cast the complex to the input dtype. We can't call the normal
-                # cast on a complex to a not complex as this is ambiguous.
+                    # cast on a complex to a not complex as this is ambiguous.
                    if (not input_dtype.startswith('complex') and
-                        acc_dtype.startswith('complex')):
+                            acc_dtype.startswith('complex')):
                        continue
                    axis = self.axes[idx % len(self.axes)]
-                # If output_dtype would force a downcast, we expect a TypeError
+                    # If output_dtype would force a downcast, we expect a TypeError
-                # We always allow int/uint inputs with float/complex outputs.
+                    # We always allow int/uint inputs with float/complex outputs.
                    upcasted_dtype = scalar.upcast(input_dtype, acc_dtype)
                    if (acc_dtype == upcasted_dtype or
                        (input_dtype in tensor.discrete_dtypes and
-                            acc_dtype in tensor.continuous_dtypes)
+                            acc_dtype in tensor.continuous_dtypes)):
-                        ):
                        var = getattr(x, method)(acc_dtype=acc_dtype,
                                                 axis=axis)
                        assert var.owner.op.acc_dtype == acc_dtype
@@ -927,8 +925,7 @@ class T_reduce_dtype(unittest.TestCase):
            s = getattr(x, method)()
            f = theano.function([], s, mode=self.mode)
            topo = f.maker.fgraph.toposort()
-            assert [n for n in topo if isinstance(n.op, self.op)], (topo,
+            assert [n for n in topo if isinstance(n.op, self.op)], topo
-                                                                    dtype)
            s_val = f()
            # Use extra precision in NumPy to compute the good answer.
            ret = getattr(numpy.asarray([1e8, 1, -1e8], dtype='float64'),
@@ -938,16 +935,15 @@ class T_reduce_dtype(unittest.TestCase):
 class T_mean_dtype(unittest.TestCase):
    def test_mean_default_dtype(self):
-        """
+        # Test the default dtype of a mean().
-        Test the default dtype of a mean().
-        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [], [0], [1], [0, 1]]
        for idx, dtype in enumerate(imap(str, theano.scalar.all_types)):
            axis = axes[idx % len(axes)]
            x = tensor.matrix(dtype=dtype)
            m = x.mean(axis=axis)
-            if dtype in tensor.discrete_dtypes and axis != []:
+            if dtype in tensor.discrete_dtypes:
                assert m.dtype == 'float64'
            else:
                assert m.dtype == dtype, (m, m.dtype, dtype)
@@ -958,9 +954,8 @@ class T_mean_dtype(unittest.TestCase):
    @attr('slow')
    def test_mean_custom_dtype(self):
-        """
+        # Test the ability to provide your own output dtype for a mean.
-        Test the ability to provide your own output dtype for a mean.
-        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [], [0], [1], [0, 1]]
        idx = 0
@@ -976,7 +971,7 @@ class T_mean_dtype(unittest.TestCase):
                    pass
                else:
                    # Executed if no TypeError was raised
-                    if sum_dtype in tensor.discrete_dtypes and axis != []:
+                    if sum_dtype in tensor.discrete_dtypes:
                        assert mean_var.dtype == 'float64', (
                            (mean_var.dtype, sum_dtype))
                    else:
@@ -984,7 +979,7 @@ class T_mean_dtype(unittest.TestCase):
                            (mean_var.dtype, sum_dtype))
                    if (('complex' in input_dtype or
                         'complex' in sum_dtype) and
-                        input_dtype != sum_dtype):
+                            input_dtype != sum_dtype):
                        continue
                    f = theano.function([x], mean_var)
                    data = numpy.random.rand(3, 4) * 10
@@ -1017,15 +1012,15 @@ class T_mean_dtype(unittest.TestCase):
 class T_prod_without_zeros_dtype(unittest.TestCase):
    def test_prod_without_zeros_default_dtype(self):
-        """
+        # Test the default dtype of a ProdWithoutZeros().
-        Test the default dtype of a ProdWithoutZeros().
-        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [], [0], [1], [0, 1]]
        for idx, dtype in enumerate(imap(str, theano.scalar.all_types)):
            axis = axes[idx % len(axes)]
            x = ProdWithoutZeros(axis=axis)(tensor.matrix(dtype=dtype))
            assert x.dtype == dict(
+                bool='int64',
                int8='int64',
                int16='int64',
                int32='int64',
@@ -1035,9 +1030,8 @@ class T_prod_without_zeros_dtype(unittest.TestCase):
            ).get(dtype, dtype)
    def test_prod_without_zeros_default_acc_dtype(self):
-        """
+        # Test the default dtype of a ProdWithoutZeros().
-        Test the default dtype of a ProdWithoutZeros().
-        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [], [0], [1], [0, 1]]
        for idx, dtype in enumerate(imap(str, theano.scalar.all_types)):
@@ -1045,16 +1039,17 @@ class T_prod_without_zeros_dtype(unittest.TestCase):
            x = tensor.matrix(dtype=dtype)
            p = ProdWithoutZeros(axis=axis)(x)
            assert p.owner.op.acc_dtype == dict(
-                    int8='int64',
+                bool='int64',
-                    int16='int64',
+                int8='int64',
-                    int32='int64',
+                int16='int64',
-                    uint8='uint64',
+                int32='int64',
-                    uint16='uint64',
+                uint8='uint64',
-                    uint32='uint64',
+                uint16='uint64',
-                    float16='float32',
+                uint32='uint64',
-                    float32='float64',
+                float16='float32',
-                    complex64='complex128'
+                float32='float64',
-                    ).get(dtype, dtype)
+                complex64='complex128'
+                ).get(dtype, dtype)
            if 'complex' in dtype:
                continue
@@ -1065,9 +1060,8 @@ class T_prod_without_zeros_dtype(unittest.TestCase):
    @attr('slow')
    def test_prod_without_zeros_custom_dtype(self):
-        """
+        # Test ability to provide your own output dtype for a ProdWithoutZeros().
-        Test ability to provide your own output dtype for a ProdWithoutZeros().
-        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [], [0], [1], [0, 1]]
        idx = 0
@@ -1076,11 +1070,11 @@ class T_prod_without_zeros_dtype(unittest.TestCase):
            for output_dtype in imap(str, theano.scalar.all_types):
                axis = axes[idx % len(axes)]
                prod_woz_var = ProdWithoutZeros(
-                        axis=axis, dtype=output_dtype)(x)
+                    axis=axis, dtype=output_dtype)(x)
                assert prod_woz_var.dtype == output_dtype
                idx += 1
                if ('complex' in output_dtype or
-                    'complex' in input_dtype):
+                        'complex' in input_dtype):
                    continue
                f = theano.function([x], prod_woz_var)
                data = numpy.random.rand(2, 3) * 3
@@ -1089,9 +1083,8 @@ class T_prod_without_zeros_dtype(unittest.TestCase):
    @attr('slow')
    def test_prod_without_zeros_custom_acc_dtype(self):
-        """
+        # Test ability to provide your own acc_dtype for a ProdWithoutZeros().
-        Test ability to provide your own acc_dtype for a ProdWithoutZeros().
-        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [], [0], [1], [0, 1]]
        idx = 0
@@ -1104,23 +1097,23 @@ class T_prod_without_zeros_dtype(unittest.TestCase):
                upcasted_dtype = scalar.upcast(input_dtype, acc_dtype)
                if (acc_dtype == upcasted_dtype or
                        (input_dtype in tensor.discrete_dtypes and
-                            acc_dtype in tensor.continuous_dtypes)
+                            acc_dtype in tensor.continuous_dtypes)):
-                        ):
                    prod_woz_var = ProdWithoutZeros(
-                            axis=axis, acc_dtype=acc_dtype)(x)
+                        axis=axis, acc_dtype=acc_dtype)(x)
                    assert prod_woz_var.owner.op.acc_dtype == acc_dtype
                    if (acc_dtype.startswith('complex') and
-                        input_dtype != acc_dtype):
+                            input_dtype != acc_dtype):
                        continue
                    f = theano.function([x], prod_woz_var)
                    data = numpy.random.rand(2, 3) * 3
                    data = data.astype(input_dtype)
                    f(data)
                else:
-                    self.assertRaises(TypeError,
+                    self.assertRaises(
-                            ProdWithoutZeros(axis=axis, acc_dtype=acc_dtype),
+                        TypeError,
-                            x)
+                        ProdWithoutZeros(axis=axis, acc_dtype=acc_dtype),
+                        x)
                idx += 1
@@ -1160,24 +1153,26 @@ class TestElemwise(unittest_tools.InferShapeTester):
    def test_infer_shape(self):
-        for s_left, s_right in [((5, 6), (5, 6)),
+        for s_left, s_right in [
-                           ((5, 6), (5, 1)),
+                ((5, 6), (5, 6)),
-                           ((5, 6), (1, 6)),
+                ((5, 6), (5, 1)),
-                           ((5, 1), (5, 6)),
+                ((5, 6), (1, 6)),
-                           ((1, 6), (5, 6)),
+                ((5, 1), (5, 6)),
-                           ((2, 3, 4, 5), (2, 3, 4, 5)),
+                ((1, 6), (5, 6)),
-                           ((2, 3, 4, 5), (2, 3, 1, 5)),
+                ((2, 3, 4, 5), (2, 3, 4, 5)),
-                            ((2, 3, 4, 5), (1, 3, 4, 5)),
+                ((2, 3, 4, 5), (2, 3, 1, 5)),
-                            ((2, 1, 4, 5), (2, 3, 4, 5)),
+                ((2, 3, 4, 5), (1, 3, 4, 5)),
-                            ((2, 3, 4, 1), (2, 3, 4, 5))]:
+                ((2, 1, 4, 5), (2, 3, 4, 5)),
+                ((2, 3, 4, 1), (2, 3, 4, 5))]:
            dtype = theano.config.floatX
            t_left = TensorType(dtype, [(entry == 1) for entry in s_left])()
            t_right = TensorType(dtype, [(entry == 1) for entry in s_right])()
            t_left_val = numpy.zeros(s_left, dtype=dtype)
            t_right_val = numpy.zeros(s_right, dtype=dtype)
-            self._compile_and_check([t_left, t_right],
+            self._compile_and_check(
-                            [Elemwise(scalar.add)(t_left, t_right)],
+                [t_left, t_right],
-                            [t_left_val, t_right_val], Elemwise)
+                [Elemwise(scalar.add)(t_left, t_right)],
+                [t_left_val, t_right_val], Elemwise)
    def test_input_dimensions_overflow(self):
        # Elemwise.perform used to compute the product
@@ -1186,7 +1181,7 @@ class TestElemwise(unittest_tools.InferShapeTester):
        a, b, c, d, e, f = tensor.vectors('abcdef')
        s = a + b + c + d + e + f
        g = theano.function([a, b, c, d, e, f], s,
-                             mode=theano.compile.Mode(linker='py'))
+                            mode=theano.compile.Mode(linker='py'))
        g(*[numpy.zeros(2 ** 11, config.floatX) for i in xrange(6)])
@@ -1204,7 +1199,7 @@ def test_gt_grad():
    input_ = T.vector(dtype=floatX)
    random_values = numpy.random.RandomState(1234).uniform(
-                                                low=-1, high=1, size=(2, 2))
+        low=-1, high=1, size=(2, 2))
    W_values = numpy.asarray(random_values, dtype=floatX)
    W = theano.shared(value=W_values, name='weights')
    correct_score = T.dot(input_, W)
@@ -1235,7 +1230,7 @@ def test_clip_grad():
    # use an x value less than y, an x value between y and z, and an x value
    # greater than z
    unittest_tools.verify_grad(func,
-            [numpy.asarray([-1., 0.5, 2.]), 0., 1.])
+                               [numpy.asarray([-1., 0.5, 2.]), 0., 1.])
 def test_clip_grad_int():

--- a/theano/tensor/tests/test_extra_ops.py
+++ b/theano/tensor/tests/test_extra_ops.py
@@ -8,7 +8,7 @@ from theano.tests import unittest_tools as utt
 from theano.tensor.extra_ops import (SearchsortedOp, searchsorted,
                                     CumsumOp, cumsum, CumprodOp, cumprod,
-                                     CpuContiguous, cpu_contiguous, BinCountOp,
+                                     CpuContiguous, cpu_contiguous,
                                     bincount, DiffOp, diff, squeeze, compress,
                                     RepeatOp, repeat, Bartlett, bartlett,
                                     FillDiagonal, fill_diagonal,
@@ -18,9 +18,6 @@ from theano import tensor as T
 from theano import config, tensor, function
 from theano.tests.unittest_tools import attr
-numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
-numpy_16 = bool(numpy_ver >= [1, 6])
 def test_cpu_contiguous():
    a = T.fmatrix('a')
@@ -218,12 +215,7 @@ class TestCumprodOp(utt.InferShapeTester):
            utt.verify_grad(self.op_class(axis=axis), [a])
-class TestBinCountOp(utt.InferShapeTester):
+class TestBinCount(utt.InferShapeTester):
-    def setUp(self):
-        super(TestBinCountOp, self).setUp()
-        self.op_class = BinCountOp
-        self.op = BinCountOp()
    def test_bincountFn(self):
        w = T.vector('w')
@@ -263,84 +255,6 @@ class TestBinCountOp(utt.InferShapeTester):
                f5 = theano.function([x], bincount(x, assert_nonneg=True))
                self.assertRaises(AssertionError, f5, a)
-    def test_bincountOp(self):
-        w = T.vector('w')
-        for dtype in ('int8', 'int16', 'int32', 'int64',
-                      'uint8', 'uint16', 'uint32', 'uint64'):
-            # uint64 always fails
-            # int64 and uint32 also fail if python int are 32-bit
-            int_bitwidth = theano.configdefaults.python_int_bitwidth()
-            if int_bitwidth == 64:
-                numpy_unsupported_dtypes = ('uint64',)
-            if int_bitwidth == 32:
-                numpy_unsupported_dtypes = ('uint32', 'int64', 'uint64')
-            x = T.vector('x', dtype=dtype)
-            if dtype in numpy_unsupported_dtypes:
-                self.assertRaises(TypeError, BinCountOp(), x)
-            else:
-                a = np.random.randint(1, 51, size=(25)).astype(dtype)
-                weights = np.random.random((25,)).astype(config.floatX)
-                f1 = theano.function([x], BinCountOp()(x, weights=None))
-                f2 = theano.function([x, w], BinCountOp()(x, weights=w))
-                assert (np.bincount(a) == f1(a)).all()
-                assert np.allclose(np.bincount(a, weights=weights),
-                                   f2(a, weights))
-                if not numpy_16:
-                    continue
-                f3 = theano.function([x], BinCountOp(minlength=23)(x, weights=None))
-                f4 = theano.function([x], BinCountOp(minlength=5)(x, weights=None))
-                assert (np.bincount(a, minlength=23) == f3(a)).all()
-                assert (np.bincount(a, minlength=5) == f4(a)).all()
-    @attr('slow')
-    def test_infer_shape(self):
-        for dtype in tensor.discrete_dtypes:
-            # uint64 always fails
-            # int64 and uint32 also fail if python int are 32-bit
-            int_bitwidth = theano.configdefaults.python_int_bitwidth()
-            if int_bitwidth == 64:
-                numpy_unsupported_dtypes = ('uint64',)
-            if int_bitwidth == 32:
-                numpy_unsupported_dtypes = ('uint32', 'int64', 'uint64')
-            x = T.vector('x', dtype=dtype)
-            if dtype in numpy_unsupported_dtypes:
-                self.assertRaises(TypeError, BinCountOp(), x)
-            else:
-                self._compile_and_check([x],
-                                        [BinCountOp()(x, None)],
-                                        [np.random.randint(
-                                             1, 51, size=(25,)).astype(dtype)],
-                                        self.op_class)
-                weights = np.random.random((25,)).astype(config.floatX)
-                self._compile_and_check([x],
-                                        [BinCountOp()(x, weights=weights)],
-                                        [np.random.randint(
-                                            1, 51, size=(25,)).astype(dtype)],
-                                        self.op_class)
-                if not numpy_16:
-                    continue
-                self._compile_and_check([x],
-                                        [BinCountOp(minlength=60)(x, weights=weights)],
-                                        [np.random.randint(
-                                            1, 51, size=(25,)).astype(dtype)],
-                                        self.op_class)
-                self._compile_and_check([x],
-                                        [BinCountOp(minlength=5)(x, weights=weights)],
-                                        [np.random.randint(
-                                            1, 51, size=(25,)).astype(dtype)],
-                                        self.op_class)
 class TestDiffOp(utt.InferShapeTester):
    nb = 10  # Number of time iterating for n
@@ -510,7 +424,7 @@ class TestRepeatOp(utt.InferShapeTester):
            a = np.random.random((10, ) * ndim).astype(config.floatX)
            for axis in self._possible_axis(ndim):
-                for dtype in tensor.discrete_dtypes:
+                for dtype in tensor.integer_dtypes:
                    r_var = T.scalar(dtype=dtype)
                    r = numpy.asarray(3, dtype=dtype)
                    if (dtype == 'uint64' or
@@ -569,7 +483,7 @@ class TestRepeatOp(utt.InferShapeTester):
            a = np.random.random(shp).astype(config.floatX)
            for axis in self._possible_axis(ndim):
-                for dtype in tensor.discrete_dtypes:
+                for dtype in tensor.integer_dtypes:
                    r_var = T.scalar(dtype=dtype)
                    r = numpy.asarray(3, dtype=dtype)
                    if dtype in self.numpy_unsupported_dtypes:
@@ -798,13 +712,11 @@ class test_Unique(utt.InferShapeTester):
        self.ops = [Unique(),
                    Unique(True),
                    Unique(False, True),
-                    Unique(True, True)]
+                    Unique(True, True),
-        if bool(numpy_ver >= [1, 9]):
+                    Unique(False, False, True),
-            self.ops.extend([
+                    Unique(True, False, True),
-                Unique(False, False, True),
+                    Unique(False, True, True),
-                Unique(True, False, True),
+                    Unique(True, True, True)]
-                Unique(False, True, True),
-                Unique(True, True, True)])
    def test_basic_vector(self):
        """
@@ -816,13 +728,11 @@ class test_Unique(utt.InferShapeTester):
        list_outs_expected = [[np.unique(inp)],
                              np.unique(inp, True),
                              np.unique(inp, False, True),
-                              np.unique(inp, True, True)]
+                              np.unique(inp, True, True),
-        if bool(numpy_ver >= [1, 9]):
+                              np.unique(inp, False, False, True),
-            list_outs_expected.extend([
+                              np.unique(inp, True, False, True),
-                np.unique(inp, False, False, True),
+                              np.unique(inp, False, True, True),
-                np.unique(inp, True, False, True),
+                              np.unique(inp, True, True, True)]
-                np.unique(inp, False, True, True),
-                np.unique(inp, True, True, True)])
        for op, outs_expected in zip(self.ops, list_outs_expected):
            f = theano.function(inputs=[x], outputs=op(x, return_list=True))
            outs = f(inp)
@@ -839,13 +749,11 @@ class test_Unique(utt.InferShapeTester):
        list_outs_expected = [[np.unique(inp)],
                              np.unique(inp, True),
                              np.unique(inp, False, True),
-                              np.unique(inp, True, True)]
+                              np.unique(inp, True, True),
-        if bool(numpy_ver >= [1, 9]):
+                              np.unique(inp, False, False, True),
-            list_outs_expected.extend([
+                              np.unique(inp, True, False, True),
-                np.unique(inp, False, False, True),
+                              np.unique(inp, False, True, True),
-                np.unique(inp, True, False, True),
+                              np.unique(inp, True, True, True)]
-                np.unique(inp, False, True, True),
-                np.unique(inp, True, True, True)])
        for op, outs_expected in zip(self.ops, list_outs_expected):
            f = theano.function(inputs=[x], outputs=op(x, return_list=True))
            outs = f(inp)

--- a/theano/tensor/type.py
+++ b/theano/tensor/type.py
@@ -255,6 +255,7 @@ class TensorType(Type):
                'float16': (float, 'npy_float16', 'NPY_FLOAT16'),
                'float32': (float, 'npy_float32', 'NPY_FLOAT32'),
                'float64': (float, 'npy_float64', 'NPY_FLOAT64'),
+                'bool': (bool, 'npy_bool', 'NPY_BOOL'),
                'uint8': (int, 'npy_uint8', 'NPY_UINT8'),
                'int8': (int, 'npy_int8', 'NPY_INT8'),
                'uint16': (int, 'npy_uint16', 'NPY_UINT16'),
@@ -340,15 +341,9 @@ class TensorType(Type):
                return False
            if a.dtype != b.dtype:
                return False
-            if 'int' in str(a.dtype):
+            if str(a.dtype) not in theano.tensor.continuous_dtypes:
                return numpy.all(a == b)
            else:
-                # work around a numpy.allclose bug:
-                # http://projects.scipy.org/numpy/ticket/1672
-                if a.ndim == 0 and numpy.isinf(a):
-                    a = a.reshape(1)
-                    b = b.reshape(1)
                cmp = theano.tensor.basic._allclose(a, b, rtol=rtol, atol=atol)
                if cmp:
                    # Numpy claims they are close, this is good enough for us.

--- a/theano/tensor/var.py
+++ b/theano/tensor/var.py
@@ -470,19 +470,17 @@ class _tensor_py_operators(object):
        def check_bool(args_el):
            try:
-                if isinstance(args_el, (numpy.bool_, bool)) or \
+                if (isinstance(args_el, (numpy.bool_, bool)) or
-                   args_el.dtype == 'int8' or args_el.dtype == 'uint8':
+                        args_el.dtype == 'bool'):
-                    raise TypeError(('TensorType does not support boolean '
+                    raise TypeError('TensorType does not support boolean '
-                                     'mask for indexing such as tensor[x==0]. '
+                                    'mask for indexing such as tensor[x==0]. '
-                                     'Instead you can use non_zeros() such as '
+                                    'Instead you can use non_zeros() such as '
-                                     'tensor[(x == 0).nonzeros()]. '
+                                    'tensor[(x == 0).nonzeros()]. ')
-                                     'If you are indexing on purpose with an '
-                                     'int8, please cast it to int16.'))
            except AttributeError:
                pass
-            if not isinstance(args_el, theano.tensor.Variable) and \
+            if (not isinstance(args_el, theano.tensor.Variable) and
-               isinstance(args_el, collections.Iterable):
+                    isinstance(args_el, collections.Iterable)):
                for el in args_el:
                    check_bool(el)

--- a/theano/tests/test_flake8.py
+++ b/theano/tests/test_flake8.py
@@ -56,7 +56,6 @@ whitelist_flake8 = [
    "tensor/tests/test_opt.py",
    "tensor/tests/test_basic.py",
    "tensor/tests/test_blas.py",
-    "tensor/tests/test_elemwise.py",
    "tensor/tests/test_merge.py",
    "tensor/tests/test_gc.py",
    "tensor/tests/test_complex.py",