Merge pull request #5842 from nouiz/unsigned_char

Unsigned char: fix: theano/scalar/basic.py

Merge pull request #5842 from nouiz/unsigned_char
a5c029dc · Pascal Lamblin · GitHub · d19edb09 · dd45dc64 · a5c029dc
--- a/theano/gof/cmodule.py
+++ b/theano/gof/cmodule.py
@@ -2274,8 +2274,8 @@ class GCC_compiler(Compiler):
            # improved loading times on most platforms (win32 is
            # different, as usual).
            cmd.append('-fvisibility=hidden')
-        cmd.extend(['-o', lib_filename])
-        cmd.append(cppfilename)
+        cmd.extend(['-o', '%s%s%s' % (path_wrapper, lib_filename, path_wrapper)])
+        cmd.append('%s%s%s' % (path_wrapper, cppfilename, path_wrapper))
        cmd.extend(['-l%s' % l for l in libs])
        # print >> sys.stderr, 'COMPILING W CMD', cmd
        _logger.debug('Running cmd: %s', ' '.join(cmd))

--- a/theano/gpuarray/basic_ops.py
+++ b/theano/gpuarray/basic_ops.py
@@ -319,7 +319,9 @@ class GpuKernelBase(object):
    def _generate_kernel_code(self, k):
        code = '\\n'.join(l for l in k.code.split('\n'))
        code = code.replace('"', '\\"')
-        return ("""static const char *%(cname)s = "%(code)s";""" %
+        return ("""static const char *%(cname)s_unsigned = "%(code)s";
+                static const char *%(cname)s = (char *)%(cname)s_unsigned;
+                """ %
                dict(cname=k.codevar, code=code))

    def _generate_kernel_vars(self, k):

--- a/theano/gpuarray/tests/test_elemwise.py
+++ b/theano/gpuarray/tests/test_elemwise.py
@@ -13,7 +13,7 @@ from .config import mode_with_gpu, mode_without_gpu, test_ctx_name
 from .test_basic_ops import rand_gpuarray
 from ..elemwise import (GpuElemwise, GpuDimShuffle,
                        GpuCAReduceCuda, GpuCAReduceCPY, GpuErfinv, GpuErfcinv)
-from ..type import GpuArrayType, get_context
+from ..type import GpuArrayType, get_context, gpuarray_shared_constructor

 from pygpu import ndgpuarray as gpuarray

@@ -40,16 +40,22 @@ def test_elemwise_pow():
        for dtype_exp in dtypes:

            # Compile a gpu function with the specified dtypes
-            base = theano.tensor.vector(dtype=dtype_base)
-            exp = theano.tensor.vector(dtype=dtype_exp)
-            output = base ** exp
-            f = theano.function([base, exp], output)
-
            base_val = np.random.randint(0, 5, size=10).astype(dtype_base)
            exp_val = np.random.randint(0, 3, size=10).astype(dtype_exp)

+            base = theano.tensor.vector(dtype=dtype_base)
+            exp = gpuarray_shared_constructor(exp_val)
+            assert exp.dtype == dtype_exp
+            output = base ** exp
+            f = theano.function([base], output, mode=mode_with_gpu)
+            theano.printing.debugprint(f)
+            # We don't transfer to the GPU when the output dtype is int*
+            n = len([n for n in f.maker.fgraph.apply_nodes
+                     if isinstance(n.op, GpuElemwise)])
+            assert n == (output.dtype in tensor.float_dtypes)
+
            # Call the function to make sure the output is valid
-            out = f(base_val, exp_val)
+            out = f(base_val)
            expected_out = base_val ** exp_val
            assert_allclose(out, expected_out)


--- a/theano/gpuarray/type.py
+++ b/theano/gpuarray/type.py
@@ -398,9 +398,6 @@ class GpuArrayType(Type):
        return pygpu.gpuarray.zeros(shape, dtype=self.typecode,
                                    context=self.context)

-    def make_variable(self, name=None):
-        return self.Variable(self, name=name)
-
    def __eq__(self, other):
        return (type(self) == type(other) and
                self.typecode == other.typecode and

--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
@@ -4025,7 +4025,6 @@ class Composite(ScalarOp):
        self.prepare_node_called = set()
        self.init_fgraph()
        self.init_py_impls()
-        assert self._c_code


 class Compositef32(object):

--- a/theano/tensor/nnet/sigm.py
+++ b/theano/tensor/nnet/sigm.py
@@ -997,7 +997,7 @@ def local_1msigmoid(node):
        if sub_r.owner and sub_r.owner.op == sigmoid:
            try:
                val_l = opt.get_scalar_constant_value(sub_l)
-            except Exception:
+            except tensor.NotScalarConstantError:
                return
            if np.allclose(np.sum(val_l), 1):
                out = sigmoid(-sub_r.owner.inputs[0])