Merge pull request #2 from nouiz/vdumoulin-new_backend

Fix the final stuff.

Merge pull request #2 from nouiz/vdumoulin-new_backend
9b7d22e7 · vdumoulin · 7dd20e1d · a8775a4c · 9b7d22e7 · 9b7d22e7
--- a/theano/sandbox/gpuarray/nnet.py
+++ b/theano/sandbox/gpuarray/nnet.py
@@ -122,6 +122,12 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
        itemsize_am = numpy.dtype(node.outputs[2].dtype).itemsize
        x, b, y_idx = inp
        nll, sm, am = out
+        dtype_x = node.inputs[0].dtype
+        dtype_b = node.inputs[1].dtype
+        dtype_y_idx = node.inputs[2].dtype
+        dtype_nll = node.outputs[0].dtype
+        dtype_sm = node.outputs[1].dtype
+        dtype_am = node.outputs[2].dtype
        classname = self.__class__.__name__
        fail = sub['fail']
        sio = StringIO()
@@ -214,24 +220,24 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
            k_xent_sm_1hot_bias_%(nodename)s<<<n_blocks, n_threads, n_shared_bytes>>>(
                PyGpuArray_DIMS(%(x)s)[0],
                PyGpuArray_DIMS(%(x)s)[1],
-                (dtype_%(x)s*)(((char *)cuda_get_ptr(%(x)s->ga.data)) +
+                (npy_%(dtype_x)s*)(((char *)cuda_get_ptr(%(x)s->ga.data)) +
                                   %(x)s->ga.offset),
                PyGpuArray_STRIDES(%(x)s)[0] / %(itemsize_x)s,
                PyGpuArray_STRIDES(%(x)s)[1] / %(itemsize_x)s,
-                (dtype_%(b)s*)(((char *)cuda_get_ptr(%(b)s->ga.data)) +
+                (npy_%(dtype_b)s*)(((char *)cuda_get_ptr(%(b)s->ga.data)) +
                                   %(b)s->ga.offset),
                PyGpuArray_STRIDES(%(b)s)[0] / %(itemsize_b)s,
-                (dtype_%(y_idx)s*)(((char *)cuda_get_ptr(%(y_idx)s->ga.data)) +
+                (npy_%(dtype_y_idx)s*)(((char *)cuda_get_ptr(%(y_idx)s->ga.data)) +
                                   %(y_idx)s->ga.offset),
                PyGpuArray_STRIDES(%(y_idx)s)[0] / %(itemsize_y_idx)s,
-                (dtype_%(nll)s*)(((char *)cuda_get_ptr(%(nll)s->ga.data)) +
+                (npy_%(dtype_nll)s*)(((char *)cuda_get_ptr(%(nll)s->ga.data)) +
                                   %(nll)s->ga.offset),
                PyGpuArray_STRIDES(%(nll)s)[0] / %(itemsize_nll)s,
-                (dtype_%(sm)s*)(((char *)cuda_get_ptr(%(sm)s->ga.data)) +
+                (npy_%(dtype_sm)s*)(((char *)cuda_get_ptr(%(sm)s->ga.data)) +
                                   %(sm)s->ga.offset),
                PyGpuArray_STRIDES(%(sm)s)[0] / %(itemsize_sm)s,
                PyGpuArray_STRIDES(%(sm)s)[1] / %(itemsize_sm)s,
-                (dtype_%(am)s*)(((char *)cuda_get_ptr(%(am)s->ga.data)) +
+                (npy_%(dtype_am)s*)(((char *)cuda_get_ptr(%(am)s->ga.data)) +
                                   %(am)s->ga.offset),
                PyGpuArray_STRIDES(%(am)s)[0] / %(itemsize_am)s);
            cudaError_t err = cudaGetLastError();
@@ -302,6 +308,10 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
        itemsize_sm = numpy.dtype(node.inputs[1].dtype).itemsize
        itemsize_y_idx = numpy.dtype(node.inputs[2].dtype).itemsize
        itemsize_dx = numpy.dtype(node.outputs[0].dtype).itemsize
+        dtype_dnll = node.inputs[0].dtype
+        dtype_sm = node.inputs[1].dtype
+        dtype_y_idx = node.inputs[2].dtype
+        dtype_dx = node.outputs[0].dtype
        dnll, sm, y_idx = inp
        dx, = out
        fail = sub['fail']
@@ -353,20 +363,20 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
                        PyGpuArray_DIMS(%(dx)s)[0],
                        PyGpuArray_DIMS(%(dx)s)[1],

-                        (dtype_%(dnll)s*)(((char *)cuda_get_ptr(%(dnll)s->ga.data)) +
+                        (npy_%(dtype_dnll)s*)(((char *)cuda_get_ptr(%(dnll)s->ga.data)) +
                                           %(dnll)s->ga.offset),
                        PyGpuArray_STRIDES(%(dnll)s)[0] / %(itemsize_dnll)s,

-                        (dtype_%(sm)s*)(((char *)cuda_get_ptr(%(sm)s->ga.data)) +
+                        (npy_%(dtype_sm)s*)(((char *)cuda_get_ptr(%(sm)s->ga.data)) +
                                           %(sm)s->ga.offset),
                        PyGpuArray_STRIDES(%(sm)s)[0] / %(itemsize_sm)s,
                        PyGpuArray_STRIDES(%(sm)s)[1] / %(itemsize_sm)s,

-                        (dtype_%(y_idx)s*)(((char *)cuda_get_ptr(%(y_idx)s->ga.data)) +
+                        (npy_%(dtype_y_idx)s*)(((char *)cuda_get_ptr(%(y_idx)s->ga.data)) +
                                           %(y_idx)s->ga.offset),
                        PyGpuArray_STRIDES(%(y_idx)s)[0] / %(itemsize_y_idx)s,

-                        (dtype_%(dx)s*)(((char *)cuda_get_ptr(%(dx)s->ga.data)) +
+                        (npy_%(dtype_dx)s*)(((char *)cuda_get_ptr(%(dx)s->ga.data)) +
                                           %(dx)s->ga.offset),
                        PyGpuArray_STRIDES(%(dx)s)[0] / %(itemsize_dx)s,
                        PyGpuArray_STRIDES(%(dx)s)[1] / %(itemsize_dx)s

--- a/theano/sandbox/gpuarray/tests/test_nnet.py
+++ b/theano/sandbox/gpuarray/tests/test_nnet.py
@@ -11,16 +11,9 @@ from theano.sandbox import gpuarray
 if theano.sandbox.gpuarray.pygpu is None:
    raise SkipTest("pygpu not installed")

-import theano.sandbox.cuda as cuda_ndarray
-if cuda_ndarray.cuda_available and not theano.sandbox.gpuarray.pygpu_activated:
-    if not cuda_ndarray.use.device_number:
-        #We should not enable all the use like the flag device=gpu,
-        #as many tests don't work in that setup.
-        cuda_ndarray.use('gpu',
-                         default_to_move_computation_to_gpu=False,
-                         move_shared_float32_to_gpu=False,
-                         enable_cuda=False)
-    gpuarray.init_dev('cuda')
+# We let that import do the init of the back-end if needed.
+from theano.sandbox.gpuarray.tests.test_basic_ops import (mode_with_gpu,
+                                                          mode_without_gpu)

 if not gpuarray.pygpu_activated:
    raise SkipTest("pygpu disabled")
@@ -29,13 +22,6 @@ from theano.sandbox.gpuarray.nnet import (
    GpuCrossentropySoftmaxArgmax1HotWithBias,
    GpuCrossentropySoftmax1HotWithBiasDx)

-if theano.config.mode == 'FAST_COMPILE':
-    mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpuarray').excluding('gpu')
-    mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpuarray')
-else:
-    mode_with_gpu = theano.compile.mode.get_default_mode().including('gpuarray').excluding('gpu')
-    mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpuarray')
-

 def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
    """

--- a/theano/sandbox/gpuarray/type.py
+++ b/theano/sandbox/gpuarray/type.py
@@ -138,12 +138,8 @@ class GpuArrayType(Type):
            return numpy.dtype(self.dtype).itemsize

    def c_declare(self, name, sub):
-        dtype = theano.tensor.TensorType(
-            dtype=self.dtype,
-            broadcastable=self.broadcastable).dtype_specs()[1]
        return """
        PyGpuArrayObject *%(name)s;
-        typedef %(dtype)s dtype_%(name)s;
        """ % locals()

    def c_init(self, name, sub):