improved indentation, added exeption when not all inputs have the same ndims,…

improved indentation, added exeption when not all inputs have the same ndims, tried to improve some memory / reference counting issues, removed initialisation of variable sized arrays, which causes problems under windows

improved indentation, added exeption when not all inputs have the same ndims,…
30e1a261 · Ludwig Schmidt-Hackenberg · 043bc3e8 · 30e1a261
--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -2942,6 +2942,10 @@ class GpuJoin(tensor.Join, GpuOp):
        out[0] = rval

    def c_code(self, node, name, inputs, out_, sub):
+        nd = node.inputs[1].ndim
+        if not all(i.ndim == nd for i in node.inputs[2:]):
+            # all inputs ndarray need to have the same number of dimensions
+            raise NotImplementedError()
        axis = inputs[0]
        n_cndas = len(inputs[1:])
        input_1 = inputs[1]
@@ -2952,9 +2956,10 @@ class GpuJoin(tensor.Join, GpuOp):
        # getting the shapes of all the involved tensors (input[0]+out)
        str = """
        int axis = PyInt_AsLong((PyObject*)%(axis)s);
-        int nd = CudaNdarray_NDIM(%(input_1)s);
-        int shape_%(input_1)s[nd];
-        int shape_out[nd];
+        int nd = %(nd)s;
+        int shape_%(input_1)s[%(nd)s];
+        int shape_out[%(nd)s];
+        int width_sum = 0;

        for(int i = 0; i<nd; i+=1)
        {
@@ -2968,8 +2973,7 @@ class GpuJoin(tensor.Join, GpuOp):
        # execept for "axis" dimension
        for i, cdna in enumerate(inputs[2:]):
            str += """
-        nd = CudaNdarray_NDIM(%(cdna)s);
-        int shape_%(cdna)s[nd];
+            int shape_%(cdna)s[%(nd)s];
            for(int i = 0; i<nd; i+=1)
            {
                shape_%(cdna)s[i] = CudaNdarray_HOST_DIMS(%(cdna)s)[i];
@@ -2981,8 +2985,6 @@ class GpuJoin(tensor.Join, GpuOp):
            """ % locals()

        # computing the new shape for the out tensors
-        str += """
-        int width_sum = 0;\n""" % locals()


        for i, cdna in enumerate(inputs[1:]):
@@ -3000,11 +3002,13 @@ class GpuJoin(tensor.Join, GpuOp):
        step = NULL;
        int errorcode;
        int sum;
-        sum =0;
+        sum = 0;
+        start = NULL;

        PyObject *slice_tuple;
-        PyObject *full_slice;
        PyObject *section_slice;
+        PyObject *full_slice;
+        full_slice = PySlice_New(NULL, NULL, NULL);

        """ % locals()

@@ -3012,9 +3016,9 @@ class GpuJoin(tensor.Join, GpuOp):
        for i, cdna in enumerate(inputs[1:]):
            str += """
            sum += shape_%(cdna)s[axis];
+            Py_XDECREF(stop);
            stop = PyInt_FromLong(sum);
            slice_tuple = PyTuple_New(nd);
-        full_slice = PySlice_New(NULL, NULL, NULL);
            section_slice = PySlice_New(start, stop, step);
            for(int i=0; i<nd; i++)
            {
@@ -3023,28 +3027,27 @@ class GpuJoin(tensor.Join, GpuOp):
                    Py_INCREF(full_slice);
                    PyTuple_SetItem(slice_tuple, i, full_slice);
                }
-            else if(i==axis)
+                else
                {
                    Py_INCREF(section_slice);
                    PyTuple_SetItem(slice_tuple, i, section_slice);
                }
            }
-
            out_sub = CudaNdarray_Subscript((PyObject*)%(out)s, slice_tuple);
            errorcode = CudaNdarray_CopyFromCudaNdarray((CudaNdarray*)out_sub, %(cdna)s);
            if((full_slice == NULL) || (section_slice == NULL) || (out_sub == NULL) || (errorcode != 0))
            {
-            Py_XDECREF(full_slice);
-            Py_XDECREF(section_slice);
+                Py_XDECREF(start);
+                Py_XDECREF(stop);
+                Py_XDECREF(step);
                Py_XDECREF(slice_tuple);
                Py_XDECREF(out_sub);
                Py_XDECREF(%(out)s);
                %(fail)s;
            }
-        Py_XDECREF(full_slice);
-        Py_XDECREF(section_slice);
            Py_XDECREF(out_sub);
            Py_XDECREF(slice_tuple);
+            Py_XDECREF(start);
            start = stop;
            """ % locals()