improved indentation, added exeption when not all inputs have the same ndims,…

improved indentation, added exeption when not all inputs have the same ndims, tried to improve some memory / reference counting issues, removed initialisation of variable sized arrays, which causes problems under windows
上级 043bc3e8
...@@ -2942,6 +2942,10 @@ class GpuJoin(tensor.Join, GpuOp): ...@@ -2942,6 +2942,10 @@ class GpuJoin(tensor.Join, GpuOp):
out[0] = rval out[0] = rval
def c_code(self, node, name, inputs, out_, sub): def c_code(self, node, name, inputs, out_, sub):
nd = node.inputs[1].ndim
if not all(i.ndim == nd for i in node.inputs[2:]):
# all inputs ndarray need to have the same number of dimensions
raise NotImplementedError()
axis = inputs[0] axis = inputs[0]
n_cndas = len(inputs[1:]) n_cndas = len(inputs[1:])
input_1 = inputs[1] input_1 = inputs[1]
...@@ -2952,9 +2956,10 @@ class GpuJoin(tensor.Join, GpuOp): ...@@ -2952,9 +2956,10 @@ class GpuJoin(tensor.Join, GpuOp):
# getting the shapes of all the involved tensors (input[0]+out) # getting the shapes of all the involved tensors (input[0]+out)
str = """ str = """
int axis = PyInt_AsLong((PyObject*)%(axis)s); int axis = PyInt_AsLong((PyObject*)%(axis)s);
int nd = CudaNdarray_NDIM(%(input_1)s); int nd = %(nd)s;
int shape_%(input_1)s[nd]; int shape_%(input_1)s[%(nd)s];
int shape_out[nd]; int shape_out[%(nd)s];
int width_sum = 0;
for(int i = 0; i<nd; i+=1) for(int i = 0; i<nd; i+=1)
{ {
...@@ -2968,8 +2973,7 @@ class GpuJoin(tensor.Join, GpuOp): ...@@ -2968,8 +2973,7 @@ class GpuJoin(tensor.Join, GpuOp):
# execept for "axis" dimension # execept for "axis" dimension
for i, cdna in enumerate(inputs[2:]): for i, cdna in enumerate(inputs[2:]):
str += """ str += """
nd = CudaNdarray_NDIM(%(cdna)s); int shape_%(cdna)s[%(nd)s];
int shape_%(cdna)s[nd];
for(int i = 0; i<nd; i+=1) for(int i = 0; i<nd; i+=1)
{ {
shape_%(cdna)s[i] = CudaNdarray_HOST_DIMS(%(cdna)s)[i]; shape_%(cdna)s[i] = CudaNdarray_HOST_DIMS(%(cdna)s)[i];
...@@ -2981,8 +2985,6 @@ class GpuJoin(tensor.Join, GpuOp): ...@@ -2981,8 +2985,6 @@ class GpuJoin(tensor.Join, GpuOp):
""" % locals() """ % locals()
# computing the new shape for the out tensors # computing the new shape for the out tensors
str += """
int width_sum = 0;\n""" % locals()
for i, cdna in enumerate(inputs[1:]): for i, cdna in enumerate(inputs[1:]):
...@@ -3000,11 +3002,13 @@ class GpuJoin(tensor.Join, GpuOp): ...@@ -3000,11 +3002,13 @@ class GpuJoin(tensor.Join, GpuOp):
step = NULL; step = NULL;
int errorcode; int errorcode;
int sum; int sum;
sum =0; sum = 0;
start = NULL;
PyObject *slice_tuple; PyObject *slice_tuple;
PyObject *full_slice;
PyObject *section_slice; PyObject *section_slice;
PyObject *full_slice;
full_slice = PySlice_New(NULL, NULL, NULL);
""" % locals() """ % locals()
...@@ -3012,9 +3016,9 @@ class GpuJoin(tensor.Join, GpuOp): ...@@ -3012,9 +3016,9 @@ class GpuJoin(tensor.Join, GpuOp):
for i, cdna in enumerate(inputs[1:]): for i, cdna in enumerate(inputs[1:]):
str += """ str += """
sum += shape_%(cdna)s[axis]; sum += shape_%(cdna)s[axis];
Py_XDECREF(stop);
stop = PyInt_FromLong(sum); stop = PyInt_FromLong(sum);
slice_tuple = PyTuple_New(nd); slice_tuple = PyTuple_New(nd);
full_slice = PySlice_New(NULL, NULL, NULL);
section_slice = PySlice_New(start, stop, step); section_slice = PySlice_New(start, stop, step);
for(int i=0; i<nd; i++) for(int i=0; i<nd; i++)
{ {
...@@ -3023,28 +3027,27 @@ class GpuJoin(tensor.Join, GpuOp): ...@@ -3023,28 +3027,27 @@ class GpuJoin(tensor.Join, GpuOp):
Py_INCREF(full_slice); Py_INCREF(full_slice);
PyTuple_SetItem(slice_tuple, i, full_slice); PyTuple_SetItem(slice_tuple, i, full_slice);
} }
else if(i==axis) else
{ {
Py_INCREF(section_slice); Py_INCREF(section_slice);
PyTuple_SetItem(slice_tuple, i, section_slice); PyTuple_SetItem(slice_tuple, i, section_slice);
} }
} }
out_sub = CudaNdarray_Subscript((PyObject*)%(out)s, slice_tuple); out_sub = CudaNdarray_Subscript((PyObject*)%(out)s, slice_tuple);
errorcode = CudaNdarray_CopyFromCudaNdarray((CudaNdarray*)out_sub, %(cdna)s); errorcode = CudaNdarray_CopyFromCudaNdarray((CudaNdarray*)out_sub, %(cdna)s);
if((full_slice == NULL) || (section_slice == NULL) || (out_sub == NULL) || (errorcode != 0)) if((full_slice == NULL) || (section_slice == NULL) || (out_sub == NULL) || (errorcode != 0))
{ {
Py_XDECREF(full_slice); Py_XDECREF(start);
Py_XDECREF(section_slice); Py_XDECREF(stop);
Py_XDECREF(step);
Py_XDECREF(slice_tuple); Py_XDECREF(slice_tuple);
Py_XDECREF(out_sub); Py_XDECREF(out_sub);
Py_XDECREF(%(out)s); Py_XDECREF(%(out)s);
%(fail)s; %(fail)s;
} }
Py_XDECREF(full_slice);
Py_XDECREF(section_slice);
Py_XDECREF(out_sub); Py_XDECREF(out_sub);
Py_XDECREF(slice_tuple); Py_XDECREF(slice_tuple);
Py_XDECREF(start);
start = stop; start = stop;
""" % locals() """ % locals()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论