提交 5fe90b3d authored 作者: Ian Goodfellow's avatar Ian Goodfellow

renamed xview to zview since it is a view of z, not x

上级 cf8fd44e
...@@ -2457,7 +2457,7 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp): ...@@ -2457,7 +2457,7 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
This doesn't need to actually set up the view with the This doesn't need to actually set up the view with the
right indexing; we'll do that manually later. right indexing; we'll do that manually later.
""" """
return """CudaNdarray* xview = (CudaNdarray*) return """CudaNdarray* zview = (CudaNdarray*)
CudaNdarray_New(%(view_ndim)s)""" % locals() CudaNdarray_New(%(view_ndim)s)""" % locals()
def get_helper_c_code_args(self): def get_helper_c_code_args(self):
...@@ -2487,41 +2487,41 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp): ...@@ -2487,41 +2487,41 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
def link_view_array(self, x, fail): def link_view_array(self, x, fail):
return """ return """
if (CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(%(x)s), if (CudaNdarray_set_device_data(zview, CudaNdarray_DEV_DATA(%(x)s),
(PyObject*) NULL)) (PyObject*) NULL))
{ {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the" "GpuSubtensor is not able to set the"
" devdata field of the view"); " devdata field of the view");
Py_XDECREF(xview); Py_XDECREF(zview);
%(fail)s; %(fail)s;
} }
cnda_mark_dev_structure_dirty(xview); cnda_mark_dev_structure_dirty(zview);
""" % locals() """ % locals()
def set_view_base(self, x, fail): def set_view_base(self, x, fail):
return """ return """
//Set the base only now //Set the base only now
if(CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(xview), if(CudaNdarray_set_device_data(zview, CudaNdarray_DEV_DATA(zview),
%(x)s)){ %(x)s)){
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set" "GpuSubtensor is not able to set"
" the base of the view array"); " the base of the view array");
Py_XDECREF(xview); Py_XDECREF(zview);
%(fail)s; %(fail)s;
}""" % locals() }""" % locals()
def add_to_xview(self, x, fail): def add_to_zview(self, x, fail):
return """ return """
PyObject * add_result = CudaNdarray_inplace_add((PyObject *) xview, PyObject * add_result = CudaNdarray_inplace_add((PyObject *) zview,
(PyObject *) py_%(x)s); (PyObject *) py_%(x)s);
if (! add_result ) if (! add_result )
{ {
Py_DECREF(xview); Py_DECREF(zview);
%(fail)s; %(fail)s;
} }
else else
......
...@@ -101,33 +101,42 @@ int device_free(void *ptr) ...@@ -101,33 +101,42 @@ int device_free(void *ptr)
// it returns something else I still don't see why we should ignore // it returns something else I still don't see why we should ignore
// it. All we want to do here is reset the flag. // it. All we want to do here is reset the flag.
cudaGetLastError(); cudaGetLastError();
#if COMPUTE_GPU_MEM_USED #if COMPUTE_GPU_MEM_USED
fprintf(stderr, "Error freeing device pointer %p (%s).%d byte already allocated\n", ptr, cudaGetErrorString(err), _allocated_size); fprintf(stderr,
#else "Error freeing device pointer %p (%s).%d byte already allocated\n",
fprintf(stderr, "Error freeing device pointer %p (%s).\n", ptr, cudaGetErrorString(err)); ptr, cudaGetErrorString(err), _allocated_size);
#endif #else
PyErr_Format(PyExc_MemoryError, "error freeing device pointer %p (%s)", ptr, cudaGetErrorString(err)); fprintf(stderr,
"Error freeing device pointer %p (%s).\n",
ptr,
cudaGetErrorString(err));
#endif
PyErr_Format(PyExc_MemoryError,
"error freeing device pointer %p (%s)",
ptr,
cudaGetErrorString(err));
return -1; return -1;
} }
_outstanding_mallocs[0] -= (ptr != NULL); _outstanding_mallocs[0] -= (ptr != NULL);
#if COMPUTE_GPU_MEM_USED #if COMPUTE_GPU_MEM_USED
int i=0; int i=0;
size_t total_freed = 0; size_t total_freed = 0;
for(;i<TABLE_SIZE;i++) for(;i<TABLE_SIZE;i++)
if(_alloc_size_table[i].ptr==ptr){ if(_alloc_size_table[i].ptr==ptr){
_allocated_size -= _alloc_size_table[i].size; _allocated_size -= _alloc_size_table[i].size;
total_freed += _alloc_size_table[i].size; total_freed += _alloc_size_table[i].size;
_alloc_size_table[i].ptr=0; _alloc_size_table[i].ptr=0;
_alloc_size_table[i].size=0; _alloc_size_table[i].size=0;
break; break;
} }
//if(i==TABLE_SIZE) //if(i==TABLE_SIZE)
// printf("Unallocated unknow size!\n"); // printf("Unallocated unknow size!\n");
//fprintf(stderr, "freed %li bytes of device memory (%s). %d already allocated, ptr=%p\n", (long)total_freed, cudaGetErrorString(err),_allocated_size,ptr); //fprintf(stderr, "freed %li bytes of device memory (%s). %d already allocated, ptr=%p\n", (long)total_freed, cudaGetErrorString(err),_allocated_size,ptr);
#endif #endif
return 0; return 0;
} }
static PyObject * static PyObject *
outstanding_mallocs(PyObject* self, PyObject * args) outstanding_mallocs(PyObject* self, PyObject * args)
{ {
......
...@@ -3972,13 +3972,14 @@ class Subtensor(Op): ...@@ -3972,13 +3972,14 @@ class Subtensor(Op):
return { return {
"c_prefix" : "PyArray", "c_prefix" : "PyArray",
"update_flags": ("PyArray_UpdateFlags(xview," "update_flags": ("PyArray_UpdateFlags(%(view_name)s,"
" NPY_ARRAY_C_CONTIGUOUS|" " NPY_ARRAY_C_CONTIGUOUS|"
"NPY_ARRAY_F_CONTIGUOUS);"), "NPY_ARRAY_F_CONTIGUOUS);"),
"set_data" : "PyArray_set_data", "set_data" : "PyArray_set_data",
"set_dim" : "PyArray_set_dim", "set_dim" : "PyArray_set_dim",
"set_stride" : "PyArray_set_stride", "set_stride" : "PyArray_set_stride",
"strides_mul" : 1 } "strides_mul" : 1,
"view_name" : "xview" }
@staticmethod @staticmethod
...@@ -3989,6 +3990,7 @@ class Subtensor(Op): ...@@ -3989,6 +3990,7 @@ class Subtensor(Op):
set_dim=None, set_dim=None,
set_stride=None, set_stride=None,
strides_mul=None, strides_mul=None,
view_name=None
): ):
""" """
The parameters c_prefix, update_flags, set_data, set_dim, The parameters c_prefix, update_flags, set_data, set_dim,
...@@ -4016,6 +4018,11 @@ class Subtensor(Op): ...@@ -4016,6 +4018,11 @@ class Subtensor(Op):
if c_prefix is None: if c_prefix is None:
c_prefix = default_args['c_prefix'] c_prefix = default_args['c_prefix']
if view_name is None:
view_name = default_args['view_name']
#update_flags may depend on view_name
update_flags = update_flags % locals()
# #
# two arrays are created in C code: # two arrays are created in C code:
...@@ -4090,6 +4097,8 @@ class Subtensor(Op): ...@@ -4090,6 +4097,8 @@ class Subtensor(Op):
x, = inputs[:1] x, = inputs[:1]
z, = outputs z, = outputs
xview = view_name
rval = """ rval = """
#define PyArray_set_dim(obj, idx, d) PyArray_DIMS(obj)[idx]=d #define PyArray_set_dim(obj, idx, d) PyArray_DIMS(obj)[idx]=d
#define PyArray_set_stride(obj, idx, d) PyArray_STRIDES(obj)[idx]=d #define PyArray_set_stride(obj, idx, d) PyArray_STRIDES(obj)[idx]=d
...@@ -4105,30 +4114,30 @@ class Subtensor(Op): ...@@ -4105,30 +4114,30 @@ class Subtensor(Op):
int inner_ii = 0; // the current dimension of zview int inner_ii = 0; // the current dimension of zview
int outer_ii = 0; // current dimension of z int outer_ii = 0; // current dimension of z
char* ptr = (char*) %(c_prefix)s_BYTES(xview); char* ptr = (char*) %(c_prefix)s_BYTES(%(xview)s);
if ((%(c_prefix)s_DIMS(xview) == %(c_prefix)s_DIMS(%(x)s)) if ((%(c_prefix)s_DIMS(%(xview)s) == %(c_prefix)s_DIMS(%(x)s))
&& (%(c_prefix)s_DIMS(%(x)s) != NULL)) && (%(c_prefix)s_DIMS(%(x)s) != NULL))
{ {
PyErr_Format(PyExc_ValueError, "x and xview" PyErr_Format(PyExc_ValueError, "x and %(xview)s"
"(with %%d dims) have the same dimensions" "(with %%d dims) have the same dimensions"
" pointers: %%p and %%p", " pointers: %%p and %%p",
%(c_prefix)s_NDIM(%(x)s), %(c_prefix)s_NDIM(%(x)s),
%(c_prefix)s_DIMS(xview), %(c_prefix)s_DIMS(%(xview)s),
%(c_prefix)s_DIMS(%(x)s)); %(c_prefix)s_DIMS(%(x)s));
Py_XDECREF(xview); Py_XDECREF(%(xview)s);
%(fail)s; %(fail)s;
} }
if (%(c_prefix)s_STRIDES(xview) == %(c_prefix)s_STRIDES(%(x)s) if (%(c_prefix)s_STRIDES(%(xview)s) == %(c_prefix)s_STRIDES(%(x)s)
&& (%(c_prefix)s_DIMS(%(x)s) != NULL)) && (%(c_prefix)s_DIMS(%(x)s) != NULL))
{ {
PyErr_Format(PyExc_ValueError, "x and xview" PyErr_Format(PyExc_ValueError, "x and %(xview)s"
"(with %%d dims) have the same strides" "(with %%d dims) have the same strides"
" pointers: %%p and %%p", " pointers: %%p and %%p",
%(c_prefix)s_NDIM(%(x)s), %(c_prefix)s_NDIM(%(x)s),
%(c_prefix)s_STRIDES(xview), %(c_prefix)s_STRIDES(%(xview)s),
%(c_prefix)s_STRIDES(%(x)s)); %(c_prefix)s_STRIDES(%(x)s));
Py_XDECREF(xview); Py_XDECREF(%(xview)s);
%(fail)s; %(fail)s;
} }
...@@ -4150,10 +4159,10 @@ class Subtensor(Op): ...@@ -4150,10 +4159,10 @@ class Subtensor(Op):
// PySlice_GetIndicesEx in python source // PySlice_GetIndicesEx in python source
if (!step) if (!step)
{ {
Py_DECREF(xview); Py_DECREF(%(xview)s);
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"slice step cannot be zero"); "slice step cannot be zero");
Py_XDECREF(xview); Py_XDECREF(%(xview)s);
%(fail)s; %(fail)s;
} }
...@@ -4204,8 +4213,8 @@ class Subtensor(Op): ...@@ -4204,8 +4213,8 @@ class Subtensor(Op):
ptr += %(c_prefix)s_STRIDES(%(x)s)[outer_ii] * start * ptr += %(c_prefix)s_STRIDES(%(x)s)[outer_ii] * start *
%(strides_mul)s; %(strides_mul)s;
%(set_dim)s(xview, inner_ii, slicelength); %(set_dim)s(%(xview)s, inner_ii, slicelength);
%(set_stride)s(xview, inner_ii, %(set_stride)s(%(xview)s, inner_ii,
%(c_prefix)s_STRIDES(%(x)s)[outer_ii] * step); %(c_prefix)s_STRIDES(%(x)s)[outer_ii] * step);
inner_ii += 1; inner_ii += 1;
...@@ -4225,27 +4234,27 @@ class Subtensor(Op): ...@@ -4225,27 +4234,27 @@ class Subtensor(Op):
else else
{ {
PyErr_Format(PyExc_IndexError,"index out of bounds"); PyErr_Format(PyExc_IndexError,"index out of bounds");
Py_XDECREF(xview); Py_XDECREF(%(xview)s);
%(fail)s; %(fail)s;
} }
} }
else else
{ {
PyErr_Format(PyExc_IndexError,"index out of bounds"); PyErr_Format(PyExc_IndexError,"index out of bounds");
Py_XDECREF(xview); Py_XDECREF(%(xview)s);
%(fail)s; %(fail)s;
} }
spec_pos += 1; spec_pos += 1;
} }
} }
%(set_data)s(xview, ptr, (PyObject*)NULL); %(set_data)s(%(xview)s, ptr, (PyObject*)NULL);
assert (inner_ii <= %(c_prefix)s_NDIM(xview)); assert (inner_ii <= %(c_prefix)s_NDIM(%(xview)s));
while (inner_ii < %(c_prefix)s_NDIM(xview)) while (inner_ii < %(c_prefix)s_NDIM(%(xview)s))
{ {
assert (outer_ii < %(c_prefix)s_NDIM(%(x)s)); assert (outer_ii < %(c_prefix)s_NDIM(%(x)s));
%(set_dim)s(xview, inner_ii, %(c_prefix)s_DIMS(%(x)s)[outer_ii]); %(set_dim)s(%(xview)s, inner_ii, %(c_prefix)s_DIMS(%(x)s)[outer_ii]);
%(set_stride)s(xview, inner_ii, %(c_prefix)s_STRIDES(%(x)s)[outer_ii]); %(set_stride)s(%(xview)s, inner_ii, %(c_prefix)s_STRIDES(%(x)s)[outer_ii]);
inner_ii += 1; inner_ii += 1;
outer_ii += 1; outer_ii += 1;
} }
...@@ -4601,7 +4610,7 @@ class IncSubtensor(Op): ...@@ -4601,7 +4610,7 @@ class IncSubtensor(Op):
{ {
if (%(x)s != %(z)s) if (%(x)s != %(z)s)
{ {
Py_xDECREF(%(z)s); Py_XDECREF(%(z)s);
Py_INCREF(%(x)s); Py_INCREF(%(x)s);
%(z)s = %(x)s; %(z)s = %(x)s;
} }
...@@ -4613,28 +4622,25 @@ class IncSubtensor(Op): ...@@ -4613,28 +4622,25 @@ class IncSubtensor(Op):
} }
""" % locals() """ % locals()
# IG: Note: this makes a variable called "xview" alloc_zview = self.make_view_array(z, view_ndim)
# even though it is a view of z.
# I assume this is because IncSubtensor was written
# by copy-pasting Subtensor and in Subtensor you make
# a view of x.
alloc_view_of_z = self.make_view_array(z, view_ndim)
# On GPU, it takes two steps to make a view # On GPU, it takes two steps to make a view
link_view_of_z = self.link_view_array(z, fail); link_zview = self.link_view_array(z, fail);
#Make a first view on the output, as we will write into it. #Make a first view on the output, as we will write into it.
build_view = """ build_view = """
//TODO: give this Op a second output so that this view can be cached //TODO: give this Op a second output so that this view can be cached
//TODO: alternatively, fix the memory leak on failure //TODO: alternatively, fix the memory leak on failure
%(alloc_view_of_z)s; %(alloc_zview)s;
if (!xview) if (!zview)
{ {
%(fail)s; %(fail)s;
} }
%(link_view_of_z)s; %(link_zview)s;
""" % locals() """ % locals()
# make xview actually a view of %(z)s # make zview actually a view of %(z)s
get_xview = self.define_set_data() + \ helper_args = self.get_helper_c_code_args()
helper_args['view_name'] = 'zview'
get_zview = self.define_set_data() + \
Subtensor.helper_c_code( Subtensor.helper_c_code(
node=node, node=node,
name=name, name=name,
...@@ -4642,33 +4648,33 @@ class IncSubtensor(Op): ...@@ -4642,33 +4648,33 @@ class IncSubtensor(Op):
outputs=outputs, outputs=outputs,
sub=sub, sub=sub,
idx_list=self.idx_list, idx_list=self.idx_list,
**self.get_helper_c_code_args() ** helper_args
) )
copy_into = self.copy_into("xview", y) copy_into = self.copy_into("zview", y)
add_to_xview = self.add_to_xview(y, fail) add_to_zview = self.add_to_zview(y, fail)
make_modification = """ make_modification = """
if (%(op_is_set)s) if (%(op_is_set)s)
{ {
if (%(copy_into)s) // does broadcasting if (%(copy_into)s) // does broadcasting
{ {
Py_DECREF(xview); Py_DECREF(zview);
%(fail)s; %(fail)s;
} }
} }
else else
{ {
%(add_to_xview)s %(add_to_zview)s
} }
""" % locals() """ % locals()
return (copy_input_if_necessary return (copy_input_if_necessary
+ build_view + build_view
+ "{" + get_xview + "}" + "{" + get_zview + "}"
+ make_modification + make_modification
+ "Py_DECREF(xview);" + "Py_DECREF(zview);"
) )
def do_type_checking(self, node): def do_type_checking(self, node):
...@@ -4719,7 +4725,7 @@ class IncSubtensor(Op): ...@@ -4719,7 +4725,7 @@ class IncSubtensor(Op):
""" """
return """Py_INCREF(PyArray_DESCR(%(x)s)); return """Py_INCREF(PyArray_DESCR(%(x)s));
PyArrayObject * xview = PyArrayObject * zview =
(PyArrayObject*)PyArray_NewFromDescr( (PyArrayObject*)PyArray_NewFromDescr(
&PyArray_Type, &PyArray_Type,
PyArray_DESCR(%(x)s), PyArray_DESCR(%(x)s),
...@@ -4750,34 +4756,34 @@ class IncSubtensor(Op): ...@@ -4750,34 +4756,34 @@ class IncSubtensor(Op):
return "" return ""
def link_view_array(self, x, fail): def link_view_array(self, x, fail):
""" Returns code to complete making xview a view of x""" """ Returns code to complete making zview a view of x"""
# On CPU there is nothing to do, make_view_array already did this # On CPU there is nothing to do, make_view_array already did this
return "" return ""
def set_view_base(self, x, fail): def set_view_base(self, x, fail):
""" Returns code to make xview be a correct view of x, """ Returns code to make zview be a correct view of x,
after helper_c_code is done messing with x""" after helper_c_code is done messing with x"""
# On CPU there is nothing to do # On CPU there is nothing to do
return "" return ""
def add_to_xview(self, x, fail): def add_to_zview(self, x, fail):
""" Return C code to add x to xview. Should DECREF xview if the """ Return C code to add x to zview. Should DECREF zview if the
add fails.""" add fails."""
return """ return """
PyArrayObject * add_rval = (PyArrayObject*)PyNumber_InPlaceAdd( PyArrayObject * add_rval = (PyArrayObject*)PyNumber_InPlaceAdd(
(PyObject*)xview, py_%(x)s); (PyObject*)zview, py_%(x)s);
if (add_rval) if (add_rval)
{ {
assert (PyArray_Check((PyObject*)add_rval)); assert (PyArray_Check((PyObject*)add_rval));
assert (PyArray_DATA(add_rval) == PyArray_DATA(xview)); assert (PyArray_DATA(add_rval) == PyArray_DATA(zview));
Py_DECREF(add_rval); Py_DECREF(add_rval);
} }
else else
{ {
Py_DECREF(xview); Py_DECREF(zview);
%(fail)s; %(fail)s;
}""" % locals() }""" % locals()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论