提交 56eb0c95 authored 作者: Frederic's avatar Frederic

Enable memory profiling for sparse variable.

上级 0cbdf291
......@@ -106,6 +106,38 @@ default values.
*Default:* ``id(self)``
.. method:: get_shape_info(obj)
Optional. Only needed to profile the memory of this Type of object
Return the information needed to compute the memory size of obj.
The memory size is only the data, so this exclude the container.
For an ndarray, this is the data, but not the ndarray object and
others data structures as shape and strides.
get_shape_info() and get_size() work in tendem for the memory profiler.
get_shape_info() is called during the execution of the function.
So it is better that it is not too slow.
get_size() will be called with the output of this function
when printing the memory profile.
:param obj: The object that this Type represent during execution
:return: Python object that self.get_size() understand
.. method:: get_size(shape_info)
Number of bytes taken by the object represented by shape_info
Optional. Only needed to profile the memory of this Type of object
:param shape_info: the output of the call to get_shape_info()
:return: the number of bytes taken by the object described in
shape_info.
"""
For each method, the *default* is what ``Type`` defines
for you. So, if you create an instance of ``Type`` or an
instance of a subclass of ``Type``, you
......
......@@ -601,9 +601,7 @@ class ProfileStats(object):
sum_dense = 0
for out in node.outputs:
sh = self.variable_shape[out]
if isinstance(out.type, theano.sparse.SparseType):
v = "Sparse"
elif hasattr(out.type, 'get_size'):
if hasattr(out.type, 'get_size'):
v = out.type.get_size(sh)
sum_dense += v
else:
......@@ -739,10 +737,7 @@ class ProfileStats(object):
code[out] = "v"
shapes = str(fct_shapes[node.fgraph][node])
if any([isinstance(out.type, theano.sparse.SparseType)
for out in node.outputs]):
size = "%10s" % "Sparse"
elif all([hasattr(out.type, 'get_size')
if all([hasattr(out.type, 'get_size')
for out in node.outputs]):
size = "%9dB" % node_outputs_size
else:
......
......@@ -327,7 +327,10 @@ class Stack(VM):
for var, data in self.storage_map.iteritems():
if data[0] is None:
continue
sh = getattr(data[0], 'shape', 'input no shape')
if hasattr(var.type, 'get_shape_info'):
sh = var.type.get_shape_info(data[0])
else:
sh = 'input no shape'
self.variable_shape[var] = sh
st = getattr(data[0], 'strides', 'input no strides')
if getattr(data[0], 'flags', False) and data[0].flags.c_contiguous:
......@@ -383,7 +386,10 @@ class Stack(VM):
thunks[self.node_idx[
current_apply]].outputs):
var = self.nodes[current_idx].outputs[idx]
sh = getattr(o[0], 'shape', 'input no shape')
if hasattr(var.type, 'get_shape_info'):
sh = var.type.get_shape_info(o[0])
else:
sh = 'input no shape'
self.variable_shape[var] = sh
st = getattr(o[0], 'strides',
'input no strides')
......@@ -466,7 +472,11 @@ class Stack(VM):
self.node_idx[current_apply]].outputs):
var = self.nodes[
self.node_idx[current_apply]].outputs[idx]
sh = getattr(o[0], 'shape', 'input no shape')
if hasattr(var.type, 'get_shape_info'):
sh = var.type.get_shape_info(o[0])
else:
sh = 'input no shape'
self.variable_shape[var] = sh
st = getattr(o[0], 'strides', 'input no strides')
if (getattr(o[0], 'flags', False) and
......
......@@ -417,6 +417,9 @@ class CudaNdarrayType(Type):
def c_compile_args(self):
return []
def get_shape_info(self, obj):
return obj.shape
def get_size(self, shape_info):
return numpy.prod(shape_info, dtype=int) * numpy.dtype(self.dtype).itemsize
......
......@@ -147,6 +147,15 @@ class SparseType(gof.Type):
def is_valid_value(self, a):
return scipy.sparse.issparse(a) and (a.format == self.format)
def get_shape_info(self, obj):
obj = self.filter(obj)
return (obj.shape, obj.data.size,
obj.indices.size, obj.indptr.size, obj.nnz)
def get_size(self, shape_info):
return (shape_info[1] * numpy.dtype(self.dtype).itemsize +
(shape_info[2] + shape_info[3]) * numpy.dtype('int32').itemsize)
# Register SparseType's C code for ViewOp.
theano.compile.register_view_op_c_code(
SparseType,
......
......@@ -1181,9 +1181,36 @@ class TensorType(Type):
"""
return numpy.zeros(shape, dtype=self.dtype)
def get_shape_info(self, obj):
"""Return the information needed to compute the memory size of obj.
The memory size is only the data, so this exclude the container.
For an ndarray, this is the data, but not the ndarray object and
others data structures as shape and strides.
get_shape_info() and get_size() work in tendem for the memory profiler.
get_shape_info() is called during the execution of the function.
So it is better that it is not too slow.
get_size() will be called with the output of this function
when printing the memory profile.
:param obj: The object that this Type represent during execution
:return: Python object that self.get_size() understand
"""
return obj.shape
def get_size(self, shape_info):
return numpy.prod(shape_info, dtype=int) * numpy.dtype(self.dtype).itemsize
""" Number of bytes taken by the object represented by shape_info
:param shape_info: the output of the call to get_shape_info()
:return: the number of bytes taken by the object described in
shape_info.
"""
return numpy.prod(shape_info,
dtype=int) * numpy.dtype(self.dtype).itemsize
theano.compile.ops.expandable_types += (TensorType,)
# Register TensorType C code for ViewOp.
......
......@@ -56,6 +56,9 @@ class RandomStateType(gof.Type):
return False
return True
def get_shape_info(self, obj):
return None
def get_size(self, shape_info):
# The size is the data, that have constant size.
state = numpy.random.RandomState().get_state()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论