提交 56eb0c95 authored 作者: Frederic's avatar Frederic

Enable memory profiling for sparse variable.

上级 0cbdf291
...@@ -106,6 +106,38 @@ default values. ...@@ -106,6 +106,38 @@ default values.
*Default:* ``id(self)`` *Default:* ``id(self)``
.. method:: get_shape_info(obj)
Optional. Only needed to profile the memory of this Type of object
Return the information needed to compute the memory size of obj.
The memory size is only the data, so this exclude the container.
For an ndarray, this is the data, but not the ndarray object and
others data structures as shape and strides.
get_shape_info() and get_size() work in tendem for the memory profiler.
get_shape_info() is called during the execution of the function.
So it is better that it is not too slow.
get_size() will be called with the output of this function
when printing the memory profile.
:param obj: The object that this Type represent during execution
:return: Python object that self.get_size() understand
.. method:: get_size(shape_info)
Number of bytes taken by the object represented by shape_info
Optional. Only needed to profile the memory of this Type of object
:param shape_info: the output of the call to get_shape_info()
:return: the number of bytes taken by the object described in
shape_info.
"""
For each method, the *default* is what ``Type`` defines For each method, the *default* is what ``Type`` defines
for you. So, if you create an instance of ``Type`` or an for you. So, if you create an instance of ``Type`` or an
instance of a subclass of ``Type``, you instance of a subclass of ``Type``, you
......
...@@ -601,9 +601,7 @@ class ProfileStats(object): ...@@ -601,9 +601,7 @@ class ProfileStats(object):
sum_dense = 0 sum_dense = 0
for out in node.outputs: for out in node.outputs:
sh = self.variable_shape[out] sh = self.variable_shape[out]
if isinstance(out.type, theano.sparse.SparseType): if hasattr(out.type, 'get_size'):
v = "Sparse"
elif hasattr(out.type, 'get_size'):
v = out.type.get_size(sh) v = out.type.get_size(sh)
sum_dense += v sum_dense += v
else: else:
...@@ -739,10 +737,7 @@ class ProfileStats(object): ...@@ -739,10 +737,7 @@ class ProfileStats(object):
code[out] = "v" code[out] = "v"
shapes = str(fct_shapes[node.fgraph][node]) shapes = str(fct_shapes[node.fgraph][node])
if any([isinstance(out.type, theano.sparse.SparseType) if all([hasattr(out.type, 'get_size')
for out in node.outputs]):
size = "%10s" % "Sparse"
elif all([hasattr(out.type, 'get_size')
for out in node.outputs]): for out in node.outputs]):
size = "%9dB" % node_outputs_size size = "%9dB" % node_outputs_size
else: else:
......
...@@ -327,7 +327,10 @@ class Stack(VM): ...@@ -327,7 +327,10 @@ class Stack(VM):
for var, data in self.storage_map.iteritems(): for var, data in self.storage_map.iteritems():
if data[0] is None: if data[0] is None:
continue continue
sh = getattr(data[0], 'shape', 'input no shape') if hasattr(var.type, 'get_shape_info'):
sh = var.type.get_shape_info(data[0])
else:
sh = 'input no shape'
self.variable_shape[var] = sh self.variable_shape[var] = sh
st = getattr(data[0], 'strides', 'input no strides') st = getattr(data[0], 'strides', 'input no strides')
if getattr(data[0], 'flags', False) and data[0].flags.c_contiguous: if getattr(data[0], 'flags', False) and data[0].flags.c_contiguous:
...@@ -383,7 +386,10 @@ class Stack(VM): ...@@ -383,7 +386,10 @@ class Stack(VM):
thunks[self.node_idx[ thunks[self.node_idx[
current_apply]].outputs): current_apply]].outputs):
var = self.nodes[current_idx].outputs[idx] var = self.nodes[current_idx].outputs[idx]
sh = getattr(o[0], 'shape', 'input no shape') if hasattr(var.type, 'get_shape_info'):
sh = var.type.get_shape_info(o[0])
else:
sh = 'input no shape'
self.variable_shape[var] = sh self.variable_shape[var] = sh
st = getattr(o[0], 'strides', st = getattr(o[0], 'strides',
'input no strides') 'input no strides')
...@@ -466,7 +472,11 @@ class Stack(VM): ...@@ -466,7 +472,11 @@ class Stack(VM):
self.node_idx[current_apply]].outputs): self.node_idx[current_apply]].outputs):
var = self.nodes[ var = self.nodes[
self.node_idx[current_apply]].outputs[idx] self.node_idx[current_apply]].outputs[idx]
sh = getattr(o[0], 'shape', 'input no shape')
if hasattr(var.type, 'get_shape_info'):
sh = var.type.get_shape_info(o[0])
else:
sh = 'input no shape'
self.variable_shape[var] = sh self.variable_shape[var] = sh
st = getattr(o[0], 'strides', 'input no strides') st = getattr(o[0], 'strides', 'input no strides')
if (getattr(o[0], 'flags', False) and if (getattr(o[0], 'flags', False) and
......
...@@ -417,6 +417,9 @@ class CudaNdarrayType(Type): ...@@ -417,6 +417,9 @@ class CudaNdarrayType(Type):
def c_compile_args(self): def c_compile_args(self):
return [] return []
def get_shape_info(self, obj):
return obj.shape
def get_size(self, shape_info): def get_size(self, shape_info):
return numpy.prod(shape_info, dtype=int) * numpy.dtype(self.dtype).itemsize return numpy.prod(shape_info, dtype=int) * numpy.dtype(self.dtype).itemsize
......
...@@ -147,6 +147,15 @@ class SparseType(gof.Type): ...@@ -147,6 +147,15 @@ class SparseType(gof.Type):
def is_valid_value(self, a): def is_valid_value(self, a):
return scipy.sparse.issparse(a) and (a.format == self.format) return scipy.sparse.issparse(a) and (a.format == self.format)
def get_shape_info(self, obj):
obj = self.filter(obj)
return (obj.shape, obj.data.size,
obj.indices.size, obj.indptr.size, obj.nnz)
def get_size(self, shape_info):
return (shape_info[1] * numpy.dtype(self.dtype).itemsize +
(shape_info[2] + shape_info[3]) * numpy.dtype('int32').itemsize)
# Register SparseType's C code for ViewOp. # Register SparseType's C code for ViewOp.
theano.compile.register_view_op_c_code( theano.compile.register_view_op_c_code(
SparseType, SparseType,
......
...@@ -1181,9 +1181,36 @@ class TensorType(Type): ...@@ -1181,9 +1181,36 @@ class TensorType(Type):
""" """
return numpy.zeros(shape, dtype=self.dtype) return numpy.zeros(shape, dtype=self.dtype)
def get_shape_info(self, obj):
"""Return the information needed to compute the memory size of obj.
The memory size is only the data, so this exclude the container.
For an ndarray, this is the data, but not the ndarray object and
others data structures as shape and strides.
get_shape_info() and get_size() work in tendem for the memory profiler.
get_shape_info() is called during the execution of the function.
So it is better that it is not too slow.
get_size() will be called with the output of this function
when printing the memory profile.
:param obj: The object that this Type represent during execution
:return: Python object that self.get_size() understand
"""
return obj.shape
def get_size(self, shape_info): def get_size(self, shape_info):
return numpy.prod(shape_info, dtype=int) * numpy.dtype(self.dtype).itemsize """ Number of bytes taken by the object represented by shape_info
:param shape_info: the output of the call to get_shape_info()
:return: the number of bytes taken by the object described in
shape_info.
"""
return numpy.prod(shape_info,
dtype=int) * numpy.dtype(self.dtype).itemsize
theano.compile.ops.expandable_types += (TensorType,) theano.compile.ops.expandable_types += (TensorType,)
# Register TensorType C code for ViewOp. # Register TensorType C code for ViewOp.
......
...@@ -56,6 +56,9 @@ class RandomStateType(gof.Type): ...@@ -56,6 +56,9 @@ class RandomStateType(gof.Type):
return False return False
return True return True
def get_shape_info(self, obj):
return None
def get_size(self, shape_info): def get_size(self, shape_info):
# The size is the data, that have constant size. # The size is the data, that have constant size.
state = numpy.random.RandomState().get_state() state = numpy.random.RandomState().get_state()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论