提交 16025b5f authored 作者: lamblin's avatar lamblin

Merge pull request #423 from nouiz/cache

Cache
.. _NEWS:
Since 0.5rc2
* Fix a memory leak with shared variable(we kept a pointor to the original value)
* The key in our cache now store the hash of constant and not the constant value itself. This is significat for big constant.
* theano-cache list list key file bigger then 1M
* theano-cache list print an histograme of the number of key per compiled module
* theano-cache list print the number of compiled module per op class
=============
Release Notes
=============
......
......@@ -28,6 +28,8 @@ elif sys.argv[1] in ('clear'):
(len(items), ', '.join(items)))
elif sys.argv[1] in ('list'):
theano.gof.compiledir.print_compiledir_content()
elif sys.argv[1] in ('cleanup'):
theano.gof.compiledir.cleanup()
elif sys.argv[1] == 'unlock':
theano.gof.compilelock.force_unlock()
print 'Lock successfully removed!'
......@@ -37,5 +39,6 @@ else:
print 'Type "theano-cache clear" to erase the cache'
print 'Type "theano-cache list" to print the cache content'
print 'Type "theano-cache unlock" to unlock the cache directory'
print 'Type "theano-cache cleanup" to delete old key format'
sys.exit(1)
......@@ -81,6 +81,12 @@ Alternative installation on Ubuntu with PPA
Benjamin J. McCann provides `installation documentation <http://www.benmccann.com/dev-blog/installing-cuda-and-theano/>`_ for Ubuntu 11.04 with CUDA 4.0 PPA.
Gentoo
~~~~~~
Brian Vandenberg emailed `installation instruction on Gentoo <https://groups.google.com/d/msg/theano-dev/-8WCMn2FMR0/bJPasoZXaqoJ>`.
.. _linux_basic:
Basic user install instructions
......
......@@ -817,9 +817,9 @@ class CLinker(link.Linker):
Input Signature
---------------
Each input signature is a tuple with an element for each input to the corresponding
Apply node.
Each element identifies the type of the node input, and the nature of that input in the
Each input signature is a tuple with an element for each input
to the corresponding Apply node. Each element identifies the
type of the node input, and the nature of that input in the
graph.
The nature of a typical variable is encoded by integer pairs ``((a,b),c)``:
......@@ -827,25 +827,31 @@ class CLinker(link.Linker):
``b`` is the index of the variable in the owner's output list.
``c`` is a flag indicating whether the variable is in the no_recycling set.
If a variable is also a graph output, then its position in the outputs list is also
bundled with this tuple (after the b).
If a variable is also a graph output, then its position in the
outputs list is also bundled with this tuple (after the b).
The nature of a Constant instance is defined as its signature, together with
two integers: the topological position of the first Apply using that Constant instance,
and the lowest index into that Apply's inputs that refers to that Constant. (These two
integers are a surrogate for the id() of the Constant. The integers are important
because merge-able constants have the same signature, but require separate containers
in C code.) The membership in no_recycling is also included in the signature.
The nature of a Constant instance is defined as its signature,
together with two integers: the topological position of the
first Apply using that Constant instance, and the lowest index
into that Apply's inputs that refers to that Constant. (These
two integers are a surrogate for the id() of the Constant.
The integers are important because merge-able constants have
the same signature, but require separate containers in C
code.) The membership in no_recycling is also included in the
signature.
Output Signature
----------------
The outputs of a node are entirely determined by the node's Op and the nature of the
inputs, but the set of outputs that may be re-used by the computation (the elements of
The outputs of a node are entirely determined by the node's Op
and the nature of the inputs, but the set of outputs that may
be re-used by the computation (the elements of
self.no_recycling) can affect the code that is generated.
The format of each Op's output signature is simply a list of booleans, indicating
whether each output is in the no_recycling set.
The format of each Op's output signature is simply a list of
booleans, indicating whether each output is in the
no_recycling set.
"""
return self.cmodule_key_(self.env, self.no_recycling,
......@@ -900,6 +906,13 @@ class CLinker(link.Linker):
if isinstance(i, graph.Constant): #orphans
if id(i) not in constant_ids:
isig = (i.signature(), topological_pos, i_idx)
# If the Theano constant provide a strong hash
# (no collision for transpose, 2, 1, 0, -1, -2,
# 2 element swapped...) we put this hash in the signature
# instead of the value. This make the key file much smaller
# for big constant. Before this, we saw key file up to 80M.
if hasattr(isig[0], "theano_hash"):
isig = (isig[0].theano_hash(), topological_pos, i_idx)
try:
hash(isig)
except Exception: #generic constants don't have a hashable signature
......
......@@ -3,9 +3,12 @@ import errno
import os
import platform
import re
import shutil
import sys
import textwrap
import numpy
import theano
from theano.configparser import config, AddConfigVar, ConfigParam, StrParam
......@@ -106,9 +109,7 @@ AddConfigVar('compiledir',
allow_override=False))
def print_compiledir_content():
def flatten(a):
def flatten(a):
if isinstance(a, (tuple, list, set)):
l = []
for item in a:
......@@ -117,15 +118,60 @@ def print_compiledir_content():
else:
return [a]
def cleanup():
""" Delete old keys from the compiledir
We define old key as key that have an ndarray in them.
Now we use an hash in the keys of the constant data.
If there is no key left for a compiled module, we delete the module.
"""
compiledir = theano.config.compiledir
for directory in os.listdir(compiledir):
file = None
try:
try:
filename = os.path.join(compiledir, directory, "key.pkl")
file = open(filename, 'rb')
#print file
try:
keydata = cPickle.load(file)
for key in list(keydata.keys):
for obj in flatten(key):
if isinstance(obj, numpy.ndarray):
keydata.remove_key(key)
break
if len(keydata.keys) == 0:
shutil.rmtree(os.path.join(compiledir, directory))
pass
except EOFError:
print ("ERROR while reading this key file '%s'."
" Delete its directory" % filename)
except IOError:
pass
finally:
if file is not None:
file.close()
def print_compiledir_content():
max_key_file_size = 1 * 1024 * 1024 # 1M
compiledir = theano.config.compiledir
table = []
more_than_one_ops = 0
zeros_op = 0
big_key_files = []
total_key_sizes = 0
nb_keys = {}
for dir in os.listdir(compiledir):
file = None
try:
try:
file = open(os.path.join(compiledir, dir, "key.pkl"), 'rb')
filename = os.path.join(compiledir, dir, "key.pkl")
file = open(filename, 'rb')
keydata = cPickle.load(file)
ops = list(set([x for x in flatten(keydata.keys)
if isinstance(x, theano.gof.Op)]))
......@@ -137,6 +183,14 @@ def print_compiledir_content():
types = list(set([x for x in flatten(keydata.keys)
if isinstance(x, theano.gof.Type)]))
table.append((dir, ops[0], types))
size = os.path.getsize(filename)
total_key_sizes += size
if size > max_key_file_size:
big_key_files.append((dir, size, ops))
nb_keys.setdefault(len(keydata.keys), 0)
nb_keys[len(keydata.keys)] += 1
except IOError:
pass
finally:
......@@ -159,6 +213,31 @@ def print_compiledir_content():
table_op_class = sorted(table_op_class.iteritems(), key=lambda t: t[1])
for op_class, nb in table_op_class:
print op_class, nb
if big_key_files:
big_key_files = sorted(big_key_files, key=lambda t: str(t[1]))
big_total_size = sum([size for dir, size, ops in big_key_files])
print ("There are directories with key files bigger than %d bytes "
"(they probably contain big tensor constants)" %
max_key_file_size)
print ("They use %d bytes out of %d (total size used by all key files)"
"" % (big_total_size, total_key_sizes))
print
print "Directory with a key file bigger then %d bytes" % max_key_file_size,
print "(probably they there is a big constant inside)"
print "There total are %d bytes on a total size of %d for key files" % (
big_total_size, total_key_sizes)
for dir, size, ops in big_key_files:
print dir, size, ops
nb_keys = sorted(nb_keys.iteritems())
print
print "Number of keys for a compiled module"
print "number of keys/number of modules with that number of keys"
for n_k, n_m in nb_keys:
print n_k, n_m
print ("Skipped %d files that contained more than"
" 1 op (was compiled with the C linker)" % more_than_one_ops)
print ("Skipped %d files that contained 0 op "
......
......@@ -39,26 +39,14 @@ def test_neibs():
def test_neibs_bad_shape():
shape = (2, 3, 10, 10)
images = shared(numpy.arange(numpy.prod(shape)).reshape(shape))
neib_shape = T.as_tensor_variable((3, 2))
try:
f = function([], images2neibs(images, neib_shape),
mode=mode_without_gpu)
neibs = f()
#print neibs
assert False, "An error was expected"
except TypeError:
pass
shape = (2, 3, 10, 10)
images = shared(numpy.arange(numpy.prod(shape)).reshape(shape))
neib_shape = T.as_tensor_variable((2, 3))
for neib_shape in [(3, 2), (2, 3)]:
neib_shape = T.as_tensor_variable(neib_shape)
try:
f = function([], images2neibs(images, neib_shape),
mode=mode_without_gpu)
neibs = f()
#print neibs
f()
assert False, "An error was expected"
except TypeError:
pass
......@@ -67,28 +55,15 @@ def test_neibs_bad_shape():
def test_neibs_bad_shape_warp_centered():
shape = (2, 3, 10, 10)
images = shared(numpy.arange(numpy.prod(shape)).reshape(shape))
neib_shape = T.as_tensor_variable((3, 2))
try:
f = function([], images2neibs(images, neib_shape,
mode="wrap_centered"),
mode=mode_without_gpu)
neibs = f()
#print neibs
assert False, "An error was expected"
except TypeError:
pass
shape = (2, 3, 10, 10)
images = shared(numpy.arange(numpy.prod(shape)).reshape(shape))
neib_shape = T.as_tensor_variable((2, 3))
for neib_shape in [(3, 2), (2, 3)]:
neib_shape = T.as_tensor_variable(neib_shape)
try:
f = function([], images2neibs(images, neib_shape,
mode="wrap_centered"),
mode=mode_without_gpu)
neibs = f()
#print neibs
f()
assert False, "An error was expected"
except TypeError:
pass
......@@ -97,38 +72,24 @@ def test_neibs_bad_shape_warp_centered():
images = shared(numpy.arange(numpy.prod(shape)).reshape(shape))
neib_shape = T.as_tensor_variable((3, 3))
for shape in [(2, 3, 2, 3), (2, 3, 3, 2)]:
try:
f = function([], images2neibs(images, neib_shape,
mode="wrap_centered"),
mode=mode_without_gpu)
neibs = f()
#print neibs
f()
assert False, "An error was expected"
except TypeError:
pass
shape = (2, 3, 3, 2)
images = shared(numpy.arange(numpy.prod(shape)).reshape(shape))
neib_shape = T.as_tensor_variable((3, 3))
try:
f = function([], images2neibs(images, neib_shape,
mode="wrap_centered"),
mode=mode_without_gpu)
neibs = f()
#print neibs
assert False, "An error was expected"
except TypeError, e:
pass
# Test a valid shapes
shape = (2, 3, 3, 3)
images = shared(numpy.arange(numpy.prod(shape)).reshape(shape))
neib_shape = T.as_tensor_variable((3, 3))
f = function([], images2neibs(images, neib_shape, mode="wrap_centered"),
mode=mode_without_gpu)
neibs = f()
#print neibs
f()
def test_neibs_manual():
......
......@@ -16,6 +16,7 @@ import scipy.sparse
from theano import gof, tensor, compile, scalar, config
from theano.gof.python25 import all
from theano.tensor import blas
from theano.sparse.utils import hash_from_sparse
sparse_formats = ['csc', 'csr']
......@@ -278,6 +279,10 @@ class SparseConstantSignature(tuple):
(a, b) = self
return hash(type(self)) ^ hash(a) ^ hash(type(b))
def theano_hash(self):
(_, d) = self
return hash_from_sparse(d)
class SparseConstant(gof.Constant, _sparse_py_operators):
dtype = property(lambda self: self.type.dtype)
......
import numpy
from theano.sparse.utils import hash_from_sparse
from theano.sparse.tests.test_basic import as_sparse_format
def test_hash_from_sparse():
hashs = []
rng = numpy.random.rand(5, 5)
for format in ['csc', 'csr']:
rng = as_sparse_format(rng, format)
for data in [[[-2]], [[-1]], [[0]], [[1]], [[2]],
numpy.zeros((1, 5)), numpy.zeros((1, 6)),
# Data buffer empty but different shapes
# numpy.zeros((1, 0)), numpy.zeros((2, 0)),
# Same data buffer and shapes but different strides
numpy.arange(25).reshape(5, 5),
numpy.arange(25).reshape(5, 5).T,
# Same data buffer, shapes and strides
# but different dtypes
numpy.zeros((5, 5), dtype="uint32"),
numpy.zeros((5, 5), dtype="int32"),
# Test slice
rng, rng[1:], rng[:4], rng[1:3],
# Don't test step as they are not supported by sparse
#rng[::2], rng[::-1]
]:
data = as_sparse_format(data, format)
hashs.append(hash_from_sparse(data))
# test that different type of views and their copy give the same hash
assert hash_from_sparse(rng[1:]) == hash_from_sparse(rng[1:].copy())
assert hash_from_sparse(rng[1:3]) == hash_from_sparse(rng[1:3].copy())
assert hash_from_sparse(rng[:4]) == hash_from_sparse(rng[:4].copy())
assert len(set(hashs)) == len(hashs)
from theano.gof.cc import hash_from_code
def hash_from_sparse(data):
# We need to hash the shapes as hash_from_code only hash
# the data buffer. Otherwise, this will cause problem with shapes like:
# (1, 0) and (2, 0)
# We also need to add the dtype to make the distinction between
# uint32 and int32 of zeros with the same shape.
# python hash are not strong, so I always use md5. To don't have a too long
# hash, I call it again on the contatenation of all part.
return (hash_from_code(hash_from_code(data.data) +
hash_from_code(data.indices) +
hash_from_code(data.indptr) +
hash_from_code(str(data.shape)) +
hash_from_code(str(data.dtype)) +
hash_from_code(data.format)))
......@@ -20,6 +20,7 @@ from theano import scalar as scal
from theano.gof.python25 import partial, any, all
from theano import compile, printing
from theano.printing import pprint, min_informative_str
from theano.tensor.utils import hash_from_ndarray
# We use these exceptions as well.
from theano.scalar import ComplexError, IntegerDivisionError
......@@ -1505,6 +1506,10 @@ class TensorConstantSignature(tuple):
t, d = self
return hashtype(self) ^ hash(t) ^ hash(d.shape) ^ hash(self.sum)
def theano_hash(self):
_, d = self
return hash_from_ndarray(d)
def _get_sum(self):
"""Compute sum of non NaN / Inf values in the array."""
try:
......
......@@ -254,11 +254,6 @@ class DimShuffle(Op):
shape_statements += [('dimensions['+str(i)+'] = %(basename)s->dimensions['+str(o)+']')]
else:
shape_statements += [('dimensions['+str(i)+'] = 1')]
#backport
#shape_statements += [('dimensions['+str(i)+'] = %(basename)s->dimensions['+str(o)+']')
# if o != 'x' else
# ('dimensions['+str(i)+'] = 1')
# for i, o in enumerate(self.new_order)]
strides_statements = ['npy_intp strides[%i]'%nd_out]
......@@ -269,11 +264,6 @@ class DimShuffle(Op):
strides_statements += [('strides['+str(i)+'] = %(basename)s->strides['+str(o)+']')]
else:
strides_statements += [('strides['+str(i)+'] = 0')]
#backport
#strides_statements += [('strides['+str(i)+'] = %(basename)s->strides['+str(o)+']')
# if o != 'x' else
# ('strides['+str(i)+'] = 0')
# for i, o in enumerate(self.new_order)]
# set the strides of the broadcasted dimensions
# this algorithm is from numpy: PyArray_Newshape() in cvs/numpy/numpy/core/src/multiarraymodule.c
......
import numpy
from theano.tensor.utils import hash_from_ndarray
def test_hash_from_ndarray():
hashs = []
rng = numpy.random.rand(5, 5)
for data in [-2, -1, 0, 1, 2, numpy.zeros((1, 5)), numpy.zeros((1, 6)),
# Data buffer empty but different shapes
numpy.zeros((1, 0)), numpy.zeros((2, 0)),
# Same data buffer and shapes but different strides
numpy.arange(25).reshape(5, 5),
numpy.arange(25).reshape(5, 5).T,
# Same data buffer, shapes and strides but different dtypes
numpy.zeros((5, 5), dtype="uint32"),
numpy.zeros((5, 5), dtype="int32"),
# Test slice
rng, rng[1:], rng[:4], rng[1:3], rng[::2], rng[::-1]
]:
data = numpy.asarray(data)
hashs.append(hash_from_ndarray(data))
assert len(set(hashs)) == len(hashs)
# test that different type of views and their copy give the same hash
assert hash_from_ndarray(rng[1:]) == hash_from_ndarray(rng[1:].copy())
assert hash_from_ndarray(rng[1:3]) == hash_from_ndarray(rng[1:3].copy())
assert hash_from_ndarray(rng[:4]) == hash_from_ndarray(rng[:4].copy())
assert hash_from_ndarray(rng[::2]) == hash_from_ndarray(rng[::2].copy())
assert hash_from_ndarray(rng[::-1]) == hash_from_ndarray(rng[::-1].copy())
import numpy
from theano.gof.cc import hash_from_code
def hash_from_ndarray(data):
# We need to hash the shapes and strides as hash_from_code only hash
# the data buffer. Otherwise, this will cause problem with shapes like:
# (1, 0) and (2, 0) and problem with inplace transpose.
# We also need to add the dtype to make the distinction between
# uint32 and int32 of zeros with the same shape and strides.
# python hash are not strong, so I always use md5 in order not to have a
# too long hash, I call it again on the concatenation of all parts.
if not data.flags["C_CONTIGUOUS"] and not data.flags["F_CONTIGUOUS"]:
data = numpy.ascontiguousarray(data)
return (hash_from_code(hash_from_code(data) +
hash_from_code(str(data.shape)) +
hash_from_code(str(data.strides)) +
hash_from_code(str(data.dtype))))
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论