Merge pull request #423 from nouiz/cache

Cache

Merge pull request #423 from nouiz/cache
16025b5f · lamblin · f707ea95 · 07982adc · 16025b5f · 16025b5f
--- a/NEWS.txt
+++ b/NEWS.txt
 .. _NEWS:
+Since 0.5rc2
+ * Fix a memory leak with shared variable(we kept a pointor to the original value)
+ * The key in our cache now store the hash of constant and not the constant value itself. This is significat for big constant.
+ * theano-cache list list key file bigger then 1M
+ * theano-cache list print an histograme of the number of key per compiled module
+ * theano-cache list print the number of compiled module per op class
 =============
 Release Notes
 =============

--- a/bin/theano-cache
+++ b/bin/theano-cache
@@ -28,6 +28,8 @@ elif sys.argv[1] in ('clear'):
                      (len(items), ', '.join(items)))
 elif sys.argv[1] in ('list'):
    theano.gof.compiledir.print_compiledir_content()
+elif sys.argv[1] in ('cleanup'):
+    theano.gof.compiledir.cleanup()
 elif sys.argv[1] == 'unlock':
    theano.gof.compilelock.force_unlock()
    print 'Lock successfully removed!'
@@ -37,5 +39,6 @@ else:
    print 'Type "theano-cache clear" to erase the cache'
    print 'Type "theano-cache list" to print the cache content'
    print 'Type "theano-cache unlock" to unlock the cache directory'
+    print 'Type "theano-cache cleanup" to delete old key format'
    sys.exit(1)
--- a/doc/install.txt
+++ b/doc/install.txt
@@ -81,6 +81,12 @@ Alternative installation on Ubuntu with PPA
 Benjamin J. McCann provides `installation documentation <http://www.benmccann.com/dev-blog/installing-cuda-and-theano/>`_ for Ubuntu 11.04 with CUDA 4.0 PPA.
+Gentoo
+~~~~~~
+Brian Vandenberg emailed `installation instruction on Gentoo <https://groups.google.com/d/msg/theano-dev/-8WCMn2FMR0/bJPasoZXaqoJ>`.
 .. _linux_basic:
 Basic user install instructions

--- a/theano/gof/cc.py
+++ b/theano/gof/cc.py
@@ -817,9 +817,9 @@ class CLinker(link.Linker):
        Input Signature
        ---------------
-        Each input signature is a tuple with an element for each input to the corresponding
+        Each input signature is a tuple with an element for each input
-        Apply node.
+        to the corresponding Apply node.  Each element identifies the
-        Each element identifies the type of the node input, and the nature of that input in the
+        type of the node input, and the nature of that input in the
        graph.
        The nature of a typical variable is encoded by integer pairs ``((a,b),c)``:
@@ -827,25 +827,31 @@ class CLinker(link.Linker):
        ``b`` is the index of the variable in the owner's output list.
        ``c`` is a flag indicating whether the variable is in the no_recycling set.
-        If a variable is also a graph output, then its position in the outputs list is also
+        If a variable is also a graph output, then its position in the
-        bundled with this tuple (after the b).
+        outputs list is also bundled with this tuple (after the b).
-        The nature of a Constant instance is defined as its signature, together with
-        two integers: the topological position of the first Apply using that Constant instance,
+        The nature of a Constant instance is defined as its signature,
-        and the lowest index into that Apply's inputs that refers to that Constant.  (These two
+        together with two integers: the topological position of the
-        integers are a surrogate for the id() of the Constant.  The integers are important
+        first Apply using that Constant instance, and the lowest index
-        because merge-able constants have the same signature, but require separate containers
+        into that Apply's inputs that refers to that Constant.  (These
-        in C code.)  The membership in no_recycling is also included in the signature.
+        two integers are a surrogate for the id() of the Constant.
+        The integers are important because merge-able constants have
+        the same signature, but require separate containers in C
+        code.)  The membership in no_recycling is also included in the
+        signature.
        Output Signature
        ----------------
-        The outputs of a node are entirely determined by the node's Op and the nature of the
+        The outputs of a node are entirely determined by the node's Op
-        inputs, but the set of outputs that may be re-used by the computation (the elements of
+        and the nature of the inputs, but the set of outputs that may
+        be re-used by the computation (the elements of
        self.no_recycling) can affect the code that is generated.
-        The format of each Op's output signature is simply a list of booleans, indicating
+        The format of each Op's output signature is simply a list of
-        whether each output is in the no_recycling set.
+        booleans, indicating whether each output is in the
+        no_recycling set.
        """
        return self.cmodule_key_(self.env, self.no_recycling,
@@ -900,6 +906,13 @@ class CLinker(link.Linker):
            if isinstance(i, graph.Constant): #orphans
                if id(i) not in constant_ids:
                    isig = (i.signature(), topological_pos, i_idx)
+                    # If the Theano constant provide a strong hash
+                    # (no collision for transpose, 2, 1, 0, -1, -2,
+                    # 2 element swapped...) we put this hash in the signature
+                    # instead of the value. This make the key file much smaller
+                    # for big constant. Before this, we saw key file up to 80M.
+                    if hasattr(isig[0], "theano_hash"):
+                        isig = (isig[0].theano_hash(), topological_pos, i_idx)
                    try:
                        hash(isig)
                    except Exception: #generic constants don't have a hashable signature

--- a/theano/gof/compiledir.py
+++ b/theano/gof/compiledir.py
@@ -3,9 +3,12 @@ import errno
 import os
 import platform
 import re
+import shutil
 import sys
 import textwrap
+import numpy
 import theano
 from theano.configparser import config, AddConfigVar, ConfigParam, StrParam
@@ -106,26 +109,69 @@ AddConfigVar('compiledir',
            allow_override=False))
-def print_compiledir_content():
+def flatten(a):
+    if isinstance(a, (tuple, list, set)):
+        l = []
+        for item in a:
+            l.extend(flatten(item))
+        return l
+    else:
+        return [a]
+def cleanup():
+    """ Delete old keys from the compiledir
+        We define old key as key that have an ndarray in them.
+        Now we use an hash in the keys of the constant data.
+        If there is no key left for a compiled module, we delete the module.
+    """
+    compiledir = theano.config.compiledir
+    for directory in os.listdir(compiledir):
+        file = None
+        try:
+            try:
+                filename = os.path.join(compiledir, directory, "key.pkl")
+                file = open(filename, 'rb')
+                #print file
+                try:
+                    keydata = cPickle.load(file)
+                    for key in list(keydata.keys):
+                        for obj in flatten(key):
+                            if isinstance(obj, numpy.ndarray):
+                                keydata.remove_key(key)
+                                break
+                    if len(keydata.keys) == 0:
+                        shutil.rmtree(os.path.join(compiledir, directory))
+                        pass
+                except EOFError:
+                    print ("ERROR while reading this key file '%s'."
+                           " Delete its directory" % filename)
+            except IOError:
+                pass
+        finally:
+            if file is not None:
+                file.close()
-    def flatten(a):
-        if isinstance(a, (tuple, list, set)):
+def print_compiledir_content():
-            l = []
+    max_key_file_size = 1 * 1024 * 1024  # 1M
-            for item in a:
-                l.extend(flatten(item))
-            return l
-        else:
-            return [a]
    compiledir = theano.config.compiledir
    table = []
    more_than_one_ops = 0
    zeros_op = 0
+    big_key_files = []
+    total_key_sizes = 0
+    nb_keys = {}
    for dir in os.listdir(compiledir):
        file = None
        try:
            try:
-                file = open(os.path.join(compiledir, dir, "key.pkl"), 'rb')
+                filename = os.path.join(compiledir, dir, "key.pkl")
+                file = open(filename, 'rb')
                keydata = cPickle.load(file)
                ops = list(set([x for x in flatten(keydata.keys)
                                if isinstance(x, theano.gof.Op)]))
@@ -137,6 +183,14 @@ def print_compiledir_content():
                    types = list(set([x for x in flatten(keydata.keys)
                                      if isinstance(x, theano.gof.Type)]))
                    table.append((dir, ops[0], types))
+                size = os.path.getsize(filename)
+                total_key_sizes += size
+                if size > max_key_file_size:
+                    big_key_files.append((dir, size, ops))
+                nb_keys.setdefault(len(keydata.keys), 0)
+                nb_keys[len(keydata.keys)] += 1
            except IOError:
                pass
        finally:
@@ -159,6 +213,31 @@ def print_compiledir_content():
    table_op_class = sorted(table_op_class.iteritems(), key=lambda t: t[1])
    for op_class, nb in table_op_class:
        print op_class, nb
+    if big_key_files:
+        big_key_files = sorted(big_key_files, key=lambda t: str(t[1]))
+        big_total_size = sum([size for dir, size, ops in big_key_files])
+        print ("There are directories with key files bigger than %d bytes "
+               "(they probably contain big tensor constants)" %
+               max_key_file_size)
+        print ("They use %d bytes out of %d (total size used by all key files)"
+               "" % (big_total_size, total_key_sizes))
+    print
+    print "Directory with a key file bigger then %d bytes" % max_key_file_size,
+    print "(probably they there is a big constant inside)"
+    print "There total are %d bytes on a total size of %d for key files" % (
+        big_total_size, total_key_sizes)
+    for dir, size, ops in big_key_files:
+        print dir, size, ops
+    nb_keys = sorted(nb_keys.iteritems())
+    print
+    print "Number of keys for a compiled module"
+    print "number of keys/number of modules with that number of keys"
+    for n_k, n_m in nb_keys:
+        print n_k, n_m
    print ("Skipped %d files that contained more than"
           " 1 op (was compiled with the C linker)" % more_than_one_ops)
    print ("Skipped %d files that contained 0 op "

--- a/theano/sandbox/test_neighbours.py
+++ b/theano/sandbox/test_neighbours.py
@@ -39,96 +39,57 @@ def test_neibs():
 def test_neibs_bad_shape():
    shape = (2, 3, 10, 10)
    images = shared(numpy.arange(numpy.prod(shape)).reshape(shape))
-    neib_shape = T.as_tensor_variable((3, 2))
-    try:
+    for neib_shape in [(3, 2), (2, 3)]:
-        f = function([], images2neibs(images, neib_shape),
+        neib_shape = T.as_tensor_variable(neib_shape)
-                     mode=mode_without_gpu)
-        neibs = f()
-        #print neibs
-        assert False, "An error was expected"
-    except TypeError:
-        pass
-    shape = (2, 3, 10, 10)
-    images = shared(numpy.arange(numpy.prod(shape)).reshape(shape))
-    neib_shape = T.as_tensor_variable((2, 3))
-    try:
+        try:
-        f = function([], images2neibs(images, neib_shape),
+            f = function([], images2neibs(images, neib_shape),
-                     mode=mode_without_gpu)
+                         mode=mode_without_gpu)
-        neibs = f()
+            f()
-        #print neibs
+            assert False, "An error was expected"
-        assert False, "An error was expected"
+        except TypeError:
-    except TypeError:
+            pass
-        pass
 def test_neibs_bad_shape_warp_centered():
    shape = (2, 3, 10, 10)
    images = shared(numpy.arange(numpy.prod(shape)).reshape(shape))
-    neib_shape = T.as_tensor_variable((3, 2))
-    try:
+    for neib_shape in [(3, 2), (2, 3)]:
-        f = function([], images2neibs(images, neib_shape,
+        neib_shape = T.as_tensor_variable(neib_shape)
-                                      mode="wrap_centered"),
-                     mode=mode_without_gpu)
-        neibs = f()
-        #print neibs
-        assert False, "An error was expected"
-    except TypeError:
-        pass
-    shape = (2, 3, 10, 10)
-    images = shared(numpy.arange(numpy.prod(shape)).reshape(shape))
-    neib_shape = T.as_tensor_variable((2, 3))
-    try:
+        try:
-        f = function([], images2neibs(images, neib_shape,
+            f = function([], images2neibs(images, neib_shape,
-                                      mode="wrap_centered"),
+                                          mode="wrap_centered"),
-                     mode=mode_without_gpu)
+                         mode=mode_without_gpu)
-        neibs = f()
+            f()
-        #print neibs
+            assert False, "An error was expected"
-        assert False, "An error was expected"
+        except TypeError:
-    except TypeError:
+            pass
-        pass
    shape = (2, 3, 2, 3)
    images = shared(numpy.arange(numpy.prod(shape)).reshape(shape))
    neib_shape = T.as_tensor_variable((3, 3))
-    try:
+    for shape in [(2, 3, 2, 3), (2, 3, 3, 2)]:
-        f = function([], images2neibs(images, neib_shape,
+        try:
-                                      mode="wrap_centered"),
+            f = function([], images2neibs(images, neib_shape,
-                     mode=mode_without_gpu)
+                                          mode="wrap_centered"),
-        neibs = f()
+                         mode=mode_without_gpu)
-        #print neibs
+            f()
-        assert False, "An error was expected"
+            assert False, "An error was expected"
-    except TypeError:
+        except TypeError:
-        pass
+            pass
-    shape = (2, 3, 3, 2)
-    images = shared(numpy.arange(numpy.prod(shape)).reshape(shape))
-    neib_shape = T.as_tensor_variable((3, 3))
-    try:
-        f = function([], images2neibs(images, neib_shape,
-                                      mode="wrap_centered"),
-                     mode=mode_without_gpu)
-        neibs = f()
-        #print neibs
-        assert False, "An error was expected"
-    except TypeError, e:
-        pass
+    # Test a valid shapes
    shape = (2, 3, 3, 3)
    images = shared(numpy.arange(numpy.prod(shape)).reshape(shape))
    neib_shape = T.as_tensor_variable((3, 3))
    f = function([], images2neibs(images, neib_shape, mode="wrap_centered"),
                 mode=mode_without_gpu)
-    neibs = f()
+    f()
-        #print neibs
 def test_neibs_manual():

--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -16,6 +16,7 @@ import scipy.sparse
 from theano import gof, tensor, compile, scalar, config
 from theano.gof.python25 import all
 from theano.tensor import blas
+from theano.sparse.utils import hash_from_sparse
 sparse_formats = ['csc', 'csr']
@@ -278,6 +279,10 @@ class SparseConstantSignature(tuple):
        (a, b) = self
        return hash(type(self)) ^ hash(a) ^ hash(type(b))
+    def theano_hash(self):
+        (_, d) = self
+        return hash_from_sparse(d)
 class SparseConstant(gof.Constant, _sparse_py_operators):
    dtype = property(lambda self: self.type.dtype)

--- a/theano/sparse/tests/test_utils.py
+++ b/theano/sparse/tests/test_utils.py
+import numpy
+from theano.sparse.utils import hash_from_sparse
+from theano.sparse.tests.test_basic import as_sparse_format
+def test_hash_from_sparse():
+    hashs = []
+    rng = numpy.random.rand(5, 5)
+    for format in ['csc', 'csr']:
+        rng = as_sparse_format(rng, format)
+        for data in [[[-2]], [[-1]], [[0]], [[1]], [[2]],
+                     numpy.zeros((1, 5)), numpy.zeros((1, 6)),
+                     # Data buffer empty but different shapes
+                     # numpy.zeros((1, 0)), numpy.zeros((2, 0)),
+                     # Same data buffer and shapes but different strides
+                     numpy.arange(25).reshape(5, 5),
+                     numpy.arange(25).reshape(5, 5).T,
+                     # Same data buffer, shapes and strides
+                     # but different dtypes
+                     numpy.zeros((5, 5), dtype="uint32"),
+                     numpy.zeros((5, 5), dtype="int32"),
+                     # Test slice
+                     rng, rng[1:], rng[:4], rng[1:3],
+                     # Don't test step as they are not supported by sparse
+                     #rng[::2], rng[::-1]
+                     ]:
+            data = as_sparse_format(data, format)
+            hashs.append(hash_from_sparse(data))
+        # test that different type of views and their copy give the same hash
+        assert hash_from_sparse(rng[1:]) == hash_from_sparse(rng[1:].copy())
+        assert hash_from_sparse(rng[1:3]) == hash_from_sparse(rng[1:3].copy())
+        assert hash_from_sparse(rng[:4]) == hash_from_sparse(rng[:4].copy())
+    assert len(set(hashs)) == len(hashs)
--- a/theano/sparse/utils.py
+++ b/theano/sparse/utils.py
+from theano.gof.cc import hash_from_code
+def hash_from_sparse(data):
+    # We need to hash the shapes as hash_from_code only hash
+    # the data buffer. Otherwise, this will cause problem with shapes like:
+    # (1, 0) and (2, 0)
+    # We also need to add the dtype to make the distinction between
+    # uint32 and int32 of zeros with the same shape.
+    # python hash are not strong, so I always use md5. To don't have a too long
+    # hash, I call it again on the contatenation of all part.
+    return (hash_from_code(hash_from_code(data.data) +
+                           hash_from_code(data.indices) +
+                           hash_from_code(data.indptr) +
+                           hash_from_code(str(data.shape)) +
+                           hash_from_code(str(data.dtype)) +
+                           hash_from_code(data.format)))
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -20,6 +20,7 @@ from theano import scalar as scal
 from theano.gof.python25 import partial, any, all
 from theano import compile, printing
 from theano.printing import pprint, min_informative_str
+from theano.tensor.utils import hash_from_ndarray
 # We use these exceptions as well.
 from theano.scalar import ComplexError, IntegerDivisionError
@@ -1505,6 +1506,10 @@ class TensorConstantSignature(tuple):
        t, d = self
        return hashtype(self) ^ hash(t) ^ hash(d.shape) ^ hash(self.sum)
+    def theano_hash(self):
+        _, d = self
+        return hash_from_ndarray(d)
    def _get_sum(self):
        """Compute sum of non NaN / Inf values in the array."""
        try:

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -254,11 +254,6 @@ class DimShuffle(Op):
                shape_statements += [('dimensions['+str(i)+'] = %(basename)s->dimensions['+str(o)+']')]
            else:
                shape_statements += [('dimensions['+str(i)+'] = 1')]
-        #backport
-        #shape_statements += [('dimensions['+str(i)+'] = %(basename)s->dimensions['+str(o)+']')
-        #    if o != 'x' else
-        #    ('dimensions['+str(i)+'] = 1')
-        #    for i, o in enumerate(self.new_order)]
        strides_statements = ['npy_intp strides[%i]'%nd_out]
@@ -269,11 +264,6 @@ class DimShuffle(Op):
                strides_statements += [('strides['+str(i)+'] = %(basename)s->strides['+str(o)+']')]
            else:
                strides_statements += [('strides['+str(i)+'] = 0')]
-        #backport
-        #strides_statements += [('strides['+str(i)+'] = %(basename)s->strides['+str(o)+']')
-        #    if o != 'x' else
-        #    ('strides['+str(i)+'] = 0')
-        #    for i, o in enumerate(self.new_order)]
        # set the strides of the broadcasted dimensions
        # this algorithm is from numpy: PyArray_Newshape() in cvs/numpy/numpy/core/src/multiarraymodule.c

--- a/theano/tensor/tests/test_utils.py
+++ b/theano/tensor/tests/test_utils.py
+import numpy
+from theano.tensor.utils import hash_from_ndarray
+def test_hash_from_ndarray():
+    hashs = []
+    rng = numpy.random.rand(5, 5)
+    for data in [-2, -1, 0, 1, 2, numpy.zeros((1, 5)), numpy.zeros((1, 6)),
+                  # Data buffer empty but different shapes
+                  numpy.zeros((1, 0)), numpy.zeros((2, 0)),
+                  # Same data buffer and shapes but different strides
+                  numpy.arange(25).reshape(5, 5),
+                  numpy.arange(25).reshape(5, 5).T,
+                  # Same data buffer, shapes and strides but different dtypes
+                  numpy.zeros((5, 5), dtype="uint32"),
+                  numpy.zeros((5, 5), dtype="int32"),
+                  # Test slice
+                  rng, rng[1:], rng[:4], rng[1:3], rng[::2], rng[::-1]
+                  ]:
+        data = numpy.asarray(data)
+        hashs.append(hash_from_ndarray(data))
+    assert len(set(hashs)) == len(hashs)
+    # test that different type of views and their copy give the same hash
+    assert hash_from_ndarray(rng[1:]) == hash_from_ndarray(rng[1:].copy())
+    assert hash_from_ndarray(rng[1:3]) == hash_from_ndarray(rng[1:3].copy())
+    assert hash_from_ndarray(rng[:4]) == hash_from_ndarray(rng[:4].copy())
+    assert hash_from_ndarray(rng[::2]) == hash_from_ndarray(rng[::2].copy())
+    assert hash_from_ndarray(rng[::-1]) == hash_from_ndarray(rng[::-1].copy())
--- a/theano/tensor/utils.py
+++ b/theano/tensor/utils.py
+import numpy
+from theano.gof.cc import hash_from_code
+def hash_from_ndarray(data):
+    # We need to hash the shapes and strides as hash_from_code only hash
+    # the data buffer. Otherwise, this will cause problem with shapes like:
+    # (1, 0) and (2, 0) and problem with inplace transpose.
+    # We also need to add the dtype to make the distinction between
+    # uint32 and int32 of zeros with the same shape and strides.
+    # python hash are not strong, so I always use md5 in order not to have a
+    # too long hash, I call it again on the concatenation of all parts.
+    if not data.flags["C_CONTIGUOUS"] and not data.flags["F_CONTIGUOUS"]:
+        data = numpy.ascontiguousarray(data)
+    return (hash_from_code(hash_from_code(data) +
+                           hash_from_code(str(data.shape)) +
+                           hash_from_code(str(data.strides)) +
+                           hash_from_code(str(data.dtype))))