Merge pull request #1552 from nouiz/mixed2

Mixed2

Merge pull request #1552 from nouiz/mixed2
b01c7960 · Pascal Lamblin · 95e36534 · f266a3ba · b01c7960 · b01c7960
--- a/bin/theano-cache
+++ b/bin/theano-cache
@@ -19,7 +19,7 @@ def print_help(exit_status):
    print 'Type "theano-cache clear" to erase the cache'
    print 'Type "theano-cache list" to print the cache content'
    print 'Type "theano-cache unlock" to unlock the cache directory'
-    print 'Type "theano-cache cleanup" to delete keys in the old format'
+    print 'Type "theano-cache cleanup" to delete keys in the old format/code version'
    print 'Type "theano-cache purge" to force deletion of the cache directory'
    print ('Type "theano-cache basecompiledir" '
            'to print the parent of the cache directory')
@@ -60,6 +60,8 @@ elif len(sys.argv) == 2:
        theano.gof.compiledir.print_compiledir_content()
    elif sys.argv[1] == 'cleanup':
        theano.gof.compiledir.cleanup()
+        cache = get_module_cache(init_args=dict(do_refresh=False))
+        cache.clear_old()
    elif sys.argv[1] == 'unlock':
        theano.gof.compilelock.force_unlock()
        print 'Lock successfully removed!'

--- a/doc/library/tensor/basic.txt
+++ b/doc/library/tensor/basic.txt
@@ -1067,6 +1067,18 @@ The six usual equality and inequality operators share the same interface.

    Returns a variable representing the result of logical inequality (a!=b).

+.. function:: isnan(a)
+
+    Returns a variable representing the comparison of ``a`` elements with nan.
+
+    This is equivalent to ``numpy.isnan``.
+
+.. function:: isinf(a)
+
+    Returns a variable representing the comparison of ``a`` elements
+    with inf or -inf.
+
+    This is equivalent to ``numpy.isinf``.

 Condition
 ---------

--- a/theano/compile/profiling.py
+++ b/theano/compile/profiling.py
@@ -566,13 +566,13 @@ class ProfileStats(object):
                sh = self.variable_shape.get(var, 'no shape')
                st = self.variable_strides.get(var, 'no strides')
                dtype = getattr(var, 'dtype', 'no dtype')
-                print "    input %d: dtype=%s, shape=%s, strides=%s " % (
+                print >> file, "    input %d: dtype=%s, shape=%s, strides=%s " % (
                    idx, dtype, sh, st)
            for idx, var in enumerate(a.outputs):
                sh = self.variable_shape.get(var, 'no shape')
                st = self.variable_strides.get(var, 'no strides')
                dtype = getattr(var, 'dtype', 'no dtype')
-                print "    output %d: dtype=%s, shape=%s, strides=%s " % (
+                print >> file, "    output %d: dtype=%s, shape=%s, strides=%s " % (
                    idx, dtype, sh, st)
            # Same as before, this I've sacrificied some information making
            # the output more readable

--- a/theano/gof/compiledir.py
+++ b/theano/gof/compiledir.py
 import cPickle
 import errno
+import logging
 import os
 import platform
 import re
@@ -17,6 +18,9 @@ from theano.configparser import config, AddConfigVar, ConfigParam, StrParam
 from theano.gof.utils import flatten
 from theano.misc.windows import call_subprocess_Popen

+
+_logger = logging.getLogger("theano.gof.compiledir")
+
 # Using the dummy file descriptors below is a workaround for a crash
 # experienced in an unusual Python 2.4.4 Windows environment with the default
 # None values.
@@ -181,7 +185,7 @@ def cleanup():
    """
    Delete keys in old format from the compiledir.

-    Old clean up include key in old format:
+    Old clean up include key in old format or with old version of the c_code:
    1) keys that have an ndarray in them.
       Now we use a hash in the keys of the constant data.
    2) key that don't have the numpy ABI version in them
@@ -204,24 +208,46 @@ def cleanup():
                        have_c_compiler = False
                        for obj in flatten(key):
                            if isinstance(obj, numpy.ndarray):
-                                keydata.remove_key(key)
+                                have_npy_abi_version = False
                                break
                            elif isinstance(obj, basestring):
                                if obj.startswith('NPY_ABI_VERSION=0x'):
                                    have_npy_abi_version = True
                                elif obj.startswith('c_compiler_str='):
                                    have_c_compiler = True
+                            elif (isinstance(obj, (theano.gof.Op, theano.gof.Type)) and
+                                  hasattr(obj, 'c_code_cache_version')):
+                                v = obj.c_code_cache_version()
+                                if v not in [(), None] and v not in key[0]:
+                                    have_npy_abi_version = False
+                                    break

                        if not have_npy_abi_version or not have_c_compiler:
-                            keydata.remove_key(key)
+                            try:
+                                #This can happen when we move the compiledir.
+                                if keydata.key_pkl != filename:
+                                    keydata.key_pkl = filename
+                                keydata.remove_key(key)
+                            except IOError, e:
+                                _logger.error(
+                                    "Could not remove file '%s'. To complete "
+                                    "the clean-up, please remove manually "
+                                    "the directory containing it.",
+                                    filename)
                    if len(keydata.keys) == 0:
                        shutil.rmtree(os.path.join(compiledir, directory))

                except EOFError:
-                    print ("ERROR while reading this key file '%s'."
-                           " Delete its directory" % filename)
+                    _logger.error(
+                        "Could not read key file '%s'. To complete "
+                        "the clean-up, please remove manually "
+                        "the directory containing it.",
+                        filename)
            except IOError:
-                pass
+                _logger.error(
+                    "Could not clean up this directory: '%s'. To complete "
+                    "the clean-up, please remove it manually.",
+                    directory)
        finally:
            if file is not None:
                file.close()

--- a/theano/gof/opt.py
+++ b/theano/gof/opt.py
@@ -1424,15 +1424,12 @@ class EquilibriumOptimizer(NavigatorOptimizer):

        loop_timing = []
        global_opt_timing = []
-        time_lopts = {}
+        time_opts = {}
        io_toposort_timing = []
        nb_nodes = []
-        for gopt in self.global_optimizers:
-            process_count.setdefault(gopt, 0)
-
-        for lopt in self.local_optimizers:
-            process_count.setdefault(lopt, 0)
-            time_lopts.setdefault(lopt, 0)
+        for opt in self.global_optimizers + self.local_optimizers:
+            process_count.setdefault(opt, 0)
+            time_opts.setdefault(opt, 0)

        while changed and not max_use_abort:
            t0 = time.time()
@@ -1441,7 +1438,9 @@ class EquilibriumOptimizer(NavigatorOptimizer):
            #apply global optimizers
            for gopt in self.global_optimizers:
                fgraph.change_tracker.reset()
+                t_opt = time.time()
                gopt.apply(fgraph)
+                time_opts[gopt] += time.time() - t_opt
                if fgraph.change_tracker.changed:
                    process_count[gopt] += 1
                    changed = True
@@ -1482,9 +1481,9 @@ class EquilibriumOptimizer(NavigatorOptimizer):
                    current_node = node

                    for lopt in self.local_optimizers:
-                        t_lopt = time.time()
+                        t_opt = time.time()
                        lopt_change = self.process_node(fgraph, node, lopt)
-                        time_lopts[lopt] += time.time() - t_lopt
+                        time_opts[lopt] += time.time() - t_opt
                        if lopt_change:
                            process_count[lopt] += 1
                            changed = True
@@ -1507,7 +1506,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
                          config.optdb.max_use_ratio)

        return (self, loop_timing, process_count, max_nb_nodes,
-                global_opt_timing, nb_nodes, time_lopts, io_toposort_timing)
+                global_opt_timing, nb_nodes, time_opts, io_toposort_timing)

    def print_summary(self, stream=sys.stdout, level=0, depth=-1):
        name = getattr(self, 'name', None)
@@ -1521,7 +1520,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
    @staticmethod
    def print_profile(stream, prof, level=0):
        (opt, loop_timing, process_count, max_nb_nodes,
-         global_opt_timing, nb_nodes, time_lopts, io_toposort_timing) = prof
+         global_opt_timing, nb_nodes, time_opts, io_toposort_timing) = prof
        blanc = ('    ' * level)
        print >> stream, blanc, "EquilibriumOptimizer",
        print >> stream, blanc, getattr(opt, "name",
@@ -1540,7 +1539,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
        count_opt = []
        for opt, count in process_count.iteritems():
            if count > 0:
-                count_opt.append((time_lopts[opt], count, opt))
+                count_opt.append((time_opts[opt], count, opt))

        if count_opt:
            print >> stream, blanc, \
@@ -1554,7 +1553,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
    @staticmethod
    def merge_profile(prof1, prof2):
        #(opt, loop_timing, process_count, max_nb_nodes,
-        # global_opt_timing, nb_nodes, time_lopts, io_toposort_timing) = prof1
+        # global_opt_timing, nb_nodes, time_opts, io_toposort_timing) = prof1

        local_optimizers = set(prof1[0].local_optimizers).union(
            prof2[0].local_optimizers)
@@ -1588,12 +1587,12 @@ class EquilibriumOptimizer(NavigatorOptimizer):

        nb_nodes = merge_list(prof1[5], prof2[5])

-        time_lopts = prof1[6].copy()
+        time_opts = prof1[6].copy()
        for opt, t in prof2[6].iteritems():
-            if opt in time_lopts:
-                time_lopts[opt] += t
+            if opt in time_opts:
+                time_opts[opt] += t
            else:
-                time_lopts[opt] = t
+                time_opts[opt] = t

        io_toposort_timing = merge_list(prof1[7], prof2[7])

@@ -1606,7 +1605,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
                max_nb_nodes,
                global_opt_timing,
                nb_nodes,
-                time_lopts,
+                time_opts,
                io_toposort_timing)

 #################

--- a/theano/sandbox/cuda/conv_kernel.cu
+++ b/theano/sandbox/cuda/conv_kernel.cu
@@ -29,6 +29,7 @@ for (int iter_m=0; iter_m < Os[0]; iter_m++) {
 */
 #ifndef CONV_KERNEL_CU
 #define CONV_KERNEL_CU
+#include <stdint.h>

 /*
 #define CHECK_BANK_CONFLICTS 0
@@ -44,7 +45,9 @@ for (int iter_m=0; iter_m < Os[0]; iter_m++) {
 #define MIN(a, b) ((a) < (b) ? (a) : (b) )
 #define MAX(a, b) ((a) < (b) ? (b) : (a) )

-const unsigned long int COALESCED_ALIGN = 0xFFFFFFFFFFFFFF00; // zero-out the trailing bits of pointers
+//Must be the same size as a ptr. We can't use unsigned long as on Windows 64
+//bit, it is 32 bit.
+const uintptr_t COALESCED_ALIGN = 0xFFFFFFFFFFFFFF00; // zero-out the trailing bits of pointers

 __device__ void load_to_shared(float * dst, const float * src, const int thread_id, int nb_thread, const int N, const bool flipped=false){
  if (nb_thread < 64)
@@ -73,7 +76,7 @@ __device__ void load_to_shared(float * dst, const float * src, const int thread_
      if (thread_id < nb_thread)
        {
          const float * my_src_ptr = (const float *)(
-                  ((unsigned long int)src) & COALESCED_ALIGN);
+                  ((uintptr_t)src) & COALESCED_ALIGN);
          my_src_ptr += thread_id;
          while (my_src_ptr < src + N)
          {

--- a/theano/sandbox/cuda/cuda_ndarray.cu
+++ b/theano/sandbox/cuda/cuda_ndarray.cu
@@ -47,13 +47,13 @@ static PyObject *CudaNdarray_get_shape(CudaNdarray *self, void *closure);
 int _outstanding_mallocs[] = {0,0};

 #if COMPUTE_GPU_MEM_USED
-int _allocated_size = 0;
-int _max_allocated_size = 0;
+size_t _allocated_size = 0;
+size_t _max_allocated_size = 0;

 const int TABLE_SIZE = 10000;
 struct table_struct{
    void* ptr;
-    int size;
+    size_t size;
 };
 table_struct _alloc_size_table[TABLE_SIZE];
 #endif
@@ -92,26 +92,26 @@ void * device_malloc(size_t size, int verbose)
            if (err2 != cudaSuccess){
                cudaGetLastError();
                fprintf(stderr,
-                        "Error when tring to find the memory information"
+                        "Error when trying to find the memory information"
                        " on the GPU: %s\n", cudaGetErrorString(err2));
            }
            #if COMPUTE_GPU_MEM_USED
                fprintf(stderr,
-                        "Error allocating %li bytes of device memory (%s)."
+                        "Error allocating %zd bytes of device memory (%s)."
                        " new total bytes allocated: %d."
-                        " Driver report %d bytes free and %d bytes total \n",
-                        (long)size, cudaGetErrorString(err), _allocated_size,
+                        " Driver report %zd bytes free and %zd bytes total \n",
+                        size, cudaGetErrorString(err), _allocated_size,
                        free, total);
            #else
                fprintf(stderr,
-                        "Error allocating %li bytes of device memory (%s)."
-                        " Driver report %d bytes free and %d bytes total \n",
-                        (long)size, cudaGetErrorString(err), free, total);
+                        "Error allocating %zd bytes of device memory (%s)."
+                        " Driver report %zd bytes free and %zd bytes total \n",
+                        size, cudaGetErrorString(err), free, total);
            #endif
        }
        PyErr_Format(PyExc_MemoryError,
-                     "Error allocating %li bytes of device memory (%s).",
-                     (long)size, cudaGetErrorString(err));
+                     "Error allocating %zd bytes of device memory (%s).",
+                     size, cudaGetErrorString(err));
        return NULL;
    }
    if (rval != NULL){
@@ -227,15 +227,15 @@ int device_free(void *ptr)
                }
            assert(i<TABLE_SIZE);
            fprintf(stderr,
-                    "Error freeing device pointer %p (%s) of size %d. %d byte already allocated."
-                    " Driver report %d bytes free and %d bytes total \n",
+                    "Error freeing device pointer %p (%s) of size %d. %zd byte already allocated."
+                    " Driver report %zd bytes free and %zd bytes total \n",
                    ptr, cudaGetErrorString(err),
                    _alloc_size_table[i].size, _allocated_size, free, total);
        }
        #else
            fprintf(stderr,
                    "Error freeing device pointer %p (%s)."
-                    " Driver report %d bytes free and %d bytes total \n",
+                    " Driver report %zd bytes free and %zd bytes total \n",
                    ptr,
                    cudaGetErrorString(err), free, total);
        #endif

--- a/theano/scan_module/tests/test_scan.py
+++ b/theano/scan_module/tests/test_scan.py
@@ -3564,11 +3564,10 @@ class T_Scan(unittest.TestCase):
        assert not opt_obj.belongs_to_set(scan_node1, [scan_node2])
        assert not opt_obj.belongs_to_set(scan_node2, [scan_node1])

-    def test_remove_constants_and_unused_inputs_scan(self):
-        """
-        Test the opt remove_constants_and_unused_inputs_scan
+    def test_remove_constants_and_unused_inputs_scan_non_seqs(self):
+        """Test the opt remove_constants_and_unused_inputs_scan for
+        non sequences.

-        TODO: currently we only test non_seqs, should test 
        """
        W = theano.tensor.matrix(name='W')
        v = theano.tensor.ivector(name='v')
@@ -3594,17 +3593,61 @@ class T_Scan(unittest.TestCase):
            f(numpy.zeros((3, 3), dtype=theano.config.floatX), [1, 2])
            scan_node = f.maker.fgraph.toposort()[-1]

-            # TODO: Why this assert always fail?
-#            assert (len(scan_node.inputs) ==
-#                    len(set(scan_node.inputs)))
+            # The first input is the number of iteration.
+            assert (len(scan_node.inputs[1:]) ==
+                    len(set(scan_node.inputs[1:])))
            inp = scan_node.op.inner_non_seqs(scan_node.op.inputs)
            assert len(inp) == 1
            assert (len(inp) == len(set(inp)))
            inp = scan_node.op.outer_non_seqs(scan_node)
            assert len(inp) == 1
            assert (len(inp) == len(set(inp)))
-            #import pdb;pdb.set_trace()
-            #utt.assert_allclose(f([1, 2]), [[0, 0, 0], [1, 1, 1], [1, 1, 1]])
+
+    def test_remove_constants_and_unused_inputs_scan_seqs(self):
+        """
+        Test the opt remove_constants_and_unused_inputs_scan for sequences.
+
+        """
+        W = theano.tensor.matrix(name='W')
+        v = theano.tensor.ivector(name='v')
+        vv = theano.tensor.matrix(name='vv')
+        y1, _ = theano.scan(lambda i, W: W[i], sequences=v,
+                            outputs_info=None, non_sequences=[W])
+        y2, _ = theano.scan(lambda i, _, W: W[i], sequences=[v, v],
+                            outputs_info=None, non_sequences=W)
+        y3, _ = theano.scan(lambda i, _, W: W[i], sequences=[v, vv[0]],
+                            outputs_info=None, non_sequences=W)
+        y4, _ = theano.scan(lambda _, i, W: W[i], sequences=[vv[0], v],
+                            outputs_info=None, non_sequences=W)
+        y5, _ = theano.scan(lambda _, i, _2, W: W[i], sequences=[vv, v, vv[0]],
+                            outputs_info=None, non_sequences=W)
+        y6, _ = theano.scan(lambda _, _2, i, W: W[i], sequences=[vv[0], vv, v],
+                            outputs_info=None, non_sequences=W)
+        y7, _ = theano.scan(lambda i, _, _2, W: W[i],
+                            sequences=[v, vv[0], vv[0]],
+                            outputs_info=None, non_sequences=W)
+        y8, _ = theano.scan(lambda _, i, W, _2, _3: W[i], sequences=[vv[0], v],
+                            outputs_info=None, non_sequences=[W, W[0], W[0]])
+        for out in [y1, y2, y3, y4, y5, y6, y7, y8]:
+            #This used to raise an exception
+            f = theano.function([W, v, vv], out, on_unused_input='ignore',
+                                mode=mode_with_opt)
+            f(numpy.zeros((3, 3), theano.config.floatX),
+              [1, 2],
+              numpy.zeros((3, 3), theano.config.floatX))
+            scan_node = f.maker.fgraph.toposort()[-1]
+
+            # The first input is the number of iteration.
+            assert (len(scan_node.inputs[1:]) ==
+                    len(set(scan_node.inputs[1:])))
+            inp = scan_node.op.inner_seqs(scan_node.op.inputs)
+            assert len(inp) == 1
+            inp = scan_node.op.outer_seqs(scan_node)
+            assert len(inp) == 1
+            inp = scan_node.op.inner_non_seqs(scan_node.op.inputs)
+            assert len(inp) == 1
+            inp = scan_node.op.outer_non_seqs(scan_node)
+            assert len(inp) == 1


 def test_speed():

--- a/theano/tensor/nnet/Conv3D.py
+++ b/theano/tensor/nnet/Conv3D.py
@@ -2,7 +2,7 @@ import theano
 from theano.tensor import basic as T
 import numpy as N
 #from util import strutil
-from theano.tensor.blas_headers import blas_header_text
+from theano.tensor.blas_headers import blas_header_text, blas_header_version
 from theano.tensor.blas import ldflags
 from theano.misc import strutil
 from theano.gradient import grad_undefined
@@ -51,8 +51,7 @@ class Conv3D(theano.Op):
        return "Conv3D"

    def c_code_cache_version(self):
-        return (3, blas_header_text.version)
-
+        return (3, blas_header_version())

    def make_node(self, V, W, b, d):
        """

--- a/theano/tensor/var.py
+++ b/theano/tensor/var.py
@@ -543,8 +543,8 @@ class _tensor_py_operators:
    def get_scalar_constant_value(self):
        return theano.tensor.basic.get_scalar_constant_value(self)

-    def zeros_like(model, dtype=None):
-        return theano.tensor.basic.zeros_like(model, dtype=dtype)
+    def zeros_like(self, dtype=None):
+        return theano.tensor.basic.zeros_like(self, dtype=dtype)


 class TensorVariable(_tensor_py_operators, Variable):