Merge pull request #623 from nouiz/mixed

Mixed

Merge pull request #623 from nouiz/mixed
560ad497 · lamblin · 501d5338 · e689a202 · 560ad497 · 560ad497
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -83,6 +83,7 @@ New Features
 * C code reuses preallocated outputs (only done by Scan) (Pascal L.)
 * Garbage collection of intermediate results during Theano function calls
   for Ops with C code (Pascal L.)
+ * Theano flags compiledir_format now support the parameter numpy_version.
 Sparse
 * Implement theano.sparse.mul(sparse1, sparse2) when both inputs don't

--- a/doc/install.txt
+++ b/doc/install.txt
@@ -915,7 +915,8 @@ MKL library included in EPD, so you should not need to compile your own BLAS.
   <https://github.com/xianyi/OpenBLAS>`_ is a new project that
   continues GotoBLAS: it has a better installation process and implements
   additional functions (not currently used by Theano).
-   We did not try OpenBLAS on Windows.
+   We did not try OpenBLAS on Windows. When installed, you probably need to
+   use this Theano flags: ``theano.config.blas.ldflags = "-lopenblas"``
 .. note::

--- a/doc/sandbox/debugging_with_stepmode.txt
+++ b/doc/sandbox/debugging_with_stepmode.txt
@@ -17,10 +17,10 @@ purpose of it is to hack it to investigate what your own particular program is d
            predefined_optimizers)
    class StepMode(Mode):
-        def __init__(self, linker=None, optimizer=None):
+        def __init__(self, linker=None, optimizer='default'):
            if linker is None:
                linker = config.linker
-            if optimizer is None:
+            if optimizer is 'default':
                optimizer = config.optimizer
            def blah(i, node, th):
                # This function will be run for each node in your compiled program.

--- a/theano/compile/mode.py
+++ b/theano/compile/mode.py
@@ -286,10 +286,10 @@ class Mode(object):
    predefined_modes.
    """
-    def __init__(self, linker=None, optimizer=None):
+    def __init__(self, linker=None, optimizer='default'):
        if linker is None:
            linker = config.linker
-        if optimizer is None:
+        if optimizer is 'default':
            optimizer = config.optimizer
        self.__setstate__((linker, optimizer))

--- a/theano/compile/profilemode.py
+++ b/theano/compile/profilemode.py
--- a/theano/gof/compiledir.py
+++ b/theano/gof/compiledir.py
@@ -17,6 +17,7 @@ compiledir_format_dict = {"platform": platform.platform(),
                          "processor": platform.processor(),
                          "python_version": platform.python_version(),
                          "theano_version": theano.__version__,
+                          "numpy_version": numpy.__version__,
                         }
 compiledir_format_keys = ", ".join(compiledir_format_dict.keys())
 default_compiledir_format =\

--- a/theano/misc/pycuda_example.py
+++ b/theano/misc/pycuda_example.py
@@ -28,6 +28,7 @@ from theano.sandbox.cuda import GpuElemwise, CudaNdarrayType, GpuOp
 from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
                                           gpu_contiguous)
 from theano.sandbox.cuda.opt import gpu_seqopt
+from theano.tensor.utils import hash_from_dict
 import pycuda_init
 if not pycuda_init.pycuda_available:
@@ -116,7 +117,7 @@ class PycudaElemwiseKernelOp(GpuOp):
    def __hash__(self):
        return (hash(type(self)) ^ hash(self.scalar_op) ^
-                hash(self.inplace_pattern))
+                hash_from_dict(self.inplace_pattern))
    def make_node(self, *inputs):
        _inputs = [gpu_contiguous(as_cuda_ndarray_variable(i)) for i in inputs]
@@ -202,7 +203,7 @@ class PycudaElemwiseSourceModuleOp(GpuOp):
    def __hash__(self):
        return (hash(type(self)) ^ hash(self.scalar_op) ^
-                hash(self.inplace_pattern))
+                hash_from_dict(self.inplace_pattern))
    def make_node(self, *inputs):
        _inputs = [gpu_contiguous(as_cuda_ndarray_variable(i)) for i in inputs]

--- a/theano/sandbox/neighbours.py
+++ b/theano/sandbox/neighbours.py
@@ -92,6 +92,10 @@ class Images2Neibs(Op):
        fail = sub['fail']
        mode = self.mode
        return """
+#ifndef CEIL_INTDIV
+#define CEIL_INTDIV(a, b) ((a/b) + ((a %% b) ? 1: 0))
+#endif
        int grid_c = -1; //number of patch in height
        int grid_d = -1; //number of patch in width
        {
@@ -141,10 +145,9 @@ class Images2Neibs(Op):
                             (long int)c, (long int)d, (long int)(%(ten4)s->dimensions[2]), (long int)(%(ten4)s->dimensions[3]));
                %(fail)s;
            }
-            //grid_c = CEIL_INTDIV(((%(ten4)s->dimensions)[2]),step_x)
+            grid_c = CEIL_INTDIV(((%(ten4)s->dimensions)[2]),step_x);
-            //grid_d = CEIL_INTDIV(((%(ten4)s->dimensions)[3]),step_y)
+            grid_d = CEIL_INTDIV(((%(ten4)s->dimensions)[3]),step_y);
-            grid_c = ((%(ten4)s->dimensions)[2])/step_x + ((((%(ten4)s->dimensions)[2])%%step_x)? 1:0);
-            grid_d = ((%(ten4)s->dimensions)[3])/step_y + ((((%(ten4)s->dimensions)[3])%%step_y)? 1:0);
        }else if ( "%(mode)s" == "valid") {
            if ( ((%(ten4)s->dimensions)[2] < c) ||( (((%(ten4)s->dimensions)[2]-c) %% step_x)!=0))
            {
@@ -454,6 +457,10 @@ class GpuImages2Neibs(Images2Neibs, GpuOp):
        fail = sub['fail']
        mode = self.mode
        return """
+#ifndef CEIL_INTDIV
+#define CEIL_INTDIV(a, b) ((a/b) + ((a %% b) ? 1: 0))
+#endif
        int grid_c = -1;
        int grid_d = -1;
@@ -491,10 +498,12 @@ class GpuImages2Neibs(Images2Neibs, GpuOp):
                                 c, d, CudaNdarray_HOST_DIMS(%(ten4)s)[2], CudaNdarray_HOST_DIMS(%(ten4)s)[3]);
                    %(fail)s;
                }
-                //grid_c = CEIL_INTDIV(((CudaNdarray_HOST_DIMS(%(ten4)s))[2]),step_x)
+                grid_c = CEIL_INTDIV(((CudaNdarray_HOST_DIMS(%(ten4)s))[2]),
-                //grid_d = CEIL_INTDIV(((CudaNdarray_HOST_DIMS(%(ten4)s))[3]),step_y)
+                                     step_x);
-                grid_c = ((CudaNdarray_HOST_DIMS(%(ten4)s))[2])/step_x + ((((CudaNdarray_HOST_DIMS(%(ten4)s))[2])%%step_x)? 1:0);
+                grid_d = CEIL_INTDIV(((CudaNdarray_HOST_DIMS(%(ten4)s))[3]),
-                grid_d = ((CudaNdarray_HOST_DIMS(%(ten4)s))[3])/step_y + ((((CudaNdarray_HOST_DIMS(%(ten4)s))[3])%%step_y)? 1:0);
+                                     step_y);
            }else if ( "%(mode)s" == "valid") {
                if ( ((CudaNdarray_HOST_DIMS(%(ten4)s))[2] < c) ||( (((CudaNdarray_HOST_DIMS(%(ten4)s))[2]-c) %% step_x)!=0))
                {

--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -515,7 +515,21 @@ csr_fmatrix = SparseType(format='csr', dtype='float32')
 # CONSTRUCTION
 class CSMProperties(gof.Op):
-    """Extract all of .data .indices and .indptr"""
+    """Extract all of .data .indices and .indptr
+    :note: We won't implement infer_shape for this op now. This will
+           ask that we implement an GetNNZ op, and this op will keep
+           the dependence on the input of this op. So this won't help
+           to remove computations in the graph. To remove computation,
+           we will need to make an infer_sparse_pattern feature to
+           remove computations. Doing this is trickier then the
+           infer_shape feature. For example, how do we handle the case
+           when some op create some 0 values? So there is dependence
+           on the values themselves. We could write an infer_shape for
+           the last output that is the shape, but I dough this will
+           get used.
+    """
    # we don't return a view of the shape, we create a new ndarray from the
    # shape tuple.

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -13,6 +13,8 @@ from theano import scalar
 from theano.scalar import Scalar
 from theano.printing import min_informative_str, pprint
 from theano.gof.python25 import all, any
+from theano.tensor.utils import hash_from_dict
 config = theano.config
@@ -563,17 +565,8 @@ class Elemwise(Op):
        return False
    def _rehash(self):
-        items = self.inplace_pattern.items()
+        inplace_pattern_hash = hash_from_dict(self.inplace_pattern)
-        items.sort()
+        h = hash('Elemwise') ^ hash(self.scalar_op) ^ inplace_pattern_hash
-        first_part = [k for k, v in items]
-        second_part = []
-        for k, v in items:
-            if isinstance(v, (tuple, list)):
-                second_part += [tuple(v)]
-            else:
-                second_part += [v]
-        tuple_items = tuple(first_part + second_part)
-        h = hash('Elemwise') ^ hash(self.scalar_op) ^ hash(tuple_items)
        assert h == getattr(self, '_hashval', h)
        self._hashval = h

--- a/theano/tensor/tests/test_utils.py
+++ b/theano/tensor/tests/test_utils.py
 import numpy
-from theano.tensor.utils import hash_from_ndarray
+from theano.tensor.utils import hash_from_ndarray, hash_from_dict
 def test_hash_from_ndarray():
@@ -31,3 +31,18 @@ def test_hash_from_ndarray():
    assert hash_from_ndarray(rng[:4]) == hash_from_ndarray(rng[:4].copy())
    assert hash_from_ndarray(rng[::2]) == hash_from_ndarray(rng[::2].copy())
    assert hash_from_ndarray(rng[::-1]) == hash_from_ndarray(rng[::-1].copy())
+def test_hash_from_dict():
+    dicts = [{}, {0: 0}, {0: 1}, {1: 0}, {1: 1},
+             {0: (0,)}, {0: [1]},
+             {0: (0, 1)}, {0: [1, 0]},
+         ]
+    hashs = []
+    for idx, d in enumerate(dicts):
+        h = hash_from_dict(d)
+        assert h not in hashs
+        hashs.append(h)
+    # List are not hashable. So they are transformed into tuple.
+    assert hash_from_dict({0: (0,)}) == hash_from_dict({0: [0]})
--- a/theano/tensor/utils.py
+++ b/theano/tensor/utils.py
@@ -18,3 +18,28 @@ def hash_from_ndarray(data):
                          hash_from_code(str(data.shape)) +
                          hash_from_code(str(data.strides)) +
                          hash_from_code(str(data.dtype)))
+def hash_from_dict(d):
+    """Work around the fact that dict are not hashable in python
+    This request that all object have a sorted order that depend only
+    on the value of the object. This is true for integer/float/string
+    We do not verify that the objects in the dict what this properties
+    Also, we transform values that are list into tuple as list are not
+    hashable.
+    """
+    items = d.items()
+    items.sort()
+    first_part = [k for k, v in items]
+    second_part = []
+    for k, v in items:
+        if isinstance(v, (tuple, list)):
+            second_part += [tuple(v)]
+        else:
+            second_part += [v]
+    tuple_items = tuple(first_part + second_part)
+    return hash(tuple_items)