Merge pull request #2812 from nouiz/mixed

Mixed: more error detection, clean up, optimization

Merge pull request #2812 from nouiz/mixed
c3f14dd4 · abergeron · 7f5f1f8e · ae781710 · c3f14dd4 · c3f14dd4
--- a/theano/compile/pfunc.py
+++ b/theano/compile/pfunc.py
@@ -421,7 +421,7 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
    if profile is None:
        profile = config.profile
        # profile -> True or False
-    if profile == True:
+    if profile is True:
        profile = ProfileStats(message=name)
        # profile -> object
    if type(profile) == str:

--- a/theano/gof/cmodule.py
+++ b/theano/gof/cmodule.py
@@ -635,7 +635,8 @@ class ModuleCache(object):
            self.stats[0] += 1
        return self.module_from_name[name]
-    def refresh(self, age_thresh_use=None, delete_if_problem=False, cleanup=True):
+    def refresh(self, age_thresh_use=None, delete_if_problem=False,
+                cleanup=True):
        """Update cache data by walking the cache directory structure.
        Load key.pkl files that have not been loaded yet.
@@ -691,7 +692,7 @@ class ModuleCache(object):
            files = os.listdir(root)
            if not files:
                rmtree_empty(root, ignore_nocleanup=True,
-                       msg="empty dir")
+                             msg="empty dir")
                continue
            if 'delete.me' in files:
                rmtree(root, ignore_nocleanup=True,

--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -203,7 +203,8 @@ def version():
    if version.v is None:
        f = theano.function([], DnnVersion()(),
-                            theano.Mode(optimizer=None))
+                            theano.Mode(optimizer=None),
+                            profile=False)
        version.v = f()
    return version.v
 version.v = None

--- a/theano/sandbox/cuda/nnet.py
+++ b/theano/sandbox/cuda/nnet.py
@@ -247,7 +247,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuOp):
    def c_code_cache_version(self):
        # return ()
-        return (7,)
+        return (8,)
    def c_code(self, node, nodename, inp, out, sub):
        dnll, sm, y_idx = inp
@@ -288,6 +288,13 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuOp):
                            "dnll.shape[0] != y_idx.shape[0]");
            %(fail)s;
        }
+        if (CudaNdarray_HOST_DIMS(%(sm)s)[0] !=
+            CudaNdarray_HOST_DIMS(%(y_idx)s)[0])
+        {
+            PyErr_SetString(PyExc_ValueError,
+                            "sm.shape[0] != y_idx.shape[0]");
+            %(fail)s;
+        }
        if ((NULL == %(dx)s)
            || (CudaNdarray_HOST_DIMS(%(dx)s)[0] !=
                CudaNdarray_HOST_DIMS(%(sm)s)[0])

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -20,6 +20,7 @@ from theano.gof.opt import LocalMetaOptimizer
 from theano.sandbox.cuda.basic_ops import (
    gpu_eye, gpu_contiguous,
    gpu_from_host, host_from_gpu, GpuFromHost, HostFromGpu,
+    GpuContiguous,
    GpuElemwise, GpuDimShuffle, GpuReshape, GpuCAReduce, GpuFlatten,
    GpuSubtensor, GpuAdvancedSubtensor1,
    GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20,
@@ -1997,6 +1998,19 @@ def local_gpualloc_memset_0(node):
            return [new_out]
+@register_opt()
+@local_optimizer([GpuContiguous])
+def local_gpu_contiguous_gpu_contiguous(node):
+    """
+    gpu_contiguous(gpu_contiguous(x)) -> gpu_contiguous(x)
+    """
+    if isinstance(node.op, GpuContiguous):
+        inp = node.inputs[0]
+        if inp.owner and isinstance(inp.owner.op, GpuContiguous):
+            return [inp]
 @register_opt()
 @local_optimizer([gpu_from_host, tensor.Eye])
 def local_gpu_eye(node):

--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
@@ -79,6 +79,18 @@ def test_local_remove_all_assert():
    assert len(a_op) == 1
+def test_local_gpu_contiguous_gpu_contiguous():
+    a = tensor.fmatrix()
+    o1 = basic_ops.gpu_contiguous(a)
+    o2 = basic_ops.gpu_contiguous(o1)
+    f1 = theano.function([a], o1, mode=mode_with_gpu)
+    f2 = theano.function([a], o2, mode=mode_with_gpu)
+    assert 1 == len([node for node in f1.maker.fgraph.toposort()
+                     if isinstance(node.op, basic_ops.GpuContiguous)])
+    assert 1 == len([node for node in f2.maker.fgraph.toposort()
+                     if isinstance(node.op, basic_ops.GpuContiguous)])
 def test_int_pow():
    a = CudaNdarrayType([False])()

--- a/theano/sandbox/gpuarray/nnet.py
+++ b/theano/sandbox/gpuarray/nnet.py
@@ -295,7 +295,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
    def c_code_cache_version(self):
        # return ()
-        return (7,)
+        return (8,)
    def c_headers(self):
        return ['cuda.h', '<gpuarray/extension.h>', '<numpy_compat.h>']
@@ -351,6 +351,13 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
                            "dnll.shape[0] != y_idx.shape[0]");
            %(fail)s;
        }
+        if (PyGpuArray_DIMS(%(sm)s)[0] !=
+            PyGpuArray_DIMS(%(y_idx)s)[0])
+        {
+            PyErr_SetString(PyExc_ValueError,
+                            "sm.shape[0] != y_idx.shape[0]");
+            %(fail)s;
+        }
        if ((NULL == %(dx)s)
            || (PyGpuArray_DIMS(%(dx)s)[0] !=
                PyGpuArray_DIMS(%(sm)s)[0])

--- a/theano/sandbox/gpuarray/opt.py
+++ b/theano/sandbox/gpuarray/opt.py
@@ -20,7 +20,7 @@ from theano.tensor.nnet.conv import ConvOp
 from .type import GpuArrayType, GpuArrayConstant
 from .basic_ops import (host_from_gpu, gpu_from_host,
                        HostFromGpu, GpuFromHost,
-                        GpuSplit,
+                        GpuSplit, GpuContiguous,
                        gpu_alloc, GpuAlloc, GpuReshape,
                        GpuEye, gpu_join, GpuJoin)
 from .blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer
@@ -205,6 +205,19 @@ def local_gpualloc_memset_0(node):
            return [new_out]
+@register_opt()
+@local_optimizer([GpuContiguous])
+def local_gpu_contiguous_gpu_contiguous(node):
+    """
+    gpu_contiguous(gpu_contiguous(x)) -> gpu_contiguous(x)
+    """
+    if isinstance(node.op, GpuContiguous):
+        inp = node.inputs[0]
+        if inp.owner and isinstance(inp.owner.op, GpuContiguous):
+            return [inp]
 @register_opt('fast_compile')
 @op_lifter([tensor.Reshape])
 def local_gpureshape(node):

--- a/theano/sandbox/gpuarray/tests/test_opt.py
+++ b/theano/sandbox/gpuarray/tests/test_opt.py
@@ -7,6 +7,7 @@ from theano.tests.unittest_tools import SkipTest
 from theano.tensor.tests import test_basic
 import theano.sandbox.gpuarray
+from .. import basic_ops
 from ..type import GpuArrayType, gpuarray_shared_constructor
 from ..basic_ops import (GpuAlloc, GpuReshape, gpu_alloc,
                         gpu_from_host, host_from_gpu)
@@ -48,6 +49,18 @@ def test_local_remove_all_assert():
    assert len(a_op) == 1
+def test_local_gpu_contiguous_gpu_contiguous():
+    a = tensor.fmatrix()
+    o1 = basic_ops.gpu_contiguous(a)
+    o2 = basic_ops.gpu_contiguous(o1)
+    f1 = theano.function([a], o1, mode=mode_with_gpu)
+    f2 = theano.function([a], o2, mode=mode_with_gpu)
+    assert 1 == len([node for node in f1.maker.fgraph.toposort()
+                     if isinstance(node.op, basic_ops.GpuContiguous)])
+    assert 1 == len([node for node in f2.maker.fgraph.toposort()
+                     if isinstance(node.op, basic_ops.GpuContiguous)])
 def test_flatten():
    m = theano.tensor.fmatrix()
    f = theano.function([m], m.flatten(), mode=mode_with_gpu)

--- a/theano/tensor/blas.py
+++ b/theano/tensor/blas.py
@@ -177,7 +177,8 @@ def default_blas_ldflags():
    # Now test it!
    x = theano.tensor.fmatrix()
    try:
-        theano.function([x], theano.tensor.blas._dot22(x,x))
+        theano.function([x], theano.tensor.blas._dot22(x,x),
+                        profile=False)
    except Exception as e:
        print e
        yield ""

--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -1108,7 +1108,7 @@ class CrossentropySoftmax1HotWithBiasDx(gof.Op):
        return [g_dy, g_sm, g_y_idx]
    def c_code_cache_version(self):
-        return (4,)
+        return (5,)
    def c_code(self, node, name, inp, out, sub):
        dnll, sm, y_idx = inp
@@ -1167,6 +1167,13 @@ class CrossentropySoftmax1HotWithBiasDx(gof.Op):
                         (long int)PyArray_DIMS(%(y_idx)s)[0]);
            %(fail)s;
        }
+        if (PyArray_DIMS(%(sm)s)[0] !=
+            PyArray_DIMS(%(y_idx)s)[0])
+        {
+            PyErr_SetString(PyExc_ValueError,
+                            "sm.shape[0] != y_idx.shape[0]");
+            %(fail)s;
+        }
        if ((NULL == %(dx)s)
            || (PyArray_DIMS(%(dx)s)[0] != PyArray_DIMS(%(sm)s)[0])
            || (PyArray_DIMS(%(dx)s)[1] != PyArray_DIMS(%(sm)s)[1]))