Merge pull request #3011 from nouiz/mixed

Mixed stuff.

Merge pull request #3011 from nouiz/mixed
2425cd11 · Pascal Lamblin · c75f7892 · e067dd67 · 2425cd11 · 2425cd11
--- a/theano/gof/link.py
+++ b/theano/gof/link.py
@@ -144,8 +144,11 @@ def raise_with_op(node, thunk=None, exc_info=None, storage_map=None):
        clients = [[c[0] for c in var.clients] for var in node.outputs]
        detailed_err_msg += ("Inputs shapes: %s" % shapes +
                             "\nInputs strides: %s" % strides +
-                             "\nInputs values: %s" % scalar_values +
+                             "\nInputs values: %s" % scalar_values)
-                             "\nOutputs clients: %s\n" % clients)
+        if hasattr(node.op, '__input_name__'):
+            detailed_err_msg += "\nInputs name: %s\n" % str(node.op.__input_name__)
+        detailed_err_msg += "\nOutputs clients: %s\n" % clients
    else:
        hints.append(
            "HINT: Use another linker then the c linker to"

--- a/theano/misc/pkl_utils.py
+++ b/theano/misc/pkl_utils.py
@@ -96,9 +96,9 @@ def load_reduce(self):
            except Exception:
                pass
-        if self.is_verbose:
+#        if self.is_verbose:
-            print(sys.exc_info())
+#            print(sys.exc_info())
-            print(func, args)
+#            print(func, args)
        raise

--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -377,6 +377,8 @@ class GpuDnnConv(DnnBase, COp):
    :param descr: the convolution descriptor
    """
    __props__ = ('workmem', 'inplace')
+    __input_name__ = ('image', 'kernel', 'output',
+                      'descriptor', 'alpha', 'beta')
    def __init__(self, workmem=None, inplace=False):
        """
@@ -501,6 +503,7 @@ class GpuDnnConvGradW(DnnBase, COp):
    """
    __props__ = ('inplace',)
+    __input_name__ = ('image', 'grad', 'output', 'descriptor', 'alpha', 'beta')
    def __init__(self, inplace=False):
        COp.__init__(self, ["dnn_base.c", "dnn_conv_base.c", "dnn_gw.c"],
@@ -573,6 +576,8 @@ class GpuDnnConvGradI(DnnBase, COp):
    """
    __props__ = ('inplace',)
+    __input_name__ = ('kernel', 'grad', 'output',
+                      'descriptor', 'alpha', 'beta')
    def __init__(self, inplace=False):
        COp.__init__(self, ["dnn_base.c", "dnn_conv_base.c", "dnn_gi.c"],

--- a/theano/sandbox/cuda/dnn_fwd.c
+++ b/theano/sandbox/cuda/dnn_fwd.c
@@ -5,6 +5,11 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
                         CudaNdarray *om, cudnnConvolutionDescriptor_t desc,
                         float alpha, float beta, CudaNdarray **output) {
  cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
+  if (CudaNdarray_HOST_DIMS(input)[1] != CudaNdarray_HOST_DIMS(kerns)[1]) {
+    PyErr_SetString(PyExc_ValueError,
+		    "GpuDnnConv images and kernel must have the same stack size\n");
+    return 1;
+  }
  if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1)
    return 1;

--- a/theano/sandbox/cuda/dnn_gi.c
+++ b/theano/sandbox/cuda/dnn_gi.c
@@ -6,6 +6,12 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
                        float alpha, float beta, CudaNdarray **input) {
  cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
+  if (CudaNdarray_HOST_DIMS(im)[1] != CudaNdarray_HOST_DIMS(kerns)[1]) {
+    PyErr_SetString(PyExc_ValueError,
+		    "GpuDnnConv images and kernel must have the same stack size\n");
+    return 1;
+  }
  if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1)
    return 1;
  if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)

--- a/theano/sandbox/cuda/dnn_gw.c
+++ b/theano/sandbox/cuda/dnn_gw.c
@@ -6,6 +6,12 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
                        float alpha, float beta, CudaNdarray **kerns) {
  cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
+  if (CudaNdarray_HOST_DIMS(input)[1] != CudaNdarray_HOST_DIMS(km)[1]) {
+    PyErr_SetString(PyExc_ValueError,
+		    "GpuDnnConv images and kernel must have the same stack size\n");
+    return 1;
+  }
  if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1)
    return 1;
  if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1)

--- a/theano/sandbox/cuda/tests/CudaNdarray_py3.pkl
+++ b/theano/sandbox/cuda/tests/CudaNdarray_py3.pkl
--- a/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
+++ b/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
@@ -184,11 +184,6 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1),
        kern = kern[:, :, ::kern_stride[0], ::kern_stride[1]]
        npy_kern = npy_kern[:, :, ::kern_stride[0], ::kern_stride[1]]
-    t2 = None
-    t0 = time.time()
-    cpuval = py_conv(npy_img, npy_kern, mode, subsample)
-    t1 = time.time()
    i = cuda.CudaNdarrayType(
        broadcastable=[sh == 1 for sh in npy_img.shape])()
    k = cuda.CudaNdarrayType(
@@ -198,21 +193,28 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1),
                                          version=version,
                                          verbose=verbose,
                                          kshp=compile_kshp)(i, k)
-    assert [(sh == 1) is br for
-            sh, br in zip(cpuval.shape[:2], op.type.broadcastable[:2])]
    f = theano.function([i, k], op, mode=theano_mode)
    if cls is not None:
        assert any([isinstance(node.op, cls)
                    for node in f.maker.fgraph.toposort()]), "Cannot find class %r in %r" % (cls, f.maker.fgraph.toposort())
-    gpuval = f(img, kern)
    t2 = time.time()
+    gpuval = f(img, kern)
+    t3 = time.time()
    for i in range(nb_iter):
        gpuval2 = f(img, kern)
        assert (numpy.asarray(gpuval) == numpy.asarray(gpuval2)).all()
    gpuval = numpy.asarray(gpuval)
+    # CPU val computed after GPU val to get the GPU errors.
+    t0 = time.time()
+    cpuval = py_conv(npy_img, npy_kern, mode, subsample)
+    t1 = time.time()
    assert gpuval.shape == cpuval.shape, ("shape mismatch", gpuval.shape, cpuval.shape)
    assert_allclose(cpuval, gpuval, rtol=rtol, atol=atol)
    assert numpy.all(numpy.isfinite(gpuval)), gpuval
+    assert [(sh == 1) is br for
+            sh, br in zip(cpuval.shape[:2], op.type.broadcastable[:2])]
    if (t2 is not None):
        if mode == 'valid':
@@ -222,7 +224,7 @@ def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1),
                         kshape[3] * ishape[2] * ishape[3] * 2)
        approx_fp /= 1e6
        cpu_mflops = approx_fp / (t1 - t0)
-        gpu_mflops = approx_fp / (t2 - t1)
+        gpu_mflops = approx_fp / (t3 - t2)
        if verbose > 0:
            print('%15s' % str(ishape), '%15s' % str(kshape), end=' ', file=sys.stdout)
            print('%12.5f  %7.2f %7.2f %7.1f' % (approx_fp,
@@ -410,6 +412,12 @@ def test_dnn_valid():
        yield t
+def test_dnn_valid_err():
+    assert_raises(ValueError, _params_allgood, (1, 2, 4, 4), (1, 1, 2, 2),
+                  'valid', theano_mode=theano_mode.including("cudnn"),
+                  cls=DnnBase)
 def test_default_conv():
    """Just test that we introduce the right GPU convolution
    version.

--- a/theano/sandbox/cuda/tests/test_dnn.py
+++ b/theano/sandbox/cuda/tests/test_dnn.py
@@ -475,7 +475,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
            dtype='float32'
        )
        kern_vals = numpy.asarray(
-            numpy.random.rand(3, 4, 5, 6),
+            numpy.random.rand(13, 14, 15, 16),
            dtype='float32'
        )

--- a/theano/sandbox/cuda/tests/test_type.py
+++ b/theano/sandbox/cuda/tests/test_type.py
@@ -6,6 +6,8 @@ from nose.tools import assert_raises
 import numpy
 from theano import config
+from theano.compat import PY3
+from theano.misc.pkl_utils import CompatUnpickler
 from theano.sandbox.cuda import cuda_available
 if cuda_available:
@@ -30,16 +32,18 @@ def test_unpickle_cudandarray_as_numpy_ndarray_flag0():
    try:
        testfile_dir = os.path.dirname(os.path.realpath(__file__))
        fname = 'CudaNdarray.pkl'
-        if sys.version_info[0] == 3:
-            fname = 'CudaNdarray_py3.pkl'
        with open(os.path.join(testfile_dir, fname), 'rb') as fp:
+            if PY3:
+                u = CompatUnpickler(fp, encoding="latin1")
+            else:
+                u = CompatUnpickler(fp)
            if cuda_available:
-                mat = cPickle.load(fp)
+                mat = u.load()
                assert isinstance(mat, CudaNdarray)
                assert numpy.asarray(mat)[0] == -42.0
            else:
-                assert_raises(ImportError, cPickle.load, fp)
+                assert_raises(ImportError, u.load)
    finally:
        config.experimental.unpickle_gpu_on_cpu = oldflag
@@ -52,11 +56,13 @@ def test_unpickle_cudandarray_as_numpy_ndarray_flag1():
    try:
        testfile_dir = os.path.dirname(os.path.realpath(__file__))
        fname = 'CudaNdarray.pkl'
-        if sys.version_info[0] == 3:
-            fname = 'CudaNdarray_py3.pkl'
        with open(os.path.join(testfile_dir, fname), 'rb') as fp:
-            mat = cPickle.load(fp)
+            if PY3:
+                u = CompatUnpickler(fp, encoding="latin1")
+            else:
+                u = CompatUnpickler(fp)
+            mat = u.load()
        assert isinstance(mat, numpy.ndarray)
        assert mat[0] == -42.0

--- a/theano/tensor/blas.py
+++ b/theano/tensor/blas.py
@@ -176,13 +176,18 @@ def default_blas_ldflags():
    yield flags
    # Now test it!
-    x = theano.tensor.fmatrix()
    try:
-        theano.function([x], theano.tensor.blas._dot22(x,x),
+        old = config.compute_test_value
-                        profile=False)
+        config.compute_test_value = 'off'
-    except Exception as e:
+        x = theano.tensor.fmatrix()
-        print(e)
+        try:
-        yield ""
+            theano.function([x], theano.tensor.blas._dot22(x, x),
+                            profile=False)
+        except Exception as e:
+            print(e)
+            yield ""
+    finally:
+        config.compute_test_value = old
 def static_default_blas_flags():

--- a/theano/tensor/blas_c.py
+++ b/theano/tensor/blas_c.py
@@ -345,7 +345,7 @@ def use_c_ger(node):
 @local_optimizer([CGer(False)])
 def make_c_ger_destructive(node):
-    if node.op == cger_no_inplace:
+    if isinstance(node.op, CGer) and not node.op.destructive:
        return [cger_inplace(*node.inputs)]
@@ -800,7 +800,7 @@ def use_c_gemv(node):
 @local_optimizer([CGemv(inplace=False)])
 def make_c_gemv_destructive(node):
-    if node.op == cgemv_no_inplace:
+    if isinstance(node.op, CGemv) and not node.op.inplace:
        return [cgemv_inplace(*node.inputs)]

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -233,7 +233,8 @@ def inplace_elemwise_optimizer_op(OP):
        for node in list(graph.io_toposort(fgraph.inputs, fgraph.outputs)):
            op = node.op
-            if not isinstance(op, OP):
+            # gpuarray GpuElemwise inherit from Elemwise
+            if not type(op) == OP:
                continue
            baseline = op.inplace_pattern
            protected_inputs = [