Merge pull request #4222 from nouiz/opt_fix

Opt fix

Merge pull request #4222 from nouiz/opt_fix
350f5d55 · abergeron · c6979aee · 7d23f113 · 350f5d55 · 350f5d55
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -21,6 +21,7 @@ from theano.sandbox.cuda import dnn_version as version
 from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
                                           host_from_gpu,
                                           gpu_contiguous, HostFromGpu,
+                                           gpu_alloc, GpuAlloc,
                                           gpu_alloc_empty, GpuAllocEmpty,
                                           GpuElemwise)
 from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax,
@@ -2246,9 +2247,13 @@ if True:
        inputs = list(node.inputs)
        dest = inputs[2]
        if (dest.owner and
-                isinstance(dest.owner.op, GpuAllocEmpty) and
+                type(dest.owner.op) is GpuAllocEmpty and
                len(dest.clients) > 1):
            inputs[2] = gpu_alloc_empty(*dest.owner.inputs)
+        elif (dest.owner and
+                type(dest.owner.op) is GpuAlloc and
+                len(dest.clients) > 1):
+            inputs[2] = gpu_alloc(*dest.owner.inputs)
        return [GpuDnnConv(algo=node.op.algo, inplace=True)(*inputs)]
    @local_optimizer([GpuDnnConvGradW], inplace=True)
@@ -2258,9 +2263,13 @@ if True:
        inputs = list(node.inputs)
        dest = inputs[2]
        if (dest.owner and
-                isinstance(dest.owner.op, GpuAllocEmpty) and
+                type(dest.owner.op) is GpuAllocEmpty and
                len(dest.clients) > 1):
            inputs[2] = gpu_alloc_empty(*dest.owner.inputs)
+        elif (dest.owner and
+                type(dest.owner.op) is GpuAlloc and
+                len(dest.clients) > 1):
+            inputs[2] = gpu_alloc(*dest.owner.inputs)
        return [GpuDnnConvGradW(inplace=True)(*inputs)]
    @local_optimizer([GpuDnnConvGradI], inplace=True)
@@ -2270,9 +2279,13 @@ if True:
        inputs = list(node.inputs)
        dest = inputs[2]
        if (dest.owner and
-                isinstance(dest.owner.op, GpuAllocEmpty) and
+                type(dest.owner.op) is GpuAllocEmpty and
                len(dest.clients) > 1):
            inputs[2] = gpu_alloc_empty(*dest.owner.inputs)
+        elif (dest.owner and
+                type(dest.owner.op) is GpuAlloc and
+                len(dest.clients) > 1):
+            inputs[2] = gpu_alloc(*dest.owner.inputs)
        return [GpuDnnConvGradI(inplace=True)(*inputs)]
    optdb.register('local_dnn_conv_inplace',

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1942,7 +1942,9 @@ def local_gpu_downsample_factor_max_grad_grad(node):
    if isinstance(node.op, pool.DownsampleFactorMaxGradGrad):
        assert node.op.__props__ == ('ds', 'ignore_border', 'st',
                                     'padding', 'mode')
-        if node.op.padding != (0, 0) or node.op.mode != 'max':
+        if (node.op.padding != (0, 0) or
+                node.op.mode != 'max' or
+                node.op.st != (1, 1)):
            return
        x, z, gx = node.inputs
        if (x.owner and isinstance(x.owner.op, HostFromGpu)):