Merge pull request #3993 from abergeron/fix_buildbot

Fix last DLT problem(s) with the new backend.

Merge pull request #3993 from abergeron/fix_buildbot
6a3b192d · Frédéric Bastien · 709c9440 · 53151276 · 6a3b192d · 6a3b192d
--- a/theano/sandbox/gpuarray/dnn.py
+++ b/theano/sandbox/gpuarray/dnn.py
@@ -22,8 +22,7 @@ from theano.tensor.signal.pool import (
 from . import pygpu
 from .type import get_context, gpu_context_type, list_contexts, GpuArrayType
 from .basic_ops import (as_gpuarray_variable, infer_context_name,
-                        gpu_contiguous, HostFromGpu,
-                        GpuAllocEmpty, empty_like)
+                        gpu_contiguous, GpuAllocEmpty, empty_like)
 from .elemwise import GpuElemwise

 # These don't exist in gpuarray
@@ -892,6 +891,8 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
 def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
                   subsample=(1, 1), conv_mode='conv'):
    ctx_name = infer_context_name(img, topgrad)
+    img = as_gpuarray_variable(img, ctx_name)
+    topgrad = as_gpuarray_variable(topgrad, ctx_name)
    img = gpu_contiguous(img)
    topgrad = gpu_contiguous(topgrad)
    kerns_shp = as_tensor_variable(kerns_shp)
@@ -904,6 +905,8 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
 def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
                  subsample=(1, 1), conv_mode='conv'):
    ctx_name = infer_context_name(kerns, topgrad)
+    kerns = as_gpuarray_variable(kerns, ctx_name)
+    topgrad = as_gpuarray_variable(topgrad, ctx_name)
    kerns = gpu_contiguous(kerns)
    topgrad = gpu_contiguous(topgrad)
    img_shp = as_tensor_variable(img_shp)
@@ -1291,17 +1294,16 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
 @local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights,
                  AbstractConv2d_gradInputs])
 def local_abstractconv_cudnn(node):
-    if (not isinstance(node.op, (AbstractConv2d, AbstractConv2d_gradWeights,
+    if (not isinstance(node.op, (AbstractConv2d,
+                                 AbstractConv2d_gradWeights,
                                 AbstractConv2d_gradInputs))):
        return None
+
    inp1 = node.inputs[0]
    inp2 = node.inputs[1]

    if (not isinstance(inp1.type, GpuArrayType) or
-            not isinstance(inp2.type, GpuArrayType)):
-        return None
-
-    if not dnn_available(inp1.type.context_name):
+            not dnn_available(inp1.type.context_name)):
        return None

    if node.op.filter_flip:
@@ -1406,12 +1408,12 @@ def local_pool_dnn_alternative(node, ctx_name):
    if not node.op.ignore_border:
        return
    img, = node.inputs
+    img = as_gpuarray_variable(img, ctx_name)
    ds = node.op.ds
    stride = node.op.st
    pad = node.op.padding
    mode = node.op.mode
-    return dnn_pool(gpu_contiguous(img.owner.inputs[0]),
-                    ds, stride=stride, pad=pad, mode=mode)
+    return dnn_pool(gpu_contiguous(img), ds, stride=stride, pad=pad, mode=mode)


 @register_opt('cudnn')
@@ -1422,6 +1424,9 @@ def local_pool_dnn_grad_stride(node, ctx_name):
    if not node.op.ignore_border:
        return
    inp, out, out_grad = node.inputs
+    inp = as_gpuarray_variable(inp, ctx_name)
+    out = as_gpuarray_variable(out, ctx_name)
+    out_grad = as_gpuarray_variable(out_grad, ctx_name)
    ds = node.op.ds
    st = node.op.st
    pad = node.op.padding
@@ -1442,6 +1447,8 @@ def local_avg_pool_dnn_grad_stride(node, ctx_name):
    if not node.op.ignore_border:
        return
    inp, out_grad = node.inputs
+    inp = as_gpuarray_variable(inp, ctx_name)
+    out_grad = as_gpuarray_variable(out_grad, ctx_name)
    ds = node.op.ds
    st = node.op.st
    pad = node.op.padding
@@ -1530,8 +1537,7 @@ def local_softmax_dnn_grad(node, ctx_name):
        return
    ins = []
    for n in node.inputs:
-        if isinstance(n.owner.op, HostFromGpu):
-            n = n.owner.inputs[0]
+        n = as_gpuarray_variable(n, ctx_name)
        if n.ndim != 2:
            return
        ins.append(n.dimshuffle(0, 1, 'x', 'x'))

--- a/theano/sandbox/gpuarray/opt.py
+++ b/theano/sandbox/gpuarray/opt.py
@@ -14,10 +14,8 @@ from theano.gof.optdb import LocalGroupDB
 from theano.scalar.basic import Scalar, Pow, Cast
 from theano.scan_module import scan_utils, scan_op, scan_opt

-from theano.tensor import as_tensor_variable
 from theano.tensor.nnet.conv import ConvOp
-from theano.tensor.nnet.abstract_conv import (BaseAbstractConv2d,
-                                              AbstractConv2d,
+from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
                                              AbstractConv2d_gradWeights,
                                              AbstractConv2d_gradInputs)

@@ -329,8 +327,7 @@ def local_gpureshape(node, context_name):
 @register_opt('fast_compile')
 @op_lifter([tensor.Rebroadcast])
 def local_gpu_rebroadcast(node, context_name):
-    if isinstance(node.inputs[0].owner.op, HostFromGpu):
-        return node.op(node.inputs[0].owner.inputs[0])
+    return node.op(as_gpuarray_variable(node.inputs[0], context_name))


 @register_opt('fast_compile')
@@ -453,7 +450,7 @@ def gpu_print_wrapper(op, cnda):
 @op_lifter([tensor.printing.Print])
 def local_gpu_print_op(node, context_name):
    x, = node.inputs
-    gpu_x, = x.owner.inputs
+    gpu_x = as_gpuarray_variable(x, context_name=context_name)
    new_op = node.op.__class__(global_fn=gpu_print_wrapper)
    new_op.old_op = node.op
    return new_op(gpu_x)
@@ -786,10 +783,9 @@ def local_gpua_softmaxwithbias(node, context_name):
 @register_opt('fast_compile')
 @op_lifter([theano.tensor.opt.Assert])
 def local_assert(node, context_name):
-    if (node.inputs[0].owner and
-            isinstance(node.inputs[0].owner.op, HostFromGpu)):
-        return [host_from_gpu(node.op(node.inputs[0].owner.inputs[0],
-                                      *node.inputs[1:]))]
+    return [host_from_gpu(node.op(as_gpuarray_variable(node.inputs[0],
+                                                       context_name),
+                                  *node.inputs[1:]))]


 @register_opt('fast_compile')
@@ -819,26 +815,6 @@ def local_lift_abstractconv2d(node, context_name):
                                   context_name=context_name)
    return [node.op(*inps)]

-
-# This will deal with ops that don't have an explicit transfer but
-# have one of their inputs on the GPU already and the other not on the
-# GPU (to avoid endlessly replacing things).
-@register_opt('fast_compile')
-@local_optimizer([AbstractConv2d,
-                  AbstractConv2d_gradWeights,
-                  AbstractConv2d_gradInputs])
-def local_gpu_abstractconv2d(node):
-    if isinstance(node.op, BaseAbstractConv2d):
-        if ((isinstance(node.inputs[0].type, GpuArrayType) or
-             isinstance(node.inputs[1].type, GpuArrayType)) and
-            not (isinstance(node.inputs[0].type, GpuArrayType) or
-                 isinstance(node.inputs[1].type, GpuArrayType))):
-            inps = list(node.inputs)
-            ctx_name = infer_context_name(inps[0], inps[1])
-            inps[0] = as_gpuarray_variable(inps[0], context_name=ctx_name)
-            inps[1] = as_gpuarray_variable(inps[1], context_name=ctx_name)
-            return as_tensor_variable(node.op(*inps))
-
 # Register this here so that it goes after the abstract lifting
 register_opt()(conv_groupopt)


--- a/theano/tensor/nnet/abstract_conv.py
+++ b/theano/tensor/nnet/abstract_conv.py
@@ -448,6 +448,11 @@ class AbstractConv2d(BaseAbstractConv2d):
                                             filter_flip)

    def make_node(self, img, kern):
+        # Make sure both inputs have the same Type
+        ktype = img.type.clone(dtype=kern.dtype,
+                               broadcastable=kern.broadcastable)
+        kern = ktype.filter_variable(kern)
+
        if img.type.ndim != 4:
            raise TypeError('img must be 4D tensor')
        if kern.type.ndim != 4:
@@ -541,6 +546,11 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):

    # Update shape/height_width
    def make_node(self, img, topgrad, shape):
+        # Make sure both inputs have the same Type
+        gtype = img.type.clone(dtype=topgrad.dtype,
+                               broadcastable=topgrad.broadcastable)
+        topgrad = gtype.filter_variable(topgrad)
+
        if img.type.ndim != 4:
            raise TypeError('img must be 4D tensor')
        if topgrad.type.ndim != 4:
@@ -628,6 +638,11 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):

    # Update shape/height_width
    def make_node(self, kern, topgrad, shape):
+        # Make sure both inputs have the same Type
+        gtype = kern.type.clone(dtype=topgrad.dtype,
+                                broadcastable=topgrad.broadcastable)
+        topgrad = gtype.filter_variable(topgrad)
+
        if kern.type.ndim != 4:
            raise TypeError('kern must be 4D tensor')
        if topgrad.type.ndim != 4: