Merge pull request #3912 from lamblin/fix_abstractconv_grad_type

Make sure grads of abstract conv ops have the right type

Merge pull request #3912 from lamblin/fix_abstractconv_grad_type
8020a38f · Frédéric Bastien · dd41f3e9 · 6bf15e52 · 8020a38f · 8020a38f
--- a/theano/sandbox/cuda/tests/test_abstractconv.py
+++ b/theano/sandbox/cuda/tests/test_abstractconv.py
@@ -3,6 +3,7 @@ import numpy
 import itertools

 import theano
+from theano import tensor
 from theano.tests import unittest_tools as utt
 import theano.tensor.nnet.abstract_conv as conv
 from theano.sandbox.cuda import float32_shared_constructor as gpu_shared
@@ -265,3 +266,55 @@ class TestConv2d(unittest.TestCase):
                               provide_shape=provide_shape, border_mode=b,
                               filter_flip=flip,
                               target_op=GpuCorrMM_gradInputs)
+
+    def test_grad_types(self):
+        # This function simply tests the behaviour of the AbstractConv
+        # Ops, not their optimizations
+        cpu_input = tensor.ftensor4()
+        cpu_filters = tensor.ftensor4()
+        cpu_topgrad = tensor.ftensor4()
+        gpu_input = cuda.ftensor4()
+        gpu_filters = cuda.ftensor4()
+        gpu_topgrad = cuda.ftensor4()
+
+        out_shape = tensor.lvector()
+
+        # Check the gradient of the forward conv2d
+        for input, filters in itertools.product(
+                (cpu_input, gpu_input),
+                (cpu_filters, gpu_filters)):
+            output = conv.conv2d(input, filters)
+            grad_input, grad_filters = theano.grad(output.sum(),
+                                                   wrt=(input, filters))
+            assert grad_input.type == input.type, (
+                grad_input, grad_input.type, input, input.type)
+            assert grad_filters.type == filters.type, (
+                grad_filters, grad_filters.type, filters, filters.type)
+
+        # Check the gradient of gradweight
+        for input, topgrad in itertools.product(
+                (cpu_input, gpu_input),
+                (cpu_topgrad, gpu_topgrad)):
+            grad_filters = conv.AbstractConv2d_gradWeights()(
+                input, topgrad, out_shape)
+            grad_input, grad_topgrad = theano.grad(grad_filters.sum(),
+                                                   wrt=(input, topgrad))
+
+            assert grad_input.type == input.type, (
+                grad_input, grad_input.type, input, input.type)
+            assert grad_topgrad.type == topgrad.type, (
+                grad_topgrad, grad_topgrad.type, topgrad, topgrad.type)
+
+        # Check the gradient of gradinputs
+        for filters, topgrad in itertools.product(
+                (cpu_filters, gpu_filters),
+                (cpu_topgrad, gpu_topgrad)):
+            grad_input = conv.AbstractConv2d_gradInputs()(
+                filters, topgrad, out_shape)
+            grad_filters, grad_topgrad = theano.grad(grad_input.sum(),
+                                                     wrt=(filters, topgrad))
+
+            assert grad_filters.type == filters.type, (
+                grad_filters, grad_filters.type, filters, filters.type)
+            assert grad_topgrad.type == topgrad.type, (
+                grad_topgrad, grad_topgrad.type, topgrad, topgrad.type)
--- a/theano/sandbox/gpuarray/tests/test_abstractconv.py
+++ b/theano/sandbox/gpuarray/tests/test_abstractconv.py
@@ -5,10 +5,12 @@ import itertools
 from nose.plugins.skip import SkipTest

 import theano
+from theano import tensor
 from theano.tests import unittest_tools as utt
 import theano.tensor.nnet.abstract_conv as conv
 from theano.compile import shared as cpu_shared
 from ..type import gpuarray_shared_constructor as gpu_shared
+from ..type import GpuArrayType
 from ..dnn import (
    dnn_available, dnn_conv, dnn_gradweight, dnn_gradinput,
    GpuDnnConv, GpuDnnConvGradW, GpuDnnConvGradI)
@@ -20,6 +22,9 @@ from theano.tensor.nnet import ConvGrad3D, ConvTransp3D
 from .config import mode_with_gpu, mode_without_gpu, test_ctx_name


+gpu_ftensor4 = GpuArrayType(dtype='float32', broadcastable=(False,) * 4)
+
+
 class TestConv2d(unittest.TestCase):

    def setUp(self):
@@ -348,3 +353,55 @@ class TestConv2d(unittest.TestCase):
                                  provide_shape=provide_shape,
                                  border_mode=b,
                                  filter_flip=flip)
+
+    def test_grad_types(self):
+        # This function simply tests the behaviour of the AbstractConv
+        # Ops, not their optimizations
+        cpu_input = tensor.ftensor4()
+        cpu_filters = tensor.ftensor4()
+        cpu_topgrad = tensor.ftensor4()
+        gpu_input = gpu_ftensor4()
+        gpu_filters = gpu_ftensor4()
+        gpu_topgrad = gpu_ftensor4()
+
+        out_shape = tensor.lvector()
+
+        # Check the gradient of the forward conv2d
+        for input, filters in itertools.product(
+                (cpu_input, gpu_input),
+                (cpu_filters, gpu_filters)):
+            output = conv.conv2d(input, filters)
+            grad_input, grad_filters = theano.grad(output.sum(),
+                                                   wrt=(input, filters))
+            assert grad_input.type == input.type, (
+                grad_input, grad_input.type, input, input.type)
+            assert grad_filters.type == filters.type, (
+                grad_filters, grad_filters.type, filters, filters.type)
+
+        # Check the gradient of gradweight
+        for input, topgrad in itertools.product(
+                (cpu_input, gpu_input),
+                (cpu_topgrad, gpu_topgrad)):
+            grad_filters = conv.AbstractConv2d_gradWeights()(
+                input, topgrad, out_shape)
+            grad_input, grad_topgrad = theano.grad(grad_filters.sum(),
+                                                   wrt=(input, topgrad))
+
+            assert grad_input.type == input.type, (
+                grad_input, grad_input.type, input, input.type)
+            assert grad_topgrad.type == topgrad.type, (
+                grad_topgrad, grad_topgrad.type, topgrad, topgrad.type)
+
+        # Check the gradient of gradinputs
+        for filters, topgrad in itertools.product(
+                (cpu_filters, gpu_filters),
+                (cpu_topgrad, gpu_topgrad)):
+            grad_input = conv.AbstractConv2d_gradInputs()(
+                filters, topgrad, out_shape)
+            grad_filters, grad_topgrad = theano.grad(grad_input.sum(),
+                                                     wrt=(filters, topgrad))
+
+            assert grad_filters.type == filters.type, (
+                grad_filters, grad_filters.type, filters, filters.type)
+            assert grad_topgrad.type == topgrad.type, (
+                grad_topgrad, grad_topgrad.type, topgrad, topgrad.type)
--- a/theano/tensor/nnet/abstract_conv.py
+++ b/theano/tensor/nnet/abstract_conv.py
@@ -337,8 +337,12 @@ class AbstractConv2d(BaseAbstractConv2d):
        # Make sure that the broadcastable pattern of the inputs is used
        # for the gradients, even if the grad opts are not able to infer
        # that the dimensions are broadcastable.
+        # Also make sure that the gradient lives on the same device than
+        # the corresponding input.
        d_bottom = patternbroadcast(d_bottom, bottom.broadcastable)
+        d_bottom = bottom.type.filter_variable(d_bottom)
        d_weights = patternbroadcast(d_weights, weights.broadcastable)
+        d_weights = weights.type.filter_variable(d_weights)
        return d_bottom, d_weights

    def infer_shape(self, node, input_shapes):
@@ -414,8 +418,12 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):
        # Make sure that the broadcastable pattern of the inputs is used
        # for the gradients, even if the grad opts are not able to infer
        # that the dimensions are broadcastable.
+        # Also make sure that the gradient lives on the same device than
+        # the corresponding input.
        d_bottom = patternbroadcast(d_bottom, bottom.broadcastable)
+        d_bottom = bottom.type.filter_variable(d_bottom)
        d_top = patternbroadcast(d_top, top.broadcastable)
+        d_top = top.type.filter_variable(d_top)

        d_height_width = (theano.gradient.DisconnectedType()(),)
        return (d_bottom, d_top) + d_height_width
@@ -491,8 +499,12 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):
        # Make sure that the broadcastable pattern of the inputs is used
        # for the gradients, even if the grad opts are not able to infer
        # that the dimensions are broadcastable.
+        # Also make sure that the gradient lives on the same device than
+        # the corresponding input.
        d_weights = patternbroadcast(d_weights, weights.broadcastable)
+        d_weights = weights.type.filter_variable(d_weights)
        d_top = patternbroadcast(d_top, top.broadcastable)
+        d_top = top.type.filter_variable(d_top)

        d_height_width = (theano.gradient.DisconnectedType()(),)
        return (d_weights, d_top) + d_height_width