提交 8020a38f authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #3912 from lamblin/fix_abstractconv_grad_type

Make sure grads of abstract conv ops have the right type
......@@ -3,6 +3,7 @@ import numpy
import itertools
import theano
from theano import tensor
from theano.tests import unittest_tools as utt
import theano.tensor.nnet.abstract_conv as conv
from theano.sandbox.cuda import float32_shared_constructor as gpu_shared
......@@ -265,3 +266,55 @@ class TestConv2d(unittest.TestCase):
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorrMM_gradInputs)
def test_grad_types(self):
# This function simply tests the behaviour of the AbstractConv
# Ops, not their optimizations
cpu_input = tensor.ftensor4()
cpu_filters = tensor.ftensor4()
cpu_topgrad = tensor.ftensor4()
gpu_input = cuda.ftensor4()
gpu_filters = cuda.ftensor4()
gpu_topgrad = cuda.ftensor4()
out_shape = tensor.lvector()
# Check the gradient of the forward conv2d
for input, filters in itertools.product(
(cpu_input, gpu_input),
(cpu_filters, gpu_filters)):
output = conv.conv2d(input, filters)
grad_input, grad_filters = theano.grad(output.sum(),
wrt=(input, filters))
assert grad_input.type == input.type, (
grad_input, grad_input.type, input, input.type)
assert grad_filters.type == filters.type, (
grad_filters, grad_filters.type, filters, filters.type)
# Check the gradient of gradweight
for input, topgrad in itertools.product(
(cpu_input, gpu_input),
(cpu_topgrad, gpu_topgrad)):
grad_filters = conv.AbstractConv2d_gradWeights()(
input, topgrad, out_shape)
grad_input, grad_topgrad = theano.grad(grad_filters.sum(),
wrt=(input, topgrad))
assert grad_input.type == input.type, (
grad_input, grad_input.type, input, input.type)
assert grad_topgrad.type == topgrad.type, (
grad_topgrad, grad_topgrad.type, topgrad, topgrad.type)
# Check the gradient of gradinputs
for filters, topgrad in itertools.product(
(cpu_filters, gpu_filters),
(cpu_topgrad, gpu_topgrad)):
grad_input = conv.AbstractConv2d_gradInputs()(
filters, topgrad, out_shape)
grad_filters, grad_topgrad = theano.grad(grad_input.sum(),
wrt=(filters, topgrad))
assert grad_filters.type == filters.type, (
grad_filters, grad_filters.type, filters, filters.type)
assert grad_topgrad.type == topgrad.type, (
grad_topgrad, grad_topgrad.type, topgrad, topgrad.type)
......@@ -5,10 +5,12 @@ import itertools
from nose.plugins.skip import SkipTest
import theano
from theano import tensor
from theano.tests import unittest_tools as utt
import theano.tensor.nnet.abstract_conv as conv
from theano.compile import shared as cpu_shared
from ..type import gpuarray_shared_constructor as gpu_shared
from ..type import GpuArrayType
from ..dnn import (
dnn_available, dnn_conv, dnn_gradweight, dnn_gradinput,
GpuDnnConv, GpuDnnConvGradW, GpuDnnConvGradI)
......@@ -20,6 +22,9 @@ from theano.tensor.nnet import ConvGrad3D, ConvTransp3D
from .config import mode_with_gpu, mode_without_gpu, test_ctx_name
gpu_ftensor4 = GpuArrayType(dtype='float32', broadcastable=(False,) * 4)
class TestConv2d(unittest.TestCase):
def setUp(self):
......@@ -348,3 +353,55 @@ class TestConv2d(unittest.TestCase):
provide_shape=provide_shape,
border_mode=b,
filter_flip=flip)
def test_grad_types(self):
# This function simply tests the behaviour of the AbstractConv
# Ops, not their optimizations
cpu_input = tensor.ftensor4()
cpu_filters = tensor.ftensor4()
cpu_topgrad = tensor.ftensor4()
gpu_input = gpu_ftensor4()
gpu_filters = gpu_ftensor4()
gpu_topgrad = gpu_ftensor4()
out_shape = tensor.lvector()
# Check the gradient of the forward conv2d
for input, filters in itertools.product(
(cpu_input, gpu_input),
(cpu_filters, gpu_filters)):
output = conv.conv2d(input, filters)
grad_input, grad_filters = theano.grad(output.sum(),
wrt=(input, filters))
assert grad_input.type == input.type, (
grad_input, grad_input.type, input, input.type)
assert grad_filters.type == filters.type, (
grad_filters, grad_filters.type, filters, filters.type)
# Check the gradient of gradweight
for input, topgrad in itertools.product(
(cpu_input, gpu_input),
(cpu_topgrad, gpu_topgrad)):
grad_filters = conv.AbstractConv2d_gradWeights()(
input, topgrad, out_shape)
grad_input, grad_topgrad = theano.grad(grad_filters.sum(),
wrt=(input, topgrad))
assert grad_input.type == input.type, (
grad_input, grad_input.type, input, input.type)
assert grad_topgrad.type == topgrad.type, (
grad_topgrad, grad_topgrad.type, topgrad, topgrad.type)
# Check the gradient of gradinputs
for filters, topgrad in itertools.product(
(cpu_filters, gpu_filters),
(cpu_topgrad, gpu_topgrad)):
grad_input = conv.AbstractConv2d_gradInputs()(
filters, topgrad, out_shape)
grad_filters, grad_topgrad = theano.grad(grad_input.sum(),
wrt=(filters, topgrad))
assert grad_filters.type == filters.type, (
grad_filters, grad_filters.type, filters, filters.type)
assert grad_topgrad.type == topgrad.type, (
grad_topgrad, grad_topgrad.type, topgrad, topgrad.type)
......@@ -337,8 +337,12 @@ class AbstractConv2d(BaseAbstractConv2d):
# Make sure that the broadcastable pattern of the inputs is used
# for the gradients, even if the grad opts are not able to infer
# that the dimensions are broadcastable.
# Also make sure that the gradient lives on the same device than
# the corresponding input.
d_bottom = patternbroadcast(d_bottom, bottom.broadcastable)
d_bottom = bottom.type.filter_variable(d_bottom)
d_weights = patternbroadcast(d_weights, weights.broadcastable)
d_weights = weights.type.filter_variable(d_weights)
return d_bottom, d_weights
def infer_shape(self, node, input_shapes):
......@@ -414,8 +418,12 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):
# Make sure that the broadcastable pattern of the inputs is used
# for the gradients, even if the grad opts are not able to infer
# that the dimensions are broadcastable.
# Also make sure that the gradient lives on the same device than
# the corresponding input.
d_bottom = patternbroadcast(d_bottom, bottom.broadcastable)
d_bottom = bottom.type.filter_variable(d_bottom)
d_top = patternbroadcast(d_top, top.broadcastable)
d_top = top.type.filter_variable(d_top)
d_height_width = (theano.gradient.DisconnectedType()(),)
return (d_bottom, d_top) + d_height_width
......@@ -491,8 +499,12 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):
# Make sure that the broadcastable pattern of the inputs is used
# for the gradients, even if the grad opts are not able to infer
# that the dimensions are broadcastable.
# Also make sure that the gradient lives on the same device than
# the corresponding input.
d_weights = patternbroadcast(d_weights, weights.broadcastable)
d_weights = weights.type.filter_variable(d_weights)
d_top = patternbroadcast(d_top, top.broadcastable)
d_top = top.type.filter_variable(d_top)
d_height_width = (theano.gradient.DisconnectedType()(),)
return (d_weights, d_top) + d_height_width
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论