提交 691be8f5 authored 作者: Gijs van Tulder's avatar Gijs van Tulder

Add GpuCorrMM and GpuCorr3dMM to gpuarray backend.

上级 146ef971
差异被折叠。
差异被折叠。
差异被折叠。
...@@ -1877,8 +1877,6 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var, ...@@ -1877,8 +1877,6 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var,
return result return result
@register_opt2([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs], 'fast_compile', 'conv_dnn', 'cudnn')
def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs): def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
if (not isinstance(op, (AbstractConv2d, if (not isinstance(op, (AbstractConv2d,
AbstractConv2d_gradWeights, AbstractConv2d_gradWeights,
...@@ -1922,8 +1920,6 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs): ...@@ -1922,8 +1920,6 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
return [rval] return [rval]
@register_opt2([AbstractConv3d, AbstractConv3d_gradWeights,
AbstractConv3d_gradInputs], 'fast_compile', 'conv_dnn', 'cudnn')
def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs): def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
if (not isinstance(op, (AbstractConv3d, if (not isinstance(op, (AbstractConv3d,
AbstractConv3d_gradWeights, AbstractConv3d_gradWeights,
...@@ -1967,7 +1963,6 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs): ...@@ -1967,7 +1963,6 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
return [rval] return [rval]
@register_opt('fast_compile', 'conv_dnn', 'cudnn')
@local_optimizer([AbstractConv2d, AbstractConv3d]) @local_optimizer([AbstractConv2d, AbstractConv3d])
def local_abstractconv_cudnn(node): def local_abstractconv_cudnn(node):
ctx = infer_context_name(*node.inputs) ctx = infer_context_name(*node.inputs)
...@@ -1979,7 +1974,6 @@ def local_abstractconv_cudnn(node): ...@@ -1979,7 +1974,6 @@ def local_abstractconv_cudnn(node):
return local_abstractconv3d_cudnn_graph(node.op, ctx, node.inputs, node.outputs) return local_abstractconv3d_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
@register_opt('fast_compile', 'conv_dnn', 'cudnn')
@local_optimizer([AbstractConv2d_gradWeights, AbstractConv3d_gradWeights]) @local_optimizer([AbstractConv2d_gradWeights, AbstractConv3d_gradWeights])
def local_abstractconv_gw_cudnn(node): def local_abstractconv_gw_cudnn(node):
ctx = infer_context_name(*node.inputs) ctx = infer_context_name(*node.inputs)
...@@ -1991,7 +1985,6 @@ def local_abstractconv_gw_cudnn(node): ...@@ -1991,7 +1985,6 @@ def local_abstractconv_gw_cudnn(node):
return local_abstractconv3d_cudnn_graph(node.op, ctx, node.inputs, node.outputs) return local_abstractconv3d_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
@register_opt('fast_compile', 'conv_dnn', 'cudnn')
@local_optimizer([AbstractConv2d_gradInputs, AbstractConv3d_gradInputs]) @local_optimizer([AbstractConv2d_gradInputs, AbstractConv3d_gradInputs])
def local_abstractconv_gi_cudnn(node): def local_abstractconv_gi_cudnn(node):
ctx = infer_context_name(*node.inputs) ctx = infer_context_name(*node.inputs)
......
差异被折叠。
...@@ -7,6 +7,9 @@ import numpy ...@@ -7,6 +7,9 @@ import numpy
from theano.tensor.nnet.tests import test_abstract_conv from theano.tensor.nnet.tests import test_abstract_conv
from ..type import GpuArrayType, gpuarray_shared_constructor, get_context from ..type import GpuArrayType, gpuarray_shared_constructor, get_context
from ..dnn import dnn_available, GpuDnnConv, GpuDnnConvGradW, GpuDnnConvGradI from ..dnn import dnn_available, GpuDnnConv, GpuDnnConvGradW, GpuDnnConvGradI
from ..blas import (
GpuCorrMM, GpuCorrMM_gradWeights, GpuCorrMM_gradInputs,
GpuCorr3dMM, GpuCorr3dMM_gradWeights, GpuCorr3dMM_gradInputs)
from .config import mode_with_gpu, test_ctx_name from .config import mode_with_gpu, test_ctx_name
from pygpu import gpuarray from pygpu import gpuarray
...@@ -80,6 +83,72 @@ class TestDnnConv3d(test_abstract_conv.BaseTestConv3d): ...@@ -80,6 +83,72 @@ class TestDnnConv3d(test_abstract_conv.BaseTestConv3d):
filter_flip=flip, target_op=GpuDnnConvGradI) filter_flip=flip, target_op=GpuDnnConvGradI)
class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
@classmethod
def setup_class(cls):
test_abstract_conv.BaseTestConv2d.setup_class()
cls.shared = staticmethod(gpuarray_shared_constructor)
cls.mode = mode_with_gpu.excluding('cudnn')
def tcase(self, i, f, s, b, flip, provide_shape, fd=(1, 1)):
mode = self.mode
o = self.get_output_shape(i, f, s, b, fd)
self.run_fwd(inputs_shape=i, filters_shape=f,
subsample=s, verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=(GpuCorrMM,
GpuCorrMM_gradWeights,
GpuCorrMM_gradInputs),
filter_dilation=fd)
self.run_gradweight(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorrMM_gradWeights,
filter_dilation=fd)
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorrMM_gradInputs,
filter_dilation=fd)
class TestCorrMMConv3d(test_abstract_conv.BaseTestConv3d):
@classmethod
def setup_class(cls):
test_abstract_conv.BaseTestConv3d.setup_class()
cls.shared = staticmethod(gpuarray_shared_constructor)
cls.mode = mode_with_gpu.excluding('cudnn')
def tcase(self, i, f, s, b, flip, provide_shape, fd=(1, 1, 1)):
mode = self.mode
o = self.get_output_shape(i, f, s, b, fd)
self.run_fwd(inputs_shape=i, filters_shape=f,
subsample=s, verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=(GpuCorr3dMM,
GpuCorr3dMM_gradWeights,
GpuCorr3dMM_gradInputs),
filter_dilation=fd)
self.run_gradweight(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorr3dMM_gradWeights,
filter_dilation=fd)
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorr3dMM_gradInputs,
filter_dilation=fd)
class TestDnnConvTypes(test_abstract_conv.TestConvTypes): class TestDnnConvTypes(test_abstract_conv.TestConvTypes):
def setUp(self): def setUp(self):
self.input = gpu_ftensor4() self.input = gpu_ftensor4()
......
from __future__ import absolute_import, print_function, division
import unittest
import numpy
import theano
from theano.tests import unittest_tools as utt
from theano.tensor.nnet.corr import CorrMM, CorrMM_gradWeights, CorrMM_gradInputs
from ..type import gpuarray_shared_constructor
from ..blas import GpuCorrMM, GpuCorrMM_gradWeights, GpuCorrMM_gradInputs
from .config import mode_with_gpu, mode_without_gpu
class TestCorrMM(unittest.TestCase):
def run_conv_valid(self, inputs_shape, filters_shape,
border_mode='valid',
filter_dilation=(1, 1),
subsample=(1, 1),
verify_grad=False):
inputs_shape = [inputs_shape[i] for i in (0, 3, 1, 2)]
filters_shape = [filters_shape[i] for i in (0, 3, 1, 2)]
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = gpuarray_shared_constructor(inputs_val)
filters = gpuarray_shared_constructor(filters_val)
conv_ref = CorrMM(border_mode=border_mode,
filter_dilation=filter_dilation,
subsample=subsample)(inputs, filters)
f_ref = theano.function([], conv_ref, mode=mode_without_gpu)
conv = GpuCorrMM(border_mode=border_mode,
filter_dilation=filter_dilation,
subsample=subsample)(inputs, filters)
f = theano.function([], conv, mode=mode_with_gpu)
res_ref = f_ref()
res = f()
utt.assert_allclose(res_ref, res)
if verify_grad:
utt.verify_grad(GpuCorrMM(border_mode=border_mode,
filter_dilation=filter_dilation,
subsample=subsample),
[inputs_val, filters_val])
def test_valid(self):
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 6, 12, 1))
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 6, 12, 1),
subsample=(2, 2))
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 6, 12, 1),
subsample=(2, 2))
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 6, 12, 1),
subsample=(3, 3))
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 6, 12, 1),
subsample=(3, 3))
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 6, 12, 1),
subsample=(3, 2))
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 6, 12, 1),
subsample=(1, 2))
def test_border_mode(self):
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 6, 12, 1),
border_mode='valid')
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 6, 12, 1),
border_mode='half')
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 6, 12, 1),
border_mode='full')
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 6, 12, 1),
border_mode=(0, 0))
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 6, 12, 1),
border_mode=(1, 2))
self.run_conv_valid(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 6, 12, 1),
border_mode=(3, 2))
def test_filter_dilation(self):
inputs_shape = [16, 20, 12, 1]
filters_shape = [10, 6, 5, 1]
for filter_dilation in [(2, 1), (1, 2)]:
for border_mode in ['valid', 'half', 'full']:
self.run_conv_valid(inputs_shape=inputs_shape,
filters_shape=filters_shape,
filter_dilation=filter_dilation,
border_mode=border_mode)
def test_verify_gradients(self):
# use a small example to check the gradients
inputs_shape = [2, 7, 9, 1]
filters_shape = [1, 3, 3, 1]
for filter_dilation in [(2, 1), (1, 2)]:
for border_mode in ['valid', 'half', 'full', (2, 1)]:
self.run_conv_valid(inputs_shape=inputs_shape,
filters_shape=filters_shape,
filter_dilation=filter_dilation,
border_mode=border_mode,
verify_grad=True)
def run_gradweight(self, inputs_shape, filters_shape, dCdH_shape,
subsample=(1, 1)):
inputs_shape = [inputs_shape[i] for i in (0, 3, 1, 2)]
filters_shape = [filters_shape[i] for i in (0, 3, 1, 2)]
dCdH_shape = [dCdH_shape[i] for i in (0, 3, 1, 2)]
inputs_val = numpy.random.random(inputs_shape).astype('float32')
dCdH_val = numpy.random.random(dCdH_shape).astype('float32')
inputs = gpuarray_shared_constructor(inputs_val)
dCdH = gpuarray_shared_constructor(dCdH_val)
shape = gpuarray_shared_constructor(numpy.array(filters_shape[2:]))
if (subsample == (1, 1)):
conv_ref = CorrMM_gradWeights(subsample=subsample)(
inputs, dCdH)
conv_gemm = GpuCorrMM_gradWeights(subsample=subsample)(
inputs, dCdH)
else:
conv_ref = CorrMM_gradWeights(subsample=subsample)(
inputs, dCdH, shape=shape)
conv_gemm = GpuCorrMM_gradWeights(subsample=subsample)(
inputs, dCdH, shape=shape)
f_ref = theano.function([], conv_ref, mode=mode_without_gpu)
f = theano.function([], conv_gemm, mode=mode_with_gpu)
res_ref = f_ref()
res = f()
utt.assert_allclose(res_ref, res)
def test_gradweight(self):
self.run_gradweight(inputs_shape=(16, 10, 12, 1),
filters_shape=(10, 6, 12, 1),
dCdH_shape=(16, 5, 1, 10),
subsample=(1, 1))
self.run_gradweight(inputs_shape=(16, 20, 10, 1),
filters_shape=(10, 6, 4, 1),
dCdH_shape=(16, 8, 4, 10),
subsample=(2, 2))
self.run_gradweight(inputs_shape=(16, 20, 10, 1),
filters_shape=(10, 6, 3, 1),
dCdH_shape=(16, 5, 3, 10),
subsample=(3, 3))
self.run_gradweight(inputs_shape=(16, 20, 12, 1),
filters_shape=(10, 6, 12, 1),
dCdH_shape=(16, 8, 1, 10),
subsample=(2, 1))
def run_gradinput(self, inputs_shape, filters_shape,
subsample=(1, 1)):
inputs_shape = [inputs_shape[i] for i in (0, 3, 1, 2)]
filters_shape = [filters_shape[i] for i in (0, 3, 1, 2)]
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = gpuarray_shared_constructor(inputs_val)
filters = gpuarray_shared_constructor(filters_val)
bottom_height = (inputs_shape[2] - 1) * subsample[0] + filters_shape[2]
bottom_width = (inputs_shape[3] - 1) * subsample[1] + filters_shape[3]
bottom_shape = gpuarray_shared_constructor(numpy.array([bottom_height, bottom_width]))
if (subsample == (1, 1)):
conv_ref = CorrMM_gradInputs(subsample=subsample)(
kern=filters, topgrad=inputs)
conv_gemm = GpuCorrMM_gradInputs(subsample=subsample)(
kern=filters, topgrad=inputs)
else:
conv_ref = CorrMM_gradInputs(subsample=subsample)(
kern=filters, topgrad=inputs, shape=bottom_shape)
conv_gemm = GpuCorrMM_gradInputs(subsample=subsample)(
kern=filters, topgrad=inputs, shape=bottom_shape)
f_ref = theano.function([], conv_ref, mode=mode_without_gpu)
f = theano.function([], conv_gemm, mode=mode_with_gpu)
res_ref = f_ref()
res = f()
utt.assert_allclose(res_ref, res)
def test_gradinput(self):
self.run_gradinput(inputs_shape=(16, 15, 12, 10),
filters_shape=(10, 6, 12, 1))
self.run_gradinput(inputs_shape=(16, 15, 12, 10),
filters_shape=(10, 6, 12, 1),
subsample=(2, 2))
self.run_gradinput(inputs_shape=(16, 15, 12, 10),
filters_shape=(10, 6, 12, 1),
subsample=(3, 3))
self.run_gradinput(inputs_shape=(16, 15, 12, 10),
filters_shape=(10, 6, 12, 1),
subsample=(3, 1))
from __future__ import absolute_import, print_function, division
import unittest
import numpy
import theano
from theano.tests import unittest_tools as utt
from theano.tensor.nnet.corr3d import Corr3dMM, Corr3dMM_gradWeights, Corr3dMM_gradInputs
from ..type import gpuarray_shared_constructor
from ..blas import GpuCorr3dMM, GpuCorr3dMM_gradWeights, GpuCorr3dMM_gradInputs
from .config import mode_with_gpu, mode_without_gpu
class TestCorr3dMM(unittest.TestCase):
def run_conv_valid(self, inputs_shape, filters_shape,
border_mode='valid',
filter_dilation=(1, 1, 1),
subsample=(1, 1, 1),
verify_grad=False):
inputs_shape = [inputs_shape[i] for i in (0, 4, 1, 2, 3)]
filters_shape = [filters_shape[i] for i in (0, 4, 1, 2, 3)]
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = gpuarray_shared_constructor(inputs_val)
filters = gpuarray_shared_constructor(filters_val)
conv_ref = Corr3dMM(border_mode=border_mode,
filter_dilation=filter_dilation,
subsample=subsample)(inputs, filters)
f_ref = theano.function([], conv_ref, mode=mode_without_gpu)
conv = GpuCorr3dMM(border_mode=border_mode,
filter_dilation=filter_dilation,
subsample=subsample)(inputs, filters)
f = theano.function([], conv, mode=mode_with_gpu)
res_ref = f_ref()
res = f()
utt.assert_allclose(res_ref, res)
if verify_grad:
utt.verify_grad(GpuCorr3dMM(border_mode=border_mode,
filter_dilation=filter_dilation,
subsample=subsample),
[inputs_val, filters_val])
def test_valid(self):
self.run_conv_valid(inputs_shape=(16, 20, 12, 16, 1),
filters_shape=(10, 6, 12, 4, 1))
self.run_conv_valid(inputs_shape=(16, 20, 12, 15, 1),
filters_shape=(10, 6, 12, 4, 1),
subsample=(2, 2, 2))
self.run_conv_valid(inputs_shape=(16, 20, 12, 15, 1),
filters_shape=(10, 6, 12, 4, 1),
subsample=(2, 2, 2))
self.run_conv_valid(inputs_shape=(16, 20, 12, 15, 1),
filters_shape=(10, 6, 12, 4, 1),
subsample=(3, 3, 3))
self.run_conv_valid(inputs_shape=(16, 20, 12, 15, 1),
filters_shape=(10, 6, 12, 4, 1),
subsample=(3, 3, 3))
self.run_conv_valid(inputs_shape=(16, 20, 12, 15, 1),
filters_shape=(10, 6, 12, 4, 1),
subsample=(3, 2, 1))
self.run_conv_valid(inputs_shape=(16, 20, 12, 15, 1),
filters_shape=(10, 6, 12, 4, 1),
subsample=(1, 2, 3))
def test_border_mode(self):
self.run_conv_valid(inputs_shape=(16, 20, 12, 15, 1),
filters_shape=(10, 6, 12, 4, 1),
border_mode='valid')
self.run_conv_valid(inputs_shape=(16, 20, 12, 15, 1),
filters_shape=(10, 6, 12, 4, 1),
border_mode='half')
self.run_conv_valid(inputs_shape=(16, 20, 12, 15, 1),
filters_shape=(10, 6, 12, 4, 1),
border_mode='full')
self.run_conv_valid(inputs_shape=(16, 20, 12, 15, 1),
filters_shape=(10, 6, 12, 4, 1),
border_mode=(0, 0, 0))
self.run_conv_valid(inputs_shape=(16, 20, 12, 15, 1),
filters_shape=(10, 6, 12, 4, 1),
border_mode=(1, 2, 3))
self.run_conv_valid(inputs_shape=(16, 20, 12, 15, 1),
filters_shape=(10, 6, 12, 4, 1),
border_mode=(3, 2, 1))
def test_filter_dilation(self):
inputs_shape = [16, 20, 12, 15, 1]
filters_shape = [10, 6, 5, 4, 1]
for filter_dilation in [(2, 1, 1), (1, 2, 1), (1, 1, 2)]:
for border_mode in ['valid', 'half', 'full']:
self.run_conv_valid(inputs_shape=inputs_shape,
filters_shape=filters_shape,
filter_dilation=filter_dilation,
border_mode=border_mode)
def test_verify_gradients(self):
# use a small example to check the gradients
inputs_shape = [2, 7, 9, 6, 1]
filters_shape = [1, 3, 3, 2, 1]
for filter_dilation in [(2, 1, 1), (1, 2, 1), (1, 1, 2)]:
for border_mode in ['valid', 'half', 'full', (2, 1, 3)]:
self.run_conv_valid(inputs_shape=inputs_shape,
filters_shape=filters_shape,
filter_dilation=filter_dilation,
border_mode=border_mode,
verify_grad=True)
def run_gradweight(self, inputs_shape, filters_shape, dCdH_shape,
subsample=(1, 1, 1)):
inputs_shape = [inputs_shape[i] for i in (0, 4, 1, 2, 3)]
filters_shape = [filters_shape[i] for i in (0, 4, 1, 2, 3)]
dCdH_shape = [dCdH_shape[i] for i in (0, 4, 1, 2, 3)]
inputs_val = numpy.random.random(inputs_shape).astype('float32')
dCdH_val = numpy.random.random(dCdH_shape).astype('float32')
inputs = gpuarray_shared_constructor(inputs_val)
dCdH = gpuarray_shared_constructor(dCdH_val)
shape = gpuarray_shared_constructor(numpy.array(filters_shape[2:]))
if (subsample == (1, 1, 1)):
conv_ref = Corr3dMM_gradWeights(subsample=subsample)(
inputs, dCdH)
conv_gemm = GpuCorr3dMM_gradWeights(subsample=subsample)(
inputs, dCdH)
else:
conv_ref = Corr3dMM_gradWeights(subsample=subsample)(
inputs, dCdH, shape=shape)
conv_gemm = GpuCorr3dMM_gradWeights(subsample=subsample)(
inputs, dCdH, shape=shape)
f_ref = theano.function([], conv_ref, mode=mode_without_gpu)
f = theano.function([], conv_gemm, mode=mode_with_gpu)
res_ref = f_ref()
res = f()
utt.assert_allclose(res_ref, res)
def test_gradweight(self):
self.run_gradweight(inputs_shape=(16, 10, 12, 16, 1),
filters_shape=(10, 6, 12, 4, 1),
dCdH_shape=(16, 5, 1, 13, 10),
subsample=(1, 1, 1))
self.run_gradweight(inputs_shape=(16, 20, 10, 16, 1),
filters_shape=(10, 6, 4, 4, 1),
dCdH_shape=(16, 8, 4, 7, 10),
subsample=(2, 2, 2))
self.run_gradweight(inputs_shape=(16, 20, 10, 16, 1),
filters_shape=(10, 6, 3, 4, 1),
dCdH_shape=(16, 5, 3, 5, 10),
subsample=(3, 3, 3))
self.run_gradweight(inputs_shape=(16, 20, 12, 16, 1),
filters_shape=(10, 6, 12, 4, 1),
dCdH_shape=(16, 8, 1, 5, 10),
subsample=(2, 1, 3))
def run_gradinput(self, inputs_shape, filters_shape,
subsample=(1, 1, 1)):
inputs_shape = [inputs_shape[i] for i in (0, 4, 1, 2, 3)]
filters_shape = [filters_shape[i] for i in (0, 4, 1, 2, 3)]
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = gpuarray_shared_constructor(inputs_val)
filters = gpuarray_shared_constructor(filters_val)
bottom_height = (inputs_shape[2] - 1) * subsample[0] + filters_shape[2]
bottom_width = (inputs_shape[3] - 1) * subsample[1] + filters_shape[3]
bottom_depth = (inputs_shape[4] - 1) * subsample[2] + filters_shape[4]
bottom_shape = gpuarray_shared_constructor(numpy.array([bottom_height, bottom_width, bottom_depth]))
if (subsample == (1, 1, 1)):
conv_ref = Corr3dMM_gradInputs(subsample=subsample)(
kern=filters, topgrad=inputs)
conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(
kern=filters, topgrad=inputs)
else:
conv_ref = Corr3dMM_gradInputs(subsample=subsample)(
kern=filters, topgrad=inputs, shape=bottom_shape)
conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(
kern=filters, topgrad=inputs, shape=bottom_shape)
f_ref = theano.function([], conv_ref, mode=mode_without_gpu)
f = theano.function([], conv_gemm, mode=mode_with_gpu)
res_ref = f_ref()
res = f()
utt.assert_allclose(res_ref, res)
def test_gradinput(self):
self.run_gradinput(inputs_shape=(16, 15, 12, 12, 10),
filters_shape=(10, 6, 12, 4, 1))
self.run_gradinput(inputs_shape=(16, 15, 12, 12, 10),
filters_shape=(10, 6, 12, 4, 1),
subsample=(2, 2, 2))
self.run_gradinput(inputs_shape=(16, 15, 12, 12, 10),
filters_shape=(10, 6, 12, 4, 1),
subsample=(3, 3, 3))
self.run_gradinput(inputs_shape=(16, 15, 12, 12, 10),
filters_shape=(10, 6, 12, 4, 1),
subsample=(3, 1, 2))
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论