提交 6178a7a9 authored 作者: Nicolas Ballas's avatar Nicolas Ballas 提交者: Pascal Lamblin

update tests, fix cudnn call in abstract op

上级 7cc4e783
...@@ -1279,7 +1279,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1279,7 +1279,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
def dnn_gradweight(img, topgrad, def dnn_gradweight(img, topgrad,
kerns_shp, kerns_shp,
border_mode='valid', subsample=(1, 1), border_mode='valid', subsample=(1, 1),
conv_mode='conv', workmem=None): conv_mode='conv'):
""" """
GPU convolution gradient with respect to weight using cuDNN from NVIDIA. GPU convolution gradient with respect to weight using cuDNN from NVIDIA.
...@@ -1295,16 +1295,16 @@ def dnn_gradweight(img, topgrad, ...@@ -1295,16 +1295,16 @@ def dnn_gradweight(img, topgrad,
img = gpu_contiguous(img) img = gpu_contiguous(img)
topgrad = gpu_contiguous(topgrad) topgrad = gpu_contiguous(topgrad)
kerns_shp = theano.tensor.as_tensor_variable(kerns_shp)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode)(img.shape, kerns_shp) conv_mode=conv_mode)(img.shape, kerns_shp)
out = gpu_alloc_empty(*kerns_shp)
out = gpu_alloc_empty(*kern_shp) return GpuDnnConvGradW()(img, topgrad, out, desc)
return GpuDnnConvGradW(workmem=workmem)(img, topgrad, out, desc)
def dnn_gradinput(kerns, topgrad, def dnn_gradinput(kerns, topgrad,
img_shape, img_shp,
border_mode='valid', subsample=(1, 1), border_mode='valid', subsample=(1, 1),
conv_mode='conv', workmem=None): conv_mode='conv'):
""" """
GPU convolution gradient with respect to input using cuDNN from NVIDIA. GPU convolution gradient with respect to input using cuDNN from NVIDIA.
...@@ -1320,11 +1320,12 @@ def dnn_gradinput(kerns, topgrad, ...@@ -1320,11 +1320,12 @@ def dnn_gradinput(kerns, topgrad,
kerns = gpu_contiguous(kerns) kerns = gpu_contiguous(kerns)
topgrad = gpu_contiguous(topgrad) topgrad = gpu_contiguous(topgrad)
img_shp = theano.tensor.as_tensor_variable(img_shp)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode)(img_shp, kerns.shape) conv_mode=conv_mode)(img_shp, kerns.shape)
out = gpu_alloc_empty(*img_shp) out = gpu_alloc_empty(*img_shp)
return GpuDnnConvGradI(workmem=workmem)(kerns, topgrad, out, desc) return GpuDnnConvGradI()(kerns, topgrad, out, desc)
class GpuDnnPoolDesc(GpuOp): class GpuDnnPoolDesc(GpuOp):
......
...@@ -25,7 +25,7 @@ from theano.sandbox.cuda.basic_ops import ( ...@@ -25,7 +25,7 @@ from theano.sandbox.cuda.basic_ops import (
GpuFromHost, HostFromGpu GpuFromHost, HostFromGpu
) )
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda.dnn import dnn_available, dnn_conv from theano.sandbox.cuda.dnn import dnn_available, dnn_conv, dnn_gradweight, dnn_gradinput
from theano.sandbox.cuda.blas import GpuCorrMM, GpuCorrMM_gradWeights, GpuCorrMM_gradInputs from theano.sandbox.cuda.blas import GpuCorrMM, GpuCorrMM_gradWeights, GpuCorrMM_gradInputs
from theano.sandbox.cuda.opt import values_eq_approx_high_tol from theano.sandbox.cuda.opt import values_eq_approx_high_tol
...@@ -45,7 +45,7 @@ def conv2d(inputs, ...@@ -45,7 +45,7 @@ def conv2d(inputs,
batch_size=None, batch_size=None,
border_mode='valid', border_mode='valid',
subsample=(1, 1), subsample=(1, 1),
filter_flip=True): filters_flip=True):
""" """
This function will build the symbolic graph for convolving a mini-batch of a This function will build the symbolic graph for convolving a mini-batch of a
stack of 2D inputs with a set of 2D filters. The implementation is modelled stack of 2D inputs with a set of 2D filters. The implementation is modelled
...@@ -92,8 +92,8 @@ def conv2d(inputs, ...@@ -92,8 +92,8 @@ def conv2d(inputs,
:param subsample: factor by which to subsample the output. :param subsample: factor by which to subsample the output.
Also called strides elsewhere. Also called strides elsewhere.
:type filter_flip: bool :type filters_flip: bool
:param filter_flip: If ``True``, will flip the filter rows and columns :param filters_flip: If ``True``, will flip the filter rows and columns
before sliding them over the input. This operation is normally referred before sliding them over the input. This operation is normally referred
to as a convolution, and this is the default. If ``False``, the filters to as a convolution, and this is the default. If ``False``, the filters
are not flipped and the operation is referred to as a cross-correlation. are not flipped and the operation is referred to as a cross-correlation.
...@@ -109,7 +109,7 @@ def conv2d(inputs, ...@@ -109,7 +109,7 @@ def conv2d(inputs,
bsize=batch_size, bsize=batch_size,
border_mode=border_mode, border_mode=border_mode,
subsample=subsample, subsample=subsample,
filter_flip = filter_flip) filters_flip = filters_flip)
return conv_op(inputs, filters) return conv_op(inputs, filters)
...@@ -120,12 +120,12 @@ class BaseAbstractConv2d(Op): ...@@ -120,12 +120,12 @@ class BaseAbstractConv2d(Op):
FIXME FIXME
""" """
check_broadcast = False check_broadcast = False
__props__ = ('border_mode', 'subsample', 'filter_flip', 'imshp', 'kshp', 'bsize') __props__ = ('border_mode', 'subsample', 'filters_flip', 'imshp', 'kshp', 'bsize')
def __init__(self, def __init__(self,
imshp=None, kshp=None, bsize=None, imshp=None, kshp=None, bsize=None,
border_mode="valid", subsample=(1, 1), border_mode="valid", subsample=(1, 1),
filter_flip = True): filters_flip = True):
if isinstance(border_mode, int): if isinstance(border_mode, int):
border_mode = (border_mode, border_mode) border_mode = (border_mode, border_mode)
if isinstance(border_mode, tuple): if isinstance(border_mode, tuple):
...@@ -142,7 +142,7 @@ class BaseAbstractConv2d(Op): ...@@ -142,7 +142,7 @@ class BaseAbstractConv2d(Op):
self.kshp = kshp self.kshp = kshp
self.bsize = bsize self.bsize = bsize
self.border_mode = border_mode self.border_mode = border_mode
self.filter_flip = filter_flip self.filters_flip = filters_flip
if len(subsample) != 2: if len(subsample) != 2:
raise ValueError("subsample must have two elements") raise ValueError("subsample must have two elements")
...@@ -175,9 +175,9 @@ class AbstractConv2d(BaseAbstractConv2d): ...@@ -175,9 +175,9 @@ class AbstractConv2d(BaseAbstractConv2d):
bsize=None, bsize=None,
border_mode="valid", border_mode="valid",
subsample=(1, 1), subsample=(1, 1),
filter_flip = True): filters_flip = True):
super(AbstractConv2d, self).__init__(imshp, kshp, bsize, super(AbstractConv2d, self).__init__(imshp, kshp, bsize,
border_mode, subsample, filter_flip) border_mode, subsample, filters_flip)
def make_node(self, img, kern): def make_node(self, img, kern):
if img.type.ndim != 4: if img.type.ndim != 4:
...@@ -203,13 +203,13 @@ class AbstractConv2d(BaseAbstractConv2d): ...@@ -203,13 +203,13 @@ class AbstractConv2d(BaseAbstractConv2d):
self.bsize, self.bsize,
self.border_mode, self.border_mode,
self.subsample, self.subsample,
self.filter_flip)( self.filters_flip)(
weights, top, bottom.shape[-2:]) weights, top, bottom.shape[-2:])
d_weights = AbstractConv2d_gradWeights(self.imshp, self.kshp, d_weights = AbstractConv2d_gradWeights(self.imshp, self.kshp,
self.bsize, self.bsize,
self.border_mode, self.border_mode,
self.subsample, self.subsample,
self.filter_flip)( self.filters_flip)(
bottom, top, weights.shape[-2:]) bottom, top, weights.shape[-2:])
return d_bottom, d_weights return d_bottom, d_weights
...@@ -222,16 +222,15 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d): ...@@ -222,16 +222,15 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):
use it as needed. use it as needed.
""" """
def __init__(self, def __init__(self,
imshp=None, imshp=None,
kshp=None, kshp=None,
bsize=None, bsize=None,
border_mode="valid", border_mode="valid",
subsample=(1, 1), subsample=(1, 1),
filter_flip=True): filters_flip=True):
super(AbstractConv2d_gradWeights, self).__init__(imshp, kshp, bsize, super(AbstractConv2d_gradWeights, self).__init__(imshp, kshp, bsize,
border_mode, subsample, filter_flip) border_mode, subsample, filters_flip)
## Update shape/height_width ## Update shape/height_width
def make_node(self, img, topgrad, shape): def make_node(self, img, topgrad, shape):
...@@ -261,13 +260,13 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d): ...@@ -261,13 +260,13 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):
self.bsize, self.bsize,
self.border_mode, self.border_mode,
self.subsample, self.subsample,
self.filter_flip)(weights, top, bottom.shape[-2:]) self.filters_flip)(weights, top, bottom.shape[-2:])
d_top = AbstractConv2d(self.imshp, d_top = AbstractConv2d(self.imshp,
self.kshp, self.kshp,
self.bsize, self.bsize,
self.border_mode, self.border_mode,
self.subsample, self.subsample,
self.filter_flip)(bottom, weights) self.filters_flip)(bottom, weights)
d_height_width = (theano.gradient.DisconnectedType()(),) d_height_width = (theano.gradient.DisconnectedType()(),)
return (d_bottom, d_top) + d_height_width return (d_bottom, d_top) + d_height_width
...@@ -290,9 +289,9 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d): ...@@ -290,9 +289,9 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):
bsize=None, bsize=None,
border_mode="valid", border_mode="valid",
subsample=(1, 1), subsample=(1, 1),
filter_flip=True): filters_flip=True):
super(AbstractConv2d_gradInputs, self).__init__(imshp, kshp, bsize, super(AbstractConv2d_gradInputs, self).__init__(imshp, kshp, bsize,
border_mode, subsample, filter_flip) border_mode, subsample, filters_flip)
## Update shape/height_width ## Update shape/height_width
def make_node(self, kern, topgrad, shape): def make_node(self, kern, topgrad, shape):
...@@ -336,7 +335,8 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d): ...@@ -336,7 +335,8 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):
### move to Gpu optimization ### move to Gpu optimization
### Do not replace the AbstractOpt only the inputs ### Do not replace the AbstractOpt only the inputs
### Abstract Ops is replaced layer by device_specialized opt ### Abstract Ops is replaced layer by device_specialized opt
@local_optimizer([gpu_from_host, BaseAbstractConv2d]) @local_optimizer([gpu_from_host,
AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs])
def local_conv2d_gpu_conv(node): def local_conv2d_gpu_conv(node):
""" """
gpu_from_host(AbstractConv) -> AbstractConv(gpu_from_host) gpu_from_host(AbstractConv) -> AbstractConv(gpu_from_host)
...@@ -381,13 +381,12 @@ def local_conv2d_gpu_conv(node): ...@@ -381,13 +381,12 @@ def local_conv2d_gpu_conv(node):
node.outputs[0].broadcastable) node.outputs[0].broadcastable)
out.values_eq_approx = values_eq_approx_high_tol out.values_eq_approx = values_eq_approx_high_tol
return [as_tensor_variable(out)] return [as_tensor_variable(out)]
# We register the optimizer that moves convolutions to the GPU.
register_gpu()(local_conv2d_gpu_conv) register_gpu()(local_conv2d_gpu_conv)
### Call dnn conv class directly ### Call dnn conv class directly
@local_optimizer([BaseAbstractConv2d]) @local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs])
def local_conv2d_cudnn(node): def local_conv2d_cudnn(node):
inp1 = node.inputs[0] inp1 = node.inputs[0]
...@@ -399,7 +398,7 @@ def local_conv2d_cudnn(node): ...@@ -399,7 +398,7 @@ def local_conv2d_cudnn(node):
if not dnn_available(): if not dnn_available():
return None return None
if node.op.filter_flip: if node.op.filters_flip:
conv_mode = 'conv' conv_mode = 'conv'
else: else:
conv_mode = 'cross' conv_mode = 'cross'
...@@ -411,20 +410,20 @@ def local_conv2d_cudnn(node): ...@@ -411,20 +410,20 @@ def local_conv2d_cudnn(node):
conv_mode = conv_mode) conv_mode = conv_mode)
return [rval] return [rval]
if (isinstance(node.op, AbstractConv2d_gradWeights)): if (isinstance(node.op, AbstractConv2d_gradWeights)):
shape = node.inputs[2] shape = (inp2.shape[1], inp1.shape[1], node.inputs[2][0], node.inputs[2][1])
rval = dnn_gradweight(inp1, inp2, shape, rval = dnn_gradweight(inp1, inp2, shape,
border_mode=node.op.border_mode, border_mode=node.op.border_mode,
subsample=node.op.subsample, subsample=node.op.subsample,
conv_mode = conv_mode) conv_mode = conv_mode)
return [rval] return [rval]
if (isinstance(node.op, AbstractConv2d_gradInputs)): if (isinstance(node.op, AbstractConv2d_gradInputs)):
shape = node.inputs[2] shape = (inp2.shape[0], inp1.shape[1], node.inputs[2][0], node.inputs[2][1])
rval = dnn_gradinput(inp1, inp2, shape rval = dnn_gradinput(inp1, inp2, shape,
border_mode=node.op.border_mode, border_mode=node.op.border_mode,
subsample=node.op.subsample, subsample=node.op.subsample,
conv_mode = conv_mode) conv_mode = conv_mode)
return [rval] return [rval]
register_specialize_device(local_conv2d_cudnn) register_specialize_device(local_conv2d_cudnn, 'cudnn')
@local_optimizer([AbstractConv2d]) @local_optimizer([AbstractConv2d])
...@@ -441,7 +440,7 @@ def local_conv2d_corrmm(node): ...@@ -441,7 +440,7 @@ def local_conv2d_corrmm(node):
subsample = node.op.subsample subsample = node.op.subsample
if (border_mode == 'valid') or (subsample != (1,1)): if (border_mode == 'valid') or (subsample != (1,1)):
# need to flip the kernel for valid convolution # need to flip the kernel for valid convolution
if node.op.filter_flip: if node.op.filters_flip:
kern = kern[:, :, ::-1, ::-1] kern = kern[:, :, ::-1, ::-1]
# By default use GpuCorrMM # By default use GpuCorrMM
rval = GpuCorrMM(border_mode, subsample)(gpu_contiguous(img), rval = GpuCorrMM(border_mode, subsample)(gpu_contiguous(img),
...@@ -484,7 +483,7 @@ def local_conv2d_corrmm(node): ...@@ -484,7 +483,7 @@ def local_conv2d_corrmm(node):
rval = GpuCorrMM_gradInputs('valid', subsample)( rval = GpuCorrMM_gradInputs('valid', subsample)(
gpu_contiguous(kern), gpu_contiguous(img)) gpu_contiguous(kern), gpu_contiguous(img))
return [rval] return [rval]
#register_specialize_device(local_conv2d_corrmm) register_specialize_device(local_conv2d_corrmm, 'conv_gemm')
@local_optimizer([AbstractConv2d_gradWeights]) @local_optimizer([AbstractConv2d_gradWeights])
def local_conv2d_gradweight_corrmm(node): def local_conv2d_gradweight_corrmm(node):
...@@ -494,13 +493,13 @@ def local_conv2d_gradweight_corrmm(node): ...@@ -494,13 +493,13 @@ def local_conv2d_gradweight_corrmm(node):
if not isinstance(img.type, CudaNdarrayType) or \ if not isinstance(img.type, CudaNdarrayType) or \
not isinstance(topgrad.type, CudaNdarrayType): not isinstance(topgrad.type, CudaNdarrayType):
return None return None
if node.op.filter_flip: if node.op.filters_flip:
img = img[:, :, ::-1, ::-1] img = img[:, :, ::-1, ::-1]
rval = GpuCorrMM_gradWeights(border_mode=node.op.border_mode, rval = GpuCorrMM_gradWeights(border_mode=node.op.border_mode,
subsample=node.op.subsample)( subsample=node.op.subsample)(
gpu_contiguous(img), gpu_contiguous(topgrad), shape) gpu_contiguous(img), gpu_contiguous(topgrad), shape)
return [rval] return [rval]
#register_specialize_device(local_conv2d_gradweight_corrmm) register_specialize_device(local_conv2d_gradweight_corrmm, 'conv_gemm')
@local_optimizer([AbstractConv2d_gradInputs]) @local_optimizer([AbstractConv2d_gradInputs])
def local_conv2d_gradinputs_corrmm(node): def local_conv2d_gradinputs_corrmm(node):
...@@ -510,14 +509,14 @@ def local_conv2d_gradinputs_corrmm(node): ...@@ -510,14 +509,14 @@ def local_conv2d_gradinputs_corrmm(node):
not isinstance(topgrad.type, CudaNdarrayType): not isinstance(topgrad.type, CudaNdarrayType):
return None return None
if node.op.filter_flip: if node.op.filters_flip:
kern = kern[:, :, ::-1, ::-1] kern = kern[:, :, ::-1, ::-1]
rval = GpuCorrMM_gradInputs(border_mode=node.op.border_mode, rval = GpuCorrMM_gradInputs(border_mode=node.op.border_mode,
subsample=node.op.subsample)( subsample=node.op.subsample)(
gpu_contiguous(kern), gpu_contiguous(topgrad), shape) gpu_contiguous(kern), gpu_contiguous(topgrad), shape)
return [rval] return [rval]
#register_specialize_device(local_conv2d_gradinputs_corrmm) register_specialize_device(local_conv2d_gradinputs_corrmm, 'conv_gemm')
...@@ -553,7 +552,7 @@ def local_conv2d_gradweight_cpu(node): ...@@ -553,7 +552,7 @@ def local_conv2d_gradweight_cpu(node):
if node.op.border_mode not in ['full', 'valid']: if node.op.border_mode not in ['full', 'valid']:
return None return None
if not node.op.filter_flip: if not node.op.filters_flip:
# Not tested yet # Not tested yet
return return
...@@ -617,7 +616,7 @@ def local_conv2d_gradweight_cpu(node): ...@@ -617,7 +616,7 @@ def local_conv2d_gradweight_cpu(node):
raise NotImplementedError( raise NotImplementedError(
'Only [full,valid] modes are currently supported.') 'Only [full,valid] modes are currently supported.')
if node.op.filter_flip: if node.op.filters_flip:
filters = filters[:, :, ::-1, ::-1] # flip them filters = filters[:, :, ::-1, ::-1] # flip them
dw = ConvOp(imshp, kshp, nkern, bsize, 1, 1, output_mode='valid', dw = ConvOp(imshp, kshp, nkern, bsize, 1, 1, output_mode='valid',
...@@ -645,7 +644,7 @@ def local_conv2d_gradinputs_cpu(node): ...@@ -645,7 +644,7 @@ def local_conv2d_gradinputs_cpu(node):
if node.op.border_mode not in ['full', 'valid']: if node.op.border_mode not in ['full', 'valid']:
return None return None
if not node.op.filter_flip: if not node.op.filters_flip:
# Not tested yet # Not tested yet
return None return None
......
...@@ -3,6 +3,7 @@ import numpy ...@@ -3,6 +3,7 @@ import numpy
import copy import copy
import theano import theano
import theano.tensor as T
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
...@@ -13,7 +14,7 @@ from theano.sandbox.cuda import float32_shared_constructor as gpu_shared ...@@ -13,7 +14,7 @@ from theano.sandbox.cuda import float32_shared_constructor as gpu_shared
from theano.compile import shared as cpu_shared from theano.compile import shared as cpu_shared
from theano.sandbox.cuda.tests.test_conv_cuda_ndarray import py_conv from theano.sandbox.cuda.tests.test_conv_cuda_ndarray import py_conv
#from theano.sandbox.cuda.dnn import dnn_available from theano.sandbox.cuda.dnn import dnn_available, dnn_conv, dnn_gradweight, dnn_gradinput
if theano.config.mode == 'FAST_COMPILE': if theano.config.mode == 'FAST_COMPILE':
...@@ -26,249 +27,239 @@ else: ...@@ -26,249 +27,239 @@ else:
class TestConv2d(unittest.TestCase): class TestConv2d(unittest.TestCase):
def run_fwd(self, def setUp(self):
inputs_shape, super(TestConv2d, self).setUp()
filters_shape,
subsample=(1, 1), self.inputs_shapes = [(16, 1, 12, 12), (16, 1, 18, 18), (16, 1, 24, 24),
verify_grad=True, (16, 1, 20, 20), (16, 1, 32, 20), (10, 5, 32, 32)]
mode=mode_without_gpu, self.filters_shapes = [(10, 1, 2, 2), (10, 1, 3, 3), (10, 1, 2, 2),
border_mode='valid', (1, 1, 2, 5), (5, 1, 2, 2), (15, 5, 2, 2)]
device='gpu', self.subsamples = [(1, 1), (2, 2), (2, 4)]
provide_shape=False): self.border_modes = ["valid", "full", (0, 0), (1, 1), (5, 5), (5, 2)]
def get_output_shape(self, inputs_shape, filters_shape, subsample, border_mode):
if border_mode == "valid":
border_mode = (0, 0)
if border_mode == "full":
border_mode = (filters_shape[2] - 1, filters_shape[3] - 1)
batch_size = inputs_shape[0]
num_filters = filters_shape[1]
return (batch_size, num_filters,) + \
tuple(None if i is None or k is None
else ((i + 2*pad - k) // d + 1)
for i, k, d, pad in zip(inputs_shape[2:],
filters_shape[2:],
subsample,
border_mode))
def run_fwd(self, inputs_shape, filters_shape, ref=dnn_conv,
subsample=(1, 1), verify_grad=True, mode=mode_without_gpu,
border_mode='valid', filters_flip=True, device='cpu', provide_shape=False):
inputs_val = numpy.random.random(inputs_shape).astype('float32') inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32')
if device == 'gpu': if device == 'gpu':
inputs = gpu_shared(inputs_val) inputs = gpu_shared(inputs_val)
filters = gpu_shared(filters_val) filters = gpu_shared(filters_val)
else: else:
inputs = cpu_shared(inputs_val) inputs = theano.tensor.as_tensor_variable(cpu_shared(inputs_val))
filters = cpu_shared(filters_val) filters = theano.tensor.as_tensor_variable(cpu_shared(filters_val))
if provide_shape: if provide_shape:
imshp = inputs_shape imshp = inputs_shape
kshp = filters_shape kshp = filters_shape
else: else:
imshp = None imshp = None
kshp = None kshp = None
if filters_flip:
conv_mode = 'conv'
else:
conv_mode = 'cross'
c_ref = conv_ref.conv2d(inputs, filters, c_ref = ref(inputs, filters,
border_mode=border_mode, border_mode=border_mode,
subsample=subsample) subsample=subsample,
conv_mode = conv_mode)
c = conv.conv2d(inputs, filters, c = conv.conv2d(inputs, filters,
border_mode=border_mode, subsample=subsample) border_mode=border_mode,
subsample=subsample,
filters_flip=filters_flip,
inputs_shape=imshp,
filters_shape=kshp)
f_ref = theano.function([], c_ref, mode=mode) f_ref = theano.function([], c_ref, mode=mode)
f = theano.function([], c, mode) f = theano.function([], c, mode)
res_ref = f_ref() res_ref = f_ref()
res = f() res = f()
print res_ref.shape, res.shape
utt.assert_allclose(res_ref, res) utt.assert_allclose(res_ref, res)
if verify_grad: if verify_grad:
utt.verify_grad(conv.AbstractConv2d(border_mode="valid", utt.verify_grad(conv.AbstractConv2d(border_mode="valid", imshp=imshp, kshp=kshp,
imshp=imshp, bsize=inputs_shape[0], subsample=subsample),
kshp=kshp,
bsize=inputs_shape[0],
subsample=subsample),
[inputs_val, filters_val]) [inputs_val, filters_val])
def run_gradweight(self, inputs_shape, filters_shape, output_shape,
def run_gradweight(self, ref=dnn_gradweight, subsample=(1, 1), filters_flip=True,
inputs_shape, verify_grad=True, mode=mode_without_gpu, border_mode='valid',
filters_shape, device='cpu', provide_shape = False):
output_shape,
subsample=(1, 1),
verify_grad=True,
mode=mode_without_gpu,
border_mode='valid',
device='gpu',
provide_shape = False):
inputs_val = numpy.random.random(inputs_shape).astype('float32') inputs_val = numpy.random.random(inputs_shape).astype('float32')
output_val = numpy.random.random(output_shape).astype('float32') output_val = numpy.random.random(output_shape).astype('float32')
if device == 'gpu': if device == 'gpu':
inputs = gpu_shared(inputs_val) inputs = gpu_shared(inputs_val)
output = gpu_shared(output_val) output = gpu_shared(output_val)
else: else:
inputs = cpu_shared(inputs_val) inputs = theano.tensor.as_tensor_variable(cpu_shared(inputs_val))
output = cpu_shared(output_val) output = theano.tensor.as_tensor_variable(cpu_shared(output_val))
if provide_shape: if provide_shape:
imshp = inputs_shape imshp = inputs_shape
kshp = filters_shape kshp = filters_shape
else: else:
imshp = None imshp = None
kshp = None kshp = None
if filters_flip:
conv_mode = 'conv'
else:
conv_mode = 'cross'
c = conv.AbstractConv2d_gradWeights(border_mode=border_mode, c = conv.AbstractConv2d_gradWeights(border_mode=border_mode,
filters_flip=filters_flip,
subsample=subsample, subsample=subsample,
imshp = imshp, kshp = kshp) imshp = imshp, kshp = kshp)
c = c(inputs, output, filters_shape[-2:]) c = c(inputs, output, filters_shape[-2:])
c_ref = ref(inputs, output,
filters_shape,
border_mode=border_mode,
subsample=subsample,
conv_mode=conv_mode)
f = theano.function([], c, mode) f = theano.function([], c, mode)
res_ref = py_conv(inputs_val.transpose((1, 0, 2, 3)), f_ref = theano.function([], c_ref, mode)
output_val.transpose((1, 0, 2, 3))[:, :, ::-1, ::-1], res = f()
'valid', subsample).transpose((1, 0, 2, 3))[:, :, ::-1, ::-1] res_ref = f_ref()
res = numpy.array(f())
print res_ref.shape, res.shape
utt.assert_allclose(res_ref, res) utt.assert_allclose(res_ref, res)
def abstract_conv2d_gradweight(inputs_val, output_val): def abstract_conv2d_gradweight(inputs_val, output_val):
conv_op = conv.AbstractConv2d_gradInputs(border_mode=border_mode, conv_op = conv.AbstractConv2d_gradInputs(border_mode=border_mode, subsample=subsample)
subsample=subsample)
return conv_op(inputs_val, output_val, filters_shape[-2:]) return conv_op(inputs_val, output_val, filters_shape[-2:])
if verify_grad: if verify_grad:
utt.verify_grad(abstract_conv2d_gradweight, utt.verify_grad(abstract_conv2d_gradweight, [inputs_val, output_val])
[inputs_val, output_val])
def run_gradinput(self, def run_gradinput(self, inputs_shape, filters_shape, output_shape, ref=dnn_gradweight,
inputs_shape, subsample=(1, 1), filters_flip=True, verify_grad=True, mode=mode_without_gpu,
filters_shape, border_mode='valid', device='cpu', provide_shape = False):
output_shape,
subsample=(1, 1),
verify_grad=True,
mode=mode_without_gpu,
border_mode='valid',
device='gpu',
provide_shape = False):
output_val = numpy.random.random(output_shape).astype('float32') output_val = numpy.random.random(output_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32')
if device == 'gpu': if device == 'gpu':
output = gpu_shared(output_val) output = gpu_shared(output_val)
filters = gpu_shared(filters_val) filters = gpu_shared(filters_val)
else: else:
output = cpu_shared(output_val) output = theano.tensor.as_tensor_variable(cpu_shared(output_val))
filters = cpu_shared(filters_val) filters = theano.tensor.as_tensor_variable(cpu_shared(filters_val))
if provide_shape: if provide_shape:
imshp = inputs_shape imshp = inputs_shape
kshp = filters_shape kshp = filters_shape
else: else:
imshp = None imshp = None
kshp = None kshp = None
if filters_flip:
c = conv.AbstractConv2d_gradInputs(border_mode="valid", conv_mode = 'conv'
else:
conv_mode = 'cross'
c = conv.AbstractConv2d_gradInputs(border_mode=border_mode,
subsample=subsample, subsample=subsample,
imshp = imshp, kshp = kshp) filters_flip=filters_flip,
imshp=imshp, kshp=kshp)
c = c(filters, output, inputs_shape[-2:]) c = c(filters, output, inputs_shape[-2:])
c_ref = ref(filters, output, inputs_shape,
border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode)
f = theano.function([], c, mode) f = theano.function([], c, mode)
res_ref = py_conv(output_val, f_ref = theano.function([], c_ref, mode)
filters_val.transpose(1, 0, 2, 3)[:, :, ::-1, ::-1], res = f()
'full', subsample) res_ref = f_ref()
print filters_val.shape, output_val.shape, inputs_shape
res = numpy.array(f())
print "2, ", res_ref.shape, res.shape
utt.assert_allclose(res_ref, res) utt.assert_allclose(res_ref, res)
def abstract_conv2d_gradinputs(filters_val, output_val): def abstract_conv2d_gradinputs(filters_val, output_val):
conv_op = conv.AbstractConv2d_gradInputs(border_mode=border_mode, conv_op = conv.AbstractConv2d_gradInputs(border_mode=border_mode, subsample=subsample)
subsample=subsample)
return conv_op(filters_val, output_val, inputs_shape[-2:]) return conv_op(filters_val, output_val, inputs_shape[-2:])
if verify_grad: if verify_grad:
utt.verify_grad(abstract_conv2d_gradinputs, utt.verify_grad(abstract_conv2d_gradinputs, [filters_val, output_val])
[filters_val, output_val])
def test_corrmm(self):
mode = mode_with_gpu
mode = mode.excluding('cudnn')
self.run_fwd(inputs_shape=(16, 1, 2, 2),
filters_shape=(10, 1, 2, 2),
verify_grad=False, mode=mode)
self.run_gradweight(inputs_shape=(16, 1, 2, 2),
filters_shape=(10, 1, 2, 2),
verify_grad=False, mode=mode)
self.run_gradinput(inputs_shape=(1, 1, 2, 2),
filters_shape=(10, 1, 2, 2),
verify_grad=False, mode=mode)
def test_dnn_conv(self):
if not dnn_available():
def test_cpu_conv(self): return
mode=mode_with_gpu
inputs_shapes = [(16, 1, 2, 2), (16, 1, 8, 8), (16, 1, 4, 4)]
filters_shapes = [(10, 1, 2, 2), (10, 1, 2, 2), (10, 1, 2, 2),] inputs_shapes = self.inputs_shapes
output_shapes = [(16, 10, 1, 1), (16, 10, 7, 7), (16, 10, 3, 3)] filters_shapes = self.filters_shapes
subsamples = [(1, 1), (1, 1), (1, 1)] subsamples = self.subsamples
border_modes = self.border_modes
border_mode= 'valid' for i, f in zip(inputs_shapes[0:1], filters_shapes[0:1]):
for i, f, o, s in zip(inputs_shapes[0:1], filters_shapes[0:1], output_shapes[0:1], subsamples[0:1]): for s in subsamples:
for b in border_modes:
o = self.get_output_shape(i, f, s, b)
for provide_shape in [False, True]: for provide_shape in [False, True]:
self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s, self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s,
verify_grad=True, mode=mode_without_gpu, device='cpu', verify_grad=True, mode=mode, device='gpu',
provide_shape=provide_shape, border_mode=b)
self.run_gradweight(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode, device='gpu',
provide_shape=provide_shape, border_mode=b)
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode, device='gpu',
provide_shape=provide_shape, border_mode=border_mode) provide_shape=provide_shape, border_mode=border_mode)
return
### No reference implementation of full available yet def test_cormm_conv(self):
border_mode= 'full' mode = mode_with_gpu.excluding('cudnn')
provide_shape = True
self.run_gradweight(inputs_shape=(16, 1, 2, 2), inputs_shapes = self.inputs_shapes
filters_shape=(10, 1, 2, 2), filters_shapes = self.filters_shapes
output_shape=(16, 10, 3, 3), subsamples = self.subsamples
subsample=(1, 1), border_modes = self.border_modes
verify_grad=True, mode=mode_without_gpu, device='cpu', for i, f in zip(inputs_shapes, filters_shapes):
for s in subsamples:
for b in border_modes:
o = self.get_output_shape(i, f, s, b)
for provide_shape in [False, True]:
self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s,
verify_grad=True, mode=mode, device='gpu',
provide_shape=provide_shape, border_mode=b)
self.run_gradweight(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode, device='gpu',
provide_shape=provide_shape, border_mode=b)
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode, device='gpu',
provide_shape=provide_shape, border_mode=border_mode) provide_shape=provide_shape, border_mode=border_mode)
def test_cpu_grad_weight(self):
### FIXME subsample def test_cpu_conv(self):
inputs_shapes = [(16, 1, 2, 2), (16, 1, 8, 8), (16, 1, 4, 4)] mode = mode_without_gpu
filters_shapes = [(10, 1, 2, 2), (10, 1, 2, 2), (10, 1, 2, 2),]
output_shapes = [(16, 10, 1, 1), (16, 10, 7, 7), (16, 10, 3, 3)] inputs_shapes = self.inputs_shapes
subsamples = [(1, 1), (1, 1), (1, 1)] filters_shapes = self.filters_shapes
subsamples = self.subsamples
border_modes = self.border_modes[:2] # only valid and full are supported
border_mode = 'valid' for i, f in zip(inputs_shapes, filters_shapes):
for i, f, o, s in zip(inputs_shapes[:], filters_shapes[:], output_shapes[:], subsamples[:]): for s in subsamples:
for b in border_modes:
o = self.get_output_shape(i, f, s, b)
for provide_shape in [False, True]: for provide_shape in [False, True]:
self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s,
verify_grad=True, mode=mode, device='cpu',
provide_shape=provide_shape, border_mode=b)
self.run_gradweight(inputs_shape=i, filters_shape=f, self.run_gradweight(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, output_shape=o, subsample=s,
verify_grad=True, mode=mode_without_gpu, device='cpu', verify_grad=True, mode=mode, device='cpu',
provide_shape=provide_shape, border_mode=border_mode) provide_shape=provide_shape, border_mode=b)
return
### No reference implementation of full available yet
border_mode= 'full'
provide_shape = True
self.run_gradweight(inputs_shape=(16, 1, 2, 2),
filters_shape=(10, 1, 2, 2),
output_shape=(16, 10, 3, 3),
subsample=(1, 1),
verify_grad=True, mode=mode_without_gpu, device='cpu',
provide_shape=provide_shape, border_mode=border_mode)
def test_cpu_grad_input(self):
### FIXME subsample
inputs_shapes = [(16, 1, 2, 2), (16, 1, 8, 8), (16, 1, 4, 4)]
filters_shapes = [(10, 1, 2, 2), (10, 1, 2, 2), (10, 1, 2, 2),]
output_shapes = [(16, 10, 1, 1), (16, 10, 7, 7), (16, 10, 3, 3)]
subsamples = [(1, 1), (1, 1), (1, 1)]
border_mode= 'valid'
for i, f, o, s in zip(inputs_shapes[:], filters_shapes[:], output_shapes[:], subsamples[:]):
for provide_shape in [True, False]:
self.run_gradinput(inputs_shape=i, filters_shape=f, self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, output_shape=o, subsample=s,
verify_grad=True, mode=mode_without_gpu, device='cpu', verify_grad=True, mode=mode, device='cpu',
provide_shape=provide_shape, border_mode=border_mode) provide_shape=provide_shape, border_mode=border_mode)
return
### No reference implementation of full available yet
border_mode= 'full'
provide_shape = True
self.run_gradweight(inputs_shape=(16, 1, 2, 2),
filters_shape=(10, 1, 2, 2),
output_shape=(16, 10, 3, 3),
subsample=(1, 1),
verify_grad=True, mode=mode_without_gpu, device='cpu',
provide_shape=provide_shape, border_mode=border_mode)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论