提交 12cc6f02 authored 作者: Nicolas Ballas's avatar Nicolas Ballas 提交者: Pascal Lamblin

update cpu gradweight

上级 3937acc7
......@@ -32,6 +32,9 @@ from theano.sandbox.cuda.opt import values_eq_approx_high_tol
## Cpu implementation
from theano.tensor.nnet import conv2d as cpu_conv2d, ConvOp
from theano.tensor.nnet.ConvGrad3D import convGrad3D
from theano.tensor.nnet.ConvTransp3D import convTransp3D
_logger = logging.getLogger("theano.tensor.nnet.conv2d")
......@@ -434,7 +437,7 @@ def local_conv2d_cudnn(node):
direction_hint='bprop inputs',
conv_mode = conv_mode)
return [rval]
register_specialize_device(local_conv2d_cudnn)
#register_specialize_device(local_conv2d_cudnn)
@local_optimizer([AbstractConv2d])
......@@ -445,7 +448,6 @@ def local_conv2d_corrmm(node):
not isinstance(kern.type, CudaNdarrayType)):
return None
print "here"
if node.op.border_mode in ['full', 'valid']:
border_mode = node.op.border_mode
......@@ -495,7 +497,7 @@ def local_conv2d_corrmm(node):
rval = GpuCorrMM_gradInputs('valid', subsample)(
gpu_contiguous(kern), gpu_contiguous(img))
return [rval]
register_specialize_device(local_conv2d_corrmm)
#register_specialize_device(local_conv2d_corrmm)
@local_optimizer([AbstractConv2d_gradWeights])
def local_conv2d_gradweight_corrmm(node):
......@@ -511,7 +513,7 @@ def local_conv2d_gradweight_corrmm(node):
subsample=node.op.subsample)(
gpu_contiguous(img), gpu_contiguous(topgrad), shape)
return [rval]
register_specialize_device(local_conv2d_gradweight_corrmm)
#register_specialize_device(local_conv2d_gradweight_corrmm)
@local_optimizer([AbstractConv2d_gradInputs])
def local_conv2d_gradinputs_corrmm(node):
......@@ -528,7 +530,7 @@ def local_conv2d_gradinputs_corrmm(node):
subsample=node.op.subsample)(
gpu_contiguous(kern), gpu_contiguous(topgrad), shape)
return [rval]
register_specialize_device(local_conv2d_gradinputs_corrmm)
#register_specialize_device(local_conv2d_gradinputs_corrmm)
......@@ -537,6 +539,7 @@ register_specialize_device(local_conv2d_gradinputs_corrmm)
@local_optimizer([AbstractConv2d])
def local_conv2d_cpu(node):
import pdb; pdb.set_trace()
if not isinstance(node.op, AbstractConv2d):
return None
......@@ -556,33 +559,36 @@ register_specialize_device(local_conv2d_cpu)
@local_optimizer([AbstractConv2d_gradWeights])
def local_conv2d_gradweight_cpu(node):
import pdb; pdb.set_trace()
## len is 4 all the time
img, topgrad, shape = node.inputs
if isinstance(img.type, CudaNdarrayType) or \
isinstance(topgrad.type, CudaNdarrayType):
return None
if node.op.border_mode == 'valid' and node.op.subsample != (1, 1):
if (node.op.border_mode == 'valid' and node.op.subsample != (1, 1)) or \
node.op.imshp is None or node.op.kshp is None:
# Use the gradient as defined in conv3D, because the implementation
# by Conv is slow (about 3x slower than conv3D, and probably 10x
# slower than it could be), nad incorrect when dx or dy > 2.
# slower than it could be), nad incorrect when subsample > 2.
# build a "node", that should be equivalent to the one given by
# self.make_node, but using convGrad3D instead.
shuffled_img = img.dimshuffle(0, 2, 3, 'x', 1)
shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1)
rval = ConvGrad3D(V=shuffled_img,
d=(op.subsample[0], op.subsample[1], 1),
WShape=(self.kshp[0], self.kshp[1], 1),
dCdH_=shuffled_topgrad)
print shape
rval = convGrad3D(V=shuffled_img,
d=(node.op.subsample[0], node.op.subsample[1], 1),
WShape=(shape[0], shape[2], shape[3], 1, shape[1]),
dCdH=shuffled_topgrad)
rval = theano.tensor.addbroadcast(rval, 3)
return [rval.dimshuffle(0, 4, 1, 2)]
if node.op.imshp is None or node.op.kshp is None:
return None
####### Determine gradient on kernels ########
assert len(node.op.imshp) == 4 and len(node.op.kshp) == 4
print "here0", node.op.imshp[2:], node.op.kshp[2:]
import pdb; pdb.set_trace()
outshp = ConvOp.getOutputShape(node.op.imshp[2:],
node.op.kshp[2:], node.op.subsample,
......@@ -592,9 +598,15 @@ def local_conv2d_gradweight_cpu(node):
node.op.border_mode)
print outshp, fulloutshp
#newimg = img.dimshuffle((1, 0, 2, 3))
#newtopgrad = topgrad.dimshuffle((1, 0, 2, 3))
newimg = img
newtopgrad = topgrad
if node.op.border_mode == 'valid':
print "here1", node.op.imshp, node.op.kshp, fulloutshp
(img, filters) = (img, topgrad)
(img, filters) = (newimg, newtopgrad)
kshp_logical = fulloutshp
kshp_logical_top_aligned = False
imshp_logical = None
......@@ -602,15 +614,15 @@ def local_conv2d_gradweight_cpu(node):
imshp = (bsize, node.op.imshp[1], node.op.imshp[2])
kshp = node.op.kshp[2:]
elif node.op.border_mode == 'full':
(img, filters) = (topgrad, img)
(img, filters) = (newtopgrad, newimg)
kshp_logical = None
kshp_logical_top_aligned = True
imshp_logical = (node.op.imshp[0],
fulloutshp[0],
fulloutshp[1]) ## FIXME
fulloutshp[1])
(bsize, nkern) = (node.op.kshp[0], node.op.imshp[1])
imshp = (node.op.imshp[0], outshp[0], outshp[1]) ## FIXME
kshp = node.op.imshp[1:] ## FIXME
imshp = (node.op.imshp[0], outshp[0], outshp[1])
kshp = node.op.imshp[1:]
else:
raise NotImplementedError(
'Only [full,valid] modes are currently supported.')
......@@ -629,26 +641,46 @@ def local_conv2d_gradweight_cpu(node):
#dw = ConvOp(output_mode='valid')
res = dw(img, filters)
print "here3", node.op.imshp, node.op.kshp, fulloutshp
res = res.dimshuffle((1, 0, 2, 3))
return [res]
register_specialize_device(local_conv2d_gradweight_cpu)
@local_optimizer([AbstractConv2d_gradInputs])
def local_conv2d_gradinputs_cpu(node):
import pdb; pdb.set_trace()
kern, topgrad, shape = node.inputs
if isinstance(kern.type, CudaNdarrayType) or \
isinstance(topgrad.type, CudaNdarrayType):
return None
print "here4a", node.op.imshp, node.op.kshp
if node.op.border_mode == 'valid' and node.op.subsample != (1, 1):
# Use the gradient as defined in conv3D, because the implementation
# by Conv is slow (about 3x slower than conv3D, and probably 10x
# slower than it could be), nad incorrect when subsample > 2.
# build a "node", that should be equivalent to the one given by
# self.make_node, but using convGrad3D instead.
shuffled_kern = kern.dimshuffle(0, 2, 3, 'x', 1)
shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1)
b = T.zeros((kern.shape[1]))
rval = ConvTransp3D(W=shuffled_kern, b=b,
d=(op.subsample[0], op.subsample[1], 1),
H=shuffled_topgrad,
RShape=(shape[0], shape[1], 1))
return [rval.dimshuffle(0, 4, 1, 2)]
####### Determine gradient on inputs ########
mode = 'valid'
if not node.op.border_mode == 'full':
mode = 'full'
filters = kern.dimshuffle((1, 0, 2, 3))
if node.op.filter_flip:
filters = filters[:, :, ::-1, ::-1]
filters = kern.dimshuffle((1, 0, 2, 3))
outshp = ConvOp.getOutputShape(node.op.imshp[2:],
node.op.kshp[2:], node.op.subsample,
node.op.border_mode)
......@@ -659,6 +691,10 @@ def local_conv2d_gradinputs_cpu(node):
imshp = (nkern, outshp[0], outshp[1])
imshp_logical = (nkern, fulloutshp[0], fulloutshp[1])
if node.op.filter_flip:
filters = filters[:, :, ::-1, ::-1]
print "here4", imshp, node.op.kshp, nkern
din = ConvOp(imshp,
node.op.kshp[2:],
......@@ -671,16 +707,6 @@ def local_conv2d_gradinputs_cpu(node):
kshp_logical=None,
version=-1,
direction_hint='bprop inputs')
#din = ConvOp()
print "here5"
din = din(topgrad, filters)
print "here6"
#assert all(o is None or o == i
# for o, i in zip(din.owner.op.outshp, node.op.imshp[1:]))
# din and dw should have the same broadcasting pattern as the
# parameters they are the gradient of (resp. inputs and kerns).
din = din
return [din]
register_specialize_device(local_conv2d_gradinputs_cpu)
......@@ -34,8 +34,11 @@ class TestConv2d(unittest.TestCase):
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = shared(inputs_val)
filters = shared(filters_val)
### FIXME (CPU vs GPU)
inputs = theano.tensor.shared(inputs_val)
filters = theano.tensor.shared(filters_val)
c_ref = conv_ref.conv2d(inputs, filters,
border_mode="valid",
subsample=subsample)
......@@ -63,23 +66,40 @@ class TestConv2d(unittest.TestCase):
def run_gradweight(self,
inputs_shape,
filters_shape,
output_shape,
subsample=(1, 1),
verify_grad=True,
mode=mode_without_gpu):
mode=mode_without_gpu,
device='gpu',
provide_shape = False):
inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32')
inputs = shared(inputs_val.transpose((1, 0, 2, 3)))
filters = shared(filters_val.transpose((1, 0, 2, 3))[:,:,:,:])
output_val = numpy.random.random(output_shape).astype('float32')
if device == 'gpu':
inputs = shared(inputs_val)
filters = shared(filters_val)
else:
inputs = theano.tensor.shared(inputs_val)
output = theano.tensor.shared(output_val)
if provide_shape:
imshp = inputs_shape
kshp = filters_shape
else:
imshp = None,
kshp = None
c = conv.AbstractConv2d_gradWeights(border_mode="valid",
subsample=subsample)
c = c(inputs, filters, inputs_shape)
subsample=subsample,
imshp = imshp, kshp = kshp)
c = c(inputs, output, filters_shape)
f = theano.function([], c, mode)
res_ref = py_conv(inputs_val, filters_val, 'valid', subsample)
res_ref = py_conv(inputs_val.transpose((1, 0, 2, 3)),
output_val.transpose((1, 0, 2, 3)),
'valid', subsample).transpose((1, 0, 2, 3))
print res_ref.shape, numpy.array(f()).shape
res = numpy.array(f()).transpose((1, 0, 2, 3))
res = numpy.array(f())
utt.assert_allclose(res_ref, res)
if verify_grad:
utt.verify_grad(conv.AbstractConv2d(border_mode="valid",
......@@ -129,17 +149,14 @@ class TestConv2d(unittest.TestCase):
# verify_grad=False, mode=mode)
def test_cpu(self):
self.run_conv(inputs_shape=(16, 1, 2, 2),
filters_shape=(10, 1, 2, 2),
verify_grad=True,
mode=mode_without_gpu)
# self.run_gradweight(inputs_shape=(16, 1, 2, 2),
# filters_shape=(10, 1, 2, 2),
# verify_grad=False, mode=mode_without_gpu)
#self.run_gradinput(inputs_shape=(1, 1, 2, 2),
# filters_shape=(10, 1, 2, 2),
# verify_grad=False, mode=mode_without_gpu)
#def test_cpu(self):
#self.run_conv(inputs_shape=(16, 1, 2, 2),
# filters_shape=(10, 1, 2, 2),
# verify_grad=False,
# mode=mode_without_gpu)
# self.run_gradinput(inputs_shape=(1, 1, 2, 2),
# filters_shape=(10, 1, 2, 2),
# verify_grad=False, mode=mode_without_gpu)
# mode = mode_without_gpu
# self.run_conv(inputs_shape=(16, 1, 2, 2),
......@@ -166,4 +183,16 @@ class TestConv2d(unittest.TestCase):
# # subsample=(2, 2),
# # verify_grad=True,mode=mode)
def test_cpu_grad_weight(self):
self.run_gradweight(inputs_shape=(16, 1, 2, 2),
filters_shape=(10, 1, 2, 2),
output_shape=(16, 10, 1, 1),
verify_grad=False, mode=mode_without_gpu, device='cpu')
self.run_gradweight(inputs_shape=(16, 1, 2, 2),
filters_shape=(10, 1, 2, 2),
output_shape=(16, 10, 1, 1),
verify_grad=False,
mode=mode_without_gpu, device='cpu',
provide_shape=True)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论