Fix for ConvOp gradient when dx,dy != 1

上级 b0c67648
......@@ -55,9 +55,8 @@ class ConvOp(Op):
else:
print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(%s) should be 0 or a multiple of nkern(%s)We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_kern),str(self.nkern))
self.unroll_kern=1
if (self.dx!=1 or self.dy!=1):
print "WARNING: dx(%d)!=1 or dy(%d)!=1. The gradient is not implemented for those case."
self.outshp = getFilterOutShp(self.imshp, kshp, (dx,dy), output_mode)
self.fulloutshp = getFilterOutShp(self.imshp, kshp, (1,1), output_mode)
self.out_mode = output_mode
if not self.out_mode in ["valid", "full"]:
raise Exception("Mode %s not implemented"%self.out_mode)
......@@ -130,9 +129,14 @@ class ConvOp(Op):
* inputs needs to be a 4D tensor. Couldn't get 3D to work
* will crash if filter the same size as input image
"""
outshp = self.fulloutshp
if self.dx!=1 or self.dy!=1:
raise NotImplementedError("I don't know how to implement the grad when dx!=1 or dy!=1! Is this possible?")
upgz = T.as_tensor(N.zeros((self.bsize,self.nkern)+tuple(self.fulloutshp),
dtype=gz.type.dtype))
gz = T.SetSubtensor([slice(self.bsize), slice(self.nkern),
slice(0,outshp[0],self.dy),
slice(0,outshp[1],self.dx)])(upgz,gz)
####### Determine gradient on kernels ########
if inputs.ndim == 3:
inputs = tensor.shape_padleft(inputs,1)
......@@ -145,13 +149,13 @@ class ConvOp(Op):
(img, filters) = (newin, newgz)
(bsize, nkern) = (self.imshp[0], self.nkern)
imshp = N.hstack((self.bsize, self.imshp[1:]))
kshp = self.outshp
kshp = outshp
un_b = self.unroll_batch
un_k = self.unroll_kern
elif self.out_mode == 'full':
(img, filters) = (newgz, newin)
(bsize, nkern) = (self.nkern, self.imshp[0])
imshp = N.hstack((self.bsize, self.outshp))
imshp = N.hstack((self.bsize, outshp))
kshp = self.imshp[1:]
un_b = self.unroll_kern
un_k = self.unroll_batch
......@@ -186,11 +190,11 @@ class ConvOp(Op):
filters = tensor.DimShuffle(gz.broadcastable, (1,0,2,3))(kerns)
filters = filters[:,:,::-1,::-1]
nkern = self.imshp[0]
imshp = N.hstack((self.nkern,self.outshp))
imshp = N.hstack((self.nkern,outshp))
din = ConvOp(imshp, self.kshp, nkern, self.bsize,
1,1, output_mode=mode,
unroll_batch=un_b, unroll_kern=un_k)(gz,filters)
assert (din.owner.op.outshp==self.imshp[1:]).all()
return [din, dw]
#def c():
......
......@@ -428,11 +428,12 @@ class TestConvOp(unittest.TestCase):
nkern = 4
bsize = 3
types = ["float32", "float64"]
kshps = [(5,5), (6,7)]
imshps = [(1,5,5), (2,8,7)]
kshps = [(3,4)]
imshps = [(2,8,7)]
modes = ['valid', 'full']
unroll_batch=[0,1,3]
unroll_kern=[0,1,4]
ssizes = [(1,1),(2,2)]
for typ in types:
imgs = T.TensorType(typ, (False, False, False, False),'imgs')
......@@ -447,27 +448,23 @@ class TestConvOp(unittest.TestCase):
continue
for un_b in unroll_batch:
for un_k in unroll_kern:
imgvals = N.array(N.random.random(N.hstack((bsize,imshp))),dtype=imgs.dtype)
# print 'imgvals.shape = ', imgvals.shape, imgvals.dtype
# imgvals = imgvals.reshape(bsize,-1)
kernvals = N.array(N.random.rand(nkern,visdim,kshp[0],
kshp[1]),dtype=kerns.dtype)
# print 'kernvals.shape = ', kernvals.shape, kernvals.dtype
# kernvals = kernvals.reshape(nkern,-1)
def testf(imgs, kerns):
out, outshp = convolve2(kerns, kshp, nkern,
imgs, imshp, bsize,
mode=mode,
unroll_batch=un_b,
unroll_kern=un_k)
return out
#TODO the tolerance needed to pass is very high for float32(0.16). Is this acceptable? Expected?
utt.verify_grad(testf, [imgvals, kernvals],
cast_to_output_type=True,
tol=None if typ!="float32" else 0.16)
for ss in ssizes:
imgvals = N.array(N.random.random(N.hstack((bsize,imshp))),dtype=imgs.dtype)
kernvals = N.array(N.random.rand(nkern,visdim,kshp[0],
kshp[1]),dtype=kerns.dtype)
def testf(imgs, kerns):
out, outshp = convolve2(kerns, kshp, nkern,
imgs, imshp, bsize,
mode=mode, step=ss,
unroll_batch=un_b,
unroll_kern=un_k)
return out
#TODO the tolerance needed to pass is very high for float32(0.16). Is this acceptable? Expected?
utt.verify_grad(testf, [imgvals, kernvals],
cast_to_output_type=True,
tol=None if typ!="float32" else 0.16)
if __name__ == '__main__':
t = TestConvOp("test_convolution")
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论