Fix for ConvOp gradient when dx,dy != 1

上级 b0c67648
...@@ -55,9 +55,8 @@ class ConvOp(Op): ...@@ -55,9 +55,8 @@ class ConvOp(Op):
else: else:
print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(%s) should be 0 or a multiple of nkern(%s)We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_kern),str(self.nkern)) print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(%s) should be 0 or a multiple of nkern(%s)We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_kern),str(self.nkern))
self.unroll_kern=1 self.unroll_kern=1
if (self.dx!=1 or self.dy!=1):
print "WARNING: dx(%d)!=1 or dy(%d)!=1. The gradient is not implemented for those case."
self.outshp = getFilterOutShp(self.imshp, kshp, (dx,dy), output_mode) self.outshp = getFilterOutShp(self.imshp, kshp, (dx,dy), output_mode)
self.fulloutshp = getFilterOutShp(self.imshp, kshp, (1,1), output_mode)
self.out_mode = output_mode self.out_mode = output_mode
if not self.out_mode in ["valid", "full"]: if not self.out_mode in ["valid", "full"]:
raise Exception("Mode %s not implemented"%self.out_mode) raise Exception("Mode %s not implemented"%self.out_mode)
...@@ -130,8 +129,13 @@ class ConvOp(Op): ...@@ -130,8 +129,13 @@ class ConvOp(Op):
* inputs needs to be a 4D tensor. Couldn't get 3D to work * inputs needs to be a 4D tensor. Couldn't get 3D to work
* will crash if filter the same size as input image * will crash if filter the same size as input image
""" """
outshp = self.fulloutshp
if self.dx!=1 or self.dy!=1: if self.dx!=1 or self.dy!=1:
raise NotImplementedError("I don't know how to implement the grad when dx!=1 or dy!=1! Is this possible?") upgz = T.as_tensor(N.zeros((self.bsize,self.nkern)+tuple(self.fulloutshp),
dtype=gz.type.dtype))
gz = T.SetSubtensor([slice(self.bsize), slice(self.nkern),
slice(0,outshp[0],self.dy),
slice(0,outshp[1],self.dx)])(upgz,gz)
####### Determine gradient on kernels ######## ####### Determine gradient on kernels ########
if inputs.ndim == 3: if inputs.ndim == 3:
...@@ -145,13 +149,13 @@ class ConvOp(Op): ...@@ -145,13 +149,13 @@ class ConvOp(Op):
(img, filters) = (newin, newgz) (img, filters) = (newin, newgz)
(bsize, nkern) = (self.imshp[0], self.nkern) (bsize, nkern) = (self.imshp[0], self.nkern)
imshp = N.hstack((self.bsize, self.imshp[1:])) imshp = N.hstack((self.bsize, self.imshp[1:]))
kshp = self.outshp kshp = outshp
un_b = self.unroll_batch un_b = self.unroll_batch
un_k = self.unroll_kern un_k = self.unroll_kern
elif self.out_mode == 'full': elif self.out_mode == 'full':
(img, filters) = (newgz, newin) (img, filters) = (newgz, newin)
(bsize, nkern) = (self.nkern, self.imshp[0]) (bsize, nkern) = (self.nkern, self.imshp[0])
imshp = N.hstack((self.bsize, self.outshp)) imshp = N.hstack((self.bsize, outshp))
kshp = self.imshp[1:] kshp = self.imshp[1:]
un_b = self.unroll_kern un_b = self.unroll_kern
un_k = self.unroll_batch un_k = self.unroll_batch
...@@ -186,11 +190,11 @@ class ConvOp(Op): ...@@ -186,11 +190,11 @@ class ConvOp(Op):
filters = tensor.DimShuffle(gz.broadcastable, (1,0,2,3))(kerns) filters = tensor.DimShuffle(gz.broadcastable, (1,0,2,3))(kerns)
filters = filters[:,:,::-1,::-1] filters = filters[:,:,::-1,::-1]
nkern = self.imshp[0] nkern = self.imshp[0]
imshp = N.hstack((self.nkern,self.outshp)) imshp = N.hstack((self.nkern,outshp))
din = ConvOp(imshp, self.kshp, nkern, self.bsize, din = ConvOp(imshp, self.kshp, nkern, self.bsize,
1,1, output_mode=mode, 1,1, output_mode=mode,
unroll_batch=un_b, unroll_kern=un_k)(gz,filters) unroll_batch=un_b, unroll_kern=un_k)(gz,filters)
assert (din.owner.op.outshp==self.imshp[1:]).all()
return [din, dw] return [din, dw]
#def c(): #def c():
......
...@@ -428,11 +428,12 @@ class TestConvOp(unittest.TestCase): ...@@ -428,11 +428,12 @@ class TestConvOp(unittest.TestCase):
nkern = 4 nkern = 4
bsize = 3 bsize = 3
types = ["float32", "float64"] types = ["float32", "float64"]
kshps = [(5,5), (6,7)] kshps = [(3,4)]
imshps = [(1,5,5), (2,8,7)] imshps = [(2,8,7)]
modes = ['valid', 'full'] modes = ['valid', 'full']
unroll_batch=[0,1,3] unroll_batch=[0,1,3]
unroll_kern=[0,1,4] unroll_kern=[0,1,4]
ssizes = [(1,1),(2,2)]
for typ in types: for typ in types:
imgs = T.TensorType(typ, (False, False, False, False),'imgs') imgs = T.TensorType(typ, (False, False, False, False),'imgs')
...@@ -447,20 +448,16 @@ class TestConvOp(unittest.TestCase): ...@@ -447,20 +448,16 @@ class TestConvOp(unittest.TestCase):
continue continue
for un_b in unroll_batch: for un_b in unroll_batch:
for un_k in unroll_kern: for un_k in unroll_kern:
for ss in ssizes:
imgvals = N.array(N.random.random(N.hstack((bsize,imshp))),dtype=imgs.dtype) imgvals = N.array(N.random.random(N.hstack((bsize,imshp))),dtype=imgs.dtype)
# print 'imgvals.shape = ', imgvals.shape, imgvals.dtype
# imgvals = imgvals.reshape(bsize,-1)
kernvals = N.array(N.random.rand(nkern,visdim,kshp[0], kernvals = N.array(N.random.rand(nkern,visdim,kshp[0],
kshp[1]),dtype=kerns.dtype) kshp[1]),dtype=kerns.dtype)
# print 'kernvals.shape = ', kernvals.shape, kernvals.dtype
# kernvals = kernvals.reshape(nkern,-1)
def testf(imgs, kerns): def testf(imgs, kerns):
out, outshp = convolve2(kerns, kshp, nkern, out, outshp = convolve2(kerns, kshp, nkern,
imgs, imshp, bsize, imgs, imshp, bsize,
mode=mode, mode=mode, step=ss,
unroll_batch=un_b, unroll_batch=un_b,
unroll_kern=un_k) unroll_kern=un_k)
return out return out
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论