提交 b18ac4ac authored 作者: Frederic's avatar Frederic

Move convolution to the GPU when the image shape and logical image shape differ.

上级 05e72586
...@@ -1011,11 +1011,7 @@ def local_gpu_conv(node): ...@@ -1011,11 +1011,7 @@ def local_gpu_conv(node):
""" """
def GpuConvOp_from_ConvOp(op): def GpuConvOp_from_ConvOp(op):
logical_img_hw = None logical_img_hw = None
if op.imshp_logical is not None:
logical_img_hw = op.imshp_logical[1:3]
if logical_img_hw != op.imshp[1:3]:
# this case is not implemented
return None
if op.kshp_logical is not None and op.kshp_logical != op.kshp: if op.kshp_logical is not None and op.kshp_logical != op.kshp:
return None return None
#print op.kshp, op.imshp[1:3] #print op.kshp, op.imshp[1:3]
...@@ -1033,6 +1029,23 @@ def local_gpu_conv(node): ...@@ -1033,6 +1029,23 @@ def local_gpu_conv(node):
#HACK to print the number of MFlops in the profiler output. #HACK to print the number of MFlops in the profiler output.
if hasattr(op, 'flops'): if hasattr(op, 'flops'):
ret.flops = op.flops ret.flops = op.flops
if op.imshp_logical is not None:
logical_img_hw = op.imshp_logical[1:3]
if logical_img_hw != op.imshp[1:3]:
# this case is not implemented
#return None
rstride = int(numpy.ceil(op.imshp_logical[1] /
float(op.imshp[1])))
cstride = int(numpy.ceil(op.imshp_logical[2] /
float(op.imshp[2])))
def make_graph(img, kern):
buf = tensor.alloc(numpy.asarray(0, dtype=img.dtype),
img.shape[0], *op.imshp_logical)
img = tensor.set_subtensor(buf[:, :, ::rstride, ::cstride],
img)
img = gpu_from_host(img)
return ret(img, kern)
return make_graph
return ret return ret
if node.op == gpu_from_host: if node.op == gpu_from_host:
......
...@@ -17,6 +17,7 @@ except ImportError: ...@@ -17,6 +17,7 @@ except ImportError:
pass pass
import theano import theano
from theano import tensor
# Skip test if cuda_ndarray is not available. # Skip test if cuda_ndarray is not available.
import theano.sandbox.cuda as cuda_ndarray import theano.sandbox.cuda as cuda_ndarray
...@@ -707,14 +708,41 @@ def test_subsample(): ...@@ -707,14 +708,41 @@ def test_subsample():
exec_conv(version_full, shapes, verbose, random, 'full', exec_conv(version_full, shapes, verbose, random, 'full',
print_=print_, ones=ones) print_=print_, ones=ones)
## See #616
#def test_logical_shapes():
# # implement when
# print >> sys.stderr, ("WARNING TODO: test_logical_shapes not implemented"
# " (i.e. imshp_logical, kshp_logical, kshp_logical_top_aligned)")
class TestConv2DGPU(unittest.TestCase): class TestConv2DGPU(unittest.TestCase):
def test_logical_shapes(self):
for stride in range(1, 4):
kshp = (10, 2, 10, 10)
featshp = (3, 10, 11, 11)
a = tensor.ftensor4()
A = tensor.ftensor4()
# Need to transpose first two dimensions of kernel, and reverse
# index kernel image dims (for correlation)
kernel_rotated = tensor.transpose(A, axes=[1, 0, 2, 3])
featshp_logical = (featshp[0], featshp[1], featshp[2] * stride,
featshp[3] * stride)
kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3])
print featshp, kshp_rotated, featshp_logical[1:], kshp[2:]
image_estimate = tensor.nnet.conv2d(a, kernel_rotated,
border_mode='full',
image_shape=featshp,
filter_shape=kshp_rotated,
imshp_logical=featshp_logical[1:],
kshp_logical=kshp[2:])
func = theano.function([a, A], image_estimate, mode=theano_mode)
theano.printing.debugprint(func,)
assert any([isinstance(node.op, theano.sandbox.cuda.blas.GpuConv)
for node in func.maker.fgraph.toposort()])
a_in = numpy.random.randn(*featshp).astype("float32")
A_in = numpy.random.randn(*kshp).astype("float32")
func(a_in, A_in)
def test_invalid_input_shape(self): def test_invalid_input_shape(self):
""" """
Tests that when the shape gived at build time is not the same as Tests that when the shape gived at build time is not the same as
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论