提交 1e126b35 authored 作者: lamblin's avatar lamblin

Merge pull request #1044 from nouiz/gpu_conv_logical

Move convolution to the GPU when the image shape and logical image shape...
......@@ -128,11 +128,12 @@ compile_cuda_ndarray = True
if not compile_cuda_ndarray:
compile_cuda_ndarray = not try_import()
if not nvcc_compiler.is_nvcc_available():
# It can happen that there the file cuda_ndarray.so is already compiled
if not nvcc_compiler.is_nvcc_available() or not theano.config.cxx:
# It can happen that the file cuda_ndarray.so is already compiled
# but nvcc is not available. In that case we need to disable the CUDA
# back-end as we won't be able to compile any new op and we can't only
# use already compiled GPU op and not the others.
# Also, if cxx is not available, we need to disable all GPU code.
set_cuda_disabled()
if compile_cuda_ndarray and cuda_available:
......
......@@ -1011,11 +1011,7 @@ def local_gpu_conv(node):
"""
def GpuConvOp_from_ConvOp(op):
logical_img_hw = None
if op.imshp_logical is not None:
logical_img_hw = op.imshp_logical[1:3]
if logical_img_hw != op.imshp[1:3]:
# this case is not implemented
return None
if op.kshp_logical is not None and op.kshp_logical != op.kshp:
return None
#print op.kshp, op.imshp[1:3]
......@@ -1033,6 +1029,23 @@ def local_gpu_conv(node):
#HACK to print the number of MFlops in the profiler output.
if hasattr(op, 'flops'):
ret.flops = op.flops
if op.imshp_logical is not None:
logical_img_hw = op.imshp_logical[1:3]
if logical_img_hw != op.imshp[1:3]:
# this case is not implemented
#return None
rstride = int(numpy.ceil(op.imshp_logical[1] /
float(op.imshp[1])))
cstride = int(numpy.ceil(op.imshp_logical[2] /
float(op.imshp[2])))
def make_graph(img, kern):
buf = tensor.alloc(numpy.asarray(0, dtype=img.dtype),
img.shape[0], *op.imshp_logical)
img = tensor.set_subtensor(buf[:, :, ::rstride, ::cstride],
img)
img = gpu_from_host(img)
return ret(img, kern)
return make_graph
return ret
if node.op == gpu_from_host:
......
......@@ -17,6 +17,7 @@ except ImportError:
pass
import theano
from theano import tensor
# Skip test if cuda_ndarray is not available.
import theano.sandbox.cuda as cuda_ndarray
......@@ -707,14 +708,41 @@ def test_subsample():
exec_conv(version_full, shapes, verbose, random, 'full',
print_=print_, ones=ones)
## See #616
#def test_logical_shapes():
# # implement when
# print >> sys.stderr, ("WARNING TODO: test_logical_shapes not implemented"
# " (i.e. imshp_logical, kshp_logical, kshp_logical_top_aligned)")
class TestConv2DGPU(unittest.TestCase):
def test_logical_shapes(self):
for stride in range(1, 4):
kshp = (10, 2, 10, 10)
featshp = (3, 10, 11, 11)
a = tensor.ftensor4()
A = tensor.ftensor4()
# Need to transpose first two dimensions of kernel, and reverse
# index kernel image dims (for correlation)
kernel_rotated = tensor.transpose(A, axes=[1, 0, 2, 3])
featshp_logical = (featshp[0], featshp[1], featshp[2] * stride,
featshp[3] * stride)
kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3])
print featshp, kshp_rotated, featshp_logical[1:], kshp[2:]
image_estimate = tensor.nnet.conv2d(a, kernel_rotated,
border_mode='full',
image_shape=featshp,
filter_shape=kshp_rotated,
imshp_logical=featshp_logical[1:],
kshp_logical=kshp[2:])
func = theano.function([a, A], image_estimate, mode=theano_mode)
theano.printing.debugprint(func,)
assert any([isinstance(node.op, theano.sandbox.cuda.blas.GpuConv)
for node in func.maker.fgraph.toposort()])
a_in = numpy.random.randn(*featshp).astype("float32")
A_in = numpy.random.randn(*kshp).astype("float32")
func(a_in, A_in)
def test_invalid_input_shape(self):
"""
Tests that when the shape gived at build time is not the same as
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论