提交 1e126b35 authored 作者: lamblin's avatar lamblin

Merge pull request #1044 from nouiz/gpu_conv_logical

Move convolution to the GPU when the image shape and logical image shape...
...@@ -128,11 +128,12 @@ compile_cuda_ndarray = True ...@@ -128,11 +128,12 @@ compile_cuda_ndarray = True
if not compile_cuda_ndarray: if not compile_cuda_ndarray:
compile_cuda_ndarray = not try_import() compile_cuda_ndarray = not try_import()
if not nvcc_compiler.is_nvcc_available(): if not nvcc_compiler.is_nvcc_available() or not theano.config.cxx:
# It can happen that there the file cuda_ndarray.so is already compiled # It can happen that the file cuda_ndarray.so is already compiled
# but nvcc is not available. In that case we need to disable the CUDA # but nvcc is not available. In that case we need to disable the CUDA
# back-end as we won't be able to compile any new op and we can't only # back-end as we won't be able to compile any new op and we can't only
# use already compiled GPU op and not the others. # use already compiled GPU op and not the others.
# Also, if cxx is not available, we need to disable all GPU code.
set_cuda_disabled() set_cuda_disabled()
if compile_cuda_ndarray and cuda_available: if compile_cuda_ndarray and cuda_available:
......
...@@ -1011,11 +1011,7 @@ def local_gpu_conv(node): ...@@ -1011,11 +1011,7 @@ def local_gpu_conv(node):
""" """
def GpuConvOp_from_ConvOp(op): def GpuConvOp_from_ConvOp(op):
logical_img_hw = None logical_img_hw = None
if op.imshp_logical is not None:
logical_img_hw = op.imshp_logical[1:3]
if logical_img_hw != op.imshp[1:3]:
# this case is not implemented
return None
if op.kshp_logical is not None and op.kshp_logical != op.kshp: if op.kshp_logical is not None and op.kshp_logical != op.kshp:
return None return None
#print op.kshp, op.imshp[1:3] #print op.kshp, op.imshp[1:3]
...@@ -1033,6 +1029,23 @@ def local_gpu_conv(node): ...@@ -1033,6 +1029,23 @@ def local_gpu_conv(node):
#HACK to print the number of MFlops in the profiler output. #HACK to print the number of MFlops in the profiler output.
if hasattr(op, 'flops'): if hasattr(op, 'flops'):
ret.flops = op.flops ret.flops = op.flops
if op.imshp_logical is not None:
logical_img_hw = op.imshp_logical[1:3]
if logical_img_hw != op.imshp[1:3]:
# this case is not implemented
#return None
rstride = int(numpy.ceil(op.imshp_logical[1] /
float(op.imshp[1])))
cstride = int(numpy.ceil(op.imshp_logical[2] /
float(op.imshp[2])))
def make_graph(img, kern):
buf = tensor.alloc(numpy.asarray(0, dtype=img.dtype),
img.shape[0], *op.imshp_logical)
img = tensor.set_subtensor(buf[:, :, ::rstride, ::cstride],
img)
img = gpu_from_host(img)
return ret(img, kern)
return make_graph
return ret return ret
if node.op == gpu_from_host: if node.op == gpu_from_host:
......
...@@ -17,6 +17,7 @@ except ImportError: ...@@ -17,6 +17,7 @@ except ImportError:
pass pass
import theano import theano
from theano import tensor
# Skip test if cuda_ndarray is not available. # Skip test if cuda_ndarray is not available.
import theano.sandbox.cuda as cuda_ndarray import theano.sandbox.cuda as cuda_ndarray
...@@ -707,14 +708,41 @@ def test_subsample(): ...@@ -707,14 +708,41 @@ def test_subsample():
exec_conv(version_full, shapes, verbose, random, 'full', exec_conv(version_full, shapes, verbose, random, 'full',
print_=print_, ones=ones) print_=print_, ones=ones)
## See #616
#def test_logical_shapes():
# # implement when
# print >> sys.stderr, ("WARNING TODO: test_logical_shapes not implemented"
# " (i.e. imshp_logical, kshp_logical, kshp_logical_top_aligned)")
class TestConv2DGPU(unittest.TestCase): class TestConv2DGPU(unittest.TestCase):
def test_logical_shapes(self):
for stride in range(1, 4):
kshp = (10, 2, 10, 10)
featshp = (3, 10, 11, 11)
a = tensor.ftensor4()
A = tensor.ftensor4()
# Need to transpose first two dimensions of kernel, and reverse
# index kernel image dims (for correlation)
kernel_rotated = tensor.transpose(A, axes=[1, 0, 2, 3])
featshp_logical = (featshp[0], featshp[1], featshp[2] * stride,
featshp[3] * stride)
kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3])
print featshp, kshp_rotated, featshp_logical[1:], kshp[2:]
image_estimate = tensor.nnet.conv2d(a, kernel_rotated,
border_mode='full',
image_shape=featshp,
filter_shape=kshp_rotated,
imshp_logical=featshp_logical[1:],
kshp_logical=kshp[2:])
func = theano.function([a, A], image_estimate, mode=theano_mode)
theano.printing.debugprint(func,)
assert any([isinstance(node.op, theano.sandbox.cuda.blas.GpuConv)
for node in func.maker.fgraph.toposort()])
a_in = numpy.random.randn(*featshp).astype("float32")
A_in = numpy.random.randn(*kshp).astype("float32")
func(a_in, A_in)
def test_invalid_input_shape(self): def test_invalid_input_shape(self):
""" """
Tests that when the shape gived at build time is not the same as Tests that when the shape gived at build time is not the same as
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论