提交 674ee45b authored 作者: David Warde-Farley's avatar David Warde-Farley

Merge pull request #123 from nouiz/check_conv_const_shape_gpu

Check compile/run-time kernel dim mismatches raise an error.
...@@ -456,7 +456,7 @@ class GpuConv(Op): ...@@ -456,7 +456,7 @@ class GpuConv(Op):
return ['cuda_ndarray.cuh','<stdio.h>'] return ['cuda_ndarray.cuh','<stdio.h>']
def c_code_cache_version(self): def c_code_cache_version(self):
return (0,15) # raise this whenever modifying any of the support_code_files return (0, 16) # raise this whenever modifying any of the support_code_files
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of these files # REMEMBER TO RAISE c_code_cache_version when changing any of these files
......
...@@ -82,7 +82,12 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern, ...@@ -82,7 +82,12 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
const int img_size_byte = img_size*sizeof(float); const int img_size_byte = img_size*sizeof(float);
const int kern_size_byte = kern_size*sizeof(float); const int kern_size_byte = kern_size*sizeof(float);
const int out_size_byte = out_size*sizeof(float); const int out_size_byte = out_size*sizeof(float);
assert((THEANO_KERN_WID == CudaNdarray_HOST_DIMS(kern)[3]) || (THEANO_KERN_WID==0)); if (!((THEANO_KERN_WID == CudaNdarray_HOST_DIMS(kern)[3]) || (THEANO_KERN_WID==0))){
PyErr_Format(PyExc_ValueError, "ERROR: This GpuConv code was compiled for"
" %d kernel columns, but the kernel we received had %d columns!",
THEANO_KERN_WID, CudaNdarray_HOST_DIMS(kern)[3]);
return -1;
}
bool subsample = subsample_rows!=1 || subsample_cols!=1; bool subsample = subsample_rows!=1 || subsample_cols!=1;
bool img_contiguous = CudaNdarray_is_c_contiguous(img); bool img_contiguous = CudaNdarray_is_c_contiguous(img);
...@@ -761,7 +766,12 @@ CudaNdarray_conv_full(const CudaNdarray *img, const CudaNdarray * kern, CudaNdar ...@@ -761,7 +766,12 @@ CudaNdarray_conv_full(const CudaNdarray *img, const CudaNdarray * kern, CudaNdar
//const int out_size_byte = out_size*sizeof(float); // unused //const int out_size_byte = out_size*sizeof(float); // unused
assert((THEANO_KERN_WID == CudaNdarray_HOST_DIMS(kern)[3]) || (THEANO_KERN_WID==0)); if (!((THEANO_KERN_WID == CudaNdarray_HOST_DIMS(kern)[3]) || (THEANO_KERN_WID==0))){
PyErr_Format(PyExc_ValueError, "ERROR: This GpuConv code was compiled for"
" %d kernel columns, but the kernel we received had %d columns!",
THEANO_KERN_WID, CudaNdarray_HOST_DIMS(kern)[3]);
return -1;
}
bool subsample = subsample_rows!=1 || subsample_cols!=1; bool subsample = subsample_rows!=1 || subsample_cols!=1;
bool img_contiguous = CudaNdarray_is_c_contiguous(img); bool img_contiguous = CudaNdarray_is_c_contiguous(img);
......
import sys, time import sys
import time
import unittest
import numpy import numpy
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
...@@ -86,7 +90,7 @@ def _params_allgood_header(): ...@@ -86,7 +90,7 @@ def _params_allgood_header():
def _params_allgood(ishape, kshape, mode, subsample=(1,1), img_stride=(1,1), def _params_allgood(ishape, kshape, mode, subsample=(1,1), img_stride=(1,1),
kern_stride=(1,1), version=-1, verbose=0, random=True, print_=None, kern_stride=(1,1), version=-1, verbose=0, random=True, print_=None,
id=None, rtol=1e-5, atol = 1e-8, nb_iter=0, ones=False): id=None, rtol=1e-5, atol = 1e-8, nb_iter=0, ones=False, compile_kshp=None):
# #
# This function is the core of several of the big unit-test drivers, # This function is the core of several of the big unit-test drivers,
# but it can also be used very directly on its own to test a specific # but it can also be used very directly on its own to test a specific
...@@ -94,6 +98,11 @@ def _params_allgood(ishape, kshape, mode, subsample=(1,1), img_stride=(1,1), ...@@ -94,6 +98,11 @@ def _params_allgood(ishape, kshape, mode, subsample=(1,1), img_stride=(1,1),
# #
# See `test_example` (above) for an example of how to use this directly. # See `test_example` (above) for an example of how to use this directly.
# #
# :param kshape: (4d)The shape of the kernel at run time.
# :param compile_kshp: (2d) hardcode the shape of the kernel in the generated code
# This is supposed to be faster, but we need to check
# That we raise an error if the input have the wrong shape.
#
if ones: if ones:
assert not random assert not random
npy_img = theano._asarray(numpy.ones(ishape), dtype='float32') npy_img = theano._asarray(numpy.ones(ishape), dtype='float32')
...@@ -124,7 +133,10 @@ def _params_allgood(ishape, kshape, mode, subsample=(1,1), img_stride=(1,1), ...@@ -124,7 +133,10 @@ def _params_allgood(ishape, kshape, mode, subsample=(1,1), img_stride=(1,1),
t1 = time.time() t1 = time.time()
i = cuda_tensor4() i = cuda_tensor4()
k = cuda_tensor4() k = cuda_tensor4()
op = theano.sandbox.cuda.blas.GpuConv(border_mode=mode,subsample=subsample, version=version, verbose=verbose)(i,k) op = theano.sandbox.cuda.blas.GpuConv(border_mode=mode,
subsample=subsample,
version=version,
verbose=verbose, kshp=compile_kshp)(i,k)
f=theano.function([i,k],op, mode=theano_mode) f=theano.function([i,k],op, mode=theano_mode)
gpuval = f(img,kern) gpuval = f(img,kern)
t2 = time.time() t2 = time.time()
...@@ -180,7 +192,8 @@ def _params_allgood(ishape, kshape, mode, subsample=(1,1), img_stride=(1,1), ...@@ -180,7 +192,8 @@ def _params_allgood(ishape, kshape, mode, subsample=(1,1), img_stride=(1,1),
return rval return rval
def exec_conv(version, shapes, verbose, random, mode, print_=None, rtol=1e-5, ones=False): def exec_conv(version, shapes, verbose, random, mode,
print_=None, rtol=1e-5, ones=False):
if verbose>0: if verbose>0:
_params_allgood_header() _params_allgood_header()
nb_failed = 0 nb_failed = 0
...@@ -550,7 +563,7 @@ def test_full(): ...@@ -550,7 +563,7 @@ def test_full():
def test_subsample(): def test_subsample():
# implement when # implement when
shapes = [ shapes = [
((1, 1, 1, 1), (1, 1, 1, 1), (1,1), (1,1), (1,1)) ((1, 1, 1, 1), (1, 1, 1, 1), (1,1), (1,1), (1,1))
, ((1, 1, 1, 1), (1, 1, 1, 1), (2,2), (1,1), (1,1)) , ((1, 1, 1, 1), (1, 1, 1, 1), (2,2), (1,1), (1,1))
, ((4, 2, 10, 10), (3, 2, 2, 2), (1, 3), (1,1), (1,1)) , ((4, 2, 10, 10), (3, 2, 2, 2), (1, 3), (1,1), (1,1))
...@@ -571,9 +584,7 @@ def test_subsample(): ...@@ -571,9 +584,7 @@ def test_subsample():
ones = False ones = False
if ones: if ones:
random = False random = False
#test
random = False
exec_conv(version_valid, shapes, verbose, random, 'valid', print_=print_, ones=ones) exec_conv(version_valid, shapes, verbose, random, 'valid', print_=print_, ones=ones)
exec_conv(version_full, shapes, verbose, random, 'full', print_=print_, ones=ones) exec_conv(version_full, shapes, verbose, random, 'full', print_=print_, ones=ones)
...@@ -583,6 +594,36 @@ def test_subsample(): ...@@ -583,6 +594,36 @@ def test_subsample():
# print >> sys.stderr, "WARNING TODO: test_logical_shapes not implemented (i.e. imshp_logical, kshp_logical, kshp_logical_top_aligned)" # print >> sys.stderr, "WARNING TODO: test_logical_shapes not implemented (i.e. imshp_logical, kshp_logical, kshp_logical_top_aligned)"
class TestConv2DGPU(unittest.TestCase):
def test_invalid_input_shape(self):
"""
Tests that when the shape gived at build time is not the same as
run time we raise an error
"""
verbose = 0
random = True
print_ = False
ones = False
if ones:
random = False
global theano_mode
theano_mode_orig = theano_mode
try:
if theano.config.mode in ['DebugMode', 'DEBUG_MODE']:
theano_mode = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
for mode in ['valid', 'full']:
for shapes in [((3, 2, 8, 8), (4, 2, 5, 5), (8, 8)),
((3, 2, 8, 8), (4, 2, 5, 5), (5, 8)),
#((3, 2, 8, 8), (4, 2, 5, 5), (8, 5)),
# We use only the number of columns.
]:
self.assertRaises(ValueError, _params_allgood, shapes[0], shapes[1],
verbose=verbose, random=random, mode=mode,
print_=print_, ones=ones, compile_kshp=shapes[2])
finally:
theano_mode = theano_mode_orig
def _test_dummy(): def _test_dummy():
ishape = (1, 1, 5, 5) ishape = (1, 1, 5, 5)
kshape = (1, 1, 3, 3) kshape = (1, 1, 3, 3)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论