提交 887a56c5 authored 作者: Frederic Bastien's avatar Frederic Bastien

merge

...@@ -130,7 +130,8 @@ class GpuConv(Op): ...@@ -130,7 +130,8 @@ class GpuConv(Op):
subsample=(1,1), subsample=(1,1),
logical_img_hw=None, logical_img_hw=None,
logical_kern_hw=None, logical_kern_hw=None,
logical_kern_align_top=True): logical_kern_align_top=True,
verbose=0):
self.border_mode = border_mode self.border_mode = border_mode
self.subsample = subsample self.subsample = subsample
if logical_img_hw is not None: if logical_img_hw is not None:
...@@ -146,6 +147,7 @@ class GpuConv(Op): ...@@ -146,6 +147,7 @@ class GpuConv(Op):
# grid # grid
self.logical_kern_hw = tuple(logical_kern_hw) self.logical_kern_hw = tuple(logical_kern_hw)
self.logical_kern_align_top = logical_kern_align_top self.logical_kern_align_top = logical_kern_align_top
self.verbose=verbose
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) \ return type(self) == type(other) \
...@@ -188,7 +190,7 @@ class GpuConv(Op): ...@@ -188,7 +190,7 @@ class GpuConv(Op):
logical_img_shape=self.logical_img_hw, logical_img_shape=self.logical_img_hw,
logical_kern_shape=self.logical_kern_hw, logical_kern_shape=self.logical_kern_hw,
kern_align=self.logical_kern_align_top, kern_align=self.logical_kern_align_top,
verbose=0) verbose=self.verbose)
class GpuDownsampleFactorMax(Op): class GpuDownsampleFactorMax(Op):
def __init__(self, ds, ignore_border=False): def __init__(self, ds, ignore_border=False):
......
...@@ -301,7 +301,8 @@ def local_gpu_conv(node): ...@@ -301,7 +301,8 @@ def local_gpu_conv(node):
subsample=(op.dx, op.dy), subsample=(op.dx, op.dy),
logical_img_hw=op.imshp_logical[1:3], logical_img_hw=op.imshp_logical[1:3],
logical_kern_hw=op.kshp_logical, logical_kern_hw=op.kshp_logical,
logical_kern_align_top=op.kshp_logical_top_aligned logical_kern_align_top=op.kshp_logical_top_aligned,
verbose=op.verbose
) )
#HACK to print the number of MFlops in the profiler output. #HACK to print the number of MFlops in the profiler output.
if hasattr(op,'flops'): if hasattr(op,'flops'):
......
...@@ -238,7 +238,7 @@ def test_conv_nnet2(): ...@@ -238,7 +238,7 @@ def test_conv_nnet2():
assert numpy.allclose(rval_cpu, rval_gpu,rtol=1e-4,atol=1e-4) assert numpy.allclose(rval_cpu, rval_gpu,rtol=1e-4,atol=1e-4)
def run_conv_nnet2_classif(shared_fn, isize, ksize, n_batch, n_iter, def run_conv_nnet2_classif(shared_fn, isize, ksize, n_batch, n_iter,
downsample_ops=True): downsample_ops=True, verbose=0):
isize1=isize isize1=isize
isize2=isize isize2=isize
if isinstance(isize,(tuple,)): if isinstance(isize,(tuple,)):
...@@ -275,8 +275,11 @@ def run_conv_nnet2_classif(shared_fn, isize, ksize, n_batch, n_iter, ...@@ -275,8 +275,11 @@ def run_conv_nnet2_classif(shared_fn, isize, ksize, n_batch, n_iter,
y = tensor.fmatrix('y') y = tensor.fmatrix('y')
lr = tensor.fscalar('lr') lr = tensor.fscalar('lr')
conv_op = theano.sandbox.conv.ConvOp(shape_img[2:], shape_kern[2:], n_kern, n_batch, 1, 1) conv_op = theano.sandbox.conv.ConvOp(shape_img[2:], shape_kern[2:], n_kern,
conv_op1 = theano.sandbox.conv.ConvOp((n_kern,logical_hid_shape[0]/2, logical_hid_shape[1]/2), shape_kern1[2:], n_kern1, n_batch, 1, 1) n_batch, 1, 1, verbose=verbose)
conv_op1 = theano.sandbox.conv.ConvOp(
(n_kern,logical_hid_shape[0]/2, logical_hid_shape[1]/2),
shape_kern1[2:], n_kern1, n_batch, 1, 1,verbose=verbose)
conv_op.set_flops() conv_op.set_flops()
conv_op1.set_flops() conv_op1.set_flops()
...@@ -319,8 +322,11 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize, ...@@ -319,8 +322,11 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize,
ignore_error=False, ignore_error=False,
n_iter=10, n_iter=10,
gpu_only=False, gpu_only=False,
cpu_only=False,
float_atol=1e-08, float_atol=1e-08,
check_isfinite=True): check_isfinite=True,
pickle=False,
verbose=0):
""" """
float_atol: None mean use the default value. float_atol: None mean use the default value.
check_isfinite: the debug mode option. We forward this value to debug mode. check_isfinite: the debug mode option. We forward this value to debug mode.
...@@ -336,22 +342,27 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize, ...@@ -336,22 +342,27 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize,
try: try:
predefined_modes["DEBUG_MODE"].check_isfinite = check_isfinite predefined_modes["DEBUG_MODE"].check_isfinite = check_isfinite
if gpu_only:
tcn.use()
if float_atol: if float_atol:
print "float_atol",float_atol print "float_atol",float_atol
theano.tensor.basic.float32_atol=float_atol theano.tensor.basic.float32_atol=float_atol
rval_gpu, tg, gpu_mode = run_conv_nnet2_classif(tcn.shared_constructor, isize, ksize, bsize, n_iter) if not cpu_only:
rval_gpu, tg, gpu_mode = run_conv_nnet2_classif(
tcn.shared_constructor, isize, ksize, bsize, n_iter, verbose)
finally: finally:
predefined_modes["DEBUG_MODE"].check_isfinite = orig_check_isfinite predefined_modes["DEBUG_MODE"].check_isfinite = orig_check_isfinite
theano.tensor.basic.float32_atol=orig_float32_atol theano.tensor.basic.float32_atol=orig_float32_atol
if gpu_only: if gpu_only:
print "time gpu: %.3f"%(tg)
return return
try: try:
predefined_modes["DEBUG_MODE"].check_isfinite = check_isfinite predefined_modes["DEBUG_MODE"].check_isfinite = check_isfinite
numpy.random.seed(seed) numpy.random.seed(seed)
rval_cpu, tc, cpu_mode = run_conv_nnet2_classif(shared, isize, ksize, bsize, n_iter) rval_cpu, tc, cpu_mode = run_conv_nnet2_classif(shared, isize, ksize, bsize, n_iter, verbose)
if isinstance(cpu_mode,(theano.compile.ProfileMode,)): if pickle and isinstance(cpu_mode,(theano.compile.ProfileMode,)):
import pickle import pickle
print "BEGIN GPU profile mode dump" print "BEGIN GPU profile mode dump"
#print pickle.dumps(gpu_mode) #print pickle.dumps(gpu_mode)
...@@ -364,35 +375,44 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize, ...@@ -364,35 +375,44 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize,
predefined_modes["DEBUG_MODE"].check_isfinite = orig_check_isfinite predefined_modes["DEBUG_MODE"].check_isfinite = orig_check_isfinite
theano.tensor.basic.float32_atol=orig_float32_atol theano.tensor.basic.float32_atol=orig_float32_atol
print "cpu:", rval_cpu if not cpu_only:
print "gpu:", rval_gpu print "cpu:", rval_cpu
print "abs diff:", numpy.absolute(rval_gpu-rval_cpu) print "gpu:", rval_gpu
print "time cpu: %f, time gpu: %f, speed up %f"%(tc, tg, tc/tg) print "abs diff:", numpy.absolute(rval_gpu-rval_cpu)
print "estimated time for one pass through MNIST with cpu: %f" % (tc * (60000.0 / (n_iter*bsize))) print "time cpu: %.3f, time gpu: %.3f, speed up %f"%(tc, tg, tc/tg)
print "estimated time for one pass through MNIST with gpu: %f" % (tg * (60000.0 / (n_iter*bsize))) print "estimated time for one pass through MNIST with cpu: %f" % (tc * (60000.0 / (n_iter*bsize)))
print "estimated time for one pass through MNIST with gpu: %f" % (tg * (60000.0 / (n_iter*bsize)))
else:
print "time cpu: %.3f"%(tc)
print "estimated time for one pass through MNIST with cpu: %f" % (tc * (60000.0 / (n_iter*bsize)))
if not ignore_error: if not ignore_error and not cpu_only and not gpu_only:
assert numpy.allclose(rval_cpu, rval_gpu,rtol=1e-3,atol=float_atol) assert numpy.allclose(rval_cpu, rval_gpu,rtol=1e-3,atol=float_atol)
gpu_only=False gpu_only=False
cpu_only=False
ignore_error=False ignore_error=False
verbose=0
def test_lenet_28(): #MNIST def test_lenet_28(): #MNIST
cmp_run_conv_nnet2_classif(23485, 28, 5, 60, n_iter=10, cmp_run_conv_nnet2_classif(23485, 28, 5, 60, n_iter=10,
ignore_error=ignore_error, gpu_only=gpu_only) ignore_error=ignore_error, gpu_only=gpu_only,
cpu_only=cpu_only, verbose=verbose)
def test_lenet_32(): #CIFAR10 / Shapeset def test_lenet_32(): #CIFAR10 / Shapeset
cmp_run_conv_nnet2_classif(23485, 32, 5, 60, n_iter=10, cmp_run_conv_nnet2_classif(23485, 32, 5, 60, n_iter=10,
ignore_error=ignore_error, gpu_only=gpu_only) ignore_error=ignore_error, gpu_only=gpu_only,
verbose=verbose)
def test_lenet_32_long(): #CIFAR10 / Shapeset def test_lenet_32_long(): #CIFAR10 / Shapeset
# this tests the gradient of downsample on the GPU, # this tests the gradient of downsample on the GPU,
# which does not recieve specific testing # which does not recieve specific testing
cmp_run_conv_nnet2_classif(23485, 32, 5, 30, n_iter=50, cmp_run_conv_nnet2_classif(23485, 32, 5, 30, n_iter=50,
ignore_error=ignore_error, gpu_only=gpu_only) ignore_error=ignore_error, gpu_only=gpu_only,
cpu_only=cpu_only, verbose=verbose)
def test_lenet_64(): # ??? def test_lenet_64(): # ???
#float_atol needd to pass in debug mode #float_atol need to pass in debug mode
#needed as cpu use extended precision and gpu don't #needed as cpu use extended precision and gpu don't
cmp_run_conv_nnet2_classif(23485, 64, 7, 10, n_iter=10, cmp_run_conv_nnet2_classif(23485, 64, 7, 10, n_iter=10,
ignore_error=ignore_error, gpu_only=gpu_only, ignore_error=ignore_error, gpu_only=gpu_only,
...@@ -401,6 +421,7 @@ def test_lenet_64(): # ??? ...@@ -401,6 +421,7 @@ def test_lenet_64(): # ???
def test_lenet_108(): # NORB def test_lenet_108(): # NORB
cmp_run_conv_nnet2_classif(23485, 108, 7, 10, n_iter=5, cmp_run_conv_nnet2_classif(23485, 108, 7, 10, n_iter=5,
ignore_error=ignore_error, gpu_only=gpu_only, ignore_error=ignore_error, gpu_only=gpu_only,
cpu_only=cpu_only, verbose=verbose,
check_isfinite=True) check_isfinite=True)
def test_lenet_256(): # ImageNet def test_lenet_256(): # ImageNet
...@@ -412,10 +433,12 @@ def test_lenet_256(): # ImageNet ...@@ -412,10 +433,12 @@ def test_lenet_256(): # ImageNet
def tes_lenet_hd(): #HD 720p: 1280(wid)x720(len) def tes_lenet_hd(): #HD 720p: 1280(wid)x720(len)
cmp_run_conv_nnet2_classif(23485, (720,1280), 9, 2, n_iter=3, cmp_run_conv_nnet2_classif(23485, (720,1280), 9, 2, n_iter=3,
ignore_error=ignore_error, gpu_only=gpu_only, ignore_error=ignore_error, gpu_only=gpu_only,
cpu_only=cpu_only, verbose=verbose,
check_isfinite=True) check_isfinite=True)
#I did a wanted error in the name as we don't want it to execute automatically for now as it don't work #I did a wanted error in the name as we don't want it to execute automatically for now as it don't work
def tes_lenet_full_hd(): #HD 1080p: 1920(wid)x1080(len) def tes_lenet_full_hd(): #HD 1080p: 1920(wid)x1080(len)
cmp_run_conv_nnet2_classif(23485, (1080,1920), 9, 2, n_iter=3, cmp_run_conv_nnet2_classif(23485, (1080,1920), 9, 2, n_iter=3,
ignore_error=ignore_error, gpu_only=gpu_only, ignore_error=ignore_error, gpu_only=gpu_only,
cpu_only=cpu_only, verbose=verbose,
check_isfinite=True) check_isfinite=True)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论