提交 5890e98e authored 作者: lamblin's avatar lamblin

Merge pull request #580 from nouiz/gpu_setsubtensor

Gpu setsubtensor
...@@ -9,6 +9,13 @@ Bug fixes ...@@ -9,6 +9,13 @@ Bug fixes
(both in Python and Cython) since April 2011. (Pascal L.) (both in Python and Cython) since April 2011. (Pascal L.)
* In Sparse sandbox, fix the grad of theano.sparse.sandbox.sp.row_scale. * In Sparse sandbox, fix the grad of theano.sparse.sandbox.sp.row_scale.
It did not return the right number of elements. (Frederic B.) It did not return the right number of elements. (Frederic B.)
* set_subtensor(x[int vector], new_value) when moved to the GPU
where transformed into inc_subtensor on the GPU. Now we have a slow
GPU implementation.
Note: set_subtensor(x[slice[,...]], new_value) was working correctly
in all case as well as inc_subtensor(*, *).
Note2: If your code have this behavior, we print a warning by default.
(Frederic B.)
Documentation Documentation
* Added in the tutorial documentation on how to extend Theano. * Added in the tutorial documentation on how to extend Theano.
...@@ -81,6 +88,8 @@ Crash Fix ...@@ -81,6 +88,8 @@ Crash Fix
element-wise fusion optimization when upcasting some inputs to element-wise fusion optimization when upcasting some inputs to
float32 (to compute them on the GPU). float32 (to compute them on the GPU).
(Frederic B., reported by Sander Dieleman) (Frederic B., reported by Sander Dieleman)
* GpuReshape in some particular case when the input is not contiguous
(Frederic B., reported by Sander Dieleman)
* GpuSoftmaxWithBias with shape (0, N) with N > 1. * GpuSoftmaxWithBias with shape (0, N) with N > 1.
(Frédéric B., reported by Razvan P.) (Frédéric B., reported by Razvan P.)
* Fix crash under 64-bit Windows, when taking subtensors of the form a[n:] * Fix crash under 64-bit Windows, when taking subtensors of the form a[n:]
...@@ -89,6 +98,7 @@ Crash Fix ...@@ -89,6 +98,7 @@ Crash Fix
dimensions, which could typically result in optimization crashes (Olivier D.) dimensions, which could typically result in optimization crashes (Olivier D.)
* Fixed crash when concatenating some arrays with specific broadcasting * Fixed crash when concatenating some arrays with specific broadcasting
patterns (Olivier D.) patterns (Olivier D.)
* Work around a known issue with nvcc 4.1 on MacOS X. (Graham Taylon)
============= =============
Release Notes Release Notes
......
...@@ -315,6 +315,13 @@ AddConfigVar('warn.subtensor_merge_bug', ...@@ -315,6 +315,13 @@ AddConfigVar('warn.subtensor_merge_bug',
BoolParam(warn_default('0.5')), BoolParam(warn_default('0.5')),
in_c_key=False) in_c_key=False)
AddConfigVar('warn.gpu_set_subtensor1',
"Warn if previous versions of Theano (before 0.6) could have given "
"incorrect results when moving to the gpu"
"set_subtensor(x[int vector], new_value)",
BoolParam(warn_default('0.6')),
in_c_key=False)
AddConfigVar('compute_test_value', AddConfigVar('compute_test_value',
("If 'True', Theano will run each op at graph build time, using " ("If 'True', Theano will run each op at graph build time, using "
"Constants, SharedVariables and the tag 'test_value' as inputs " "Constants, SharedVariables and the tag 'test_value' as inputs "
......
...@@ -82,17 +82,24 @@ class InputToGpuOptimizer(Optimizer): ...@@ -82,17 +82,24 @@ class InputToGpuOptimizer(Optimizer):
def apply(self, env): def apply(self, env):
for input in env.inputs: for input in env.inputs:
if not isinstance(input.type, CudaNdarrayType): if isinstance(input.type, CudaNdarrayType):
try: return
new_input = host_from_gpu(gpu_from_host(input))
if new_input.type == input.type: # This happen frequently as we do 2 pass of the gpu optimizations
env.replace_validate(input, new_input, if (len(input.clients) == 1 and
"InputToGpuOptimizer") input.clients[0][0].op == gpu_from_host):
except TypeError, e: return
#as we currently only support float32, this can fail.
#Using try except make that we won't need try:
pass new_input = host_from_gpu(gpu_from_host(input))
if new_input.type == input.type:
env.replace_validate(input, new_input,
"InputToGpuOptimizer")
except TypeError, e:
#as we currently only support float32, this can fail.
#Using try except make that we won't need
pass
# we register it before all other gpu optimizer to be sure that the input # we register it before all other gpu optimizer to be sure that the input
# are on the gpu. # are on the gpu.
...@@ -753,11 +760,11 @@ def local_gpu_advanced_incsubtensor1(node): ...@@ -753,11 +760,11 @@ def local_gpu_advanced_incsubtensor1(node):
warnings.warn( warnings.warn(
'Although your current code is fine, please note that ' 'Although your current code is fine, please note that '
'Theano versions prior to 0.6 (more specifically, ' 'Theano versions prior to 0.6 (more specifically, '
'prior to commit XXXX on DATE) may have ' 'prior to commitd 2240bddd on March 29, 2012) may have '
'yielded an incorrect result. To remove this warning, ' 'yielded an incorrect result. To remove this warning, '
'either set the `warn.gpu_set_subtensor1` config ' 'either set the `warn.gpu_set_subtensor1` config '
'option to False, or `warn.ignore_bug_before` to at ' 'option to False, or `warn.ignore_bug_before` to at '
'least \'0.6\'.') 'least \'0.6\'.', stacklevel=1)
if set_instead_of_inc: if set_instead_of_inc:
return return
...@@ -787,7 +794,7 @@ def local_gpu_advanced_incsubtensor1(node): ...@@ -787,7 +794,7 @@ def local_gpu_advanced_incsubtensor1(node):
warnings.warn( warnings.warn(
'Although your current code is fine, please note that ' 'Although your current code is fine, please note that '
'Theano versions prior to 0.6 (more specifically, ' 'Theano versions prior to 0.6 (more specifically, '
'prior to commit XXXX on DATE) may have ' 'prior to commit d2240bddd on March 29, 2012) may have '
'yielded an incorrect result. To remove this warning, ' 'yielded an incorrect result. To remove this warning, '
'either set the `warn.gpu_set_subtensor1` config ' 'either set the `warn.gpu_set_subtensor1` config '
'option to False, or `warn.ignore_bug_before` to at ' 'option to False, or `warn.ignore_bug_before` to at '
......
...@@ -2100,23 +2100,20 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -2100,23 +2100,20 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
return super(T_subtensor, self).__init__(name) return super(T_subtensor, self).__init__(name)
def function(self, inputs, outputs, accept_inplace=False, def function(self, inputs, outputs, accept_inplace=False,
op=None, mode=None, N=1): op=None, mode=None, N=1, N_fast=None):
""" wrapper around theano.function that also check the output """ wrapper around theano.function that also check the output
:param N: the number of op expected in the toposort :param N: the number of op expected in the toposort
if tuple of length 2, (expected if fast_compile, if tuple of length 2, (expected if fast_compile,
if not fast_compile) if not fast_compile)
""" """
if isinstance(N, tuple): if self.fast_compile and N_fast is not None:
assert len(N) == 2 N = N_fast
if self.fast_compile:
N = N[0]
else:
N = N[1]
if mode is None: if mode is None:
mode = self.mode mode = self.mode
if op is None: if op is None:
op = self.sub op = self.sub
f = theano.function(inputs, outputs, mode=mode, f = theano.function(inputs, outputs, mode=mode,
accept_inplace=accept_inplace) accept_inplace=accept_inplace)
self.assertFunctionContainsClassN(f, op, N) self.assertFunctionContainsClassN(f, op, N)
...@@ -2694,7 +2691,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -2694,7 +2691,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
if idx is idxs[0]: if idx is idxs[0]:
f = self.function([], [gn.shape, n[idx_].shape], f = self.function([], [gn.shape, n[idx_].shape],
op=ops, op=ops,
N=(2, 0)) N=0, N_fast=2)
f() f()
def test_wrong_exception_regression(self): def test_wrong_exception_regression(self):
...@@ -2747,7 +2744,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -2747,7 +2744,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
data = numpy.asarray(data, dtype=self.dtype) data = numpy.asarray(data, dtype=self.dtype)
n = self.shared(data) n = self.shared(data)
t = n[idx] t = n[idx]
f = self.function([], t.shape, op=self.ops, N=(1, 0)) f = self.function([], t.shape, op=self.ops, N=0, N_fast=1)
val = f() val = f()
self.assertTrue(numpy.allclose(val, data[idx].shape)) self.assertTrue(numpy.allclose(val, data[idx].shape))
...@@ -2850,6 +2847,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -2850,6 +2847,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
data_copy[idx] = inc_num data_copy[idx] = inc_num
else: else:
data_copy[idx] += inc_num data_copy[idx] += inc_num
data_var = theano.In(data_var, mutable=True)
# Remember data for the Theano function (see below). # Remember data for the Theano function (see below).
all_inputs_var += [data_var, idx_var, inc_var] all_inputs_var += [data_var, idx_var, inc_var]
all_inputs_num += [data_num, idx_num, inc_num] all_inputs_num += [data_num, idx_num, inc_num]
...@@ -2869,9 +2868,16 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -2869,9 +2868,16 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
assert (data_num == data_num_init).all() assert (data_num == data_num_init).all()
# Actual test (we compile a single Theano function to make it faster). # Actual test (we compile a single Theano function to make it faster).
f = self.function(all_inputs_var, all_outputs_var, orig_warn = theano.config.warn.gpu_set_subtensor1
accept_inplace=True, op=self.adv_incsub1, try:
N=len(all_outputs_var)) theano.config.warn.gpu_set_subtensor1 = False
f = self.function(all_inputs_var, all_outputs_var,
accept_inplace=True,
op=self.adv_incsub1,
N=len(all_outputs_var))
finally:
theano.config.warn.gpu_set_subtensor1 = orig_warn
f_outs = f(*all_inputs_num) f_outs = f(*all_inputs_num)
assert len(f_outs) == len(all_outputs_num) assert len(f_outs) == len(all_outputs_num)
for f_out, output_num in izip(f_outs, all_outputs_num): for f_out, output_num in izip(f_outs, all_outputs_num):
......
...@@ -93,7 +93,8 @@ class TestOptimizationMixin(object): ...@@ -93,7 +93,8 @@ class TestOptimizationMixin(object):
def assertFunctionContains(self, f, op, min=1, max=sys.maxint): def assertFunctionContains(self, f, op, min=1, max=sys.maxint):
toposort = f.maker.env.toposort() toposort = f.maker.env.toposort()
matches = [node for node in toposort if node.op == op] matches = [node for node in toposort if node.op == op]
assert (min <= len(matches) <= max), (toposort, matches, str(op), min, max) assert (min <= len(matches) <= max), (toposort, matches,
str(op), len(matches), min, max)
def assertFunctionContains0(self, f, op): def assertFunctionContains0(self, f, op):
return self.assertFunctionContains(f, op, min=0, max=0) return self.assertFunctionContains(f, op, min=0, max=0)
...@@ -104,6 +105,15 @@ class TestOptimizationMixin(object): ...@@ -104,6 +105,15 @@ class TestOptimizationMixin(object):
def assertFunctionContainsN(self, f, op, N): def assertFunctionContainsN(self, f, op, N):
return self.assertFunctionContains(f, op, min=N, max=N) return self.assertFunctionContains(f, op, min=N, max=N)
def assertFunctionContainsClass(self, f, op, min=1, max=sys.maxint):
toposort = f.maker.env.toposort()
matches = [node for node in toposort if isinstance(node.op, op)]
assert (min <= len(matches) <= max), (toposort, matches,
str(op), len(matches), min, max)
def assertFunctionContainsClassN(self, f, op, N):
return self.assertFunctionContainsClass(f, op, min=N, max=N)
def SkipTest(self, msg='Skip this test'): def SkipTest(self, msg='Skip this test'):
raise SkipTest(msg) raise SkipTest(msg)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论