提交 5890e98e authored 作者: lamblin's avatar lamblin

Merge pull request #580 from nouiz/gpu_setsubtensor

Gpu setsubtensor
......@@ -9,6 +9,13 @@ Bug fixes
(both in Python and Cython) since April 2011. (Pascal L.)
* In Sparse sandbox, fix the grad of theano.sparse.sandbox.sp.row_scale.
It did not return the right number of elements. (Frederic B.)
* set_subtensor(x[int vector], new_value) when moved to the GPU
where transformed into inc_subtensor on the GPU. Now we have a slow
GPU implementation.
Note: set_subtensor(x[slice[,...]], new_value) was working correctly
in all case as well as inc_subtensor(*, *).
Note2: If your code have this behavior, we print a warning by default.
(Frederic B.)
Documentation
* Added in the tutorial documentation on how to extend Theano.
......@@ -81,6 +88,8 @@ Crash Fix
element-wise fusion optimization when upcasting some inputs to
float32 (to compute them on the GPU).
(Frederic B., reported by Sander Dieleman)
* GpuReshape in some particular case when the input is not contiguous
(Frederic B., reported by Sander Dieleman)
* GpuSoftmaxWithBias with shape (0, N) with N > 1.
(Frédéric B., reported by Razvan P.)
* Fix crash under 64-bit Windows, when taking subtensors of the form a[n:]
......@@ -89,6 +98,7 @@ Crash Fix
dimensions, which could typically result in optimization crashes (Olivier D.)
* Fixed crash when concatenating some arrays with specific broadcasting
patterns (Olivier D.)
* Work around a known issue with nvcc 4.1 on MacOS X. (Graham Taylon)
=============
Release Notes
......
......@@ -315,6 +315,13 @@ AddConfigVar('warn.subtensor_merge_bug',
BoolParam(warn_default('0.5')),
in_c_key=False)
AddConfigVar('warn.gpu_set_subtensor1',
"Warn if previous versions of Theano (before 0.6) could have given "
"incorrect results when moving to the gpu"
"set_subtensor(x[int vector], new_value)",
BoolParam(warn_default('0.6')),
in_c_key=False)
AddConfigVar('compute_test_value',
("If 'True', Theano will run each op at graph build time, using "
"Constants, SharedVariables and the tag 'test_value' as inputs "
......
......@@ -82,17 +82,24 @@ class InputToGpuOptimizer(Optimizer):
def apply(self, env):
for input in env.inputs:
if not isinstance(input.type, CudaNdarrayType):
try:
new_input = host_from_gpu(gpu_from_host(input))
if isinstance(input.type, CudaNdarrayType):
return
if new_input.type == input.type:
env.replace_validate(input, new_input,
"InputToGpuOptimizer")
except TypeError, e:
#as we currently only support float32, this can fail.
#Using try except make that we won't need
pass
# This happen frequently as we do 2 pass of the gpu optimizations
if (len(input.clients) == 1 and
input.clients[0][0].op == gpu_from_host):
return
try:
new_input = host_from_gpu(gpu_from_host(input))
if new_input.type == input.type:
env.replace_validate(input, new_input,
"InputToGpuOptimizer")
except TypeError, e:
#as we currently only support float32, this can fail.
#Using try except make that we won't need
pass
# we register it before all other gpu optimizer to be sure that the input
# are on the gpu.
......@@ -753,11 +760,11 @@ def local_gpu_advanced_incsubtensor1(node):
warnings.warn(
'Although your current code is fine, please note that '
'Theano versions prior to 0.6 (more specifically, '
'prior to commit XXXX on DATE) may have '
'prior to commitd 2240bddd on March 29, 2012) may have '
'yielded an incorrect result. To remove this warning, '
'either set the `warn.gpu_set_subtensor1` config '
'option to False, or `warn.ignore_bug_before` to at '
'least \'0.6\'.')
'least \'0.6\'.', stacklevel=1)
if set_instead_of_inc:
return
......@@ -787,7 +794,7 @@ def local_gpu_advanced_incsubtensor1(node):
warnings.warn(
'Although your current code is fine, please note that '
'Theano versions prior to 0.6 (more specifically, '
'prior to commit XXXX on DATE) may have '
'prior to commit d2240bddd on March 29, 2012) may have '
'yielded an incorrect result. To remove this warning, '
'either set the `warn.gpu_set_subtensor1` config '
'option to False, or `warn.ignore_bug_before` to at '
......
......@@ -2100,23 +2100,20 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
return super(T_subtensor, self).__init__(name)
def function(self, inputs, outputs, accept_inplace=False,
op=None, mode=None, N=1):
op=None, mode=None, N=1, N_fast=None):
""" wrapper around theano.function that also check the output
:param N: the number of op expected in the toposort
if tuple of length 2, (expected if fast_compile,
if not fast_compile)
"""
if isinstance(N, tuple):
assert len(N) == 2
if self.fast_compile:
N = N[0]
else:
N = N[1]
if self.fast_compile and N_fast is not None:
N = N_fast
if mode is None:
mode = self.mode
if op is None:
op = self.sub
f = theano.function(inputs, outputs, mode=mode,
accept_inplace=accept_inplace)
self.assertFunctionContainsClassN(f, op, N)
......@@ -2694,7 +2691,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
if idx is idxs[0]:
f = self.function([], [gn.shape, n[idx_].shape],
op=ops,
N=(2, 0))
N=0, N_fast=2)
f()
def test_wrong_exception_regression(self):
......@@ -2747,7 +2744,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
data = numpy.asarray(data, dtype=self.dtype)
n = self.shared(data)
t = n[idx]
f = self.function([], t.shape, op=self.ops, N=(1, 0))
f = self.function([], t.shape, op=self.ops, N=0, N_fast=1)
val = f()
self.assertTrue(numpy.allclose(val, data[idx].shape))
......@@ -2850,6 +2847,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
data_copy[idx] = inc_num
else:
data_copy[idx] += inc_num
data_var = theano.In(data_var, mutable=True)
# Remember data for the Theano function (see below).
all_inputs_var += [data_var, idx_var, inc_var]
all_inputs_num += [data_num, idx_num, inc_num]
......@@ -2869,9 +2868,16 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
assert (data_num == data_num_init).all()
# Actual test (we compile a single Theano function to make it faster).
f = self.function(all_inputs_var, all_outputs_var,
accept_inplace=True, op=self.adv_incsub1,
N=len(all_outputs_var))
orig_warn = theano.config.warn.gpu_set_subtensor1
try:
theano.config.warn.gpu_set_subtensor1 = False
f = self.function(all_inputs_var, all_outputs_var,
accept_inplace=True,
op=self.adv_incsub1,
N=len(all_outputs_var))
finally:
theano.config.warn.gpu_set_subtensor1 = orig_warn
f_outs = f(*all_inputs_num)
assert len(f_outs) == len(all_outputs_num)
for f_out, output_num in izip(f_outs, all_outputs_num):
......
......@@ -93,7 +93,8 @@ class TestOptimizationMixin(object):
def assertFunctionContains(self, f, op, min=1, max=sys.maxint):
toposort = f.maker.env.toposort()
matches = [node for node in toposort if node.op == op]
assert (min <= len(matches) <= max), (toposort, matches, str(op), min, max)
assert (min <= len(matches) <= max), (toposort, matches,
str(op), len(matches), min, max)
def assertFunctionContains0(self, f, op):
return self.assertFunctionContains(f, op, min=0, max=0)
......@@ -104,6 +105,15 @@ class TestOptimizationMixin(object):
def assertFunctionContainsN(self, f, op, N):
return self.assertFunctionContains(f, op, min=N, max=N)
def assertFunctionContainsClass(self, f, op, min=1, max=sys.maxint):
toposort = f.maker.env.toposort()
matches = [node for node in toposort if isinstance(node.op, op)]
assert (min <= len(matches) <= max), (toposort, matches,
str(op), len(matches), min, max)
def assertFunctionContainsClassN(self, f, op, N):
return self.assertFunctionContainsClass(f, op, min=N, max=N)
def SkipTest(self, msg='Skip this test'):
raise SkipTest(msg)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论