提交 d2c753f8 authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #5865 from nouiz/ifelse

fix opt warning in the ifelse lifter
...@@ -1341,6 +1341,8 @@ class GpuSplit(HideC, Split): ...@@ -1341,6 +1341,8 @@ class GpuSplit(HideC, Split):
Split for GPU. Split for GPU.
""" """
_f16_ok = True
def __init__(self, len_splits): def __init__(self, len_splits):
super(GpuSplit, self).__init__(len_splits) super(GpuSplit, self).__init__(len_splits)
# The GPU version of Split returns splits as views of the input. # The GPU version of Split returns splits as views of the input.
......
...@@ -931,11 +931,20 @@ def local_gpua_lazy_ifelse(op, context_name, inputs, outputs): ...@@ -931,11 +931,20 @@ def local_gpua_lazy_ifelse(op, context_name, inputs, outputs):
return return
c = inputs[0] c = inputs[0]
inps = [] inps = []
for v in inputs[1:]: falses = []
if isinstance(v.type, tensor.TensorType) and move_to_gpu(v): # ifelse need corresponding true/false inputs variables to be of the same type.
inps.append(as_gpuarray_variable(v, context_name)) # But we can't rely on inputs to respect that, as GraphToGPU don't enforce that.
# So we need to take care of this here.
for v1, v2 in zip(inputs[1:1 + op.n_outs], inputs[1 + op.n_outs:]):
if ((isinstance(v1.type, tensor.TensorType) and move_to_gpu(v1)) or
isinstance(v1.type, GpuArrayType) or
isinstance(v2.type, GpuArrayType)):
inps.append(as_gpuarray_variable(v1, context_name))
falses.append(as_gpuarray_variable(v2, context_name))
else: else:
inps.append(v) inps.append(v1)
falses.append(v2)
inps.extend(falses)
return IfElse(op.n_outs, gpu=True)(c, *inps, return_list=True) return IfElse(op.n_outs, gpu=True)(c, *inps, return_list=True)
......
...@@ -351,12 +351,19 @@ class G_Join_and_Split(test_basic.T_Join_and_Split): ...@@ -351,12 +351,19 @@ class G_Join_and_Split(test_basic.T_Join_and_Split):
# this is to avoid errors with limited devices # this is to avoid errors with limited devices
self.floatX = 'float32' self.floatX = 'float32'
self.hide_error = theano.config.mode not in ['DebugMode', 'DEBUG_MODE'] self.hide_error = theano.config.mode not in ['DebugMode', 'DEBUG_MODE']
self.shared = gpuarray_shared_constructor
def shared(x, **kwargs):
return gpuarray_shared_constructor(x, target=test_ctx_name,
**kwargs)
self.shared = shared
def test_gpusplit_opt(self): def test_gpusplit_opt(self):
# Test that we move the node to the GPU
# Also test float16 computation at the same time.
rng = np.random.RandomState(seed=utt.fetch_seed()) rng = np.random.RandomState(seed=utt.fetch_seed())
m = self.shared(rng.rand(4, 6).astype(self.floatX)) m = self.shared(rng.rand(4, 6).astype('float16'))
o = T.Split(2)(m, 0, [2, 2]) o = T.Split(2)(m, 0, [2, 2])
assert o[0].dtype == 'float16'
f = theano.function([], o, mode=self.mode) f = theano.function([], o, mode=self.mode)
assert any([isinstance(node.op, self.split_op_class) assert any([isinstance(node.op, self.split_op_class)
for node in f.maker.fgraph.toposort()]) for node in f.maker.fgraph.toposort()])
......
...@@ -11,7 +11,7 @@ from theano.tensor.blas import gemv_inplace, gemm_inplace, _dot22, batched_dot ...@@ -11,7 +11,7 @@ from theano.tensor.blas import gemv_inplace, gemm_inplace, _dot22, batched_dot
from theano.tensor.tests.test_blas import TestGer, BaseGemv from theano.tensor.tests.test_blas import TestGer, BaseGemv
from .. import gpuarray_shared_constructor from .. import gpuarray_shared_constructor
from .config import mode_with_gpu from .config import mode_with_gpu, test_ctx_name
from .test_basic_ops import makeTester, rand from .test_basic_ops import makeTester, rand
from ..blas import (gpugemv_inplace, gpugemv_no_inplace, from ..blas import (gpugemv_inplace, gpugemv_no_inplace,
...@@ -48,7 +48,7 @@ def test_float16(): ...@@ -48,7 +48,7 @@ def test_float16():
rand(3, 3).astype('float16'), rand(3, 3).astype('float16'),
rand(3, 3).astype('float16'), rand(3, 3).astype('float16'),
np.asarray(0.5, dtype=np.float32)] np.asarray(0.5, dtype=np.float32)]
float16_shared = [gpuarray_shared_constructor(val) float16_shared = [gpuarray_shared_constructor(val, target=test_ctx_name)
for val in float16_data] for val in float16_data]
o = gpugemm_no_inplace(*float16_shared) o = gpugemm_no_inplace(*float16_shared)
f = theano.function([], o) f = theano.function([], o)
......
...@@ -267,6 +267,16 @@ class test_gpu_ifelse(test_ifelse.test_ifelse): ...@@ -267,6 +267,16 @@ class test_gpu_ifelse(test_ifelse.test_ifelse):
mode=mode_with_gpu) mode=mode_with_gpu)
assert f(np.float32([1, 2, 3]), 0) == 6 assert f(np.float32([1, 2, 3]), 0) == 6
def test_lifter_with_shared_var(self):
x = tensor.lscalar('x')
y = gpuarray_shared_constructor(np.asarray(1, dtype='float32'),
target=test_ctx_name)
z = tensor.constant(2.)
a = theano.ifelse.ifelse(x, y, z)
with theano.configparser.change_flags(on_opt_error='raise'):
theano.function([x], [a], mode=mode_with_gpu)
def test_print_op(): def test_print_op():
""" Test that print ops don't block gpu optimization""" """ Test that print ops don't block gpu optimization"""
......
...@@ -19,7 +19,7 @@ from ..subtensor import (GpuIncSubtensor, GpuSubtensor, ...@@ -19,7 +19,7 @@ from ..subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAllocDiag) GpuAllocDiag)
from ..type import gpuarray_shared_constructor from ..type import gpuarray_shared_constructor
from .config import mode_with_gpu from .config import mode_with_gpu, test_ctx_name
class G_subtensor(test_subtensor.T_subtensor): class G_subtensor(test_subtensor.T_subtensor):
...@@ -27,9 +27,13 @@ class G_subtensor(test_subtensor.T_subtensor): ...@@ -27,9 +27,13 @@ class G_subtensor(test_subtensor.T_subtensor):
return None return None
def __init__(self, name): def __init__(self, name):
def shared(x, **kwargs):
return gpuarray_shared_constructor(x, target=test_ctx_name,
**kwargs)
test_subtensor.T_subtensor.__init__( test_subtensor.T_subtensor.__init__(
self, name, self, name,
shared=gpuarray_shared_constructor, shared=shared,
sub=GpuSubtensor, sub=GpuSubtensor,
inc_sub=GpuIncSubtensor, inc_sub=GpuIncSubtensor,
adv_sub1=GpuAdvancedSubtensor1, adv_sub1=GpuAdvancedSubtensor1,
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import os import os
import nose
import numpy as np import numpy as np
import theano import theano
...@@ -60,6 +61,16 @@ def test_filter_float(): ...@@ -60,6 +61,16 @@ def test_filter_float():
del theano.compile.sharedvalue.shared.constructors[-1] del theano.compile.sharedvalue.shared.constructors[-1]
def test_gpuarray_shared_scalar():
# By default, we don't put scalar as shared variable on the GPU
nose.tools.assert_raises(
TypeError, gpuarray_shared_constructor, np.asarray(1, dtype='float32'))
# But we can force that
gpuarray_shared_constructor(np.asarray(1, dtype='float32'),
target=test_ctx_name)
def test_unpickle_gpuarray_as_numpy_ndarray_flag0(): def test_unpickle_gpuarray_as_numpy_ndarray_flag0():
""" Test when pygpu isn't there for unpickle are in test_pickle.py""" """ Test when pygpu isn't there for unpickle are in test_pickle.py"""
oldflag = config.experimental.unpickle_gpu_on_cpu oldflag = config.experimental.unpickle_gpu_on_cpu
......
...@@ -658,7 +658,7 @@ def gpuarray_shared_constructor(value, name=None, strict=False, ...@@ -658,7 +658,7 @@ def gpuarray_shared_constructor(value, name=None, strict=False,
notset object. notset object.
""" """
if target == 'gpu' or target == 'cpu': if target == 'cpu':
raise TypeError('not for me') raise TypeError('not for me')
if not isinstance(value, (np.ndarray, pygpu.gpuarray.GpuArray)): if not isinstance(value, (np.ndarray, pygpu.gpuarray.GpuArray)):
...@@ -667,6 +667,8 @@ def gpuarray_shared_constructor(value, name=None, strict=False, ...@@ -667,6 +667,8 @@ def gpuarray_shared_constructor(value, name=None, strict=False,
if target is notset: if target is notset:
target = None target = None
if not gpu_supported(value): if not gpu_supported(value):
raise TypeError('The GPU do not support that value.')
if not move_to_gpu(value):
raise TypeError('We do not move that data by default to the GPU') raise TypeError('We do not move that data by default to the GPU')
try: try:
get_context(target) get_context(target)
......
...@@ -74,7 +74,7 @@ compile.optdb.register('local_inplace_sparse_block_outer', ...@@ -74,7 +74,7 @@ compile.optdb.register('local_inplace_sparse_block_outer',
def local_abstractconv_gemm(node): def local_abstractconv_gemm(node):
# If theano.config.blas.ldflags is empty, Theano will use # If theano.config.blas.ldflags is empty, Theano will use
# a NumPy C implementation of [sd]gemm_. # a NumPy C implementation of [sd]gemm_.
if theano.config.cxx == "": if theano.config.cxx == "" or node.inputs[0].dtype == 'float16':
return return
if not isinstance(node.op, AbstractConv2d): if not isinstance(node.op, AbstractConv2d):
return None return None
...@@ -98,7 +98,7 @@ def local_abstractconv_gemm(node): ...@@ -98,7 +98,7 @@ def local_abstractconv_gemm(node):
def local_abstractconv3d_gemm(node): def local_abstractconv3d_gemm(node):
# If theano.config.blas.ldflags is empty, Theano will use # If theano.config.blas.ldflags is empty, Theano will use
# a NumPy C implementation of [sd]gemm_. # a NumPy C implementation of [sd]gemm_.
if theano.config.cxx == "": if theano.config.cxx == "" or node.inputs[0].dtype == 'float16':
return return
if not isinstance(node.op, AbstractConv3d): if not isinstance(node.op, AbstractConv3d):
return None return None
...@@ -122,7 +122,7 @@ def local_abstractconv3d_gemm(node): ...@@ -122,7 +122,7 @@ def local_abstractconv3d_gemm(node):
def local_abstractconv_gradweight_gemm(node): def local_abstractconv_gradweight_gemm(node):
# If theano.config.blas.ldflags is empty, Theano will use # If theano.config.blas.ldflags is empty, Theano will use
# a NumPy C implementation of [sd]gemm_. # a NumPy C implementation of [sd]gemm_.
if theano.config.cxx == "": if theano.config.cxx == "" or node.inputs[0].dtype == 'float16':
return return
if not isinstance(node.op, AbstractConv2d_gradWeights): if not isinstance(node.op, AbstractConv2d_gradWeights):
return None return None
...@@ -149,7 +149,7 @@ def local_abstractconv_gradweight_gemm(node): ...@@ -149,7 +149,7 @@ def local_abstractconv_gradweight_gemm(node):
def local_abstractconv3d_gradweight_gemm(node): def local_abstractconv3d_gradweight_gemm(node):
# If theano.config.blas.ldflags is empty, Theano will use # If theano.config.blas.ldflags is empty, Theano will use
# a NumPy C implementation of [sd]gemm_. # a NumPy C implementation of [sd]gemm_.
if theano.config.cxx == "": if theano.config.cxx == "" or node.inputs[0].dtype == 'float16':
return return
if not isinstance(node.op, AbstractConv3d_gradWeights): if not isinstance(node.op, AbstractConv3d_gradWeights):
return None return None
...@@ -176,7 +176,7 @@ def local_abstractconv3d_gradweight_gemm(node): ...@@ -176,7 +176,7 @@ def local_abstractconv3d_gradweight_gemm(node):
def local_abstractconv_gradinputs_gemm(node): def local_abstractconv_gradinputs_gemm(node):
# If theano.config.blas.ldflags is empty, Theano will use # If theano.config.blas.ldflags is empty, Theano will use
# a NumPy C implementation of [sd]gemm_. # a NumPy C implementation of [sd]gemm_.
if theano.config.cxx == "": if theano.config.cxx == "" or node.inputs[0].dtype == 'float16':
return return
if not isinstance(node.op, AbstractConv2d_gradInputs): if not isinstance(node.op, AbstractConv2d_gradInputs):
return None return None
...@@ -201,7 +201,7 @@ def local_abstractconv_gradinputs_gemm(node): ...@@ -201,7 +201,7 @@ def local_abstractconv_gradinputs_gemm(node):
def local_abstractconv3d_gradinputs_gemm(node): def local_abstractconv3d_gradinputs_gemm(node):
# If theano.config.blas.ldflags is empty, Theano will use # If theano.config.blas.ldflags is empty, Theano will use
# a NumPy C implementation of [sd]gemm_. # a NumPy C implementation of [sd]gemm_.
if theano.config.cxx == "": if theano.config.cxx == "" or node.inputs[0].dtype == 'float16':
return return
if not isinstance(node.op, AbstractConv3d_gradInputs): if not isinstance(node.op, AbstractConv3d_gradInputs):
return None return None
...@@ -225,7 +225,8 @@ def local_abstractconv3d_gradinputs_gemm(node): ...@@ -225,7 +225,8 @@ def local_abstractconv3d_gradinputs_gemm(node):
@local_optimizer([AbstractConv2d]) @local_optimizer([AbstractConv2d])
def local_conv2d_cpu(node): def local_conv2d_cpu(node):
if not isinstance(node.op, AbstractConv2d): if (not isinstance(node.op, AbstractConv2d) or
node.inputs[0].dtype == 'float16'):
return None return None
img, kern = node.inputs img, kern = node.inputs
...@@ -280,7 +281,8 @@ def local_conv3d_cpu(node): ...@@ -280,7 +281,8 @@ def local_conv3d_cpu(node):
@local_optimizer([AbstractConv2d_gradWeights]) @local_optimizer([AbstractConv2d_gradWeights])
def local_conv2d_gradweight_cpu(node): def local_conv2d_gradweight_cpu(node):
if not isinstance(node.op, AbstractConv2d_gradWeights): if (not isinstance(node.op, AbstractConv2d_gradWeights) or
node.inputs[0].dtype == 'float16'):
return None return None
img, topgrad, shape = node.inputs img, topgrad, shape = node.inputs
...@@ -431,7 +433,8 @@ def local_conv3d_gradweight_cpu(node): ...@@ -431,7 +433,8 @@ def local_conv3d_gradweight_cpu(node):
@local_optimizer([AbstractConv2d_gradInputs]) @local_optimizer([AbstractConv2d_gradInputs])
def local_conv2d_gradinputs_cpu(node): def local_conv2d_gradinputs_cpu(node):
if not isinstance(node.op, AbstractConv2d_gradInputs): if (not isinstance(node.op, AbstractConv2d_gradInputs) or
node.inputs[0].dtype == 'float16'):
return None return None
kern, topgrad, shape = node.inputs kern, topgrad, shape = node.inputs
...@@ -611,7 +614,8 @@ def local_abstractconv_check(node): ...@@ -611,7 +614,8 @@ def local_abstractconv_check(node):
'available supporting the requested options. Did you exclude ' 'available supporting the requested options. Did you exclude '
'both "conv_dnn" and "conv_gemm" from the optimizer? If on GPU, ' 'both "conv_dnn" and "conv_gemm" from the optimizer? If on GPU, '
'is cuDNN available and does the GPU support it? If on CPU, ' 'is cuDNN available and does the GPU support it? If on CPU, '
'do you have a BLAS library installed Theano can link against?' % 'do you have a BLAS library installed Theano can link against? '
'On the CPU we do not support float16.' %
node.op.__class__.__name__) node.op.__class__.__name__)
optdb.register('AbstractConvCheck', optdb.register('AbstractConvCheck',
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论