提交 edd1c456 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5164 from abergeron/dlt_f16_2

Fix some problems in float16.
......@@ -275,6 +275,7 @@ class GpuDot22(BlasOp):
Dot22 on the GPU.
"""
_f16_ok = True
__props__ = ()
def make_node(self, x, y):
......
......@@ -1134,27 +1134,6 @@ def local_gpua_gemmbatch(op, context_name, inputs, outputs):
return gpugemmbatch_no_inplace(c, 1.0, a, b, 0.0)
@register_opt('fast_compile')
@op_lifter([tensor.basic.Dot])
@register_opt2([tensor.basic.Dot], 'fast_compile')
def local_gpua_hgemm(op, context_name, inputs, outputs):
from theano.sandbox.cuda import nvcc_compiler
if nvcc_compiler.nvcc_version < '7.5':
_logger.warning("Not performing dot of float16 on the GPU since "
"cuda 7.5 is not available. Updating could speed up "
"your code.")
return
A = inputs[0]
B = inputs[1]
if (A.ndim == 2 and B.ndim == 2 and
A.dtype == 'float16' and B.dtype == 'float16'):
fgraph = outputs[0].fgraph
C = gpu_alloc_empty(context_name, dtype='float16')(
shape_i(A, 0, fgraph),
shape_i(B, 1, fgraph))
return gpugemm_no_inplace(C, 1.0, A, B, 0.0)
@register_opt()
@alpha_merge(GpuGemm, alpha_in=1, beta_in=4)
def local_gpua_gemm_alpha_merge(node, *inputs):
......
......@@ -3,8 +3,6 @@ from unittest import TestCase
from nose.plugins.skip import SkipTest
import itertools
import numpy
import theano
from theano import tensor
from theano.tests import unittest_tools as utt
......@@ -18,7 +16,7 @@ from .test_basic_ops import makeTester, rand
from ..blas import (gpugemv_inplace, gpugemv_no_inplace,
gpugemm_inplace, gpugemmbatch_no_inplace,
gpuger_inplace, gpuger_no_inplace,
GpuGer, gpu_dot22, GpuGemm)
GpuGer, gpu_dot22)
GpuGemvTester = makeTester(
......@@ -130,52 +128,3 @@ GpuDot22Tester = makeTester(
# test9=[rand(0, 0), rand(0, 0)],
)
)
def test_hgemm_swap():
from theano.sandbox.cuda import nvcc_compiler
if nvcc_compiler.nvcc_version < '7.5':
raise SkipTest("SgemmEx is only avaialble on cuda 7.5+")
v = tensor.vector(dtype='float16')
m = tensor.matrix(dtype='float16')
m2 = tensor.matrix(dtype='float16')
m32 = tensor.matrix(dtype='float32')
# test that we don't try to replace anything but matrix x matrix in float16
f = theano.function([v, m], tensor.dot(v, m), mode=mode_with_gpu)
assert len([node for node in f.maker.fgraph.apply_nodes
if isinstance(node.op, GpuGemm)]) == 0
f = theano.function([m32, m], tensor.dot(m32, m), mode=mode_with_gpu)
assert len([node for node in f.maker.fgraph.apply_nodes
if isinstance(node.op, GpuGemm)]) == 0
f = theano.function([m, m2], tensor.dot(m, m2), mode=mode_with_gpu)
assert len([node for node in f.maker.fgraph.apply_nodes
if isinstance(node.op, GpuGemm)]) == 1
v1 = numpy.random.random((3, 4)).astype('float16')
v2 = numpy.random.random((4, 2)).astype('float16')
of = f(v1, v2)
on = numpy.dot(v1, v2)
utt.assert_allclose(of, on)
def test_hgemm_alpha_output_merge():
from theano.sandbox.cuda import nvcc_compiler
if nvcc_compiler.nvcc_version < '7.5':
raise SkipTest("SgemmEx is only avaialble on cuda 7.5+")
m1 = tensor.matrix(dtype='float16')
m2 = tensor.matrix(dtype='float16')
b = tensor.matrix(dtype='float16')
hgemm = numpy.asarray(0.05, dtype='float16') * (tensor.dot(m1, m2) + b)
f = theano.function([m1, m2, b], hgemm, mode=mode_with_gpu)
# there should be 3 gpu_from_host, 1 hgemm and 1 host_from_gpu
assert len(f.maker.fgraph.apply_nodes) == 5
......@@ -18,6 +18,7 @@ from copy import copy
from textwrap import dedent
import numpy
import six
from six.moves import xrange
import theano
......@@ -121,33 +122,165 @@ def as_scalar(x, name=None):
raise TypeError("Cannot convert %s to Scalar" % x, type(x))
def constant(x):
# pass through numpy scalars, since they are already typed on
# purpose typically.
if hasattr(x, 'dtype'):
assert x.ndim == 0
return ScalarConstant(get_scalar_type(str(x.dtype)), x)
if isinstance(x, builtin_float):
for dtype in ['float32', 'float64']:
x_ = theano._asarray(x, dtype=dtype)
if numpy.all(x == x_):
break
x_ = None
assert x_ is not None
return ScalarConstant(get_scalar_type(str(x_.dtype)), x)
if isinstance(x, builtin_int):
for dtype in ['int8', 'int16', 'int32', 'int64']:
class NumpyAutocaster(object):
"""
This class is used to cast python ints and floats to numpy arrays.
The behavior when called on scalar `x` depends on `config.cast_policy`:
- 'numpy' will simply use the same type as found by `numpy.asarray(x)`.
- 'numpy+floatX' will do the same, except it will use float32 instead
of float64 if `x` is a Python float and `config.floatX` is set to
'float32' (note that if `x` is a numpy scalar whose data type is
float64, it is not modified since we assume the user is purposedly
using float64).
- 'custom' lets one define a tuple of data types such that:
- if `x` is already a numpy scalar and its data type is in this
tuple, then it is returned unchanged;
- otherwise, the first data type in this tuple that can represent
`x` without loss of precision will be used, unless `x` is a float
and 'float32' is in the tuple (in which case `x` is cast as a
float32);
- if no data type can represent `x` without loss of precision, then
the last data type in the tuple will be used.
Parameters
----------
dtypes: tuple of strings
The ordered list of preferred data types (only used when
`config.cast_policy` is set to 'custom', see the `NumpyAutocaster`
help for details).
"""
def __init__(self, dtypes):
self.dtypes = tuple(dtypes)
def __call__(self, x):
# Make sure we only deal with scalars.
assert (isinstance(x, six.integer_types) or
isinstance(x, builtin_float) or
(isinstance(x, numpy.ndarray) and x.ndim == 0))
if config.cast_policy == 'numpy':
return numpy.asarray(x)
elif config.cast_policy == 'numpy+floatX':
rval = numpy.asarray(x)
if ((not hasattr(x, 'dtype') and
rval.dtype in ('float64', 'float32') and
rval.dtype != config.floatX)):
rval = theano._asarray(rval, dtype=config.floatX)
return rval
# The following is the original code, corresponding to the 'custom'
# option for `config.cast_policy`.
assert config.cast_policy == 'custom'
try:
# Pass through numpy scalars, since they are already typed on
# purpose typically.
if str(x.dtype) in self.dtypes:
# No need to cast `x` into a new dtype. Note that we still
# need to convert it into an array, because it may not be
# one already (e.g. if x == numpy.float64(1.1)).
return numpy.asarray(x)
except AttributeError:
# Means `x` has no 'dtype' attribute.
pass
# unsafe downcast of float64 variables when config.floatX == 'float32'
# recall: float is numpy.float
if ((isinstance(x, float) and
config.floatX in self.dtypes and
config.floatX != 'float64')):
return theano._asarray(x, dtype=config.floatX)
# Don't autocast to float16 unless config.floatX is float16
try_dtypes = [d for d in self.dtypes
if config.floatX == 'float16' or d != 'float16']
for dtype in try_dtypes:
x_ = theano._asarray(x, dtype=dtype)
if numpy.all(x == x_):
break
x_ = None
assert x_ is not None
return ScalarConstant(get_scalar_type(str(x_.dtype)), x)
if isinstance(x, builtin_complex):
# TODO: We have added the complex type, so this should be tested
raise NotImplementedError()
raise TypeError(x)
# return ScalarConstant(float64, float(x))
# returns either an exact x_==x, or the last cast x_
return x_
autocast_int = NumpyAutocaster(('int8', 'int16', 'int32', 'int64'))
# autocast_float dtypes might be manipulated in tensor.*
autocast_float = NumpyAutocaster(('float16', 'float32', 'float64'))
class autocast_float_as(object):
"""
Temporarily adjust autocasting behavior.
This class makes it possible to temporarily and locally adjust autocasting
behavior when `config.cast_policy` is set to 'custom'.
If `config.cast_policy` is not 'custom', an exception is raised.
This class might be convenient in some code, but it definitely
helps to test the autocasting mechanism.
Examples
--------
>>> with autocast_float_as('float32'):
... assert (fvector() + 1.1).dtype == 'float32' # temporary downcasting
>>> assert (fvector() + 1.1).dtype == 'float64' # back to default behaviour
"""
def __init__(self, *dtypes):
self.dtypes = dtypes
assert config.cast_policy == 'custom'
def __enter__(self):
assert config.cast_policy == 'custom'
self.old_dtypes = autocast_float.dtypes
autocast_float.dtypes = self.dtypes
def __exit__(self, *args):
assert config.cast_policy == 'custom'
autocast_float.dtypes = self.old_dtypes
def convert(x, dtype=None):
"""
Convert the input to a properly typed numpy value according to the
current casting policy. Work with scalars and tensors.
"""
if dtype is not None:
# in this case, the semantics are that the caller is forcing the dtype
x_ = theano._asarray(x, dtype=dtype)
else:
# In this case, this function should infer the dtype according to the
# autocasting rules. See autocasting above.
x_ = None
if isinstance(x, six.integer_types):
try:
x_ = autocast_int(x)
except OverflowError:
# This is to imitate numpy behavior which tries to fit
# bigger numbers into a uint64.
x_ = theano._asarray(x, dtype='uint64')
elif isinstance(x, builtin_float):
x_ = autocast_float(x)
elif isinstance(x, numpy.ndarray):
x_ = x
else:
# Here x is probably a list or a tuple. If it contains a
# long, we will behave like the current NumPy version: it
# will work if the long fits in int64 or uint64.
x_ = numpy.asarray(x)
if x_.size == 0 and not hasattr(x, 'dtype'):
x_ = numpy.asarray(x, dtype=config.floatX)
assert type(x_) in [numpy.ndarray, numpy.memmap]
return x_
def constant(x):
x = convert(x)
assert x.ndim == 0
return ScalarConstant(get_scalar_type(str(x.dtype)), x)
class Scalar(Type):
......
......@@ -219,138 +219,6 @@ _as_tensor_variable = as_tensor_variable
as_tensor = as_tensor_variable
class NumpyAutocaster(object):
"""
This class is used to cast python ints and floats to numpy arrays.
The behavior when called on scalar `x` depends on `config.cast_policy`:
- 'numpy' will simply use the same type as found by `numpy.asarray(x)`.
- 'numpy+floatX' will do the same, except it will use float32 instead
of float64 if `x` is a Python float and `config.floatX` is set to
'float32' (note that if `x` is a numpy scalar whose data type is
float64, it is not modified since we assume the user is purposedly
using float64).
- 'custom' lets one define a tuple of data types such that:
- if `x` is already a numpy scalar and its data type is in this
tuple, then it is returned unchanged;
- otherwise, the first data type in this tuple that can represent
`x` without loss of precision will be used, unless `x` is a float
and 'float32' is in the tuple (in which case `x` is cast as a
float32);
- if no data type can represent `x` without loss of precision, then
the last data type in the tuple will be used.
Parameters
----------
dtypes: tuple of strings
The ordered list of preferred data types (only used when
`config.cast_policy` is set to 'custom', see the `NumpyAutocaster`
help for details).
"""
def __init__(self, dtypes):
self.dtypes = tuple(dtypes)
def __call__(self, x):
# Make sure we only deal with scalars.
assert (isinstance(x, integer_types) or
isinstance(x, float) or
(isinstance(x, numpy.ndarray) and x.ndim == 0))
if config.cast_policy == 'numpy':
return numpy.asarray(x)
elif config.cast_policy == 'numpy+floatX':
rval = numpy.asarray(x)
if ((not hasattr(x, 'dtype') and
rval.dtype in ('float64', 'float32') and
rval.dtype != config.floatX)):
rval = theano._asarray(rval, dtype=config.floatX)
return rval
# The following is the original code, corresponding to the 'custom'
# option for `config.cast_policy`.
assert config.cast_policy == 'custom'
try:
# Pass through numpy scalars, since they are already typed on
# purpose typically.
if str(x.dtype) in self.dtypes:
# No need to cast `x` into a new dtype. Note that we still
# need to convert it into an array, because it may not be
# one already (e.g. if x == numpy.float64(1.1)).
return numpy.asarray(x)
except AttributeError:
# Means `x` has no 'dtype' attribute.
pass
# unsafe downcast of float64 variables when config.floatX == 'float32'
# recall: float is numpy.float
if ((isinstance(x, float) and
config.floatX in self.dtypes and
config.floatX != 'float64')):
return theano._asarray(x, dtype=config.floatX)
# Don't autocast to float16 unless config.floatX is float16
try_dtypes = [d for d in self.dtypes
if config.floatX == 'float16' or d != 'float16']
for dtype in try_dtypes:
x_ = theano._asarray(x, dtype=dtype)
if numpy.all(x == x_):
break
# returns either an exact x_==x, or the last cast x_
return x_
autocast_int = NumpyAutocaster(('int8', 'int16', 'int32', 'int64'))
autocast_float = NumpyAutocaster(('float16', 'float32', 'float64'))
# autocast_float dtypes might be manipulated in tensor.__init__
#
# Note: it's a bit weird for a compiler to automatically downcast
# literals like this, and it might have implications for efficiency
# when mixing types. For example when you add 1.0 + dmatrix(), the
# 1.0 could be converted to float32, and require upcasting for the +
# operation at every position in the dmatrix. using
# theano._asarray(1.0, dtype='float64') will circumvent this
# autocasting, and in future, our ops might be smarter about factoring
# out upcasts. The advantage of this mechanism is to combine it with
# floatX so that 1.0 + xmatrix() will always have the same type as the
# xmatrix().
#
class autocast_float_as(object):
"""
Temporarily adjust autocasting behavior.
This class makes it possible to temporarily and locally adjust autocasting
behavior when `config.cast_policy` is set to 'custom'.
If `config.cast_policy` is not 'custom', an exception is raised.
This class might be convenient in some code, but it definitely
helps to test the autocasting mechanism.
Examples
--------
>>> with autocast_float_as('float32'):
... assert (fvector() + 1.1).dtype == 'float32' # temporary downcasting
>>> assert (fvector() + 1.1).dtype == 'float64' # back to default behaviour
"""
def __init__(self, *dtypes):
self.dtypes = dtypes
assert config.cast_policy == 'custom'
def __enter__(self):
assert config.cast_policy == 'custom'
self.old_dtypes = autocast_float.dtypes
autocast_float.dtypes = self.dtypes
def __exit__(self, *args):
assert config.cast_policy == 'custom'
autocast_float.dtypes = self.old_dtypes
def constant_or_value(x, rtype, name=None, ndim=None, dtype=None):
"""Return a symbolic `Constant` with value `x`.
......@@ -362,32 +230,7 @@ def constant_or_value(x, rtype, name=None, ndim=None, dtype=None):
`x` could not be expanded to have ndim dimensions.
"""
if dtype is not None:
# in this case, the semantics are that the caller is forcing the dtype
x_ = theano._asarray(x, dtype=dtype)
else:
# In this case, this function should infer the dtype according to the
# autocasting rules. See autocasting above.
x_ = None
if rtype is TensorConstant and isinstance(x, integer_types):
try:
x_ = autocast_int(x)
except OverflowError:
# This is to imitate numpy behavior which tries to fit
# bigger numbers into a uint64.
x_ = theano._asarray(x, dtype='uint64')
elif rtype is TensorConstant and isinstance(x, float):
x_ = autocast_float(x)
elif isinstance(x, numpy.ndarray):
x_ = x
else:
# Here x is probably a list or a tuple. If it contains a
# long, we will behave like the current NumPy version: it
# will work if the long fits in int64 or uint64.
x_ = numpy.asarray(x)
if x_.size == 0 and not hasattr(x, 'dtype'):
x_ = numpy.asarray(x, dtype=config.floatX)
assert type(x_) in [numpy.ndarray, numpy.memmap]
x_ = scal.convert(x, dtype=dtype)
bcastable = [d == 1 for d in x_.shape]
if ndim is not None:
......@@ -3155,11 +2998,9 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
sum_dtype = dtype
else:
sum_dtype = None
# float16 overflows way too fast for sum
if ((sum_dtype == 'float16' or input.dtype == 'float16') and
acc_dtype != 'float16'):
sum_dtype == 'float32'
# float16 overflows on the cast way too often
if input.dtype == 'float16':
sum_dtype = 'float32'
s = sum(input, axis=axis, dtype=sum_dtype, keepdims=keepdims,
acc_dtype=acc_dtype)
......
......@@ -1093,14 +1093,14 @@ def _as_scalar(res, dtype=None):
def _is_real_matrix(res):
return (res.type.dtype in ('float32', 'float64') and
return (res.type.dtype in ('float16', 'float32', 'float64') and
res.type.ndim == 2 and
res.type.broadcastable[0] is False and
res.type.broadcastable[1] is False) # cope with tuple vs. list
def _is_real_vector(res):
return (res.type.dtype in ('float32', 'float64') and
return (res.type.dtype in ('float16', 'float32', 'float64') and
res.type.ndim == 1 and
res.type.broadcastable[0] is False)
......@@ -1195,7 +1195,7 @@ def _gemm_canonicalize(r, scale, rval, maxclients):
return None
if ((r.type.ndim not in (1, 2)) or
r.type.dtype not in ('float32', 'float64',
r.type.dtype not in ('float16', 'float32', 'float64',
'complex64', 'complex128')):
rval.append(scaled(r))
return rval
......@@ -1528,7 +1528,7 @@ class Dot22(GemmRelated):
"""
def make_node(self, x, y):
dtypes = ('float32', 'float64', 'complex64', 'complex128')
dtypes = ('float16', 'float32', 'float64', 'complex64', 'complex128')
if x.type.ndim != 2 or x.type.dtype not in dtypes:
raise TypeError(x)
if y.type.ndim != 2 or y.type.dtype not in dtypes:
......@@ -1621,7 +1621,7 @@ def local_dot_to_dot22(node):
x, y, x.type, y.type)
return
if y.type.dtype in ['float32', 'float64', 'complex64', 'complex128']:
if y.type.dtype in ['float16', 'float32', 'float64', 'complex64', 'complex128']:
if x.ndim == 2 and y.ndim == 2:
# print "local_dot_to_dot22: MM"
return [_dot22(*node.inputs)]
......
......@@ -26,11 +26,12 @@ from six.moves import StringIO, reduce
from theano import compile, config, function, gof, tensor, shared
from theano.compile import DeepCopyOp
from theano.compile.mode import get_default_mode
from theano.tensor import (_shared, wvector, bvector, autocast_float_as,
from theano.scalar import autocast_float_as, autocast_float
from theano.tensor import (_shared, wvector, bvector,
argmin, max_and_argmax, cscalar, ctensor3, join,
horizontal_stack, vertical_stack, argmax, get_vector_length,
fscalar, zeros_like, sum, tensor3, vector, add, addbroadcast,
alloc, as_tensor_variable, tensor_from_scalar, ARange, autocast_float,
alloc, as_tensor_variable, tensor_from_scalar, ARange,
clip, constant, default, dot, batched_dot,
dmatrix, dscalar, dvector, eq, eye, fill, flatten, inverse_permutation,
tensor4, permute_row_elements, Flatten, fmatrix, fscalars, grad,
......@@ -4595,6 +4596,12 @@ class T_mean(unittest.TestCase):
except AttributeError:
self.fail()
def test_mean_f16(self):
x = tensor.vector(dtype='float16')
y = x.mean()
f = theano.function([x], y)
utt.assert_allclose(f(numpy.ones((100000,), dtype='float16')), 1.0)
def test0(self):
# Simple test...
x = tensor.vector()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论