提交 234ffeab authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add the float16 dtype and make sure it isn't lost to casting.

上级 fd7655aa
......@@ -142,7 +142,7 @@ class GpuElemwise(HideC, Elemwise):
code.append('ga_float %s;' % (f[0],))
# XXX: The replace is an ugly hack to make sure temp
# variables inthe middle are float32
code.append(kop.replace('npy_uint16', 'ga_float'))
code.append(kop.replace('npy_float16', 'ga_float'))
for f in scal_f16:
code.append('%s[i] = __float2half_rn(%s);' % (f[1].name, f[0]))
code.append('}')
......@@ -195,6 +195,7 @@ class GpuElemwise(HideC, Elemwise):
("npy_int16", "ga_short"),
("npy_int32", "ga_int"),
("npy_int64", "ga_long"),
("npy_float16", "ga_half"),
("npy_float32", "ga_float"),
("npy_float64", "ga_double"),
]:
......
from theano import scalar
def work_dtype(dtype):
if dtype == 'float16':
......@@ -5,14 +7,21 @@ def work_dtype(dtype):
else:
return dtype
def load_w(dtype):
if dtype == 'float16':
return '__half2float'
else:
return ''
def write_w(dtype):
if dtype == 'float16':
return '__float2half_rn'
else:
return ''
class Cast16(scalar.Cast):
def c_code(self, node, name, inputs, outputs, sub):
return "%s = %s;\n" % (outputs[0], inputs[0])
......@@ -31,6 +31,7 @@ from .nnet import (GpuCrossentropySoftmaxArgmax1HotWithBias,
from .elemwise import (GpuElemwise, _is_scalar,
GpuDimShuffle, GpuCAReduceCuda,
GpuCAReduceCPY)
from . import fp16_help
from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20)
......@@ -253,10 +254,25 @@ def local_gpuflatten(node):
@op_lifter([tensor.Elemwise])
def local_gpu_elemwise(node):
op = node.op
scal_op = op.scalar_op
name = op.name
if name:
name = 'Gpu'+name
res = GpuElemwise(op.scalar_op, name=name,
if (type(scal_op) == scalar.Cast and
(node.inputs[0].dtype == 'float16' or
node.outputs[0].dtype == 'float16')):
scal_op = fp16_help.Cast16(scal_op.o_type, name=scal_op.name)
if (type(scal_op) == scalar.Composite and
True):
inputs, outputs = gof.graph.clone(scal_op.inputs, scal_op.outputs)
for v in variables(inputs, outputs):
if (type(v.op) == scalar.Cast and
(v.inputs[0].dtype == 'float16' or
v.outputs[0].dtype == 'float16')):
# We cloned the graph before so this is ok
v.op = fp16_help.Cast16(v.op.o_type, name=v.op.name)
scal_op = scalar.Composite(inputs, outputs)
res = GpuElemwise(scal_op, name=name,
inplace_pattern=copy.copy(op.inplace_pattern),
nfunc_spec=op.nfunc_spec)
return res
......
......@@ -172,6 +172,7 @@ class GpuArrayType(Type):
# complex64, etc.
try:
return {
'float16': (float, 'npy_float16', 'NPY_FLOAT16'),
'float32': (float, 'npy_float32', 'NPY_FLOAT32'),
'float64': (float, 'npy_float64', 'NPY_FLOAT64'),
'uint8': (int, 'npy_uint8', 'NPY_UINT8'),
......
......@@ -1972,6 +1972,8 @@ class Cast(UnaryScalarOp):
def c_code(self, node, name, inputs, outputs, sub):
(x,) = inputs
(z,) = outputs
if node.inputs[0].dtype == 'float16' or node.outputs[0] == 'float16':
raise NotImplementedError("C code doesn't work for float16")
return "%s = (%s)%s;" % (z, node.outputs[0].type.dtype_specs()[1], x)
def grad(self, inputs, gout):
......@@ -1997,6 +1999,7 @@ convert_to_uint8 = Cast(uint8, name='convert_to_uint8')
convert_to_uint16 = Cast(uint16, name='convert_to_uint16')
convert_to_uint32 = Cast(uint32, name='convert_to_uint32')
convert_to_uint64 = Cast(uint64, name='convert_to_uint64')
convert_to_float16 = Cast(float16, name='convert_to_float16')
convert_to_float32 = Cast(float32, name='convert_to_float32')
convert_to_float64 = Cast(float64, name='convert_to_float64')
convert_to_complex64 = Cast(complex64, name='convert_to_complex64')
......@@ -2011,6 +2014,7 @@ _cast_mapping = {
'uint16': convert_to_uint16,
'uint32': convert_to_uint32,
'uint64': convert_to_uint64,
'float16': convert_to_float16,
'float32': convert_to_float32,
'float64': convert_to_float64,
'complex64': convert_to_complex64,
......
......@@ -277,10 +277,8 @@ class NumpyAutocaster(object):
# unsafe downcast of float64 variables when config.floatX == 'float32'
# recall: float is numpy.float
if ((isinstance(x, float) and
config.floatX in self.dtypes and
config.floatX == 'float32')):
return theano._asarray(x, dtype='float32')
config.floatX in self.dtypes)):
return theano._asarray(x, dtype=config.floatX)
for dtype in self.dtypes:
x_ = theano._asarray(x, dtype=dtype)
......@@ -290,7 +288,7 @@ class NumpyAutocaster(object):
return x_
autocast_int = NumpyAutocaster(('int8', 'int16', 'int32', 'int64'))
autocast_float = NumpyAutocaster(('float32', 'float64'))
autocast_float = NumpyAutocaster(('float16', 'float32', 'float64'))
# autocast_float dtypes might be manipulated in tensor.__init__
......@@ -313,7 +311,7 @@ class autocast_float_as(object):
If `config.cast_policy` is not 'custom', an exception is raised.
For example:
>>> with autocast_float_as('float32') as _dummy:
>>> with autocast_float_as('float32'):
... assert (fvector() + 1.1).dtype == 'float32' # temporary downcasting
>>> assert (fvector() + 1.1).dtype == 'float64' # back to default behaviour
......@@ -1137,6 +1135,10 @@ _convert_to_uint64 = _conversion(
elemwise.Elemwise(scal.convert_to_uint64), 'uint64')
"""Cast to unsigned 64-bit integer"""
_convert_to_float16 = _conversion(
elemwise.Elemwise(scal.convert_to_float16), 'float16')
"""Cast to half-precision floating point"""
_convert_to_float32 = _conversion(
elemwise.Elemwise(scal.convert_to_float32), 'float32')
"""Cast to single-precision floating point"""
......@@ -1162,6 +1164,7 @@ _cast_mapping = {
'uint16': _convert_to_uint16,
'uint32': _convert_to_uint32,
'uint64': _convert_to_uint64,
'float16': _convert_to_float16,
'float32': _convert_to_float32,
'float64': _convert_to_float64,
'complex64': _convert_to_complex64,
......@@ -2752,6 +2755,9 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
out = makeKeepDims(input, out, axis)
return out
# float16 has very low precision so we do some things differently
f16 = (input.dtype == 'float16')
if dtype is not None:
# The summation will be done with the specified dtype.
# sum() will complain if it is not suitable.
......@@ -2760,6 +2766,9 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
# Let sum() infer the appropriate dtype.
sum_dtype = None
if f16 and sum_dtype is None and acc_dtype != 'float16':
sum_dtype = 'float32'
s = sum(input, axis=axis, dtype=sum_dtype, keepdims=keepdims,
acc_dtype=acc_dtype)
shp = shape(input)
......@@ -2785,6 +2794,9 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
for i in axis:
s = true_div(s, shp[i])
if f16:
s = cast(s, 'float16')
return s
......
......@@ -1806,6 +1806,7 @@ class CAReduceDtype(CAReduce):
uint8='uint64',
uint16='uint64',
uint32='uint64',
float16='float32',
float32='float64',
complex64='complex128',
).get(idtype, idtype)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论