提交 234ffeab authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add the float16 dtype and make sure it isn't lost to casting.

上级 fd7655aa
...@@ -142,7 +142,7 @@ class GpuElemwise(HideC, Elemwise): ...@@ -142,7 +142,7 @@ class GpuElemwise(HideC, Elemwise):
code.append('ga_float %s;' % (f[0],)) code.append('ga_float %s;' % (f[0],))
# XXX: The replace is an ugly hack to make sure temp # XXX: The replace is an ugly hack to make sure temp
# variables inthe middle are float32 # variables inthe middle are float32
code.append(kop.replace('npy_uint16', 'ga_float')) code.append(kop.replace('npy_float16', 'ga_float'))
for f in scal_f16: for f in scal_f16:
code.append('%s[i] = __float2half_rn(%s);' % (f[1].name, f[0])) code.append('%s[i] = __float2half_rn(%s);' % (f[1].name, f[0]))
code.append('}') code.append('}')
...@@ -195,6 +195,7 @@ class GpuElemwise(HideC, Elemwise): ...@@ -195,6 +195,7 @@ class GpuElemwise(HideC, Elemwise):
("npy_int16", "ga_short"), ("npy_int16", "ga_short"),
("npy_int32", "ga_int"), ("npy_int32", "ga_int"),
("npy_int64", "ga_long"), ("npy_int64", "ga_long"),
("npy_float16", "ga_half"),
("npy_float32", "ga_float"), ("npy_float32", "ga_float"),
("npy_float64", "ga_double"), ("npy_float64", "ga_double"),
]: ]:
......
from theano import scalar
def work_dtype(dtype): def work_dtype(dtype):
if dtype == 'float16': if dtype == 'float16':
...@@ -5,14 +7,21 @@ def work_dtype(dtype): ...@@ -5,14 +7,21 @@ def work_dtype(dtype):
else: else:
return dtype return dtype
def load_w(dtype): def load_w(dtype):
if dtype == 'float16': if dtype == 'float16':
return '__half2float' return '__half2float'
else: else:
return '' return ''
def write_w(dtype): def write_w(dtype):
if dtype == 'float16': if dtype == 'float16':
return '__float2half_rn' return '__float2half_rn'
else: else:
return '' return ''
class Cast16(scalar.Cast):
def c_code(self, node, name, inputs, outputs, sub):
return "%s = %s;\n" % (outputs[0], inputs[0])
...@@ -31,6 +31,7 @@ from .nnet import (GpuCrossentropySoftmaxArgmax1HotWithBias, ...@@ -31,6 +31,7 @@ from .nnet import (GpuCrossentropySoftmaxArgmax1HotWithBias,
from .elemwise import (GpuElemwise, _is_scalar, from .elemwise import (GpuElemwise, _is_scalar,
GpuDimShuffle, GpuCAReduceCuda, GpuDimShuffle, GpuCAReduceCuda,
GpuCAReduceCPY) GpuCAReduceCPY)
from . import fp16_help
from .subtensor import (GpuIncSubtensor, GpuSubtensor, from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20) GpuAdvancedIncSubtensor1_dev20)
...@@ -253,10 +254,25 @@ def local_gpuflatten(node): ...@@ -253,10 +254,25 @@ def local_gpuflatten(node):
@op_lifter([tensor.Elemwise]) @op_lifter([tensor.Elemwise])
def local_gpu_elemwise(node): def local_gpu_elemwise(node):
op = node.op op = node.op
scal_op = op.scalar_op
name = op.name name = op.name
if name: if name:
name = 'Gpu'+name name = 'Gpu'+name
res = GpuElemwise(op.scalar_op, name=name, if (type(scal_op) == scalar.Cast and
(node.inputs[0].dtype == 'float16' or
node.outputs[0].dtype == 'float16')):
scal_op = fp16_help.Cast16(scal_op.o_type, name=scal_op.name)
if (type(scal_op) == scalar.Composite and
True):
inputs, outputs = gof.graph.clone(scal_op.inputs, scal_op.outputs)
for v in variables(inputs, outputs):
if (type(v.op) == scalar.Cast and
(v.inputs[0].dtype == 'float16' or
v.outputs[0].dtype == 'float16')):
# We cloned the graph before so this is ok
v.op = fp16_help.Cast16(v.op.o_type, name=v.op.name)
scal_op = scalar.Composite(inputs, outputs)
res = GpuElemwise(scal_op, name=name,
inplace_pattern=copy.copy(op.inplace_pattern), inplace_pattern=copy.copy(op.inplace_pattern),
nfunc_spec=op.nfunc_spec) nfunc_spec=op.nfunc_spec)
return res return res
......
...@@ -172,6 +172,7 @@ class GpuArrayType(Type): ...@@ -172,6 +172,7 @@ class GpuArrayType(Type):
# complex64, etc. # complex64, etc.
try: try:
return { return {
'float16': (float, 'npy_float16', 'NPY_FLOAT16'),
'float32': (float, 'npy_float32', 'NPY_FLOAT32'), 'float32': (float, 'npy_float32', 'NPY_FLOAT32'),
'float64': (float, 'npy_float64', 'NPY_FLOAT64'), 'float64': (float, 'npy_float64', 'NPY_FLOAT64'),
'uint8': (int, 'npy_uint8', 'NPY_UINT8'), 'uint8': (int, 'npy_uint8', 'NPY_UINT8'),
......
...@@ -1972,6 +1972,8 @@ class Cast(UnaryScalarOp): ...@@ -1972,6 +1972,8 @@ class Cast(UnaryScalarOp):
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
(x,) = inputs (x,) = inputs
(z,) = outputs (z,) = outputs
if node.inputs[0].dtype == 'float16' or node.outputs[0] == 'float16':
raise NotImplementedError("C code doesn't work for float16")
return "%s = (%s)%s;" % (z, node.outputs[0].type.dtype_specs()[1], x) return "%s = (%s)%s;" % (z, node.outputs[0].type.dtype_specs()[1], x)
def grad(self, inputs, gout): def grad(self, inputs, gout):
...@@ -1997,6 +1999,7 @@ convert_to_uint8 = Cast(uint8, name='convert_to_uint8') ...@@ -1997,6 +1999,7 @@ convert_to_uint8 = Cast(uint8, name='convert_to_uint8')
convert_to_uint16 = Cast(uint16, name='convert_to_uint16') convert_to_uint16 = Cast(uint16, name='convert_to_uint16')
convert_to_uint32 = Cast(uint32, name='convert_to_uint32') convert_to_uint32 = Cast(uint32, name='convert_to_uint32')
convert_to_uint64 = Cast(uint64, name='convert_to_uint64') convert_to_uint64 = Cast(uint64, name='convert_to_uint64')
convert_to_float16 = Cast(float16, name='convert_to_float16')
convert_to_float32 = Cast(float32, name='convert_to_float32') convert_to_float32 = Cast(float32, name='convert_to_float32')
convert_to_float64 = Cast(float64, name='convert_to_float64') convert_to_float64 = Cast(float64, name='convert_to_float64')
convert_to_complex64 = Cast(complex64, name='convert_to_complex64') convert_to_complex64 = Cast(complex64, name='convert_to_complex64')
...@@ -2011,6 +2014,7 @@ _cast_mapping = { ...@@ -2011,6 +2014,7 @@ _cast_mapping = {
'uint16': convert_to_uint16, 'uint16': convert_to_uint16,
'uint32': convert_to_uint32, 'uint32': convert_to_uint32,
'uint64': convert_to_uint64, 'uint64': convert_to_uint64,
'float16': convert_to_float16,
'float32': convert_to_float32, 'float32': convert_to_float32,
'float64': convert_to_float64, 'float64': convert_to_float64,
'complex64': convert_to_complex64, 'complex64': convert_to_complex64,
......
...@@ -277,10 +277,8 @@ class NumpyAutocaster(object): ...@@ -277,10 +277,8 @@ class NumpyAutocaster(object):
# unsafe downcast of float64 variables when config.floatX == 'float32' # unsafe downcast of float64 variables when config.floatX == 'float32'
# recall: float is numpy.float # recall: float is numpy.float
if ((isinstance(x, float) and if ((isinstance(x, float) and
config.floatX in self.dtypes and config.floatX in self.dtypes)):
config.floatX == 'float32')): return theano._asarray(x, dtype=config.floatX)
return theano._asarray(x, dtype='float32')
for dtype in self.dtypes: for dtype in self.dtypes:
x_ = theano._asarray(x, dtype=dtype) x_ = theano._asarray(x, dtype=dtype)
...@@ -290,7 +288,7 @@ class NumpyAutocaster(object): ...@@ -290,7 +288,7 @@ class NumpyAutocaster(object):
return x_ return x_
autocast_int = NumpyAutocaster(('int8', 'int16', 'int32', 'int64')) autocast_int = NumpyAutocaster(('int8', 'int16', 'int32', 'int64'))
autocast_float = NumpyAutocaster(('float32', 'float64')) autocast_float = NumpyAutocaster(('float16', 'float32', 'float64'))
# autocast_float dtypes might be manipulated in tensor.__init__ # autocast_float dtypes might be manipulated in tensor.__init__
...@@ -313,7 +311,7 @@ class autocast_float_as(object): ...@@ -313,7 +311,7 @@ class autocast_float_as(object):
If `config.cast_policy` is not 'custom', an exception is raised. If `config.cast_policy` is not 'custom', an exception is raised.
For example: For example:
>>> with autocast_float_as('float32') as _dummy: >>> with autocast_float_as('float32'):
... assert (fvector() + 1.1).dtype == 'float32' # temporary downcasting ... assert (fvector() + 1.1).dtype == 'float32' # temporary downcasting
>>> assert (fvector() + 1.1).dtype == 'float64' # back to default behaviour >>> assert (fvector() + 1.1).dtype == 'float64' # back to default behaviour
...@@ -1137,6 +1135,10 @@ _convert_to_uint64 = _conversion( ...@@ -1137,6 +1135,10 @@ _convert_to_uint64 = _conversion(
elemwise.Elemwise(scal.convert_to_uint64), 'uint64') elemwise.Elemwise(scal.convert_to_uint64), 'uint64')
"""Cast to unsigned 64-bit integer""" """Cast to unsigned 64-bit integer"""
_convert_to_float16 = _conversion(
elemwise.Elemwise(scal.convert_to_float16), 'float16')
"""Cast to half-precision floating point"""
_convert_to_float32 = _conversion( _convert_to_float32 = _conversion(
elemwise.Elemwise(scal.convert_to_float32), 'float32') elemwise.Elemwise(scal.convert_to_float32), 'float32')
"""Cast to single-precision floating point""" """Cast to single-precision floating point"""
...@@ -1162,6 +1164,7 @@ _cast_mapping = { ...@@ -1162,6 +1164,7 @@ _cast_mapping = {
'uint16': _convert_to_uint16, 'uint16': _convert_to_uint16,
'uint32': _convert_to_uint32, 'uint32': _convert_to_uint32,
'uint64': _convert_to_uint64, 'uint64': _convert_to_uint64,
'float16': _convert_to_float16,
'float32': _convert_to_float32, 'float32': _convert_to_float32,
'float64': _convert_to_float64, 'float64': _convert_to_float64,
'complex64': _convert_to_complex64, 'complex64': _convert_to_complex64,
...@@ -2752,6 +2755,9 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False, ...@@ -2752,6 +2755,9 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
out = makeKeepDims(input, out, axis) out = makeKeepDims(input, out, axis)
return out return out
# float16 has very low precision so we do some things differently
f16 = (input.dtype == 'float16')
if dtype is not None: if dtype is not None:
# The summation will be done with the specified dtype. # The summation will be done with the specified dtype.
# sum() will complain if it is not suitable. # sum() will complain if it is not suitable.
...@@ -2760,6 +2766,9 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False, ...@@ -2760,6 +2766,9 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
# Let sum() infer the appropriate dtype. # Let sum() infer the appropriate dtype.
sum_dtype = None sum_dtype = None
if f16 and sum_dtype is None and acc_dtype != 'float16':
sum_dtype = 'float32'
s = sum(input, axis=axis, dtype=sum_dtype, keepdims=keepdims, s = sum(input, axis=axis, dtype=sum_dtype, keepdims=keepdims,
acc_dtype=acc_dtype) acc_dtype=acc_dtype)
shp = shape(input) shp = shape(input)
...@@ -2785,6 +2794,9 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False, ...@@ -2785,6 +2794,9 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
for i in axis: for i in axis:
s = true_div(s, shp[i]) s = true_div(s, shp[i])
if f16:
s = cast(s, 'float16')
return s return s
......
...@@ -1806,6 +1806,7 @@ class CAReduceDtype(CAReduce): ...@@ -1806,6 +1806,7 @@ class CAReduceDtype(CAReduce):
uint8='uint64', uint8='uint64',
uint16='uint64', uint16='uint64',
uint32='uint64', uint32='uint64',
float16='float32',
float32='float64', float32='float64',
complex64='complex128', complex64='complex128',
).get(idtype, idtype) ).get(idtype, idtype)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论