提交 8d0b398d authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add a check at C code generation time for ops that are not explicitely

tagged for float16 support and disable their C code.
上级 708efc22
...@@ -215,6 +215,8 @@ class Shape(gof.Op): ...@@ -215,6 +215,8 @@ class Shape(gof.Op):
@note: Non-differentiable. @note: Non-differentiable.
""" """
_fp16_ok = True
# Mapping from Type to C code (and version) to use. # Mapping from Type to C code (and version) to use.
# In the C code, the name of the input variable is %(iname)s, # In the C code, the name of the input variable is %(iname)s,
# the output variable is %(oname)s. # the output variable is %(oname)s.
...@@ -308,6 +310,8 @@ class Shape_i(gof.Op): ...@@ -308,6 +310,8 @@ class Shape_i(gof.Op):
@note: Non-differentiable. @note: Non-differentiable.
""" """
_fp16_ok = True
# Mapping from Type to C code (and version) to use. # Mapping from Type to C code (and version) to use.
# In the C code, the name of the input variable is %(iname)s, # In the C code, the name of the input variable is %(iname)s,
# the output variable is %(oname)s. # the output variable is %(oname)s.
......
...@@ -726,9 +726,20 @@ class Op(utils.object2, PureOp, CLinkerOp): ...@@ -726,9 +726,20 @@ class Op(utils.object2, PureOp, CLinkerOp):
node_output_storage = [storage_map[r] for r in node.outputs] node_output_storage = [storage_map[r] for r in node.outputs]
node_input_compute = [compute_map[r] for r in node.inputs] node_input_compute = [compute_map[r] for r in node.inputs]
node_output_compute = [compute_map[r] for r in node.outputs] node_output_compute = [compute_map[r] for r in node.outputs]
#logger.debug('Compiling node %i of graph' % node_idx)
if self._op_use_c_code: if self._op_use_c_code:
try: try:
# float16 get special treatment since running
# unprepared C code will get bad results.
if not getattr(self, '_f16_ok', False):
def is_f16(t):
return getattr(t, 'dtype', '') == 'float16'
if (any(is_f16(i.type) for i in node.inputs) or
any(is_f16(o.type) for o in node.outputs)):
print ("Disabling C code for %s due to unsupported "
"float16" % (self,))
raise NotImplementedError("float16")
e = FunctionGraph(node.inputs, node.outputs) e = FunctionGraph(node.inputs, node.outputs)
e_no_recycling = [new_o e_no_recycling = [new_o
......
...@@ -187,11 +187,8 @@ class GpuKernelBase(object): ...@@ -187,11 +187,8 @@ class GpuKernelBase(object):
class HostFromGpu(Op): class HostFromGpu(Op):
def __eq__(self, other): __props__ = ()
return type(self) == type(other) _f16_ok = True
def __hash__(self):
return hash(type(self))
def __str__(self): def __str__(self):
return 'HostFromGpu(gpuarray)' return 'HostFromGpu(gpuarray)'
...@@ -270,11 +267,8 @@ host_from_gpu = HostFromGpu() ...@@ -270,11 +267,8 @@ host_from_gpu = HostFromGpu()
class GpuFromHost(Op): class GpuFromHost(Op):
def __eq__(self, other): __props__ = ()
return type(self) == type(other) _f16_ok = True
def __hash__(self):
return hash(type(self))
def __str__(self): def __str__(self):
return 'GpuFromHost(gpuarray)' return 'GpuFromHost(gpuarray)'
...@@ -574,18 +568,15 @@ cuda_from_gpu = CudaFromGpu() ...@@ -574,18 +568,15 @@ cuda_from_gpu = CudaFromGpu()
class GpuAlloc(HideC, Alloc): class GpuAlloc(HideC, Alloc):
__props__ = ('memset_0',)
_f16_ok = True
def __init__(self, memset_0=False): def __init__(self, memset_0=False):
"""memset_0 is only an optimized version. True, it mean the """memset_0 is only an optimized version. True, it mean the
value is always 0, so the c code call memset as it is faster. value is always 0, so the c code call memset as it is faster.
""" """
self.memset_0 = memset_0 self.memset_0 = memset_0
def __eq__(self, other):
return type(self) == type(other) and self.memset_0 == other.memset_0
def __hash__(self):
return hash(type(self)) ^ hash(self.memset_0)
def __str__(self): def __str__(self):
# Hide the memset parameter when not used to prevent confusion. # Hide the memset parameter when not used to prevent confusion.
if self.memset_0: if self.memset_0:
...@@ -729,25 +720,17 @@ class GpuContiguous(Op): ...@@ -729,25 +720,17 @@ class GpuContiguous(Op):
Always return a c contiguous output. Copy the input only if it is Always return a c contiguous output. Copy the input only if it is
not already c contiguous. not already c contiguous.
""" """
__props__ = ()
view_map = {0: [0]} view_map = {0: [0]}
_f16_ok = True
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def grad(self, inputs, dout): def grad(self, inputs, dout):
x, = inputs x, = inputs
dout, = dout dout, = dout
dout = as_gpuarray_variable(dout) dout = as_gpuarray_variable(dout)
return [dout] return [dout]
def __str__(self):
return self.__class__.__name__
def make_node(self, input): def make_node(self, input):
input = as_gpuarray_variable(input) input = as_gpuarray_variable(input)
return Apply(self, [input], [input.type()]) return Apply(self, [input], [input.type()])
...@@ -795,6 +778,8 @@ class GpuReshape(HideC, tensor.Reshape): ...@@ -795,6 +778,8 @@ class GpuReshape(HideC, tensor.Reshape):
""" """
Implement Reshape on the gpu. Implement Reshape on the gpu.
""" """
_f16_ok = True
# __hash__, __eq__, __str__ come from tensor.Reshape # __hash__, __eq__, __str__ come from tensor.Reshape
def make_node(self, x, shp): def make_node(self, x, shp):
x = as_gpuarray_variable(x) x = as_gpuarray_variable(x)
...@@ -832,6 +817,8 @@ class GpuReshape(HideC, tensor.Reshape): ...@@ -832,6 +817,8 @@ class GpuReshape(HideC, tensor.Reshape):
class GpuJoin(HideC, Join): class GpuJoin(HideC, Join):
_f16_ok = True
def make_node(self, axis, *tensors): def make_node(self, axis, *tensors):
node = Join.make_node(self, axis, *tensors) node = Join.make_node(self, axis, *tensors)
...@@ -890,6 +877,7 @@ class GpuSplit(HideC, Split): ...@@ -890,6 +877,7 @@ class GpuSplit(HideC, Split):
class GpuEye(GpuKernelBase, Op): class GpuEye(GpuKernelBase, Op):
__props__ = ('dtype',) __props__ = ('dtype',)
_f16_ok = True
def __init__(self, dtype=None): def __init__(self, dtype=None):
if dtype is None: if dtype is None:
......
...@@ -61,6 +61,7 @@ def as_C_string_const(s): ...@@ -61,6 +61,7 @@ def as_C_string_const(s):
class GpuElemwise(HideC, Elemwise): class GpuElemwise(HideC, Elemwise):
nin = property(lambda self: self.scalar_op.nin) nin = property(lambda self: self.scalar_op.nin)
nout = property(lambda self: self.scalar_op.nout) nout = property(lambda self: self.scalar_op.nout)
_f16_ok = True
def __str__(self): def __str__(self):
if self.name is not None: if self.name is not None:
...@@ -475,6 +476,8 @@ class SupportCodeError(Exception): ...@@ -475,6 +476,8 @@ class SupportCodeError(Exception):
class GpuDimShuffle(HideC, DimShuffle): class GpuDimShuffle(HideC, DimShuffle):
_f16_ok = True
def make_node(self, input): def make_node(self, input):
res = DimShuffle.make_node(self, input) res = DimShuffle.make_node(self, input)
otype = GpuArrayType(dtype=res.outputs[0].type.dtype, otype = GpuArrayType(dtype=res.outputs[0].type.dtype,
...@@ -602,8 +605,8 @@ class GpuCAReduceCuda(HideC, CAReduceDtype): ...@@ -602,8 +605,8 @@ class GpuCAReduceCuda(HideC, CAReduceDtype):
pre_scalar_op: if present, must be a scalar op with only 1 pre_scalar_op: if present, must be a scalar op with only 1
input. We will execute it on the input value before reduction. input. We will execute it on the input value before reduction.
""" """
_f16_ok = True
def __init__(self, scalar_op, axis=None, def __init__(self, scalar_op, axis=None,
reduce_mask=None, dtype=None, acc_dtype=None, reduce_mask=None, dtype=None, acc_dtype=None,
......
...@@ -25,15 +25,8 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op): ...@@ -25,15 +25,8 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
""" """
nin = 3 nin = 3
nout = 3 nout = 3
__props__ = ()
def __eq__(self, other): _f16_ok = True
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def __str__(self):
return self.__class__.__name__
def make_node(self, x, b, y_idx): def make_node(self, x, b, y_idx):
# N.B. won't work when we don't cast y_idx to float anymore # N.B. won't work when we don't cast y_idx to float anymore
...@@ -282,21 +275,12 @@ gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1Ho ...@@ -282,21 +275,12 @@ gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1Ho
class GpuCrossentropySoftmax1HotWithBiasDx(Op): class GpuCrossentropySoftmax1HotWithBiasDx(Op):
""" """
Implement CrossentropySoftmax1HotWithBiasDx on the gpu. Implement CrossentropySoftmax1HotWithBiasDx on the gpu.
Gradient wrt x of the CrossentropySoftmax1Hot Op
""" """
nin = 3 nin = 3
nout = 1 nout = 1
"""Gradient wrt x of the CrossentropySoftmax1Hot Op""" __props__ = ()
def __init__(self, **kwargs):
Op.__init__(self, **kwargs)
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def __str__(self):
return self.__class__.__name__
def make_node(self, dnll, sm, y_idx): def make_node(self, dnll, sm, y_idx):
dnll = as_gpuarray_variable(dnll) dnll = as_gpuarray_variable(dnll)
......
...@@ -21,6 +21,8 @@ from .comp import NVCC_compiler ...@@ -21,6 +21,8 @@ from .comp import NVCC_compiler
class GpuSubtensor(HideC, Subtensor): class GpuSubtensor(HideC, Subtensor):
_fp16_ok = True
def make_node(self, x, *inputs): def make_node(self, x, *inputs):
rval = tensor.Subtensor.make_node(self, x, *inputs) rval = tensor.Subtensor.make_node(self, x, *inputs)
otype = GpuArrayType(dtype=rval.outputs[0].type.dtype, otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
......
...@@ -95,6 +95,7 @@ class DimShuffle(Op): ...@@ -95,6 +95,7 @@ class DimShuffle(Op):
transpose function. transpose function.
Adding, subtracting dimensions can be done with reshape. Adding, subtracting dimensions can be done with reshape.
""" """
_f16_ok = True
check_input = False check_input = False
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论