提交 8d0b398d authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add a check at C code generation time for ops that are not explicitely

tagged for float16 support and disable their C code.
上级 708efc22
......@@ -215,6 +215,8 @@ class Shape(gof.Op):
@note: Non-differentiable.
"""
_fp16_ok = True
# Mapping from Type to C code (and version) to use.
# In the C code, the name of the input variable is %(iname)s,
# the output variable is %(oname)s.
......@@ -308,6 +310,8 @@ class Shape_i(gof.Op):
@note: Non-differentiable.
"""
_fp16_ok = True
# Mapping from Type to C code (and version) to use.
# In the C code, the name of the input variable is %(iname)s,
# the output variable is %(oname)s.
......
......@@ -726,9 +726,20 @@ class Op(utils.object2, PureOp, CLinkerOp):
node_output_storage = [storage_map[r] for r in node.outputs]
node_input_compute = [compute_map[r] for r in node.inputs]
node_output_compute = [compute_map[r] for r in node.outputs]
#logger.debug('Compiling node %i of graph' % node_idx)
if self._op_use_c_code:
try:
# float16 get special treatment since running
# unprepared C code will get bad results.
if not getattr(self, '_f16_ok', False):
def is_f16(t):
return getattr(t, 'dtype', '') == 'float16'
if (any(is_f16(i.type) for i in node.inputs) or
any(is_f16(o.type) for o in node.outputs)):
print ("Disabling C code for %s due to unsupported "
"float16" % (self,))
raise NotImplementedError("float16")
e = FunctionGraph(node.inputs, node.outputs)
e_no_recycling = [new_o
......
......@@ -187,11 +187,8 @@ class GpuKernelBase(object):
class HostFromGpu(Op):
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
__props__ = ()
_f16_ok = True
def __str__(self):
return 'HostFromGpu(gpuarray)'
......@@ -270,11 +267,8 @@ host_from_gpu = HostFromGpu()
class GpuFromHost(Op):
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
__props__ = ()
_f16_ok = True
def __str__(self):
return 'GpuFromHost(gpuarray)'
......@@ -574,18 +568,15 @@ cuda_from_gpu = CudaFromGpu()
class GpuAlloc(HideC, Alloc):
__props__ = ('memset_0',)
_f16_ok = True
def __init__(self, memset_0=False):
"""memset_0 is only an optimized version. True, it mean the
value is always 0, so the c code call memset as it is faster.
"""
self.memset_0 = memset_0
def __eq__(self, other):
return type(self) == type(other) and self.memset_0 == other.memset_0
def __hash__(self):
return hash(type(self)) ^ hash(self.memset_0)
def __str__(self):
# Hide the memset parameter when not used to prevent confusion.
if self.memset_0:
......@@ -729,25 +720,17 @@ class GpuContiguous(Op):
Always return a c contiguous output. Copy the input only if it is
not already c contiguous.
"""
__props__ = ()
view_map = {0: [0]}
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
_f16_ok = True
def grad(self, inputs, dout):
x, = inputs
dout, = dout
dout = as_gpuarray_variable(dout)
return [dout]
def __str__(self):
return self.__class__.__name__
def make_node(self, input):
input = as_gpuarray_variable(input)
return Apply(self, [input], [input.type()])
......@@ -795,6 +778,8 @@ class GpuReshape(HideC, tensor.Reshape):
"""
Implement Reshape on the gpu.
"""
_f16_ok = True
# __hash__, __eq__, __str__ come from tensor.Reshape
def make_node(self, x, shp):
x = as_gpuarray_variable(x)
......@@ -832,6 +817,8 @@ class GpuReshape(HideC, tensor.Reshape):
class GpuJoin(HideC, Join):
_f16_ok = True
def make_node(self, axis, *tensors):
node = Join.make_node(self, axis, *tensors)
......@@ -890,6 +877,7 @@ class GpuSplit(HideC, Split):
class GpuEye(GpuKernelBase, Op):
__props__ = ('dtype',)
_f16_ok = True
def __init__(self, dtype=None):
if dtype is None:
......
......@@ -61,6 +61,7 @@ def as_C_string_const(s):
class GpuElemwise(HideC, Elemwise):
nin = property(lambda self: self.scalar_op.nin)
nout = property(lambda self: self.scalar_op.nout)
_f16_ok = True
def __str__(self):
if self.name is not None:
......@@ -475,6 +476,8 @@ class SupportCodeError(Exception):
class GpuDimShuffle(HideC, DimShuffle):
_f16_ok = True
def make_node(self, input):
res = DimShuffle.make_node(self, input)
otype = GpuArrayType(dtype=res.outputs[0].type.dtype,
......@@ -602,8 +605,8 @@ class GpuCAReduceCuda(HideC, CAReduceDtype):
pre_scalar_op: if present, must be a scalar op with only 1
input. We will execute it on the input value before reduction.
"""
_f16_ok = True
def __init__(self, scalar_op, axis=None,
reduce_mask=None, dtype=None, acc_dtype=None,
......
......@@ -25,15 +25,8 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
"""
nin = 3
nout = 3
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def __str__(self):
return self.__class__.__name__
__props__ = ()
_f16_ok = True
def make_node(self, x, b, y_idx):
# N.B. won't work when we don't cast y_idx to float anymore
......@@ -282,21 +275,12 @@ gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1Ho
class GpuCrossentropySoftmax1HotWithBiasDx(Op):
"""
Implement CrossentropySoftmax1HotWithBiasDx on the gpu.
Gradient wrt x of the CrossentropySoftmax1Hot Op
"""
nin = 3
nout = 1
"""Gradient wrt x of the CrossentropySoftmax1Hot Op"""
def __init__(self, **kwargs):
Op.__init__(self, **kwargs)
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def __str__(self):
return self.__class__.__name__
__props__ = ()
def make_node(self, dnll, sm, y_idx):
dnll = as_gpuarray_variable(dnll)
......
......@@ -21,6 +21,8 @@ from .comp import NVCC_compiler
class GpuSubtensor(HideC, Subtensor):
_fp16_ok = True
def make_node(self, x, *inputs):
rval = tensor.Subtensor.make_node(self, x, *inputs)
otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
......
......@@ -95,6 +95,7 @@ class DimShuffle(Op):
transpose function.
Adding, subtracting dimensions can be done with reshape.
"""
_f16_ok = True
check_input = False
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论