提交 e04074a9 authored 作者: Frederic Bastien's avatar Frederic Bastien

Disable c code for some float16 GPU ops.

上级 ee0051da
...@@ -1060,7 +1060,7 @@ def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs): ...@@ -1060,7 +1060,7 @@ def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs):
set_instead_of_inc = op.set_instead_of_inc set_instead_of_inc = op.set_instead_of_inc
compute_capability = int(context.bin_id[-2]) compute_capability = int(context.bin_id[-2])
if compute_capability >= 2 and x.ndim == 1 and y.ndim == 0: if compute_capability >= 2 and x.ndim == 1 and y.ndim == 0 and inputs[0].dtype != 'float16':
x = x.dimshuffle(0, 'x') x = x.dimshuffle(0, 'x')
y = y.dimshuffle('x', 'x') y = y.dimshuffle('x', 'x')
ret = GpuAdvancedIncSubtensor1_dev20( ret = GpuAdvancedIncSubtensor1_dev20(
......
...@@ -589,7 +589,7 @@ class GpuAdvancedIncSubtensor1(Op): ...@@ -589,7 +589,7 @@ class GpuAdvancedIncSubtensor1(Op):
Implement AdvancedIncSubtensor1 on the gpu. Implement AdvancedIncSubtensor1 on the gpu.
""" """
_f16_ok = True _f16_ok = False
__props__ = ('inplace', 'set_instead_of_inc') __props__ = ('inplace', 'set_instead_of_inc')
params_type = gpu_context_type params_type = gpu_context_type
...@@ -799,7 +799,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC, ...@@ -799,7 +799,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC,
only avail on compute capability 2.0 and more recent. only avail on compute capability 2.0 and more recent.
""" """
_f16_ok = True _f16_ok = False
def make_node(self, x, y, ilist): def make_node(self, x, y, ilist):
""" """
......
...@@ -136,7 +136,8 @@ def test_advinc_subtensor1_vector_scalar(): ...@@ -136,7 +136,8 @@ def test_advinc_subtensor1_vector_scalar():
name='y') name='y')
expr = tensor.advanced_inc_subtensor1(x, y, [0, 2]) expr = tensor.advanced_inc_subtensor1(x, y, [0, 2])
f = theano.function([y], expr, mode=mode_with_gpu) f = theano.function([y], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, GpuAdvancedIncSubtensor1_dev20) assert sum([isinstance(node.op, (GpuAdvancedIncSubtensor1_dev20,
GpuAdvancedIncSubtensor1))
for node in f.maker.fgraph.toposort()]) == 1 for node in f.maker.fgraph.toposort()]) == 1
rval = f(yval) rval = f(yval)
rep = xval.copy() rep = xval.copy()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论