提交 e04074a9 authored 作者: Frederic Bastien's avatar Frederic Bastien

Disable c code for some float16 GPU ops.

上级 ee0051da
......@@ -1060,7 +1060,7 @@ def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs):
set_instead_of_inc = op.set_instead_of_inc
compute_capability = int(context.bin_id[-2])
if compute_capability >= 2 and x.ndim == 1 and y.ndim == 0:
if compute_capability >= 2 and x.ndim == 1 and y.ndim == 0 and inputs[0].dtype != 'float16':
x = x.dimshuffle(0, 'x')
y = y.dimshuffle('x', 'x')
ret = GpuAdvancedIncSubtensor1_dev20(
......
......@@ -589,7 +589,7 @@ class GpuAdvancedIncSubtensor1(Op):
Implement AdvancedIncSubtensor1 on the gpu.
"""
_f16_ok = True
_f16_ok = False
__props__ = ('inplace', 'set_instead_of_inc')
params_type = gpu_context_type
......@@ -799,7 +799,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC,
only avail on compute capability 2.0 and more recent.
"""
_f16_ok = True
_f16_ok = False
def make_node(self, x, y, ilist):
"""
......
......@@ -136,7 +136,8 @@ def test_advinc_subtensor1_vector_scalar():
name='y')
expr = tensor.advanced_inc_subtensor1(x, y, [0, 2])
f = theano.function([y], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, GpuAdvancedIncSubtensor1_dev20)
assert sum([isinstance(node.op, (GpuAdvancedIncSubtensor1_dev20,
GpuAdvancedIncSubtensor1))
for node in f.maker.fgraph.toposort()]) == 1
rval = f(yval)
rep = xval.copy()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论