提交 5922a930 authored 作者: Frederic's avatar Frederic 提交者: Arnaud Bergeron

Update following code review

上级 5f3b4fab
...@@ -49,7 +49,7 @@ if __name__ == '__main__': ...@@ -49,7 +49,7 @@ if __name__ == '__main__':
else: else:
costlySpeed = costlyTimeOpenmp / costlyTime costlySpeed = costlyTimeOpenmp / costlyTime
costlySpeedstring = "slowdown" costlySpeedstring = "slowdown"
print("Timmed with vector of %d elements" % options.N) print("Timed with vector of %d elements" % options.N)
print("Fast op time without openmp %fs with openmp %fs %s %2.2f" % ( print("Fast op time without openmp %fs with openmp %fs %s %2.2f" % (
cheapTime, cheapTimeOpenmp, cheapSpeedstring, cheapSpeed)) cheapTime, cheapTimeOpenmp, cheapSpeedstring, cheapSpeed))
......
...@@ -1116,8 +1116,13 @@ def local_gpu_incsubtensor(node): ...@@ -1116,8 +1116,13 @@ def local_gpu_incsubtensor(node):
incsubt = host_output.owner.op incsubt = host_output.owner.op
x, y = host_output.owner.inputs[0:2] x, y = host_output.owner.inputs[0:2]
coords = host_output.owner.inputs[2:] coords = host_output.owner.inputs[2:]
if x.dtype != "float32" or y.dtype != "float32": if x.dtype != "float32":
return return
if y.dtype != "float32":
# The IncSubtensor upcast to float32 y, so we do it
# explicitly to move it to the GPU.
y = y.astype('float32')
return [GpuIncSubtensor( return [GpuIncSubtensor(
incsubt.idx_list, incsubt.idx_list,
inplace=incsubt.inplace, inplace=incsubt.inplace,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论