提交 78aa6276 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Remove the special code that would move float16 dot to the gpu since

we do it the normal way now.
上级 d3cb54fa
......@@ -1134,27 +1134,6 @@ def local_gpua_gemmbatch(op, context_name, inputs, outputs):
return gpugemmbatch_no_inplace(c, 1.0, a, b, 0.0)
@register_opt('fast_compile')
@op_lifter([tensor.basic.Dot])
@register_opt2([tensor.basic.Dot], 'fast_compile')
def local_gpua_hgemm(op, context_name, inputs, outputs):
from theano.sandbox.cuda import nvcc_compiler
if nvcc_compiler.nvcc_version < '7.5':
_logger.warning("Not performing dot of float16 on the GPU since "
"cuda 7.5 is not available. Updating could speed up "
"your code.")
return
A = inputs[0]
B = inputs[1]
if (A.ndim == 2 and B.ndim == 2 and
A.dtype == 'float16' and B.dtype == 'float16'):
fgraph = outputs[0].fgraph
C = gpu_alloc_empty(context_name, dtype='float16')(
shape_i(A, 0, fgraph),
shape_i(B, 1, fgraph))
return gpugemm_no_inplace(C, 1.0, A, B, 0.0)
@register_opt()
@alpha_merge(GpuGemm, alpha_in=1, beta_in=4)
def local_gpua_gemm_alpha_merge(node, *inputs):
......
......@@ -130,52 +130,3 @@ GpuDot22Tester = makeTester(
# test9=[rand(0, 0), rand(0, 0)],
)
)
def test_hgemm_swap():
from theano.sandbox.cuda import nvcc_compiler
if nvcc_compiler.nvcc_version < '7.5':
raise SkipTest("SgemmEx is only avaialble on cuda 7.5+")
v = tensor.vector(dtype='float16')
m = tensor.matrix(dtype='float16')
m2 = tensor.matrix(dtype='float16')
m32 = tensor.matrix(dtype='float32')
# test that we don't try to replace anything but matrix x matrix in float16
f = theano.function([v, m], tensor.dot(v, m), mode=mode_with_gpu)
assert len([node for node in f.maker.fgraph.apply_nodes
if isinstance(node.op, GpuGemm)]) == 0
f = theano.function([m32, m], tensor.dot(m32, m), mode=mode_with_gpu)
assert len([node for node in f.maker.fgraph.apply_nodes
if isinstance(node.op, GpuGemm)]) == 0
f = theano.function([m, m2], tensor.dot(m, m2), mode=mode_with_gpu)
assert len([node for node in f.maker.fgraph.apply_nodes
if isinstance(node.op, GpuGemm)]) == 1
v1 = numpy.random.random((3, 4)).astype('float16')
v2 = numpy.random.random((4, 2)).astype('float16')
of = f(v1, v2)
on = numpy.dot(v1, v2)
utt.assert_allclose(of, on)
def test_hgemm_alpha_output_merge():
from theano.sandbox.cuda import nvcc_compiler
if nvcc_compiler.nvcc_version < '7.5':
raise SkipTest("SgemmEx is only avaialble on cuda 7.5+")
m1 = tensor.matrix(dtype='float16')
m2 = tensor.matrix(dtype='float16')
b = tensor.matrix(dtype='float16')
hgemm = numpy.asarray(0.05, dtype='float16') * (tensor.dot(m1, m2) + b)
f = theano.function([m1, m2, b], hgemm, mode=mode_with_gpu)
# there should be 3 gpu_from_host, 1 hgemm and 1 host_from_gpu
assert len(f.maker.fgraph.apply_nodes) == 5
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论