提交 3173f02c authored 作者: Frederic Bastien's avatar Frederic Bastien

Make float16 stuff correct. Only test ops that support it and disable opt when it don't support it.

上级 05c424e7
......@@ -1163,6 +1163,8 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
@op_lifter([tensor.blas.Gemv, tensor.blas_c.CGemv])
@register_opt2([tensor.blas.Gemv], 'fast_compile')
def local_gpua_gemv(op, context_name, inputs, outputs):
if inputs[0].dtype not in ['float32', 'float64']:
return
if op.inplace:
return gpugemv_inplace
else:
......@@ -1183,6 +1185,8 @@ def local_gpua_gemm(op, context_name, inputs, outputs):
@op_lifter([tensor.blas.BatchedDot])
@register_opt2([tensor.blas.BatchedDot], 'fast_compile')
def local_gpua_gemmbatch(op, context_name, inputs, outputs):
if inputs[0].dtype not in ['float32', 'float64']:
return
a, b = inputs
c = tensor.AllocEmpty(a.dtype)(a.shape[0], a.shape[1], b.shape[2])
return gpugemmbatch_no_inplace(c, np.asarray(1.0, dtype=a.dtype),
......@@ -1217,6 +1221,8 @@ def local_gpua_gemmbatch_output_merge(node, *inputs):
@op_lifter([tensor.blas.Ger, tensor.blas_c.CGer, tensor.blas_scipy.ScipyGer])
@register_opt2([tensor.blas.Ger, tensor.blas_c.CGer, tensor.blas_scipy.ScipyGer], 'fast_compile')
def local_gpua_ger(op, context_name, inputs, outputs):
if inputs[0].dtype not in ['float32', 'float64']:
return
return GpuGer(inplace=op.destructive)
......
......@@ -15,7 +15,8 @@ from .config import mode_with_gpu
from .test_basic_ops import makeTester, rand
from ..blas import (gpugemv_inplace, gpugemv_no_inplace,
gpugemm_inplace, gpugemmbatch_no_inplace,
gpugemm_inplace, gpugemm_no_inplace,
gpugemmbatch_no_inplace,
gpuger_inplace, gpuger_no_inplace,
GpuGer, gpu_dot22)
......@@ -23,11 +24,9 @@ from ..blas import (gpugemv_inplace, gpugemv_no_inplace,
GpuGemvTester = makeTester(
'GpuGemvTester',
op=gemv_inplace, gpu_op=gpugemv_inplace,
# It don't support float16
cases=dict(dot_vv=[rand(1), 1, rand(1, 2), rand(2), 0],
dot_vm=[rand(3), 1, rand(3, 2), rand(2), 0],
# float16=[rand(3).astype('float16'), np.float32(1),
# rand(3, 2).astype('float16'),
# rand(2).astype('float16'), np.float32(0)],
float32=[rand(3).astype('float32'), np.float32(1),
rand(3, 2).astype('float32'),
rand(2).astype('float32'), np.float32(0)],
......@@ -42,19 +41,32 @@ GpuGemvTester = makeTester(
)
def test_gemv_float16():
float16 = [rand(3).astype('float16'),
np.asarray(1, dtype=np.float32),
rand(3, 2).astype('float16'),
rand(2).astype('float16'),
np.asarray(0.5, dtype=np.float32)]
float16 = [gpuarray_shared_constructor(val)
for val in float16]
o = gpugemv_no_inplace(*float16)
def test_float16():
# gemm
float16_data = [rand(3, 3).astype('float16'),
np.asarray(1, dtype=np.float32),
rand(3, 3).astype('float16'),
rand(3, 3).astype('float16'),
np.asarray(0.5, dtype=np.float32)]
float16_shared = [gpuarray_shared_constructor(val)
for val in float16_data]
o = gpugemm_no_inplace(*float16_shared)
f = theano.function([], o)
y, alpha, A, x, beta = float16
y, alpha, A, x, beta = float16_data
out = f()
utt.assert_asclose(out, alpha * np.dot(A, x) + beta * y)
utt.assert_allclose(np.asarray(out), alpha * np.dot(A, x) + beta * y)
# dot22
float16_data = [rand(3, 3).astype('float16'),
rand(3, 3).astype('float16')]
float16_shared = [gpuarray_shared_constructor(val)
for val in float16_data]
o = gpu_dot22(*float16_shared)
f = theano.function([], o)
x, y = float16_data
out = f()
utt.assert_allclose(np.asarray(out), np.dot(x, y))
class TestGpuSgemv(TestCase, BaseGemv, utt.TestOptimizationMixin):
......@@ -75,6 +87,7 @@ class TestGpuSgemv(TestCase, BaseGemv, utt.TestOptimizationMixin):
GpuGemmTester = makeTester(
'GpuGemmTester',
op=gemm_inplace, gpu_op=gpugemm_inplace,
# float16 tested in test_float16
cases=dict(test1=[rand(3, 4), 1.0, rand(3, 5), rand(5, 4), 0.0],
test2=[rand(3, 4), 1.0, rand(3, 5), rand(5, 4), 1.0],
test3=[rand(3, 4), 1.0, rand(3, 5), rand(5, 4), -1.0],
......@@ -83,9 +96,6 @@ GpuGemmTester = makeTester(
test6=[rand(3, 4), 0.0, rand(3, 5), rand(5, 4), -1.0],
test7=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 0.0],
test8=[rand(3, 4), -1.0, rand(3, 5), rand(5, 4), 1.1],
# float16=[rand(3, 4).astype('float16'), np.float32(-1.0),
# rand(3, 5).astype('float16'),
# rand(5, 4).astype('float16'), np.float32(-1.1)],
float32=[rand(3, 4).astype('float32'), np.float32(-1.0),
rand(3, 5).astype('float32'),
rand(5, 4).astype('float32'), np.float32(-1.1)],
......@@ -104,11 +114,7 @@ gemm_batched_tests = dict(
("test_b%im%ik%in%i" % (b, m, k, n),
[rand(b, m, n), rand(), rand(b, m, k), rand(b, k, n), rand()])
for b, m, k, n in itertools.combinations([2, 3, 5, 7, 11, 13], 4))
#gemm_batched_tests['float16'] = [rand(3, 4, 7).astype('float16'),
# rand().astype('float32'),
# rand(3, 4, 4).astype('float16'),
# rand(3, 4, 7).astype('float16'),
# rand().astype('float32')]
# float16 not supported
gemm_batched_tests['float32'] = [rand(3, 4, 7).astype('float32'),
rand().astype('float32'),
rand(3, 4, 4).astype('float32'),
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论