提交 9067f773 authored 作者: Frederic Bastien's avatar Frederic Bastien

Add more float16 ops. Test to refactor.

上级 81245414
...@@ -403,6 +403,7 @@ class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1): ...@@ -403,6 +403,7 @@ class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1):
""" """
AdvancedSubrensor1 on the GPU. AdvancedSubrensor1 on the GPU.
""" """
_f16_ok = True
def make_node(self, x, ilist): def make_node(self, x, ilist):
ctx_name = infer_context_name(x, ilist) ctx_name = infer_context_name(x, ilist)
x_ = as_gpuarray_variable(x, ctx_name) x_ = as_gpuarray_variable(x, ctx_name)
...@@ -1088,6 +1089,7 @@ __device__ ga_half atomicExch(ga_half *addr, ga_half val) { ...@@ -1088,6 +1089,7 @@ __device__ ga_half atomicExch(ga_half *addr, ga_half val) {
class GpuExtractDiag(Op): class GpuExtractDiag(Op):
__props__ = ("offset", "axis1", "axis2", "view") __props__ = ("offset", "axis1", "axis2", "view")
_f16_ok = True
def __init__(self, offset=0, axis1=0, axis2=1, view=False): def __init__(self, offset=0, axis1=0, axis2=1, view=False):
self.view = view self.view = view
......
...@@ -49,6 +49,33 @@ class G_subtensor(test_subtensor.T_subtensor): ...@@ -49,6 +49,33 @@ class G_subtensor(test_subtensor.T_subtensor):
assert self.sub == GpuSubtensor assert self.sub == GpuSubtensor
class G_subtensorF16(test_subtensor.T_subtensor):
def shortDescription(self):
return None
def __init__(self, name):
def shared(x, **kwargs):
return gpuarray_shared_constructor(x, target=test_ctx_name,
**kwargs)
test_subtensor.T_subtensor.__init__(
self, name,
shared=shared,
sub=GpuSubtensor,
inc_sub=GpuIncSubtensor,
adv_sub1=GpuAdvancedSubtensor1,
adv_incsub1=GpuAdvancedIncSubtensor1,
dimshuffle=GpuDimShuffle,
mode=mode_with_gpu,
# avoid errors with limited devices
dtype='float16', #use floatX?
ignore_topo=(HostFromGpu, GpuFromHost,
DeepCopyOp, GpuContiguous))
# GPU opt can't run in fast_compile only.
self.fast_compile = False
assert self.sub == GpuSubtensor
def test_advinc_subtensor1(): def test_advinc_subtensor1():
# Test the second case in the opt local_gpu_advanced_incsubtensor1 # Test the second case in the opt local_gpu_advanced_incsubtensor1
for shp in [(3, 3), (3, 3, 3)]: for shp in [(3, 3), (3, 3, 3)]:
...@@ -73,7 +100,9 @@ def test_advinc_subtensor1(): ...@@ -73,7 +100,9 @@ def test_advinc_subtensor1():
def test_advinc_subtensor1_dtype(): def test_advinc_subtensor1_dtype():
# Test the mixed dtype case # Test the mixed dtype case
shp = (3, 4) shp = (3, 4)
for dtype1, dtype2 in [('float32', 'int8'), ('float32', 'float64')]: for dtype1, dtype2 in [('float32', 'int8'), ('float32', 'float64'),
('float16', 'int8'), ('float16', 'float64'),
('float16', 'float16')]:
shared = gpuarray_shared_constructor shared = gpuarray_shared_constructor
xval = np.arange(np.prod(shp), dtype=dtype1).reshape(shp) + 1 xval = np.arange(np.prod(shp), dtype=dtype1).reshape(shp) + 1
yval = np.empty((2,) + shp[1:], dtype=dtype2) yval = np.empty((2,) + shp[1:], dtype=dtype2)
...@@ -95,7 +124,9 @@ def test_advinc_subtensor1_dtype(): ...@@ -95,7 +124,9 @@ def test_advinc_subtensor1_dtype():
def test_advinc_subtensor1_vector_scalar(): def test_advinc_subtensor1_vector_scalar():
# Test the case where x is a vector and y a scalar # Test the case where x is a vector and y a scalar
shp = (3,) shp = (3,)
for dtype1, dtype2 in [('float32', 'int8'), ('float32', 'float64')]: for dtype1, dtype2 in [('float32', 'int8'), ('float32', 'float64'),
('float16', 'int8'), ('float16', 'float64'),
('float16', 'float16')]:
shared = gpuarray_shared_constructor shared = gpuarray_shared_constructor
xval = np.arange(np.prod(shp), dtype=dtype1).reshape(shp) + 1 xval = np.arange(np.prod(shp), dtype=dtype1).reshape(shp) + 1
yval = np.asarray(10, dtype=dtype2) yval = np.asarray(10, dtype=dtype2)
...@@ -169,7 +200,26 @@ class G_advancedsubtensor(test_subtensor.TestAdvancedSubtensor): ...@@ -169,7 +200,26 @@ class G_advancedsubtensor(test_subtensor.TestAdvancedSubtensor):
sub=GpuAdvancedSubtensor, sub=GpuAdvancedSubtensor,
mode=mode_with_gpu, mode=mode_with_gpu,
# avoid errors with limited devices # avoid errors with limited devices
dtype='float32', dtype='float32', # floatX?
ignore_topo=(HostFromGpu, GpuFromHost,
DeepCopyOp))
# GPU opt can't run in fast_compile only.
self.fast_compile = False
assert self.sub == GpuAdvancedSubtensor
class G_advancedsubtensorF16(test_subtensor.TestAdvancedSubtensor):
def shortDescription(self):
return None
def __init__(self, name):
test_subtensor.TestAdvancedSubtensor.__init__(
self, name,
shared=gpuarray_shared_constructor,
sub=GpuAdvancedSubtensor,
mode=mode_with_gpu,
# avoid errors with limited devices
dtype='float16', # floatX?
ignore_topo=(HostFromGpu, GpuFromHost, ignore_topo=(HostFromGpu, GpuFromHost,
DeepCopyOp)) DeepCopyOp))
# GPU opt can't run in fast_compile only. # GPU opt can't run in fast_compile only.
...@@ -218,6 +268,17 @@ class test_gpuextractdiag(unittest.TestCase): ...@@ -218,6 +268,17 @@ class test_gpuextractdiag(unittest.TestCase):
GpuExtractDiag(offset, axis1, axis2)(x).eval({x: np_x}), GpuExtractDiag(offset, axis1, axis2)(x).eval({x: np_x}),
np_x.diagonal(offset, axis1, axis2)) np_x.diagonal(offset, axis1, axis2))
def test_tensor_float16(self):
x = tensor.tensor4()
np_x = np.arange(30107).reshape(7, 11, 17, 23).astype('float16')
for offset, axis1, axis2 in [
(1, 0, 1), (-1, 0, 1), (0, 1, 0), (-2, 1, 0),
(-3, 1, 0), (-2, 2, 0), (3, 3, 0), (-1, 3, 2),
(2, 2, 3), (-1, 2, 1), (1, 3, 1), (-1, 1, 3)]:
assert np.allclose(
GpuExtractDiag(offset, axis1, axis2)(x).eval({x: np_x}),
np_x.diagonal(offset, axis1, axis2))
class test_gpuallocdiag(unittest.TestCase): class test_gpuallocdiag(unittest.TestCase):
def test_matrix(self): def test_matrix(self):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论