提交 59a5dfbb authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #4556 from abergeron/faster_incsub

Don't rebuild inplace add kernels all the time for GpuIncSubtensor.
...@@ -50,6 +50,15 @@ def init_dev(dev, name=None): ...@@ -50,6 +50,15 @@ def init_dev(dev, name=None):
if v[1] < 0: if v[1] < 0:
raise RuntimeError("Wrong minor API version for gpuarray:", v[1], raise RuntimeError("Wrong minor API version for gpuarray:", v[1],
"Please update libgpuarray/pygpu.") "Please update libgpuarray/pygpu.")
if len(v) < 3:
vpy = -1
else:
vpy = v[2]
vpye = 0
if vpy < vpye:
print("Wrong python API version for gpuarray:", vpy, "expected:", vpye,
"Some python ops may not work correctly and/or crash. "
"Consider updating pygpu.", file=sys.stderr)
global pygpu_activated global pygpu_activated
if dev not in init_dev.devmap: if dev not in init_dev.devmap:
ctx = pygpu.init(dev, ctx = pygpu.init(dev,
......
...@@ -56,3 +56,32 @@ def test_advinc_subtensor1(): ...@@ -56,3 +56,32 @@ def test_advinc_subtensor1():
rep = xval.copy() rep = xval.copy()
rep[[0, 2]] += yval rep[[0, 2]] += yval
assert numpy.allclose(rval, rep) assert numpy.allclose(rval, rep)
def test_incsub_f16():
shp = (3, 3)
shared = gpuarray_shared_constructor
xval = numpy.arange(numpy.prod(shp), dtype='float16').reshape(shp) + 1
yval = numpy.empty((2,) + shp[1:], dtype='float16')
yval[:] = 2
x = shared(xval, name='x')
y = tensor.tensor(dtype='float16',
broadcastable=(False,) * len(shp),
name='y')
expr = tensor.advanced_inc_subtensor1(x, y, [0, 2])
f = theano.function([y], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, GpuAdvancedIncSubtensor1)
for node in f.maker.fgraph.toposort()]) == 1
rval = f(yval)
rep = xval.copy()
rep[[0, 2]] += yval
assert numpy.allclose(rval, rep)
expr = tensor.inc_subtensor(x[1:], y)
f = theano.function([y], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, GpuIncSubtensor)
for node in f.maker.fgraph.toposort()]) == 1
rval = f(yval)
rep = xval.copy()
rep[1:] += yval
assert numpy.allclose(rval, rep)
...@@ -301,20 +301,14 @@ class GpuArrayType(Type): ...@@ -301,20 +301,14 @@ class GpuArrayType(Type):
raise NotImplementedError( raise NotImplementedError(
"GpuArrayType.values_eq_approx() don't implemented the" "GpuArrayType.values_eq_approx() don't implemented the"
" allow_remove_inf and allow_remove_nan parameter") " allow_remove_inf and allow_remove_nan parameter")
if a.dtype == 'float16' or b.dtype == 'float16':
an = numpy.asarray(a)
bn = numpy.asarray(b)
return tensor.TensorType.values_eq_approx(
an, bn, allow_remove_inf=allow_remove_inf,
allow_remove_nan=allow_remove_nan, rtol=rtol, atol=atol)
atol_, rtol_ = theano.tensor.basic._get_atol_rtol(a, b) atol_, rtol_ = theano.tensor.basic._get_atol_rtol(a, b)
if rtol is not None: if rtol is not None:
rtol_ = rtol rtol_ = rtol
if atol is not None: if atol is not None:
atol_ = atol atol_ = atol
res = elemwise2(a, '', b, a, odtype=numpy.dtype('bool'), res = elemwise2(a, '', b, a, odtype=numpy.dtype('bool'),
op_tmpl="res[i] = (fabs(%%(a)s - %%(b)s) <" op_tmpl="res = (fabs(a - b) <"
"(%(atol_)s + %(rtol_)s * fabs(%%(b)s)))" % "(%(atol_)s + %(rtol_)s * fabs(b)))" %
locals()) locals())
ret = numpy.asarray(res).all() ret = numpy.asarray(res).all()
if ret: if ret:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论