提交 3d41cfaa authored 作者: lamblin's avatar lamblin

Merge pull request #1457 from nouiz/gpu_min_max

Gpu min max, more pattern implemented
差异被折叠。
...@@ -602,7 +602,7 @@ def local_gpu_careduce(node): ...@@ -602,7 +602,7 @@ def local_gpu_careduce(node):
scalar_op = node.op.scalar_op scalar_op = node.op.scalar_op
# currently, only these two ops are supported at all, # currently, only these two ops are supported at all,
# and max does not support all combinations of axes # and max does not support all combinations of axes
if node.op.scalar_op in [scal.add, scal.maximum, scal.minimum]: if node.op.scalar_op in [scal.add, scal.mul, scal.maximum, scal.minimum]:
x, = node.inputs x, = node.inputs
if x.owner and x.owner.op == host_from_gpu: if x.owner and x.owner.op == host_from_gpu:
if node.op.axis is None: if node.op.axis is None:
......
...@@ -65,9 +65,16 @@ def test_careduce(): ...@@ -65,9 +65,16 @@ def test_careduce():
TODO: test with broadcast TODO: test with broadcast
""" """
for scalar_op, careduce_op in [ for scalar_op, careduce_op in [
(theano.scalar.mul, tensor.elemwise.CAReduceDtype),
(theano.scalar.add, tensor.elemwise.CAReduceDtype), (theano.scalar.add, tensor.elemwise.CAReduceDtype),
(theano.scalar.maximum, tensor.CAReduce), (theano.scalar.maximum, tensor.CAReduce),
(theano.scalar.minimum, tensor.CAReduce)]: (theano.scalar.minimum, tensor.CAReduce)
#The following 2 cases could work if the scalar_op.c_code work with float* dtype.
#Currently we have this error:
#error: invalid operands of types 'npy_float32' and 'npy_float32' to binary 'operator&'
#(theano.scalar.and_, tensor.elemwise.CAReduce),
#(theano.scalar.or_, tensor.elemwise.CAReduce),
]:
for shape, pattern in [((1,1),(1,)), for shape, pattern in [((1,1),(1,)),
((1,0),(1,)), ((1,0),(1,)),
((0,1),(1,)), ((0,1),(1,)),
...@@ -124,11 +131,6 @@ def test_careduce(): ...@@ -124,11 +131,6 @@ def test_careduce():
op = careduce_op(scalar_op, axis=pattern) op = careduce_op(scalar_op, axis=pattern)
pat = tensor_pattern_to_gpu_pattern(shape, pattern) pat = tensor_pattern_to_gpu_pattern(shape, pattern)
#GpuCAReduce{maximum/minimum} support only those patterns
if scalar_op in [theano.scalar.maximum,
theano.scalar.minimum] and pat not in [
(0, 1), (0, 1, 1), (0, 1, 1), (1, 0)]:
continue
a = tensor.TensorType('float32', (False,) * len(shape))() a = tensor.TensorType('float32', (False,) * len(shape))()
b = op(a) b = op(a)
...@@ -139,15 +141,22 @@ def test_careduce(): ...@@ -139,15 +141,22 @@ def test_careduce():
f = theano.function([a], b, mode=mode_with_gpu) f = theano.function([a], b, mode=mode_with_gpu)
f2 = theano.function([a], b, mode=mode_without_gpu) f2 = theano.function([a], b, mode=mode_without_gpu)
assert tcn.GpuCAReduce in [x.op.__class__ assert tcn.GpuCAReduce in [x.op.__class__
for x in f.maker.fgraph.toposort()] for x in f.maker.fgraph.toposort()], (
scalar_op, shape, pattern)
assert op.__class__ in [x.op.__class__ assert op.__class__ in [x.op.__class__
for x in f2.maker.fgraph.toposort()] for x in f2.maker.fgraph.toposort()], (
scalar_op, shape, pattern)
f_caused_value_error = False f_caused_value_error = False
try: try:
f_out = f(val) f_out = f(val)
except ValueError, e: except ValueError, e:
exc = e exc = e
f_caused_value_error = True f_caused_value_error = True
except NotImplementedError:
if (numpy.prod(shape) == 0 and
getattr(scalar_op, 'identity', None) != 0):
continue
raise
f2_caused_value_error = False f2_caused_value_error = False
try: try:
...@@ -179,6 +188,7 @@ def test_careduce(): ...@@ -179,6 +188,7 @@ def test_careduce():
theano.tensor.basic.float32_rtol = 2e-5 theano.tensor.basic.float32_rtol = 2e-5
assert _allclose(f_out, f2_out), ('shape', shape, assert _allclose(f_out, f2_out), ('shape', shape,
'pattern', pattern, 'pattern', pattern,
scalar_op,
sum([shape[i] for i in pattern]), sum([shape[i] for i in pattern]),
f2(val), f(val), val) f2(val), f(val), val)
finally: finally:
...@@ -193,11 +203,6 @@ def test_careduce(): ...@@ -193,11 +203,6 @@ def test_careduce():
((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]: ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]:
op = careduce_op(scalar_op, axis=pattern) op = careduce_op(scalar_op, axis=pattern)
pat = tensor_pattern_to_gpu_pattern(shape, pattern) pat = tensor_pattern_to_gpu_pattern(shape, pattern)
#GpuCAReduce{maximum/minimum} support only those patterns
if scalar_op in [theano.scalar.maximum,
theano.scalar.minimum] and pat not in [
(0, 1), (0, 1, 1), (0, 1, 1), (1, 0)]:
continue
a = tensor.TensorType('float32', (False,) * len(shape))() a = tensor.TensorType('float32', (False,) * len(shape))()
dim_pattern = range(len(shape)) dim_pattern = range(len(shape))
...@@ -212,11 +217,14 @@ def test_careduce(): ...@@ -212,11 +217,14 @@ def test_careduce():
f = theano.function([a], b, mode=mode_with_gpu) f = theano.function([a], b, mode=mode_with_gpu)
f2 = theano.function([a], b, mode=mode_without_gpu) f2 = theano.function([a], b, mode=mode_without_gpu)
assert tcn.GpuCAReduce in [x.op.__class__ assert tcn.GpuCAReduce in [x.op.__class__
for x in f.maker.fgraph.toposort()] for x in f.maker.fgraph.toposort()], (
scalar_op, shape, pattern)
assert op.__class__ in [x.op.__class__ assert op.__class__ in [x.op.__class__
for x in f2.maker.fgraph.toposort()] for x in f2.maker.fgraph.toposort()], (
scalar_op, shape, pattern)
assert _allclose(f2(val), f(val)), ('shape', shape, assert _allclose(f2(val), f(val)), ('shape', shape,
'pattern', pattern, 'pattern', pattern,
scalar_op,
sum([shape[i] for i in pattern])) sum([shape[i] for i in pattern]))
#test with broadcast #test with broadcast
...@@ -227,11 +235,6 @@ def test_careduce(): ...@@ -227,11 +235,6 @@ def test_careduce():
((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]: ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]:
op = careduce_op(scalar_op, axis=pattern) op = careduce_op(scalar_op, axis=pattern)
pat = tensor_pattern_to_gpu_pattern(shape, pattern) pat = tensor_pattern_to_gpu_pattern(shape, pattern)
#GpuCAReduce{maximum/minimum} support only those patterns
if scalar_op in [theano.scalar.maximum,
theano.scalar.minimum] and pat not in [
(0, 1), (0, 1, 1), (0, 1, 1), (1, 0)]:
continue
shape = numpy.asarray(shape) * 2 shape = numpy.asarray(shape) * 2
a = tensor.TensorType('float32', (False,) * len(shape))() a = tensor.TensorType('float32', (False,) * len(shape))()
...@@ -258,9 +261,11 @@ def test_careduce(): ...@@ -258,9 +261,11 @@ def test_careduce():
f = theano.function([a], b, mode=mode_without_gpu) f = theano.function([a], b, mode=mode_without_gpu)
f2 = theano.function([a2], b2, mode=mode_with_gpu) f2 = theano.function([a2], b2, mode=mode_with_gpu)
assert tcn.GpuCAReduce in [x.op.__class__ assert tcn.GpuCAReduce in [x.op.__class__
for x in f2.maker.fgraph.toposort()] for x in f2.maker.fgraph.toposort()], (
scalar_op, shape, pattern)
assert op.__class__ in [x.op.__class__ assert op.__class__ in [x.op.__class__
for x in f.maker.fgraph.toposort()] for x in f.maker.fgraph.toposort()], (
scalar_op, shape, pattern)
assert _allclose(f2(val2), f(val)), ('shape', shape, assert _allclose(f2(val2), f(val)), ('shape', shape,
'pattern', pattern, 'pattern', pattern,
sum([shape[i] for i in pattern])) sum([shape[i] for i in pattern]))
......
...@@ -2721,7 +2721,7 @@ class TrueDotTester(utt.InferShapeTester): ...@@ -2721,7 +2721,7 @@ class TrueDotTester(utt.InferShapeTester):
assert tested.format == format assert tested.format == format
assert tested.dtype == expected.dtype assert tested.dtype == expected.dtype
tested = tested.toarray() tested = tested.toarray()
assert numpy.allclose(tested, expected) utt.assert_allclose(tested, expected)
def test_op_sd(self): def test_op_sd(self):
for format in sparse.sparse_formats: for format in sparse.sparse_formats:
...@@ -2743,7 +2743,7 @@ class TrueDotTester(utt.InferShapeTester): ...@@ -2743,7 +2743,7 @@ class TrueDotTester(utt.InferShapeTester):
assert tested.format == format assert tested.format == format
assert tested.dtype == expected.dtype assert tested.dtype == expected.dtype
tested = tested.toarray() tested = tested.toarray()
assert numpy.allclose(tested, expected) utt.assert_allclose(tested, expected)
def test_infer_shape(self): def test_infer_shape(self):
for format in sparse.sparse_formats: for format in sparse.sparse_formats:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论