提交 e5978249 authored 作者: Ian Goodfellow's avatar Ian Goodfellow

changed references to GpuSum to now use GpuCAReduce

上级 2a1aae56
...@@ -270,7 +270,7 @@ if cuda_available: ...@@ -270,7 +270,7 @@ if cuda_available:
import basic_ops import basic_ops
from basic_ops import (GpuFromHost, HostFromGpu, GpuElemwise, from basic_ops import (GpuFromHost, HostFromGpu, GpuElemwise,
GpuDimShuffle, GpuSum, GpuReshape, GpuContiguous, GpuDimShuffle, GpuCAReduce, GpuReshape, GpuContiguous,
GpuSubtensor, GpuIncSubtensor, GpuSubtensor, GpuIncSubtensor,
GpuAdvancedSubtensor1, GpuAdvancedIncSubtensor1, GpuAdvancedSubtensor1, GpuAdvancedIncSubtensor1,
GpuFlatten, GpuShape, GpuAlloc, GpuFlatten, GpuShape, GpuAlloc,
......
...@@ -593,7 +593,7 @@ def local_gpu_sum(node): ...@@ -593,7 +593,7 @@ def local_gpu_sum(node):
for a in node.op.axis: for a in node.op.axis:
assert reduce_mask[a] == 0 assert reduce_mask[a] == 0
reduce_mask[a] = 1 reduce_mask[a] = 1
gsum = GpuSum(reduce_mask) gsum = GpuCAReduce(reduce_mask, theano.scalar.basic.add)
pattern = (''.join(str(i) for i in reduce_mask)) pattern = (''.join(str(i) for i in reduce_mask))
if hasattr(gsum, 'c_code_reduce_%s' % pattern): if hasattr(gsum, 'c_code_reduce_%s' % pattern):
rval = host_from_gpu(gsum(gpu_from_host(x))) rval = host_from_gpu(gsum(gpu_from_host(x)))
...@@ -625,7 +625,7 @@ def local_gpu_sum(node): ...@@ -625,7 +625,7 @@ def local_gpu_sum(node):
new_in_shp.append(x_shape[i]) new_in_shp.append(x_shape[i])
pattern = (''.join(str(i) for i in new_mask)) pattern = (''.join(str(i) for i in new_mask))
new_gsum = GpuSum(new_mask) new_gsum = GpuCAReduce(new_mask, theano.scalar.basic.add)
if hasattr(new_gsum, 'c_code_reduce_%s' % pattern): if hasattr(new_gsum, 'c_code_reduce_%s' % pattern):
reshaped_x = x.reshape(tensor.stack(*new_in_shp)) reshaped_x = x.reshape(tensor.stack(*new_in_shp))
sum_reshaped_x = host_from_gpu( sum_reshaped_x = host_from_gpu(
...@@ -644,7 +644,7 @@ def local_gpu_sum(node): ...@@ -644,7 +644,7 @@ def local_gpu_sum(node):
return None return None
raise Exception( raise Exception(
"GpuSum don't have implemented the pattern", "GpuCAReduce don't have implemented the pattern",
pattern) pattern)
return False return False
......
...@@ -108,7 +108,7 @@ def test_sum(): ...@@ -108,7 +108,7 @@ def test_sum():
val = theano._asarray(val, dtype='float32') val = theano._asarray(val, dtype='float32')
f = theano.function([a], b, mode=mode_with_gpu) f = theano.function([a], b, mode=mode_with_gpu)
f2 = theano.function([a], b, mode=mode_without_gpu) f2 = theano.function([a], b, mode=mode_without_gpu)
assert tcn.GpuSum in [x.op.__class__ for x in f.maker.fgraph.toposort()] assert tcn.GpuCAReduce in [x.op.__class__ for x in f.maker.fgraph.toposort()]
assert T.Sum in [x.op.__class__ for x in f2.maker.fgraph.toposort()] assert T.Sum in [x.op.__class__ for x in f2.maker.fgraph.toposort()]
if val.size == 0: if val.size == 0:
assert f2(val) == f(val), ('shape', shape, 'pattern', pattern) assert f2(val) == f(val), ('shape', shape, 'pattern', pattern)
...@@ -145,7 +145,7 @@ def test_sum(): ...@@ -145,7 +145,7 @@ def test_sum():
val = theano._asarray(val, dtype='float32') val = theano._asarray(val, dtype='float32')
f = theano.function([a], b, mode=mode_with_gpu) f = theano.function([a], b, mode=mode_with_gpu)
f2 = theano.function([a], b, mode=mode_without_gpu) f2 = theano.function([a], b, mode=mode_without_gpu)
assert tcn.GpuSum in [x.op.__class__ for x in f.maker.fgraph.toposort()] assert tcn.GpuCAReduce in [x.op.__class__ for x in f.maker.fgraph.toposort()]
assert T.Sum in [x.op.__class__ for x in f2.maker.fgraph.toposort()] assert T.Sum in [x.op.__class__ for x in f2.maker.fgraph.toposort()]
assert _allclose(f2(val), f(val)), ('shape', shape, assert _allclose(f2(val), f(val)), ('shape', shape,
'pattern', pattern, 'pattern', pattern,
...@@ -181,7 +181,7 @@ def test_sum(): ...@@ -181,7 +181,7 @@ def test_sum():
val2 = val2[::2, ::2, ::2, ::2] val2 = val2[::2, ::2, ::2, ::2]
f = theano.function([a], b, mode=mode_without_gpu) f = theano.function([a], b, mode=mode_without_gpu)
f2 = theano.function([a2], b2, mode=mode_with_gpu) f2 = theano.function([a2], b2, mode=mode_with_gpu)
assert tcn.GpuSum in [x.op.__class__ for x in f2.maker.fgraph.toposort()] assert tcn.GpuCAReduce in [x.op.__class__ for x in f2.maker.fgraph.toposort()]
assert T.Sum in [x.op.__class__ for x in f.maker.fgraph.toposort()] assert T.Sum in [x.op.__class__ for x in f.maker.fgraph.toposort()]
assert _allclose(f2(val2), f(val)), ('shape', shape, assert _allclose(f2(val2), f(val)), ('shape', shape,
'pattern', pattern, 'pattern', pattern,
......
...@@ -28,7 +28,7 @@ def test_nvidia_driver1(): ...@@ -28,7 +28,7 @@ def test_nvidia_driver1():
profile=False) profile=False)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert len(topo) == 2 assert len(topo) == 2
assert sum(isinstance(node.op, B.GpuSum) for node in topo) == 1 assert sum(isinstance(node.op, B.GpuCAReduce) for node in topo) == 1
if not numpy.allclose(f(), a.sum()): if not numpy.allclose(f(), a.sum()):
raise Exception("The nvidia driver version installed with this OS " raise Exception("The nvidia driver version installed with this OS "
"does not give good results for reduction." "does not give good results for reduction."
......
...@@ -44,11 +44,11 @@ def test_int_pow(): ...@@ -44,11 +44,11 @@ def test_int_pow():
f = theano.function([a], (a*4).sum(), mode=mode_with_gpu) f = theano.function([a], (a*4).sum(), mode=mode_with_gpu)
op_names = [n.op.__class__.__name__ for n in f.maker.fgraph.toposort()] op_names = [n.op.__class__.__name__ for n in f.maker.fgraph.toposort()]
assert op_names == ['GpuSum', 'GpuElemwise', 'HostFromGpu'] assert op_names == ['GpuCAReduce', 'GpuElemwise', 'HostFromGpu']
f = theano.function([a], tensor.pow(a,4).sum(), mode=mode_with_gpu) f = theano.function([a], tensor.pow(a,4).sum(), mode=mode_with_gpu)
op_names = [n.op.__class__.__name__ for n in f.maker.fgraph.toposort()] op_names = [n.op.__class__.__name__ for n in f.maker.fgraph.toposort()]
assert op_names == ['GpuElemwise', 'GpuSum', 'HostFromGpu'] assert op_names == ['GpuElemwise', 'GpuCAReduce', 'HostFromGpu']
#theano.printing.debugprint(f) #theano.printing.debugprint(f)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论