提交 68bf7ddb authored 作者: Frederic's avatar Frederic

Make the op_lifter take a list and lift again the Sum op.

Add test for it. Do not include now the Prod op as there is failing test.
上级 98c61e15
...@@ -3,7 +3,8 @@ import theano ...@@ -3,7 +3,8 @@ import theano
import numpy import numpy
from theano import tensor, scalar from theano import tensor, scalar
from theano.compile import optdb from theano.compile import optdb
from theano.gof import (local_optimizer, EquilibriumDB, SequenceDB, ProxyDB, from theano.gof import (local_optimizer, EquilibriumDB,
SequenceDB, ProxyDB,
Optimizer, toolbox, DestroyHandler, Optimizer, toolbox, DestroyHandler,
InconsistencyError, EquilibriumOptimizer) InconsistencyError, EquilibriumOptimizer)
...@@ -52,7 +53,7 @@ def op_lifter(OP): ...@@ -52,7 +53,7 @@ def op_lifter(OP):
""" """
def f(maker): def f(maker):
def local_opt(node): def local_opt(node):
if type(node.op) is OP: if type(node.op) in OP:
# This does not support nodes that have more than one output. # This does not support nodes that have more than one output.
assert len(node.outputs) == 1 assert len(node.outputs) == 1
# either one of our inputs is on the gpu or # either one of our inputs is on the gpu or
...@@ -120,13 +121,13 @@ optdb['canonicalize'].register('local_cut_gpua_host_gpua', ...@@ -120,13 +121,13 @@ optdb['canonicalize'].register('local_cut_gpua_host_gpua',
@register_opt() @register_opt()
@op_lifter(tensor.Alloc) @op_lifter([tensor.Alloc])
def local_gpualloc(node): def local_gpualloc(node):
return gpu_alloc return gpu_alloc
@register_opt() @register_opt()
@op_lifter(tensor.Reshape) @op_lifter([tensor.Reshape])
def local_gpureshape(node): def local_gpureshape(node):
op = node.op op = node.op
name = op.name name = op.name
...@@ -137,7 +138,7 @@ def local_gpureshape(node): ...@@ -137,7 +138,7 @@ def local_gpureshape(node):
@register_opt() @register_opt()
@op_lifter(tensor.Flatten) @op_lifter([tensor.Flatten])
def local_gpuflatten(node): def local_gpuflatten(node):
op = node.op op = node.op
shp =[] shp =[]
...@@ -150,7 +151,7 @@ def local_gpuflatten(node): ...@@ -150,7 +151,7 @@ def local_gpuflatten(node):
@register_opt() @register_opt()
@op_lifter(tensor.Elemwise) @op_lifter([tensor.Elemwise])
def local_gpu_elemwise(node): def local_gpu_elemwise(node):
op = node.op op = node.op
name = op.name name = op.name
...@@ -193,26 +194,26 @@ optdb.register('gpua_inplace_opt', inplace_gpu_elemwise_opt, 75, ...@@ -193,26 +194,26 @@ optdb.register('gpua_inplace_opt', inplace_gpu_elemwise_opt, 75,
@register_opt() @register_opt()
@op_lifter(tensor.DimShuffle) @op_lifter([tensor.DimShuffle])
def local_gpua_dimshuffle(node): def local_gpua_dimshuffle(node):
return GpuDimShuffle(node.op.input_broadcastable, return GpuDimShuffle(node.op.input_broadcastable,
node.op.new_order) node.op.new_order)
@register_opt() @register_opt()
@op_lifter(tensor.SpecifyShape) @op_lifter([tensor.SpecifyShape])
def local_gpua_specifyShape(node): def local_gpua_specifyShape(node):
return tensor.specify_shape return tensor.specify_shape
@register_opt() @register_opt()
@op_lifter(tensor.Subtensor) @op_lifter([tensor.Subtensor])
def local_gpua_subtensor(node): def local_gpua_subtensor(node):
return GpuSubtensor(node.op.idx_list) return GpuSubtensor(node.op.idx_list)
@register_opt() @register_opt()
@op_lifter(tensor.CAReduce) @op_lifter([tensor.CAReduce, tensor.Sum])
def local_gpua_careduce(node): def local_gpua_careduce(node):
if (isinstance(node.op.scalar_op, scalar.basic.Add) or if (isinstance(node.op.scalar_op, scalar.basic.Add) or
isinstance(node.op.scalar_op, scalar.basic.Mul)): isinstance(node.op.scalar_op, scalar.basic.Mul)):
...@@ -220,23 +221,24 @@ def local_gpua_careduce(node): ...@@ -220,23 +221,24 @@ def local_gpua_careduce(node):
dtype=getattr(node.op, 'dtype', None), dtype=getattr(node.op, 'dtype', None),
acc_dtype=getattr(node.op, 'acc_dtype', None)) acc_dtype=getattr(node.op, 'acc_dtype', None))
@register_opt() @register_opt()
@op_lifter(tensor.blas.Gemv) @op_lifter([tensor.blas.Gemv])
def local_gpua_gemv(node): def local_gpua_gemv(node):
return GpuGemv(inplace=node.op.inplace) return GpuGemv(inplace=node.op.inplace)
@register_opt() @register_opt()
@op_lifter(tensor.blas_c.CGemv) @op_lifter([tensor.blas_c.CGemv])
def local_gpua_gemv2(node): def local_gpua_gemv2(node):
return GpuGemv(inplace=node.op.inplace) return GpuGemv(inplace=node.op.inplace)
@register_opt() @register_opt()
@op_lifter(tensor.blas.Gemm) @op_lifter([tensor.blas.Gemm])
def local_gpua_gemm(node): def local_gpua_gemm(node):
return GpuGemm(inplace=node.op.inplace) return GpuGemm(inplace=node.op.inplace)
@register_opt() @register_opt()
@op_lifter(tensor.basic.Eye) @op_lifter([tensor.basic.Eye])
def local_gpua_eye(node): def local_gpua_eye(node):
return GpuEye(dtype=node.op.dtype) return GpuEye(dtype=node.op.dtype)
...@@ -29,7 +29,7 @@ else: ...@@ -29,7 +29,7 @@ else:
def test_flatten(): def test_flatten():
m = theano.tensor.fmatrix() m = theano.tensor.fmatrix()
f = theano.function([m], m.flatten(), mode=mode_with_gpu) f = theano.function([m], m.flatten(), mode=mode_with_gpu)
val = numpy.random.rand(10,11).astype("float32") val = numpy.random.rand(10, 11).astype("float32")
res = f(val) res = f(val)
utt.assert_allclose(res, val.flatten()) utt.assert_allclose(res, val.flatten())
assert res.shape == val.flatten().shape assert res.shape == val.flatten().shape
...@@ -58,3 +58,15 @@ def test_flatten(): ...@@ -58,3 +58,15 @@ def test_flatten():
assert res.shape == val.reshape(10, -1).shape assert res.shape == val.reshape(10, -1).shape
assert GpuReshape in [type(node.op) assert GpuReshape in [type(node.op)
for node in f.maker.fgraph.toposort()] for node in f.maker.fgraph.toposort()]
def test_sum_prod():
for method in ['sum']:
m = theano.tensor.fmatrix()
f = theano.function([m], getattr(m, method)(), mode=mode_with_gpu)
val = numpy.random.rand(10, 11).astype("float32")
res = f(val)
utt.assert_allclose(res, val.sum())
assert res.shape == ()
assert GpuCAReduce in [type(node.op)
for node in f.maker.fgraph.toposort()]
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论