提交 9bc05a38 authored 作者: Tim Cooijmans's avatar Tim Cooijmans 提交者: Reyhane Askari

define and use with_stack_trace

上级 592e7c75
...@@ -2948,6 +2948,34 @@ def copy_stack_trace(from_var, to_var): ...@@ -2948,6 +2948,34 @@ def copy_stack_trace(from_var, to_var):
to_var.tag.trace = getattr(to_var.tag, 'trace', []) + tr to_var.tag.trace = getattr(to_var.tag, 'trace', []) + tr
def with_stack_trace(from_var, to_var):
"""
Copies the stack trace from one or more tensor variables to
one or more tensor variables and returns the destination variables.
Parameters
----------
from_var
Tensor variable or list of tensor variables to copy stack traces from.
to_var
Tensor variable or list of tensor variables to copy stack traces to.
Returns
-------
tensor variable or list of tensor variables
`to_var`, augmented with the stack traces from `from_var`.
Notes
-----
The stacktrace is assumed to be of the form of a list of lists
of tuples. Each tuple contains the filename, line number, function name
and so on. Each list of tuples contains the truples belonging to a
particular variable.
"""
copy_stack_trace(from_var, to_var)
return to_var
def check_stack_trace(f_or_fgraph, ops_to_check='last', bug_print='raise'): def check_stack_trace(f_or_fgraph, ops_to_check='last', bug_print='raise'):
""" """
This function checks if the outputs of specific ops of a compiled graph This function checks if the outputs of specific ops of a compiled graph
......
...@@ -15,6 +15,7 @@ from theano.tensor.basic import ( ...@@ -15,6 +15,7 @@ from theano.tensor.basic import (
from theano.gof import HideC, COp, ParamsType from theano.gof import HideC, COp, ParamsType
from theano.gof.utils import MethodNotDefined from theano.gof.utils import MethodNotDefined
from theano.gof.opt import with_stack_trace
from collections import deque from collections import deque
...@@ -75,11 +76,11 @@ def as_gpuarray_variable(x, context_name): ...@@ -75,11 +76,11 @@ def as_gpuarray_variable(x, context_name):
# If we couldn't deal with transfers, then maybe it's a tensor # If we couldn't deal with transfers, then maybe it's a tensor
if isinstance(x.type, tensor.TensorType): if isinstance(x.type, tensor.TensorType):
return GpuFromHost(context_name)(x) return with_stack_trace(x, GpuFromHost(context_name)(x))
# Try _as_GpuArrayVariable if possible # Try _as_GpuArrayVariable if possible
if hasattr(x, '_as_GpuArrayVariable'): if hasattr(x, '_as_GpuArrayVariable'):
return x._as_GpuArrayVariable(context_name) return with_stack_trace(x, x._as_GpuArrayVariable(context_name))
# If it didn't work try for a constant # If it didn't work try for a constant
ctx = get_context(context_name) ctx = get_context(context_name)
...@@ -88,13 +89,13 @@ def as_gpuarray_variable(x, context_name): ...@@ -88,13 +89,13 @@ def as_gpuarray_variable(x, context_name):
if x.context.ptr != ctx.ptr: if x.context.ptr != ctx.ptr:
x = x.transfer(ctx) x = x.transfer(ctx)
x = gpuarray.asarray(x, context=ctx) x = with_stack_trace(x, gpuarray.asarray(x, context=ctx))
bcast = [(s == 1) for s in x.shape] bcast = [(s == 1) for s in x.shape]
return GpuArrayConstant(GpuArrayType(dtype=x.dtype, return with_stack_trace(x, GpuArrayConstant(GpuArrayType(dtype=x.dtype,
broadcastable=bcast, broadcastable=bcast,
context_name=context_name), context_name=context_name),
x) x))
def infer_context_name(*vars): def infer_context_name(*vars):
......
...@@ -15,7 +15,7 @@ from theano.compile.ops import shape_i ...@@ -15,7 +15,7 @@ from theano.compile.ops import shape_i
from theano.gof import (local_optimizer, EquilibriumDB, TopoOptimizer, from theano.gof import (local_optimizer, EquilibriumDB, TopoOptimizer,
LocalGroupDB, LocalGroupDB,
SequenceDB, Optimizer, DB, toolbox, graph) SequenceDB, Optimizer, DB, toolbox, graph)
from theano.gof.opt import LocalMetaOptimizer, copy_stack_trace from theano.gof.opt import LocalMetaOptimizer, copy_stack_trace, with_stack_trace
from theano.ifelse import IfElse from theano.ifelse import IfElse
from theano.misc.ordered_set import OrderedSet from theano.misc.ordered_set import OrderedSet
...@@ -421,6 +421,8 @@ class GraphToGPU(Optimizer): ...@@ -421,6 +421,8 @@ class GraphToGPU(Optimizer):
if isinstance(new_ops, theano.Op): if isinstance(new_ops, theano.Op):
outputs = new_ops(*[mapping[i] for i in node.inputs], return_list=True) outputs = new_ops(*[mapping[i] for i in node.inputs], return_list=True)
for old_output, new_output in zip(node.outputs, outputs):
copy_stack_trace(old_output, new_output)
elif not new_ops: elif not new_ops:
newnode = node.clone_with_new_inputs([mapping.get(i) for i in node.inputs]) newnode = node.clone_with_new_inputs([mapping.get(i) for i in node.inputs])
outputs = newnode.outputs outputs = newnode.outputs
...@@ -461,7 +463,7 @@ class GraphToGPU(Optimizer): ...@@ -461,7 +463,7 @@ class GraphToGPU(Optimizer):
new_o.owner.inputs[0].type == o.type): new_o.owner.inputs[0].type == o.type):
new_o = new_o.owner.inputs[0] new_o = new_o.owner.inputs[0]
else: else:
new_o = safe_to_cpu(new_o) new_o = with_stack_trace(o, safe_to_cpu(new_o))
new_nodes.append(new_o) new_nodes.append(new_o)
fgraph.replace_all_validate(zip(fgraph.outputs, new_nodes), fgraph.replace_all_validate(zip(fgraph.outputs, new_nodes),
reason=self.__class__.__name__) reason=self.__class__.__name__)
...@@ -692,8 +694,6 @@ def local_gpu_contiguous_gpu_contiguous(node): ...@@ -692,8 +694,6 @@ def local_gpu_contiguous_gpu_contiguous(node):
if isinstance(node.op, GpuContiguous): if isinstance(node.op, GpuContiguous):
inp = node.inputs[0] inp = node.inputs[0]
if inp.owner and isinstance(inp.owner.op, GpuContiguous): if inp.owner and isinstance(inp.owner.op, GpuContiguous):
if not getattr(inp.tag, 'trace', None):
copy_stack_trace(node.outputs[0], inp)
return [inp] return [inp]
...@@ -1220,7 +1220,7 @@ def local_gpua_careduce(op, context_name, inputs, outputs): ...@@ -1220,7 +1220,7 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
op.scalar_op, axis=op.axis, op.scalar_op, axis=op.axis,
dtype=odtype, dtype=odtype,
acc_dtype=adtype) acc_dtype=adtype)
gvar = greduce(x) gvar = with_stack_trace(outputs, greduce(x))
# We need to have the make node called, otherwise the mask can # We need to have the make node called, otherwise the mask can
# be None # be None
if (op2 is GpuCAReduceCPY or if (op2 is GpuCAReduceCPY or
...@@ -1260,22 +1260,27 @@ def local_gpua_careduce(op, context_name, inputs, outputs): ...@@ -1260,22 +1260,27 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
dtype=getattr(op, 'dtype', outputs[0].dtype), dtype=getattr(op, 'dtype', outputs[0].dtype),
acc_dtype=getattr(op, 'acc_dtype', None)) acc_dtype=getattr(op, 'acc_dtype', None))
reshaped_x = x.reshape(tensor.stack(new_in_shp)) reshaped_x = with_stack_trace(
gpu_reshaped_x = as_gpuarray_variable(reshaped_x, context_name) outputs, x.reshape(tensor.stack(new_in_shp)))
gvar = greduce(gpu_reshaped_x) gpu_reshaped_x = with_stack_trace(
outputs, as_gpuarray_variable(reshaped_x, context_name))
gvar = with_stack_trace(outputs, greduce(gpu_reshaped_x))
# We need to have the make node called, otherwise the mask can # We need to have the make node called, otherwise the mask can
# be None # be None
reshaped_gpu_inputs = [gpu_reshaped_x] reshaped_gpu_inputs = [gpu_reshaped_x]
if greduce.supports_c_code(reshaped_gpu_inputs): if greduce.supports_c_code(reshaped_gpu_inputs):
reduce_reshaped_x = greduce(gpu_reshaped_x) reduce_reshaped_x = with_stack_trace(
outputs, greduce(gpu_reshaped_x))
if reduce_reshaped_x.ndim != outputs[0].ndim: if reduce_reshaped_x.ndim != outputs[0].ndim:
out_shp = [] out_shp = []
for i in range(x.ndim): for i in range(x.ndim):
if i not in op.axis: if i not in op.axis:
out_shp.append(shape_i(x, i)) out_shp.append(shape_i(x, i))
unreshaped_reduce = GpuReshape(len(out_shp))(reduce_reshaped_x, unreshaped_reduce = with_stack_trace(
tensor.stack(out_shp)) outputs, GpuReshape(len(out_shp))(
reduce_reshaped_x,
tensor.stack(out_shp)))
else: else:
unreshaped_reduce = reduce_reshaped_x unreshaped_reduce = reduce_reshaped_x
return [unreshaped_reduce] return [unreshaped_reduce]
...@@ -2398,7 +2403,8 @@ def local_gpu_elemwise_careduce(node): ...@@ -2398,7 +2403,8 @@ def local_gpu_elemwise_careduce(node):
props = node.op._props_dict() props = node.op._props_dict()
props["pre_scalar_op"] = scalar.basic.sqr props["pre_scalar_op"] = scalar.basic.sqr
out = GpuCAReduceCuda(**props)(inp) out = GpuCAReduceCuda(**props)(inp)
return [out] return with_stack_trace(
node.outputs, out)
@local_optimizer(None) @local_optimizer(None)
......
...@@ -14,7 +14,7 @@ import theano.gpuarray ...@@ -14,7 +14,7 @@ import theano.gpuarray
from .. import basic_ops from .. import basic_ops
from ..type import GpuArrayType, gpuarray_shared_constructor, get_context from ..type import GpuArrayType, gpuarray_shared_constructor, get_context
from ..basic_ops import ( from ..basic_ops import (
GpuAlloc, GpuAllocEmpty, GpuReshape, GpuFromHost, host_from_gpu) GpuAlloc, GpuAllocEmpty, GpuReshape, GpuFromHost, HostFromGpu, host_from_gpu)
from ..blas import GpuGemm from ..blas import GpuGemm
from ..elemwise import ( from ..elemwise import (
GpuCAReduceCuda, GpuCAReduceCPY, GpuElemwise, Elemwise, max_inputs_to_GpuElemwise) GpuCAReduceCuda, GpuCAReduceCPY, GpuElemwise, Elemwise, max_inputs_to_GpuElemwise)
...@@ -28,6 +28,16 @@ from theano.tensor.nnet import abstract_conv ...@@ -28,6 +28,16 @@ from theano.tensor.nnet import abstract_conv
from theano.gpuarray import dnn, blas from theano.gpuarray import dnn, blas
def _check_stack_trace(thing):
def _ops_to_check(op):
if not isinstance(op, theano.gof.Op):
op = op.op # assume node
return not isinstance(op, (theano.compile.ops.Shape_i,
theano.ifelse.IfElse,
GpuFromHost, HostFromGpu,
GpuElemwise))
return check_stack_trace(thing, ops_to_check=_ops_to_check)
def test_local_assert(): def test_local_assert():
x = theano.tensor.fmatrix() x = theano.tensor.fmatrix()
a = theano.tensor.opt.assert_op(x, theano.tensor.eq(x, 0).any()) a = theano.tensor.opt.assert_op(x, theano.tensor.eq(x, 0).any())
...@@ -71,8 +81,8 @@ def test_local_gpu_contiguous_gpu_contiguous(): ...@@ -71,8 +81,8 @@ def test_local_gpu_contiguous_gpu_contiguous():
if isinstance(node.op, basic_ops.GpuContiguous)]) if isinstance(node.op, basic_ops.GpuContiguous)])
assert 1 == len([node for node in f2.maker.fgraph.toposort() assert 1 == len([node for node in f2.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)]) if isinstance(node.op, basic_ops.GpuContiguous)])
assert check_stack_trace(f1, ops_to_check='all') assert _check_stack_trace(f1)
assert check_stack_trace(f2, ops_to_check='all') assert _check_stack_trace(f2)
def test_local_gpu_contiguous(): def test_local_gpu_contiguous():
...@@ -82,7 +92,7 @@ def test_local_gpu_contiguous(): ...@@ -82,7 +92,7 @@ def test_local_gpu_contiguous():
assert 1 == len([node for node in f.maker.fgraph.toposort() assert 1 == len([node for node in f.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)]) if isinstance(node.op, basic_ops.GpuContiguous)])
f([[2.]]) f([[2.]])
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
def test_flatten(): def test_flatten():
...@@ -100,7 +110,7 @@ def test_flatten(): ...@@ -100,7 +110,7 @@ def test_flatten():
assert res.shape == val.flatten().shape assert res.shape == val.flatten().shape
assert GpuReshape in [type(node.op) assert GpuReshape in [type(node.op)
for node in f.maker.fgraph.toposort()] for node in f.maker.fgraph.toposort()]
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
f = theano.function([m], m.flatten(ndim=2), f = theano.function([m], m.flatten(ndim=2),
mode=mode_with_gpu.excluding("local_useless_reshape")) mode=mode_with_gpu.excluding("local_useless_reshape"))
...@@ -110,7 +120,7 @@ def test_flatten(): ...@@ -110,7 +120,7 @@ def test_flatten():
assert res.shape == val.shape assert res.shape == val.shape
assert GpuReshape in [type(node.op) assert GpuReshape in [type(node.op)
for node in f.maker.fgraph.toposort()] for node in f.maker.fgraph.toposort()]
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
m = theano.tensor.tensor3() m = theano.tensor.tensor3()
f = theano.function([m], m.flatten(ndim=2), mode=mode_with_gpu) f = theano.function([m], m.flatten(ndim=2), mode=mode_with_gpu)
...@@ -120,7 +130,7 @@ def test_flatten(): ...@@ -120,7 +130,7 @@ def test_flatten():
assert res.shape == val.reshape(10, -1).shape assert res.shape == val.reshape(10, -1).shape
assert GpuReshape in [type(node.op) assert GpuReshape in [type(node.op)
for node in f.maker.fgraph.toposort()] for node in f.maker.fgraph.toposort()]
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
def test_reduce(): def test_reduce():
...@@ -133,7 +143,7 @@ def test_reduce(): ...@@ -133,7 +143,7 @@ def test_reduce():
f = theano.function([m], getattr(m, method)(axis=0, f = theano.function([m], getattr(m, method)(axis=0,
**param), **param),
mode=mode_with_gpu) mode=mode_with_gpu)
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
val = np.random.rand(10, 11).astype("float32") val = np.random.rand(10, 11).astype("float32")
res = f(val) res = f(val)
utt.assert_allclose(res, getattr(val, method)(axis=0)) utt.assert_allclose(res, getattr(val, method)(axis=0))
...@@ -165,7 +175,7 @@ def test_local_gpualloc_memset_0(): ...@@ -165,7 +175,7 @@ def test_local_gpualloc_memset_0():
assert len(topo) == 1 assert len(topo) == 1
assert isinstance(topo[0].op, theano.tensor.Alloc) assert isinstance(topo[0].op, theano.tensor.Alloc)
assert (np.asarray(f(6)) == 0).all() assert (np.asarray(f(6)) == 0).all()
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
# Test with 0 from CPU op. # Test with 0 from CPU op.
# Should be transfered as it is used by another op. # Should be transfered as it is used by another op.
...@@ -175,7 +185,7 @@ def test_local_gpualloc_memset_0(): ...@@ -175,7 +185,7 @@ def test_local_gpualloc_memset_0():
assert len(topo) == 3 assert len(topo) == 3
assert isinstance(topo[0].op, GpuAlloc) assert isinstance(topo[0].op, GpuAlloc)
assert (np.asarray(f(6)) == 0).all() assert (np.asarray(f(6)) == 0).all()
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
# Test with 0 # Test with 0
a = GpuAlloc(test_ctx_name)(z, i) a = GpuAlloc(test_ctx_name)(z, i)
...@@ -184,7 +194,7 @@ def test_local_gpualloc_memset_0(): ...@@ -184,7 +194,7 @@ def test_local_gpualloc_memset_0():
assert len(topo) == 1 assert len(topo) == 1
assert isinstance(topo[0].op, GpuAlloc) and topo[0].op.memset_0 assert isinstance(topo[0].op, GpuAlloc) and topo[0].op.memset_0
assert (np.asarray(f(6)) == 0).all() assert (np.asarray(f(6)) == 0).all()
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
# Test with 1 # Test with 1
a = GpuAlloc(test_ctx_name)(o, i) a = GpuAlloc(test_ctx_name)(o, i)
...@@ -194,7 +204,7 @@ def test_local_gpualloc_memset_0(): ...@@ -194,7 +204,7 @@ def test_local_gpualloc_memset_0():
assert isinstance(topo[0].op, GpuAlloc) assert isinstance(topo[0].op, GpuAlloc)
assert not topo[0].op.memset_0 assert not topo[0].op.memset_0
assert (np.asarray(f(6)) == 1).all() assert (np.asarray(f(6)) == 1).all()
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
# Test with 1, 1 # Test with 1, 1
a = GpuAlloc(test_ctx_name)(ones, i) a = GpuAlloc(test_ctx_name)(ones, i)
...@@ -204,7 +214,7 @@ def test_local_gpualloc_memset_0(): ...@@ -204,7 +214,7 @@ def test_local_gpualloc_memset_0():
assert isinstance(topo[0].op, GpuAlloc) assert isinstance(topo[0].op, GpuAlloc)
assert not topo[0].op.memset_0 assert not topo[0].op.memset_0
assert (np.asarray(f(2)) == 1).all() assert (np.asarray(f(2)) == 1).all()
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
def test_local_gpualloc_empty(): def test_local_gpualloc_empty():
...@@ -220,7 +230,7 @@ def test_local_gpualloc_empty(): ...@@ -220,7 +230,7 @@ def test_local_gpualloc_empty():
assert isinstance(topo[0].op, theano.tensor.AllocEmpty) assert isinstance(topo[0].op, theano.tensor.AllocEmpty)
# This return not initilized data, so we can only check the shape # This return not initilized data, so we can only check the shape
assert f(3).shape == (3,) assert f(3).shape == (3,)
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
# Test with vector # Test with vector
# Should be moved # Should be moved
...@@ -231,7 +241,7 @@ def test_local_gpualloc_empty(): ...@@ -231,7 +241,7 @@ def test_local_gpualloc_empty():
assert isinstance(topo[0].op, GpuAllocEmpty) assert isinstance(topo[0].op, GpuAllocEmpty)
# This return not initilized data, so we can only check the shape # This return not initilized data, so we can only check the shape
assert f(3).shape == (3,) assert f(3).shape == (3,)
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
# Test with matrix # Test with matrix
a = tensor.AllocEmpty('float32')(i, ii) a = tensor.AllocEmpty('float32')(i, ii)
...@@ -241,7 +251,7 @@ def test_local_gpualloc_empty(): ...@@ -241,7 +251,7 @@ def test_local_gpualloc_empty():
assert isinstance(topo[0].op, GpuAllocEmpty) assert isinstance(topo[0].op, GpuAllocEmpty)
# This return not initilized data, so we can only check the shape # This return not initilized data, so we can only check the shape
assert f(3, 4).shape == (3, 4) assert f(3, 4).shape == (3, 4)
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
def test_rebroadcast(): def test_rebroadcast():
...@@ -259,7 +269,7 @@ def test_rebroadcast(): ...@@ -259,7 +269,7 @@ def test_rebroadcast():
assert isinstance(rebr.inputs[0].type, GpuArrayType) assert isinstance(rebr.inputs[0].type, GpuArrayType)
assert isinstance(rebr.outputs[0].type, GpuArrayType) assert isinstance(rebr.outputs[0].type, GpuArrayType)
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
class TestSpecifyShape(test_basic.TestSpecifyShape): class TestSpecifyShape(test_basic.TestSpecifyShape):
mode = mode_with_gpu mode = mode_with_gpu
...@@ -284,7 +294,7 @@ class test_gpu_ifelse(test_ifelse.test_ifelse): ...@@ -284,7 +294,7 @@ class test_gpu_ifelse(test_ifelse.test_ifelse):
theano.ifelse.ifelse(cond, x.mean(), x.sum()), theano.ifelse.ifelse(cond, x.mean(), x.sum()),
mode=mode_with_gpu) mode=mode_with_gpu)
assert f(np.float32([1, 2, 3]), 0) == 6 assert f(np.float32([1, 2, 3]), 0) == 6
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
x = tensor.vector() x = tensor.vector()
cond = tensor.scalar() cond = tensor.scalar()
...@@ -292,7 +302,7 @@ class test_gpu_ifelse(test_ifelse.test_ifelse): ...@@ -292,7 +302,7 @@ class test_gpu_ifelse(test_ifelse.test_ifelse):
theano.ifelse.ifelse(cond, x.mean(), x.sum()), theano.ifelse.ifelse(cond, x.mean(), x.sum()),
mode=mode_with_gpu) mode=mode_with_gpu)
assert f(np.float32([1, 2, 3]), 0) == 6 assert f(np.float32([1, 2, 3]), 0) == 6
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
def test_lifter_with_shared_var(self): def test_lifter_with_shared_var(self):
x = tensor.lscalar('x') x = tensor.lscalar('x')
...@@ -315,7 +325,7 @@ def test_print_op(): ...@@ -315,7 +325,7 @@ def test_print_op():
assert isinstance(topo[1].op, theano.printing.Print) assert isinstance(topo[1].op, theano.printing.Print)
assert isinstance(topo[2].op, GpuElemwise) assert isinstance(topo[2].op, GpuElemwise)
assert topo[3].op == host_from_gpu assert topo[3].op == host_from_gpu
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
f(np.random.random((5, 5)).astype('float32')) f(np.random.random((5, 5)).astype('float32'))
...@@ -336,7 +346,7 @@ def test_pdbbreakpoint_op(): ...@@ -336,7 +346,7 @@ def test_pdbbreakpoint_op():
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert isinstance(topo[-2].op, GpuElemwise) assert isinstance(topo[-2].op, GpuElemwise)
assert topo[-1].op == host_from_gpu assert topo[-1].op == host_from_gpu
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
def test_local_gpu_elemwise_careduce(): def test_local_gpu_elemwise_careduce():
...@@ -346,7 +356,7 @@ def test_local_gpu_elemwise_careduce(): ...@@ -346,7 +356,7 @@ def test_local_gpu_elemwise_careduce():
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert len(topo) == 3 assert len(topo) == 3
assert topo[1].op.pre_scalar_op == theano.scalar.sqr assert topo[1].op.pre_scalar_op == theano.scalar.sqr
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
data = np.random.rand(3, 4).astype(theano.config.floatX) data = np.random.rand(3, 4).astype(theano.config.floatX)
utt.assert_allclose(f(data), (data * data).sum()) utt.assert_allclose(f(data), (data * data).sum())
...@@ -355,7 +365,7 @@ def test_local_gpu_elemwise_careduce(): ...@@ -355,7 +365,7 @@ def test_local_gpu_elemwise_careduce():
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert len(topo) == 3 assert len(topo) == 3
assert topo[1].op.pre_scalar_op == theano.scalar.sqr assert topo[1].op.pre_scalar_op == theano.scalar.sqr
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
utt.assert_allclose(f(data), (data * data).sum(axis=1)) utt.assert_allclose(f(data), (data * data).sum(axis=1))
...@@ -374,7 +384,7 @@ def test_local_lift_dot22scalar(): ...@@ -374,7 +384,7 @@ def test_local_lift_dot22scalar():
y_val = np.random.random((3, 4)).astype(theano.config.floatX) y_val = np.random.random((3, 4)).astype(theano.config.floatX)
a_val = 0.5 a_val = 0.5
utt.assert_allclose(f_cpu(x_val, y_val, a_val), f_gpu(x_val, y_val, a_val)) utt.assert_allclose(f_cpu(x_val, y_val, a_val), f_gpu(x_val, y_val, a_val))
assert check_stack_trace(f_gpu, ops_to_check='all') assert _check_stack_trace(f_gpu)
def test_local_gpu_subtensor(): def test_local_gpu_subtensor():
...@@ -384,7 +394,7 @@ def test_local_gpu_subtensor(): ...@@ -384,7 +394,7 @@ def test_local_gpu_subtensor():
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert any([type(node.op) is tensor.Subtensor for node in topo]) assert any([type(node.op) is tensor.Subtensor for node in topo])
assert not any([isinstance(node.op, GpuSubtensor) for node in topo]) assert not any([isinstance(node.op, GpuSubtensor) for node in topo])
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
# Test graph input. # Test graph input.
t = tensor.fmatrix() t = tensor.fmatrix()
...@@ -392,7 +402,7 @@ def test_local_gpu_subtensor(): ...@@ -392,7 +402,7 @@ def test_local_gpu_subtensor():
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert any([type(node.op) is tensor.Subtensor for node in topo]) assert any([type(node.op) is tensor.Subtensor for node in topo])
assert not any([isinstance(node.op, GpuSubtensor) for node in topo]) assert not any([isinstance(node.op, GpuSubtensor) for node in topo])
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
# Test multiple use of the input # Test multiple use of the input
# We want the subtensor to be on the GPU to prevent multiple transfer. # We want the subtensor to be on the GPU to prevent multiple transfer.
...@@ -401,7 +411,7 @@ def test_local_gpu_subtensor(): ...@@ -401,7 +411,7 @@ def test_local_gpu_subtensor():
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert not any([type(node.op) is tensor.Subtensor for node in topo]) assert not any([type(node.op) is tensor.Subtensor for node in topo])
assert any([isinstance(node.op, GpuSubtensor) for node in topo]) assert any([isinstance(node.op, GpuSubtensor) for node in topo])
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
# Test multiple use of the input + input as output # Test multiple use of the input + input as output
# We want the subtensor to be on the GPU to prevent multiple transfer. # We want the subtensor to be on the GPU to prevent multiple transfer.
...@@ -410,7 +420,7 @@ def test_local_gpu_subtensor(): ...@@ -410,7 +420,7 @@ def test_local_gpu_subtensor():
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert not any([type(node.op) is tensor.Subtensor for node in topo]) assert not any([type(node.op) is tensor.Subtensor for node in topo])
assert any([isinstance(node.op, GpuSubtensor) for node in topo]) assert any([isinstance(node.op, GpuSubtensor) for node in topo])
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
# Test shared forced on CPU end we do computation on the output of # Test shared forced on CPU end we do computation on the output of
# the subtensor. # the subtensor.
...@@ -423,7 +433,7 @@ def test_local_gpu_subtensor(): ...@@ -423,7 +433,7 @@ def test_local_gpu_subtensor():
# If it where just a little bit smarter, it could wrongly move it to the GPU. # If it where just a little bit smarter, it could wrongly move it to the GPU.
# If it where super smart, it would know it should not move it to the GPU. # If it where super smart, it would know it should not move it to the GPU.
assert any([isinstance(node.op, tensor.Elemwise) for node in topo]) assert any([isinstance(node.op, tensor.Elemwise) for node in topo])
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
def test_local_gpu_elemwise(): def test_local_gpu_elemwise():
...@@ -445,7 +455,7 @@ def test_local_gpu_elemwise(): ...@@ -445,7 +455,7 @@ def test_local_gpu_elemwise():
assert sum(isinstance(node.op, GpuElemwise) for node in topo) == 1 assert sum(isinstance(node.op, GpuElemwise) for node in topo) == 1
assert sum(type(node.op) == tensor.Elemwise for node in topo) == 0 assert sum(type(node.op) == tensor.Elemwise for node in topo) == 0
utt.assert_allclose(f(a_v, b_v, c_v), a_v + b_v + c_v) utt.assert_allclose(f(a_v, b_v, c_v), a_v + b_v + c_v)
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
# Now test with the composite already on the cpu before we move it # Now test with the composite already on the cpu before we move it
# to the gpu # to the gpu
...@@ -459,7 +469,7 @@ def test_local_gpu_elemwise(): ...@@ -459,7 +469,7 @@ def test_local_gpu_elemwise():
assert sum(isinstance(node.op, GpuElemwise) for node in topo) == 1 assert sum(isinstance(node.op, GpuElemwise) for node in topo) == 1
assert sum(type(node.op) == tensor.Elemwise for node in topo) == 0 assert sum(type(node.op) == tensor.Elemwise for node in topo) == 0
utt.assert_allclose(f(a_v, b_v, c_v), a_v + b_v + c_v) utt.assert_allclose(f(a_v, b_v, c_v), a_v + b_v + c_v)
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
return # Not yet implemeted return # Not yet implemeted
# Test multiple output # Test multiple output
...@@ -477,7 +487,7 @@ def test_local_gpu_elemwise(): ...@@ -477,7 +487,7 @@ def test_local_gpu_elemwise():
utt.assert_allclose(out[0], a_v) utt.assert_allclose(out[0], a_v)
utt.assert_allclose(out[1], c_v) utt.assert_allclose(out[1], c_v)
utt.assert_allclose(out[2], b_v) utt.assert_allclose(out[2], b_v)
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
# Test multiple output # Test multiple output
out_s = theano.scalar.Composite([a_s, b_s, c_s], [a_s + b_s, a_s * b_s]) out_s = theano.scalar.Composite([a_s, b_s, c_s], [a_s + b_s, a_s * b_s])
...@@ -489,7 +499,7 @@ def test_local_gpu_elemwise(): ...@@ -489,7 +499,7 @@ def test_local_gpu_elemwise():
out = f(a_v, b_v, c_v) out = f(a_v, b_v, c_v)
utt.assert_allclose(out[0], a_v + b_v) utt.assert_allclose(out[0], a_v + b_v)
utt.assert_allclose(out[1], a_v * c_v) utt.assert_allclose(out[1], a_v * c_v)
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
# Test non-contiguous input # Test non-contiguous input
c = gpuarray_shared_constructor(np.asarray(c_v, dtype='float32')) c = gpuarray_shared_constructor(np.asarray(c_v, dtype='float32'))
...@@ -498,7 +508,7 @@ def test_local_gpu_elemwise(): ...@@ -498,7 +508,7 @@ def test_local_gpu_elemwise():
out = f(a_v, b_v) out = f(a_v, b_v)
utt.assert_allclose(out[0], a_v[::2] + b_v[::2]) utt.assert_allclose(out[0], a_v[::2] + b_v[::2])
utt.assert_allclose(out[1], a_v[::2] * c_v[::2]) utt.assert_allclose(out[1], a_v[::2] * c_v[::2])
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
def test_many_arg_elemwise(): def test_many_arg_elemwise():
...@@ -575,7 +585,7 @@ def test_local_lift_abstractconv_gpu_shape(): ...@@ -575,7 +585,7 @@ def test_local_lift_abstractconv_gpu_shape():
b = tensor.ftensor4() b = tensor.ftensor4()
c = tensor.nnet.abstract_conv.AbstractConv2d_gradWeights()(a, b, s) c = tensor.nnet.abstract_conv.AbstractConv2d_gradWeights()(a, b, s)
f = theano.function([s, a, b], c, mode=mode_with_gpu) f = theano.function([s, a, b], c, mode=mode_with_gpu)
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
finally: finally:
theano.config.on_opt_error = prev theano.config.on_opt_error = prev
...@@ -606,7 +616,7 @@ def test_local_assert_no_cpu_op(): ...@@ -606,7 +616,7 @@ def test_local_assert_no_cpu_op():
try: try:
theano.config.assert_no_cpu_op = 'ignore' theano.config.assert_no_cpu_op = 'ignore'
f = theano.function([], out, mode=mode_local_assert) f = theano.function([], out, mode=mode_local_assert)
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
finally: finally:
theano.config.assert_no_cpu_op = old theano.config.assert_no_cpu_op = old
...@@ -618,7 +628,7 @@ def test_no_complex(): ...@@ -618,7 +628,7 @@ def test_no_complex():
stft_out = tensor.exp(width_var * freq_var) * signal_var stft_out = tensor.exp(width_var * freq_var) * signal_var
f = theano.function([width_var, freq_var, signal_var], stft_out, f = theano.function([width_var, freq_var, signal_var], stft_out,
mode=mode_with_gpu) mode=mode_with_gpu)
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
@utt.assertFailure_fast @utt.assertFailure_fast
...@@ -637,7 +647,7 @@ def test_local_lift_solve(): ...@@ -637,7 +647,7 @@ def test_local_lift_solve():
A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32") A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
b_val = np.random.uniform(-0.4, 0.4, (5, 3)).astype("float32") b_val = np.random.uniform(-0.4, 0.4, (5, 3)).astype("float32")
utt.assert_allclose(f_cpu(A_val, b_val), f_gpu(A_val, b_val)) utt.assert_allclose(f_cpu(A_val, b_val), f_gpu(A_val, b_val))
assert check_stack_trace(f_gpu, ops_to_check='all') assert _check_stack_trace(f_gpu)
def test_gpu_solve_not_inplace(): def test_gpu_solve_not_inplace():
...@@ -703,7 +713,7 @@ def test_local_gpua_advanced_incsubtensor(): ...@@ -703,7 +713,7 @@ def test_local_gpua_advanced_incsubtensor():
w = tensor.set_subtensor(w[tensor.eq(y, 1.0).nonzero()], 100) w = tensor.set_subtensor(w[tensor.eq(y, 1.0).nonzero()], 100)
w = tensor.set_subtensor(w[tensor.eq(y, -1.0).nonzero()], 0) w = tensor.set_subtensor(w[tensor.eq(y, -1.0).nonzero()], 0)
f = theano.function([target], w) f = theano.function([target], w)
assert check_stack_trace(f, ops_to_check='all') assert _check_stack_trace(f)
def test_batched_dot_lifter(): def test_batched_dot_lifter():
......
...@@ -43,6 +43,7 @@ from theano.tensor import DimShuffle, Subtensor ...@@ -43,6 +43,7 @@ from theano.tensor import DimShuffle, Subtensor
from theano.tensor.opt import register_uncanonicalize from theano.tensor.opt import register_uncanonicalize
from theano import scalar as scal from theano import scalar as scal
from theano.gof.opt import copy_stack_trace, with_stack_trace
_logger = logging.getLogger('theano.tensor.opt') _logger = logging.getLogger('theano.tensor.opt')
...@@ -57,10 +58,13 @@ def local_max_and_argmax(node): ...@@ -57,10 +58,13 @@ def local_max_and_argmax(node):
axis = node.op.get_params(node) axis = node.op.get_params(node)
if len(node.outputs[1].clients) == 0: if len(node.outputs[1].clients) == 0:
new = CAReduce(scal.maximum, axis)(node.inputs[0]) new = CAReduce(scal.maximum, axis)(node.inputs[0])
copy_stack_trace(node.outputs[0], new)
return [new, None] return [new, None]
if len(node.outputs[0].clients) == 0: if len(node.outputs[0].clients) == 0:
return [None, T.Argmax(axis)(node.inputs[0])] new = T.Argmax(axis)(node.inputs[0])
copy_stack_trace(node.outputs[0], new)
return [None, new]
@register_uncanonicalize @register_uncanonicalize
...@@ -84,8 +88,8 @@ def local_max_to_min(node): ...@@ -84,8 +88,8 @@ def local_max_to_min(node):
max.owner.op.scalar_op == scal.maximum): max.owner.op.scalar_op == scal.maximum):
neg = max.owner.inputs[0] neg = max.owner.inputs[0]
if neg.owner and neg.owner.op == T.neg: if neg.owner and neg.owner.op == T.neg:
return [CAReduce(scal.minimum, new = CAReduce(scal.minimum, max.owner.op.axis)(neg.owner.inputs[0])
max.owner.op.axis)(neg.owner.inputs[0])] return [with_stack_trace(node.outputs[0], new)]
return False return False
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论