提交 9bc05a38 authored 作者: Tim Cooijmans's avatar Tim Cooijmans 提交者: Reyhane Askari

define and use with_stack_trace

上级 592e7c75
......@@ -2948,6 +2948,34 @@ def copy_stack_trace(from_var, to_var):
to_var.tag.trace = getattr(to_var.tag, 'trace', []) + tr
def with_stack_trace(from_var, to_var):
"""
Copies the stack trace from one or more tensor variables to
one or more tensor variables and returns the destination variables.
Parameters
----------
from_var
Tensor variable or list of tensor variables to copy stack traces from.
to_var
Tensor variable or list of tensor variables to copy stack traces to.
Returns
-------
tensor variable or list of tensor variables
`to_var`, augmented with the stack traces from `from_var`.
Notes
-----
The stacktrace is assumed to be of the form of a list of lists
of tuples. Each tuple contains the filename, line number, function name
and so on. Each list of tuples contains the truples belonging to a
particular variable.
"""
copy_stack_trace(from_var, to_var)
return to_var
def check_stack_trace(f_or_fgraph, ops_to_check='last', bug_print='raise'):
"""
This function checks if the outputs of specific ops of a compiled graph
......
......@@ -15,6 +15,7 @@ from theano.tensor.basic import (
from theano.gof import HideC, COp, ParamsType
from theano.gof.utils import MethodNotDefined
from theano.gof.opt import with_stack_trace
from collections import deque
......@@ -75,11 +76,11 @@ def as_gpuarray_variable(x, context_name):
# If we couldn't deal with transfers, then maybe it's a tensor
if isinstance(x.type, tensor.TensorType):
return GpuFromHost(context_name)(x)
return with_stack_trace(x, GpuFromHost(context_name)(x))
# Try _as_GpuArrayVariable if possible
if hasattr(x, '_as_GpuArrayVariable'):
return x._as_GpuArrayVariable(context_name)
return with_stack_trace(x, x._as_GpuArrayVariable(context_name))
# If it didn't work try for a constant
ctx = get_context(context_name)
......@@ -88,13 +89,13 @@ def as_gpuarray_variable(x, context_name):
if x.context.ptr != ctx.ptr:
x = x.transfer(ctx)
x = gpuarray.asarray(x, context=ctx)
x = with_stack_trace(x, gpuarray.asarray(x, context=ctx))
bcast = [(s == 1) for s in x.shape]
return GpuArrayConstant(GpuArrayType(dtype=x.dtype,
broadcastable=bcast,
context_name=context_name),
x)
return with_stack_trace(x, GpuArrayConstant(GpuArrayType(dtype=x.dtype,
broadcastable=bcast,
context_name=context_name),
x))
def infer_context_name(*vars):
......
......@@ -15,7 +15,7 @@ from theano.compile.ops import shape_i
from theano.gof import (local_optimizer, EquilibriumDB, TopoOptimizer,
LocalGroupDB,
SequenceDB, Optimizer, DB, toolbox, graph)
from theano.gof.opt import LocalMetaOptimizer, copy_stack_trace
from theano.gof.opt import LocalMetaOptimizer, copy_stack_trace, with_stack_trace
from theano.ifelse import IfElse
from theano.misc.ordered_set import OrderedSet
......@@ -421,6 +421,8 @@ class GraphToGPU(Optimizer):
if isinstance(new_ops, theano.Op):
outputs = new_ops(*[mapping[i] for i in node.inputs], return_list=True)
for old_output, new_output in zip(node.outputs, outputs):
copy_stack_trace(old_output, new_output)
elif not new_ops:
newnode = node.clone_with_new_inputs([mapping.get(i) for i in node.inputs])
outputs = newnode.outputs
......@@ -461,7 +463,7 @@ class GraphToGPU(Optimizer):
new_o.owner.inputs[0].type == o.type):
new_o = new_o.owner.inputs[0]
else:
new_o = safe_to_cpu(new_o)
new_o = with_stack_trace(o, safe_to_cpu(new_o))
new_nodes.append(new_o)
fgraph.replace_all_validate(zip(fgraph.outputs, new_nodes),
reason=self.__class__.__name__)
......@@ -692,8 +694,6 @@ def local_gpu_contiguous_gpu_contiguous(node):
if isinstance(node.op, GpuContiguous):
inp = node.inputs[0]
if inp.owner and isinstance(inp.owner.op, GpuContiguous):
if not getattr(inp.tag, 'trace', None):
copy_stack_trace(node.outputs[0], inp)
return [inp]
......@@ -1220,7 +1220,7 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
op.scalar_op, axis=op.axis,
dtype=odtype,
acc_dtype=adtype)
gvar = greduce(x)
gvar = with_stack_trace(outputs, greduce(x))
# We need to have the make node called, otherwise the mask can
# be None
if (op2 is GpuCAReduceCPY or
......@@ -1260,22 +1260,27 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
dtype=getattr(op, 'dtype', outputs[0].dtype),
acc_dtype=getattr(op, 'acc_dtype', None))
reshaped_x = x.reshape(tensor.stack(new_in_shp))
gpu_reshaped_x = as_gpuarray_variable(reshaped_x, context_name)
gvar = greduce(gpu_reshaped_x)
reshaped_x = with_stack_trace(
outputs, x.reshape(tensor.stack(new_in_shp)))
gpu_reshaped_x = with_stack_trace(
outputs, as_gpuarray_variable(reshaped_x, context_name))
gvar = with_stack_trace(outputs, greduce(gpu_reshaped_x))
# We need to have the make node called, otherwise the mask can
# be None
reshaped_gpu_inputs = [gpu_reshaped_x]
if greduce.supports_c_code(reshaped_gpu_inputs):
reduce_reshaped_x = greduce(gpu_reshaped_x)
reduce_reshaped_x = with_stack_trace(
outputs, greduce(gpu_reshaped_x))
if reduce_reshaped_x.ndim != outputs[0].ndim:
out_shp = []
for i in range(x.ndim):
if i not in op.axis:
out_shp.append(shape_i(x, i))
unreshaped_reduce = GpuReshape(len(out_shp))(reduce_reshaped_x,
tensor.stack(out_shp))
unreshaped_reduce = with_stack_trace(
outputs, GpuReshape(len(out_shp))(
reduce_reshaped_x,
tensor.stack(out_shp)))
else:
unreshaped_reduce = reduce_reshaped_x
return [unreshaped_reduce]
......@@ -2398,7 +2403,8 @@ def local_gpu_elemwise_careduce(node):
props = node.op._props_dict()
props["pre_scalar_op"] = scalar.basic.sqr
out = GpuCAReduceCuda(**props)(inp)
return [out]
return with_stack_trace(
node.outputs, out)
@local_optimizer(None)
......
......@@ -14,7 +14,7 @@ import theano.gpuarray
from .. import basic_ops
from ..type import GpuArrayType, gpuarray_shared_constructor, get_context
from ..basic_ops import (
GpuAlloc, GpuAllocEmpty, GpuReshape, GpuFromHost, host_from_gpu)
GpuAlloc, GpuAllocEmpty, GpuReshape, GpuFromHost, HostFromGpu, host_from_gpu)
from ..blas import GpuGemm
from ..elemwise import (
GpuCAReduceCuda, GpuCAReduceCPY, GpuElemwise, Elemwise, max_inputs_to_GpuElemwise)
......@@ -28,6 +28,16 @@ from theano.tensor.nnet import abstract_conv
from theano.gpuarray import dnn, blas
def _check_stack_trace(thing):
def _ops_to_check(op):
if not isinstance(op, theano.gof.Op):
op = op.op # assume node
return not isinstance(op, (theano.compile.ops.Shape_i,
theano.ifelse.IfElse,
GpuFromHost, HostFromGpu,
GpuElemwise))
return check_stack_trace(thing, ops_to_check=_ops_to_check)
def test_local_assert():
x = theano.tensor.fmatrix()
a = theano.tensor.opt.assert_op(x, theano.tensor.eq(x, 0).any())
......@@ -71,8 +81,8 @@ def test_local_gpu_contiguous_gpu_contiguous():
if isinstance(node.op, basic_ops.GpuContiguous)])
assert 1 == len([node for node in f2.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
assert check_stack_trace(f1, ops_to_check='all')
assert check_stack_trace(f2, ops_to_check='all')
assert _check_stack_trace(f1)
assert _check_stack_trace(f2)
def test_local_gpu_contiguous():
......@@ -82,7 +92,7 @@ def test_local_gpu_contiguous():
assert 1 == len([node for node in f.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
f([[2.]])
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
def test_flatten():
......@@ -100,7 +110,7 @@ def test_flatten():
assert res.shape == val.flatten().shape
assert GpuReshape in [type(node.op)
for node in f.maker.fgraph.toposort()]
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
f = theano.function([m], m.flatten(ndim=2),
mode=mode_with_gpu.excluding("local_useless_reshape"))
......@@ -110,7 +120,7 @@ def test_flatten():
assert res.shape == val.shape
assert GpuReshape in [type(node.op)
for node in f.maker.fgraph.toposort()]
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
m = theano.tensor.tensor3()
f = theano.function([m], m.flatten(ndim=2), mode=mode_with_gpu)
......@@ -120,7 +130,7 @@ def test_flatten():
assert res.shape == val.reshape(10, -1).shape
assert GpuReshape in [type(node.op)
for node in f.maker.fgraph.toposort()]
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
def test_reduce():
......@@ -133,7 +143,7 @@ def test_reduce():
f = theano.function([m], getattr(m, method)(axis=0,
**param),
mode=mode_with_gpu)
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
val = np.random.rand(10, 11).astype("float32")
res = f(val)
utt.assert_allclose(res, getattr(val, method)(axis=0))
......@@ -165,7 +175,7 @@ def test_local_gpualloc_memset_0():
assert len(topo) == 1
assert isinstance(topo[0].op, theano.tensor.Alloc)
assert (np.asarray(f(6)) == 0).all()
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
# Test with 0 from CPU op.
# Should be transfered as it is used by another op.
......@@ -175,7 +185,7 @@ def test_local_gpualloc_memset_0():
assert len(topo) == 3
assert isinstance(topo[0].op, GpuAlloc)
assert (np.asarray(f(6)) == 0).all()
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
# Test with 0
a = GpuAlloc(test_ctx_name)(z, i)
......@@ -184,7 +194,7 @@ def test_local_gpualloc_memset_0():
assert len(topo) == 1
assert isinstance(topo[0].op, GpuAlloc) and topo[0].op.memset_0
assert (np.asarray(f(6)) == 0).all()
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
# Test with 1
a = GpuAlloc(test_ctx_name)(o, i)
......@@ -194,7 +204,7 @@ def test_local_gpualloc_memset_0():
assert isinstance(topo[0].op, GpuAlloc)
assert not topo[0].op.memset_0
assert (np.asarray(f(6)) == 1).all()
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
# Test with 1, 1
a = GpuAlloc(test_ctx_name)(ones, i)
......@@ -204,7 +214,7 @@ def test_local_gpualloc_memset_0():
assert isinstance(topo[0].op, GpuAlloc)
assert not topo[0].op.memset_0
assert (np.asarray(f(2)) == 1).all()
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
def test_local_gpualloc_empty():
......@@ -220,7 +230,7 @@ def test_local_gpualloc_empty():
assert isinstance(topo[0].op, theano.tensor.AllocEmpty)
# This return not initilized data, so we can only check the shape
assert f(3).shape == (3,)
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
# Test with vector
# Should be moved
......@@ -231,7 +241,7 @@ def test_local_gpualloc_empty():
assert isinstance(topo[0].op, GpuAllocEmpty)
# This return not initilized data, so we can only check the shape
assert f(3).shape == (3,)
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
# Test with matrix
a = tensor.AllocEmpty('float32')(i, ii)
......@@ -241,7 +251,7 @@ def test_local_gpualloc_empty():
assert isinstance(topo[0].op, GpuAllocEmpty)
# This return not initilized data, so we can only check the shape
assert f(3, 4).shape == (3, 4)
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
def test_rebroadcast():
......@@ -259,7 +269,7 @@ def test_rebroadcast():
assert isinstance(rebr.inputs[0].type, GpuArrayType)
assert isinstance(rebr.outputs[0].type, GpuArrayType)
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
class TestSpecifyShape(test_basic.TestSpecifyShape):
mode = mode_with_gpu
......@@ -284,7 +294,7 @@ class test_gpu_ifelse(test_ifelse.test_ifelse):
theano.ifelse.ifelse(cond, x.mean(), x.sum()),
mode=mode_with_gpu)
assert f(np.float32([1, 2, 3]), 0) == 6
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
x = tensor.vector()
cond = tensor.scalar()
......@@ -292,7 +302,7 @@ class test_gpu_ifelse(test_ifelse.test_ifelse):
theano.ifelse.ifelse(cond, x.mean(), x.sum()),
mode=mode_with_gpu)
assert f(np.float32([1, 2, 3]), 0) == 6
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
def test_lifter_with_shared_var(self):
x = tensor.lscalar('x')
......@@ -315,7 +325,7 @@ def test_print_op():
assert isinstance(topo[1].op, theano.printing.Print)
assert isinstance(topo[2].op, GpuElemwise)
assert topo[3].op == host_from_gpu
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
f(np.random.random((5, 5)).astype('float32'))
......@@ -336,7 +346,7 @@ def test_pdbbreakpoint_op():
topo = f.maker.fgraph.toposort()
assert isinstance(topo[-2].op, GpuElemwise)
assert topo[-1].op == host_from_gpu
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
def test_local_gpu_elemwise_careduce():
......@@ -346,7 +356,7 @@ def test_local_gpu_elemwise_careduce():
topo = f.maker.fgraph.toposort()
assert len(topo) == 3
assert topo[1].op.pre_scalar_op == theano.scalar.sqr
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
data = np.random.rand(3, 4).astype(theano.config.floatX)
utt.assert_allclose(f(data), (data * data).sum())
......@@ -355,7 +365,7 @@ def test_local_gpu_elemwise_careduce():
topo = f.maker.fgraph.toposort()
assert len(topo) == 3
assert topo[1].op.pre_scalar_op == theano.scalar.sqr
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
utt.assert_allclose(f(data), (data * data).sum(axis=1))
......@@ -374,7 +384,7 @@ def test_local_lift_dot22scalar():
y_val = np.random.random((3, 4)).astype(theano.config.floatX)
a_val = 0.5
utt.assert_allclose(f_cpu(x_val, y_val, a_val), f_gpu(x_val, y_val, a_val))
assert check_stack_trace(f_gpu, ops_to_check='all')
assert _check_stack_trace(f_gpu)
def test_local_gpu_subtensor():
......@@ -384,7 +394,7 @@ def test_local_gpu_subtensor():
topo = f.maker.fgraph.toposort()
assert any([type(node.op) is tensor.Subtensor for node in topo])
assert not any([isinstance(node.op, GpuSubtensor) for node in topo])
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
# Test graph input.
t = tensor.fmatrix()
......@@ -392,7 +402,7 @@ def test_local_gpu_subtensor():
topo = f.maker.fgraph.toposort()
assert any([type(node.op) is tensor.Subtensor for node in topo])
assert not any([isinstance(node.op, GpuSubtensor) for node in topo])
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
# Test multiple use of the input
# We want the subtensor to be on the GPU to prevent multiple transfer.
......@@ -401,7 +411,7 @@ def test_local_gpu_subtensor():
topo = f.maker.fgraph.toposort()
assert not any([type(node.op) is tensor.Subtensor for node in topo])
assert any([isinstance(node.op, GpuSubtensor) for node in topo])
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
# Test multiple use of the input + input as output
# We want the subtensor to be on the GPU to prevent multiple transfer.
......@@ -410,7 +420,7 @@ def test_local_gpu_subtensor():
topo = f.maker.fgraph.toposort()
assert not any([type(node.op) is tensor.Subtensor for node in topo])
assert any([isinstance(node.op, GpuSubtensor) for node in topo])
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
# Test shared forced on CPU end we do computation on the output of
# the subtensor.
......@@ -423,7 +433,7 @@ def test_local_gpu_subtensor():
# If it where just a little bit smarter, it could wrongly move it to the GPU.
# If it where super smart, it would know it should not move it to the GPU.
assert any([isinstance(node.op, tensor.Elemwise) for node in topo])
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
def test_local_gpu_elemwise():
......@@ -445,7 +455,7 @@ def test_local_gpu_elemwise():
assert sum(isinstance(node.op, GpuElemwise) for node in topo) == 1
assert sum(type(node.op) == tensor.Elemwise for node in topo) == 0
utt.assert_allclose(f(a_v, b_v, c_v), a_v + b_v + c_v)
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
# Now test with the composite already on the cpu before we move it
# to the gpu
......@@ -459,7 +469,7 @@ def test_local_gpu_elemwise():
assert sum(isinstance(node.op, GpuElemwise) for node in topo) == 1
assert sum(type(node.op) == tensor.Elemwise for node in topo) == 0
utt.assert_allclose(f(a_v, b_v, c_v), a_v + b_v + c_v)
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
return # Not yet implemeted
# Test multiple output
......@@ -477,7 +487,7 @@ def test_local_gpu_elemwise():
utt.assert_allclose(out[0], a_v)
utt.assert_allclose(out[1], c_v)
utt.assert_allclose(out[2], b_v)
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
# Test multiple output
out_s = theano.scalar.Composite([a_s, b_s, c_s], [a_s + b_s, a_s * b_s])
......@@ -489,7 +499,7 @@ def test_local_gpu_elemwise():
out = f(a_v, b_v, c_v)
utt.assert_allclose(out[0], a_v + b_v)
utt.assert_allclose(out[1], a_v * c_v)
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
# Test non-contiguous input
c = gpuarray_shared_constructor(np.asarray(c_v, dtype='float32'))
......@@ -498,7 +508,7 @@ def test_local_gpu_elemwise():
out = f(a_v, b_v)
utt.assert_allclose(out[0], a_v[::2] + b_v[::2])
utt.assert_allclose(out[1], a_v[::2] * c_v[::2])
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
def test_many_arg_elemwise():
......@@ -575,7 +585,7 @@ def test_local_lift_abstractconv_gpu_shape():
b = tensor.ftensor4()
c = tensor.nnet.abstract_conv.AbstractConv2d_gradWeights()(a, b, s)
f = theano.function([s, a, b], c, mode=mode_with_gpu)
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
finally:
theano.config.on_opt_error = prev
......@@ -606,7 +616,7 @@ def test_local_assert_no_cpu_op():
try:
theano.config.assert_no_cpu_op = 'ignore'
f = theano.function([], out, mode=mode_local_assert)
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
finally:
theano.config.assert_no_cpu_op = old
......@@ -618,7 +628,7 @@ def test_no_complex():
stft_out = tensor.exp(width_var * freq_var) * signal_var
f = theano.function([width_var, freq_var, signal_var], stft_out,
mode=mode_with_gpu)
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
@utt.assertFailure_fast
......@@ -637,7 +647,7 @@ def test_local_lift_solve():
A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
b_val = np.random.uniform(-0.4, 0.4, (5, 3)).astype("float32")
utt.assert_allclose(f_cpu(A_val, b_val), f_gpu(A_val, b_val))
assert check_stack_trace(f_gpu, ops_to_check='all')
assert _check_stack_trace(f_gpu)
def test_gpu_solve_not_inplace():
......@@ -703,7 +713,7 @@ def test_local_gpua_advanced_incsubtensor():
w = tensor.set_subtensor(w[tensor.eq(y, 1.0).nonzero()], 100)
w = tensor.set_subtensor(w[tensor.eq(y, -1.0).nonzero()], 0)
f = theano.function([target], w)
assert check_stack_trace(f, ops_to_check='all')
assert _check_stack_trace(f)
def test_batched_dot_lifter():
......
......@@ -43,6 +43,7 @@ from theano.tensor import DimShuffle, Subtensor
from theano.tensor.opt import register_uncanonicalize
from theano import scalar as scal
from theano.gof.opt import copy_stack_trace, with_stack_trace
_logger = logging.getLogger('theano.tensor.opt')
......@@ -57,10 +58,13 @@ def local_max_and_argmax(node):
axis = node.op.get_params(node)
if len(node.outputs[1].clients) == 0:
new = CAReduce(scal.maximum, axis)(node.inputs[0])
copy_stack_trace(node.outputs[0], new)
return [new, None]
if len(node.outputs[0].clients) == 0:
return [None, T.Argmax(axis)(node.inputs[0])]
new = T.Argmax(axis)(node.inputs[0])
copy_stack_trace(node.outputs[0], new)
return [None, new]
@register_uncanonicalize
......@@ -84,8 +88,8 @@ def local_max_to_min(node):
max.owner.op.scalar_op == scal.maximum):
neg = max.owner.inputs[0]
if neg.owner and neg.owner.op == T.neg:
return [CAReduce(scal.minimum,
max.owner.op.axis)(neg.owner.inputs[0])]
new = CAReduce(scal.minimum, max.owner.op.axis)(neg.owner.inputs[0])
return [with_stack_trace(node.outputs[0], new)]
return False
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论