提交 ff9e2b38 authored 作者: Tim Cooijmans's avatar Tim Cooijmans 提交者: Reyhane Askari

more stack trace copying

上级 39269c13
...@@ -89,13 +89,13 @@ def as_gpuarray_variable(x, context_name): ...@@ -89,13 +89,13 @@ def as_gpuarray_variable(x, context_name):
if x.context.ptr != ctx.ptr: if x.context.ptr != ctx.ptr:
x = x.transfer(ctx) x = x.transfer(ctx)
x = with_stack_trace(x, gpuarray.asarray(x, context=ctx)) x = gpuarray.asarray(x, context=ctx)
bcast = [(s == 1) for s in x.shape] bcast = [(s == 1) for s in x.shape]
return with_stack_trace(x, GpuArrayConstant(GpuArrayType(dtype=x.dtype, return GpuArrayConstant(GpuArrayType(dtype=x.dtype,
broadcastable=bcast, broadcastable=bcast,
context_name=context_name), context_name=context_name),
x)) x)
def infer_context_name(*vars): def infer_context_name(*vars):
......
...@@ -7,6 +7,7 @@ from theano import Apply, Op ...@@ -7,6 +7,7 @@ from theano import Apply, Op
from theano.compile import optdb from theano.compile import optdb
from theano.gof import LocalOptGroup, ParamsType from theano.gof import LocalOptGroup, ParamsType
from theano.scalar import bool as bool_t from theano.scalar import bool as bool_t
from theano.gof.opt import inherit_stack_trace
from theano.tensor.basic import as_tensor_variable from theano.tensor.basic import as_tensor_variable
from theano.tensor.opt import in2out from theano.tensor.opt import in2out
...@@ -1830,17 +1831,20 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM): ...@@ -1830,17 +1831,20 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
@inplace_allocempty(GpuGemv, 0) @inplace_allocempty(GpuGemv, 0)
def local_inplace_gpuagemv(node, inputs): def local_inplace_gpuagemv(node, inputs):
return [gpugemv_inplace(*inputs)] with inherit_stack_trace(node.outputs):
return [gpugemv_inplace(*inputs)]
@inplace_allocempty(GpuGemm, 0) @inplace_allocempty(GpuGemm, 0)
def local_inplace_gpuagemm(node, inputs): def local_inplace_gpuagemm(node, inputs):
return [gpugemm_inplace(*inputs)] with inherit_stack_trace(node.outputs):
return [gpugemm_inplace(*inputs)]
@inplace_allocempty(GpuGer, 0) @inplace_allocempty(GpuGer, 0)
def local_inplace_gpuager(node, inputs): def local_inplace_gpuager(node, inputs):
return [gpuger_inplace(*inputs)] with inherit_stack_trace(node.outputs):
return [gpuger_inplace(*inputs)]
@inplace_allocempty(GpuGemmBatch, 0) @inplace_allocempty(GpuGemmBatch, 0)
......
...@@ -18,6 +18,7 @@ from theano.gradient import DisconnectedType, grad_not_implemented ...@@ -18,6 +18,7 @@ from theano.gradient import DisconnectedType, grad_not_implemented
from theano.gof import Optimizer, local_optimizer, COp, ParamsType, EnumList from theano.gof import Optimizer, local_optimizer, COp, ParamsType, EnumList
from theano.gof.cmodule import GCC_compiler from theano.gof.cmodule import GCC_compiler
from theano.gof.type import CDataType, Generic from theano.gof.type import CDataType, Generic
from theano.gof.opt import inherit_stack_trace
from theano.compile import optdb from theano.compile import optdb
from theano.compile.ops import shape_i, shape_i_op from theano.compile.ops import shape_i, shape_i_op
from theano.tensor.nnet import LogSoftmax, SoftmaxGrad from theano.tensor.nnet import LogSoftmax, SoftmaxGrad
...@@ -3132,12 +3133,13 @@ def local_abstractconv_cudnn(node): ...@@ -3132,12 +3133,13 @@ def local_abstractconv_cudnn(node):
ctx = infer_context_name(*node.inputs) ctx = infer_context_name(*node.inputs)
if not isinstance(node.inputs[0].type, GpuArrayType): if not isinstance(node.inputs[0].type, GpuArrayType):
return return
if node.op.unshared: with inherit_stack_trace(node.outputs):
return None if node.op.unshared:
if isinstance(node.op, AbstractConv2d): return None
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs) if isinstance(node.op, AbstractConv2d):
elif isinstance(node.op, AbstractConv3d): return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
return local_abstractconv3d_cudnn_graph(node.op, ctx, node.inputs, node.outputs) elif isinstance(node.op, AbstractConv3d):
return local_abstractconv3d_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
@local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs]) @local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs])
...@@ -3356,12 +3358,13 @@ def local_abstractconv_gw_cudnn(node): ...@@ -3356,12 +3358,13 @@ def local_abstractconv_gw_cudnn(node):
ctx = infer_context_name(*node.inputs) ctx = infer_context_name(*node.inputs)
if not isinstance(node.inputs[0].type, GpuArrayType): if not isinstance(node.inputs[0].type, GpuArrayType):
return return
if node.op.unshared: with inherit_stack_trace(node.outputs):
return None if node.op.unshared:
if isinstance(node.op, AbstractConv2d_gradWeights): return None
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs) if isinstance(node.op, AbstractConv2d_gradWeights):
elif isinstance(node.op, AbstractConv3d_gradWeights): return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
return local_abstractconv3d_cudnn_graph(node.op, ctx, node.inputs, node.outputs) elif isinstance(node.op, AbstractConv3d_gradWeights):
return local_abstractconv3d_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
@local_optimizer([AbstractConv2d_gradInputs, AbstractConv3d_gradInputs]) @local_optimizer([AbstractConv2d_gradInputs, AbstractConv3d_gradInputs])
...@@ -3369,28 +3372,31 @@ def local_abstractconv_gi_cudnn(node): ...@@ -3369,28 +3372,31 @@ def local_abstractconv_gi_cudnn(node):
ctx = infer_context_name(*node.inputs) ctx = infer_context_name(*node.inputs)
if not isinstance(node.inputs[0].type, GpuArrayType): if not isinstance(node.inputs[0].type, GpuArrayType):
return return
if node.op.unshared: with inherit_stack_trace(node.outputs):
return None if node.op.unshared:
if isinstance(node.op, AbstractConv2d_gradInputs): return None
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs) if isinstance(node.op, AbstractConv2d_gradInputs):
elif isinstance(node.op, AbstractConv3d_gradInputs): return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
return local_abstractconv3d_cudnn_graph(node.op, ctx, node.inputs, node.outputs) elif isinstance(node.op, AbstractConv3d_gradInputs):
return local_abstractconv3d_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
@inplace_allocempty(GpuDnnConv, 2) @inplace_allocempty(GpuDnnConv, 2)
def local_dnn_conv_inplace(node, inputs): def local_dnn_conv_inplace(node, inputs):
return [GpuDnnConv(algo=node.op.algo, inplace=True, num_groups=node.op.num_groups)(*inputs)] with inherit_stack_trace(node.outputs):
return [GpuDnnConv(algo=node.op.algo, inplace=True, num_groups=node.op.num_groups)(*inputs)]
@inplace_allocempty(GpuDnnConvGradW, 2) @inplace_allocempty(GpuDnnConvGradW, 2)
def local_dnn_convgw_inplace(node, inputs): def local_dnn_convgw_inplace(node, inputs):
return [GpuDnnConvGradW(algo=node.op.algo, inplace=True, num_groups=node.op.num_groups)(*inputs)] with inherit_stack_trace(node.outputs):
return [GpuDnnConvGradW(algo=node.op.algo, inplace=True, num_groups=node.op.num_groups)(*inputs)]
@inplace_allocempty(GpuDnnConvGradI, 2) @inplace_allocempty(GpuDnnConvGradI, 2)
def local_dnn_convgi_inplace(node, inputs): def local_dnn_convgi_inplace(node, inputs):
return [GpuDnnConvGradI(algo=node.op.algo, inplace=True, num_groups=node.op.num_groups)(*inputs)] with inherit_stack_trace(node.outputs):
return [GpuDnnConvGradI(algo=node.op.algo, inplace=True, num_groups=node.op.num_groups)(*inputs)]
optdb.register('local_dnna_conv_inplace', optdb.register('local_dnna_conv_inplace',
tensor.opt.in2out(local_dnn_conv_inplace, tensor.opt.in2out(local_dnn_conv_inplace,
...@@ -3403,40 +3409,43 @@ optdb.register('local_dnna_conv_inplace', ...@@ -3403,40 +3409,43 @@ optdb.register('local_dnna_conv_inplace',
@register_opt('cudnn') @register_opt('cudnn')
@alpha_merge(GpuDnnConv, alpha_in=4, beta_in=5) @alpha_merge(GpuDnnConv, alpha_in=4, beta_in=5)
def local_dnn_conv_alpha_merge(node, *inputs): def local_dnn_conv_alpha_merge(node, *inputs):
return [GpuDnnConv(algo=node.op.algo, num_groups=node.op.num_groups)(*inputs)] with inherit_stack_trace(node.outputs):
return [GpuDnnConv(algo=node.op.algo, num_groups=node.op.num_groups)(*inputs)]
@register_opt('cudnn') @register_opt('cudnn')
@alpha_merge(GpuDnnConvGradW, alpha_in=4, beta_in=5) @alpha_merge(GpuDnnConvGradW, alpha_in=4, beta_in=5)
def local_dnn_convw_alpha_merge(node, *inputs): def local_dnn_convw_alpha_merge(node, *inputs):
return [GpuDnnConvGradW(algo=node.op.algo, num_groups=node.op.num_groups)(*inputs)] with inherit_stack_trace(node.outputs):
return [GpuDnnConvGradW(algo=node.op.algo, num_groups=node.op.num_groups)(*inputs)]
@register_opt('cudnn') @register_opt('cudnn')
@alpha_merge(GpuDnnConvGradI, alpha_in=4, beta_in=5) @alpha_merge(GpuDnnConvGradI, alpha_in=4, beta_in=5)
def local_dnn_convi_alpha_merge(node, *inputs): def local_dnn_convi_alpha_merge(node, *inputs):
return [GpuDnnConvGradI(algo=node.op.algo, num_groups=node.op.num_groups)(*inputs)] with inherit_stack_trace(node.outputs):
return [GpuDnnConvGradI(algo=node.op.algo, num_groups=node.op.num_groups)(*inputs)]
@register_opt('cudnn') @register_opt('cudnn')
@output_merge(GpuDnnConv, alpha_in=4, beta_in=5, out_in=2) @output_merge(GpuDnnConv, alpha_in=4, beta_in=5, out_in=2)
def local_dnn_conv_output_merge(node, *inputs): def local_dnn_conv_output_merge(node, *inputs):
inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:] with inherit_stack_trace(node.outputs):
return [GpuDnnConv(algo=node.op.algo)(*inputs)] inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:]
return [GpuDnnConv(algo=node.op.algo)(*inputs)]
@register_opt('cudnn') @register_opt('cudnn')
@output_merge(GpuDnnConvGradW, alpha_in=4, beta_in=5, out_in=2) @output_merge(GpuDnnConvGradW, alpha_in=4, beta_in=5, out_in=2)
def local_dnn_convw_output_merge(node, *inputs): def local_dnn_convw_output_merge(node, *inputs):
inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:] with inherit_stack_trace(node.outputs):
return [GpuDnnConvGradW(algo=node.op.algo)(*inputs)] inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:]
return [GpuDnnConvGradW(algo=node.op.algo)(*inputs)]
@register_opt('cudnn') @register_opt('cudnn')
@output_merge(GpuDnnConvGradI, alpha_in=4, beta_in=5, out_in=2) @output_merge(GpuDnnConvGradI, alpha_in=4, beta_in=5, out_in=2)
def local_dnn_convi_output_merge(node, *inputs): def local_dnn_convi_output_merge(node, *inputs):
inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:] with inherit_stack_trace(node.outputs):
return [GpuDnnConvGradI(algo=node.op.algo)(*inputs)] inputs = inputs[0:2] + (gpu_contiguous(inputs[2]),) + inputs[3:]
return [GpuDnnConvGradI(algo=node.op.algo)(*inputs)]
def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs): def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs):
......
...@@ -15,7 +15,8 @@ from theano.compile.ops import shape_i ...@@ -15,7 +15,8 @@ from theano.compile.ops import shape_i
from theano.gof import (local_optimizer, EquilibriumDB, TopoOptimizer, from theano.gof import (local_optimizer, EquilibriumDB, TopoOptimizer,
LocalGroupDB, LocalGroupDB,
SequenceDB, Optimizer, DB, toolbox, graph) SequenceDB, Optimizer, DB, toolbox, graph)
from theano.gof.opt import LocalMetaOptimizer, copy_stack_trace, with_stack_trace from theano.gof.opt import (LocalMetaOptimizer, copy_stack_trace,
with_stack_trace, inherit_stack_trace)
from theano.ifelse import IfElse from theano.ifelse import IfElse
from theano.misc.ordered_set import OrderedSet from theano.misc.ordered_set import OrderedSet
...@@ -421,8 +422,6 @@ class GraphToGPU(Optimizer): ...@@ -421,8 +422,6 @@ class GraphToGPU(Optimizer):
if isinstance(new_ops, theano.Op): if isinstance(new_ops, theano.Op):
outputs = new_ops(*[mapping[i] for i in node.inputs], return_list=True) outputs = new_ops(*[mapping[i] for i in node.inputs], return_list=True)
for old_output, new_output in zip(node.outputs, outputs):
copy_stack_trace(old_output, new_output)
elif not new_ops: elif not new_ops:
newnode = node.clone_with_new_inputs([mapping.get(i) for i in node.inputs]) newnode = node.clone_with_new_inputs([mapping.get(i) for i in node.inputs])
outputs = newnode.outputs outputs = newnode.outputs
...@@ -431,6 +430,10 @@ class GraphToGPU(Optimizer): ...@@ -431,6 +430,10 @@ class GraphToGPU(Optimizer):
elif isinstance(new_ops, theano.Variable): elif isinstance(new_ops, theano.Variable):
outputs = [new_ops] outputs = [new_ops]
for old_output, new_output in zip(node.outputs, outputs):
copy_stack_trace(old_output, new_output)
new_output.tag.tracefrom = old_output
if new_ops: if new_ops:
node_created[lopt] += len(graph.ops([mapping[i] for i in node.inputs], outputs)) node_created[lopt] += len(graph.ops([mapping[i] for i in node.inputs], outputs))
if any([getattr(old_o, 'dtype', None) != getattr(new_o, 'dtype', None) if any([getattr(old_o, 'dtype', None) != getattr(new_o, 'dtype', None)
...@@ -662,9 +665,8 @@ def local_gpualloc_memset_0(node): ...@@ -662,9 +665,8 @@ def local_gpualloc_memset_0(node):
inp.data.size == 1 and inp.data.size == 1 and
(np.asarray(inp.data) == 0).all()): (np.asarray(inp.data) == 0).all()):
new_op = GpuAlloc(node.op.context_name, memset_0=True) new_op = GpuAlloc(node.op.context_name, memset_0=True)
new_output = new_op(*node.inputs) with inherit_stack_trace(node.outputs):
copy_stack_trace(node.outputs[0], new_output) return new_op(*node.inputs, return_list=True)
return [new_output]
# Don't register by default. # Don't register by default.
...@@ -673,10 +675,9 @@ def local_gpua_alloc_empty_to_zeros(node): ...@@ -673,10 +675,9 @@ def local_gpua_alloc_empty_to_zeros(node):
if isinstance(node.op, GpuAllocEmpty): if isinstance(node.op, GpuAllocEmpty):
context_name = infer_context_name(*node.inputs) context_name = infer_context_name(*node.inputs)
z = np.asarray(0, dtype=node.outputs[0].dtype) z = np.asarray(0, dtype=node.outputs[0].dtype)
return [GpuAlloc(context_name)(as_gpuarray_variable(z, context_name), with inherit_stack_trace(node.outputs):
*node.inputs)] return [GpuAlloc(context_name)(as_gpuarray_variable(z, context_name),
*node.inputs)]
optdb.register('local_gpua_alloc_empty_to_zeros', optdb.register('local_gpua_alloc_empty_to_zeros',
theano.tensor.opt.in2out(local_gpua_alloc_empty_to_zeros), theano.tensor.opt.in2out(local_gpua_alloc_empty_to_zeros),
# After move to gpu and merge2, before inplace. # After move to gpu and merge2, before inplace.
...@@ -1220,7 +1221,8 @@ def local_gpua_careduce(op, context_name, inputs, outputs): ...@@ -1220,7 +1221,8 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
op.scalar_op, axis=op.axis, op.scalar_op, axis=op.axis,
dtype=odtype, dtype=odtype,
acc_dtype=adtype) acc_dtype=adtype)
gvar = with_stack_trace(outputs, greduce(x)) with inherit_stack_trace(outputs):
gvar = greduce(x)
# We need to have the make node called, otherwise the mask can # We need to have the make node called, otherwise the mask can
# be None # be None
if (op2 is GpuCAReduceCPY or if (op2 is GpuCAReduceCPY or
...@@ -1260,30 +1262,27 @@ def local_gpua_careduce(op, context_name, inputs, outputs): ...@@ -1260,30 +1262,27 @@ def local_gpua_careduce(op, context_name, inputs, outputs):
dtype=getattr(op, 'dtype', outputs[0].dtype), dtype=getattr(op, 'dtype', outputs[0].dtype),
acc_dtype=getattr(op, 'acc_dtype', None)) acc_dtype=getattr(op, 'acc_dtype', None))
reshaped_x = with_stack_trace( with inherit_stack_trace(outputs):
outputs, x.reshape(tensor.stack(new_in_shp))) reshaped_x = x.reshape(tensor.stack(new_in_shp))
gpu_reshaped_x = with_stack_trace( gpu_reshaped_x = as_gpuarray_variable(reshaped_x, context_name)
outputs, as_gpuarray_variable(reshaped_x, context_name)) # We need to have the make node called, otherwise the mask can
gvar = with_stack_trace(outputs, greduce(gpu_reshaped_x)) # be None
# We need to have the make node called, otherwise the mask can gvar = greduce(gpu_reshaped_x)
# be None reshaped_gpu_inputs = [gpu_reshaped_x]
reshaped_gpu_inputs = [gpu_reshaped_x] if greduce.supports_c_code(reshaped_gpu_inputs):
if greduce.supports_c_code(reshaped_gpu_inputs): reduce_reshaped_x = greduce(gpu_reshaped_x)
reduce_reshaped_x = with_stack_trace(
outputs, greduce(gpu_reshaped_x)) if reduce_reshaped_x.ndim != outputs[0].ndim:
out_shp = []
if reduce_reshaped_x.ndim != outputs[0].ndim: for i in range(x.ndim):
out_shp = [] if i not in op.axis:
for i in range(x.ndim): out_shp.append(shape_i(x, i))
if i not in op.axis: unreshaped_reduce = GpuReshape(len(out_shp))(
out_shp.append(shape_i(x, i))
unreshaped_reduce = with_stack_trace(
outputs, GpuReshape(len(out_shp))(
reduce_reshaped_x, reduce_reshaped_x,
tensor.stack(out_shp))) tensor.stack(out_shp))
else: else:
unreshaped_reduce = reduce_reshaped_x unreshaped_reduce = reduce_reshaped_x
return [unreshaped_reduce] return [unreshaped_reduce]
@register_opt('fast_compile') @register_opt('fast_compile')
...@@ -1356,25 +1355,29 @@ def local_gpua_gemmbatch(op, context_name, inputs, outputs): ...@@ -1356,25 +1355,29 @@ def local_gpua_gemmbatch(op, context_name, inputs, outputs):
@register_opt() @register_opt()
@alpha_merge(GpuGemm, alpha_in=1, beta_in=4) @alpha_merge(GpuGemm, alpha_in=1, beta_in=4)
def local_gpua_gemm_alpha_merge(node, *inputs): def local_gpua_gemm_alpha_merge(node, *inputs):
return [gpugemm_no_inplace(*inputs)] with inherit_stack_trace(node.outputs):
return [gpugemm_no_inplace(*inputs)]
@register_opt() @register_opt()
@output_merge(GpuGemm, alpha_in=1, beta_in=4, out_in=0) @output_merge(GpuGemm, alpha_in=1, beta_in=4, out_in=0)
def local_gpua_gemm_output_merge(node, *inputs): def local_gpua_gemm_output_merge(node, *inputs):
return [gpugemm_no_inplace(*inputs)] with inherit_stack_trace(node.outputs):
return [gpugemm_no_inplace(*inputs)]
@register_opt() @register_opt()
@alpha_merge(GpuGemmBatch, alpha_in=1, beta_in=4) @alpha_merge(GpuGemmBatch, alpha_in=1, beta_in=4)
def local_gpua_gemmbatch_alpha_merge(node, *inputs): def local_gpua_gemmbatch_alpha_merge(node, *inputs):
return [gpugemmbatch_no_inplace(*inputs)] with inherit_stack_trace(node.outputs):
return [gpugemmbatch_no_inplace(*inputs)]
@register_opt() @register_opt()
@output_merge(GpuGemmBatch, alpha_in=1, beta_in=4, out_in=0) @output_merge(GpuGemmBatch, alpha_in=1, beta_in=4, out_in=0)
def local_gpua_gemmbatch_output_merge(node, *inputs): def local_gpua_gemmbatch_output_merge(node, *inputs):
return [gpugemmbatch_no_inplace(*inputs)] with inherit_stack_trace(node.outputs):
return [gpugemmbatch_no_inplace(*inputs)]
@register_opt('fast_compile') @register_opt('fast_compile')
...@@ -2403,8 +2406,8 @@ def local_gpu_elemwise_careduce(node): ...@@ -2403,8 +2406,8 @@ def local_gpu_elemwise_careduce(node):
props = node.op._props_dict() props = node.op._props_dict()
props["pre_scalar_op"] = scalar.basic.sqr props["pre_scalar_op"] = scalar.basic.sqr
out = GpuCAReduceCuda(**props)(inp) out = GpuCAReduceCuda(**props)(inp)
return with_stack_trace( with inherit_stack_trace(node.outputs):
node.outputs, out) return out
@local_optimizer(None) @local_optimizer(None)
......
...@@ -33,10 +33,20 @@ def _check_stack_trace(thing): ...@@ -33,10 +33,20 @@ def _check_stack_trace(thing):
if not isinstance(op, theano.gof.Op): if not isinstance(op, theano.gof.Op):
op = op.op # assume node op = op.op # assume node
return not isinstance(op, (theano.compile.ops.Shape_i, return not isinstance(op, (theano.compile.ops.Shape_i,
theano.compile.ops.Shape,
theano.compile.ops.DeepCopyOp,
theano.tensor.opt.MakeVector,
theano.tensor.subtensor.Subtensor,
theano.tensor.elemwise.Elemwise,
theano.ifelse.IfElse, theano.ifelse.IfElse,
GpuFromHost, HostFromGpu, GpuFromHost, HostFromGpu,
GpuElemwise)) GpuCAReduceCuda,
return check_stack_trace(thing, ops_to_check=_ops_to_check) GpuElemwise,
theano.printing.Print,
PdbBreakpoint,
))
return check_stack_trace(thing, ops_to_check=_ops_to_check,
bug_print="ignore")
def test_local_assert(): def test_local_assert():
x = theano.tensor.fmatrix() x = theano.tensor.fmatrix()
......
...@@ -146,6 +146,7 @@ from theano.gof import (utils, Op, view_roots, ...@@ -146,6 +146,7 @@ from theano.gof import (utils, Op, view_roots,
EquilibriumOptimizer, Apply, EquilibriumOptimizer, Apply,
ReplacementDidntRemovedError) ReplacementDidntRemovedError)
from theano.gof.params_type import ParamsType from theano.gof.params_type import ParamsType
from theano.gof.opt import inherit_stack_trace
from theano.printing import pprint, FunctionPrinter, debugprint from theano.printing import pprint, FunctionPrinter, debugprint
from theano.compile.mode import optdb from theano.compile.mode import optdb
import theano.scalar import theano.scalar
...@@ -1625,19 +1626,20 @@ def local_dot_to_dot22(node): ...@@ -1625,19 +1626,20 @@ def local_dot_to_dot22(node):
return return
if y.type.dtype in ['float16', 'float32', 'float64', 'complex64', 'complex128']: if y.type.dtype in ['float16', 'float32', 'float64', 'complex64', 'complex128']:
if x.ndim == 2 and y.ndim == 2: with inherit_stack_trace(node.outputs):
# print "local_dot_to_dot22: MM" if x.ndim == 2 and y.ndim == 2:
return [_dot22(*node.inputs)] # print "local_dot_to_dot22: MM"
if x.ndim == 2 and y.ndim == 1: return [_dot22(*node.inputs)]
# print "local_dot_to_dot22: MV" if x.ndim == 2 and y.ndim == 1:
return [_dot22(x, y.dimshuffle(0, 'x')).dimshuffle(0)] # print "local_dot_to_dot22: MV"
if x.ndim == 1 and y.ndim == 2: return [_dot22(x, y.dimshuffle(0, 'x')).dimshuffle(0)]
# print "local_dot_to_dot22: VM" if x.ndim == 1 and y.ndim == 2:
return [_dot22(x.dimshuffle('x', 0), y).dimshuffle(1)] # print "local_dot_to_dot22: VM"
if x.ndim == 1 and y.ndim == 1: return [_dot22(x.dimshuffle('x', 0), y).dimshuffle(1)]
# print "local_dot_to_dot22: VV" if x.ndim == 1 and y.ndim == 1:
return [_dot22(x.dimshuffle('x', 0), # print "local_dot_to_dot22: VV"
y.dimshuffle(0, 'x')).dimshuffle()] return [_dot22(x.dimshuffle('x', 0),
y.dimshuffle(0, 'x')).dimshuffle()]
_logger.info('Not optimizing dot with inputs %s %s %s %s', _logger.info('Not optimizing dot with inputs %s %s %s %s',
x, y, x.type, y.type) x, y, x.type, y.type)
...@@ -1646,19 +1648,22 @@ def local_dot_to_dot22(node): ...@@ -1646,19 +1648,22 @@ def local_dot_to_dot22(node):
@local_optimizer([gemm_no_inplace], inplace=True) @local_optimizer([gemm_no_inplace], inplace=True)
def local_inplace_gemm(node): def local_inplace_gemm(node):
if node.op == gemm_no_inplace: if node.op == gemm_no_inplace:
return [gemm_inplace(*node.inputs)] with inherit_stack_trace(node.outputs):
return [gemm_inplace(*node.inputs)]
@local_optimizer([gemv_no_inplace], inplace=True) @local_optimizer([gemv_no_inplace], inplace=True)
def local_inplace_gemv(node): def local_inplace_gemv(node):
if node.op == gemv_no_inplace: if node.op == gemv_no_inplace:
return [gemv_inplace(*node.inputs)] with inherit_stack_trace(node.outputs):
return [gemv_inplace(*node.inputs)]
@local_optimizer([ger], inplace=True) @local_optimizer([ger], inplace=True)
def local_inplace_ger(node): def local_inplace_ger(node):
if node.op == ger: if node.op == ger:
return [ger_destructive(*node.inputs)] with inherit_stack_trace(node.outputs):
return [ger_destructive(*node.inputs)]
@local_optimizer([gemm_no_inplace]) @local_optimizer([gemm_no_inplace])
...@@ -1666,12 +1671,13 @@ def local_gemm_to_gemv(node): ...@@ -1666,12 +1671,13 @@ def local_gemm_to_gemv(node):
"""GEMM acting on row or column matrices -> GEMV.""" """GEMM acting on row or column matrices -> GEMV."""
if node.op == gemm_no_inplace: if node.op == gemm_no_inplace:
z, a, x, y, b = node.inputs z, a, x, y, b = node.inputs
if z.broadcastable == x.broadcastable == (True, False): with inherit_stack_trace(node.outputs):
r = gemv_no_inplace(z.dimshuffle(1), a, y.T, x.dimshuffle(1), b) if z.broadcastable == x.broadcastable == (True, False):
return [r.dimshuffle('x', 0)] r = gemv_no_inplace(z.dimshuffle(1), a, y.T, x.dimshuffle(1), b)
if z.broadcastable == y.broadcastable == (False, True): return [r.dimshuffle('x', 0)]
r = gemv_no_inplace(z.dimshuffle(0), a, x, y.dimshuffle(0), b) if z.broadcastable == y.broadcastable == (False, True):
return [r.dimshuffle(0, 'x')] r = gemv_no_inplace(z.dimshuffle(0), a, x, y.dimshuffle(0), b)
return [r.dimshuffle(0, 'x')]
@local_optimizer([gemm_no_inplace]) @local_optimizer([gemm_no_inplace])
...@@ -1680,26 +1686,27 @@ def local_gemm_to_ger(node): ...@@ -1680,26 +1686,27 @@ def local_gemm_to_ger(node):
if node.op == gemm_no_inplace: if node.op == gemm_no_inplace:
z, a, x, y, b = node.inputs z, a, x, y, b = node.inputs
if x.broadcastable[1] and y.broadcastable[0]: if x.broadcastable[1] and y.broadcastable[0]:
# x and y are both vectors so this might qualifies for a GER with inherit_stack_trace(node.outputs):
xv = x.dimshuffle(0) # x and y are both vectors so this might qualifies for a GER
yv = y.dimshuffle(1) xv = x.dimshuffle(0)
try: yv = y.dimshuffle(1)
bval = T.get_scalar_constant_value(b) try:
except T.NotScalarConstantError: bval = T.get_scalar_constant_value(b)
# b isn't a constant, GEMM is doing useful pre-scaling except T.NotScalarConstantError:
return # b isn't a constant, GEMM is doing useful pre-scaling
return
if bval == 1: # best case a natural GER
rval = ger(z, a, xv, yv) if bval == 1: # best case a natural GER
return [rval] rval = ger(z, a, xv, yv)
elif bval == 0: # GER on zeros_like should be faster than GEMM return [rval]
zeros = T.zeros([x.shape[0], y.shape[1]], x.dtype) elif bval == 0: # GER on zeros_like should be faster than GEMM
rval = ger(zeros, a, xv, yv) zeros = T.zeros([x.shape[0], y.shape[1]], x.dtype)
return [rval] rval = ger(zeros, a, xv, yv)
else: return [rval]
# if bval is another constant, then z is being usefully else:
# pre-scaled and GER isn't really the right tool for the job. # if bval is another constant, then z is being usefully
return # pre-scaled and GER isn't really the right tool for the job.
return
# TODO: delete this optimization when we have the proper dot->gemm->ger pipeline # TODO: delete this optimization when we have the proper dot->gemm->ger pipeline
...@@ -1708,37 +1715,38 @@ def local_gemm_to_ger(node): ...@@ -1708,37 +1715,38 @@ def local_gemm_to_ger(node):
def local_dot22_to_ger_or_gemv(node): def local_dot22_to_ger_or_gemv(node):
"""dot22 computing an outer-product -> GER.""" """dot22 computing an outer-product -> GER."""
if node.op == _dot22: if node.op == _dot22:
x, y = node.inputs with inherit_stack_trace(node.outputs):
xb = x.broadcastable x, y = node.inputs
yb = y.broadcastable xb = x.broadcastable
one = T.as_tensor_variable(np.asarray(1, dtype=x.dtype)) yb = y.broadcastable
zero = T.as_tensor_variable(np.asarray(0, dtype=x.dtype)) one = T.as_tensor_variable(np.asarray(1, dtype=x.dtype))
if xb[1] and yb[0]: zero = T.as_tensor_variable(np.asarray(0, dtype=x.dtype))
# x and y are both vectors so this might qualifies for a GER if xb[1] and yb[0]:
xv = x.dimshuffle(0) # x and y are both vectors so this might qualifies for a GER
yv = y.dimshuffle(1) xv = x.dimshuffle(0)
zeros = T.zeros([x.shape[0], y.shape[1]], dtype=x.dtype) yv = y.dimshuffle(1)
rval = ger(zeros, one, xv, yv) zeros = T.zeros([x.shape[0], y.shape[1]], dtype=x.dtype)
return [rval] rval = ger(zeros, one, xv, yv)
if xb[0] and yb[1]: return [rval]
# x and y are both vectors so this qualifies for a sdot / ddot if xb[0] and yb[1]:
# TODO: Theano doesn't have a sdot, but gemv is better than _dot22 # x and y are both vectors so this qualifies for a sdot / ddot
xv = x.dimshuffle(1) # TODO: Theano doesn't have a sdot, but gemv is better than _dot22
zeros = T.AllocEmpty(x.dtype)(1) xv = x.dimshuffle(1)
rval = gemv_no_inplace(zeros, one, y.T, xv, zero) zeros = T.AllocEmpty(x.dtype)(1)
return [rval.dimshuffle('x', 0)] rval = gemv_no_inplace(zeros, one, y.T, xv, zero)
if xb[0] and not yb[0] and not yb[1]: return [rval.dimshuffle('x', 0)]
# x is vector, y is matrix so try gemv if xb[0] and not yb[0] and not yb[1]:
xv = x.dimshuffle(1) # x is vector, y is matrix so try gemv
zeros = T.AllocEmpty(x.dtype)(y.shape[1]) xv = x.dimshuffle(1)
rval = gemv_no_inplace(zeros, one, y.T, xv, zero) zeros = T.AllocEmpty(x.dtype)(y.shape[1])
return [rval.dimshuffle('x', 0)] rval = gemv_no_inplace(zeros, one, y.T, xv, zero)
if not xb[0] and not xb[1] and yb[1]: return [rval.dimshuffle('x', 0)]
# x is matrix, y is vector, try gemv if not xb[0] and not xb[1] and yb[1]:
yv = y.dimshuffle(0) # x is matrix, y is vector, try gemv
zeros = T.AllocEmpty(x.dtype)(x.shape[0]) yv = y.dimshuffle(0)
rval = gemv_no_inplace(zeros, one, x, yv, zero) zeros = T.AllocEmpty(x.dtype)(x.shape[0])
return [rval.dimshuffle(0, 'x')] rval = gemv_no_inplace(zeros, one, x, yv, zero)
return [rval.dimshuffle(0, 'x')]
################################# #################################
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论