提交 96cbce45 authored 作者: Tim Cooijmans's avatar Tim Cooijmans 提交者: Reyhane Askari

more stack trace copying

上级 1b101ffc
...@@ -1323,33 +1323,34 @@ def local_gpua_gemm(op, context_name, inputs, outputs): ...@@ -1323,33 +1323,34 @@ def local_gpua_gemm(op, context_name, inputs, outputs):
def local_gpua_gemmbatch(op, context_name, inputs, outputs): def local_gpua_gemmbatch(op, context_name, inputs, outputs):
if inputs[0].dtype not in ['float16', 'float32', 'float64']: if inputs[0].dtype not in ['float16', 'float32', 'float64']:
return return
a, b = inputs with inherit_stack_trace(outputs):
# Since GpuGemmBatch only supports 3D inputs and output, a, b = inputs
# we need to add broadcastable dims to the inputs, and drop # Since GpuGemmBatch only supports 3D inputs and output,
# them from outputs # we need to add broadcastable dims to the inputs, and drop
output_dims = [0, 1, 2] # them from outputs
if a.ndim == 2: output_dims = [0, 1, 2]
a = GpuDimShuffle(a.broadcastable, (0, 'x', 1))(a) if a.ndim == 2:
del output_dims[1] a = GpuDimShuffle(a.broadcastable, (0, 'x', 1))(a)
if b.ndim == 2: del output_dims[1]
b = GpuDimShuffle(b.broadcastable, (0, 1, 'x'))(b) if b.ndim == 2:
del output_dims[-1] b = GpuDimShuffle(b.broadcastable, (0, 1, 'x'))(b)
# In case of mismatched dtypes, we also have to upcast del output_dims[-1]
out_dtype = outputs[0].dtype # In case of mismatched dtypes, we also have to upcast
if a.dtype != out_dtype or b.dtype != out_dtype: out_dtype = outputs[0].dtype
gpu_cast_op = GpuElemwise(Cast(Scalar(out_dtype))) if a.dtype != out_dtype or b.dtype != out_dtype:
if a.dtype != out_dtype: gpu_cast_op = GpuElemwise(Cast(Scalar(out_dtype)))
a = gpu_cast_op(a) if a.dtype != out_dtype:
if b.dtype != out_dtype: a = gpu_cast_op(a)
b = gpu_cast_op(b) if b.dtype != out_dtype:
b = gpu_cast_op(b)
c = GpuAllocEmpty(out_dtype, context_name)(
a.shape[0], a.shape[1], b.shape[2]) c = GpuAllocEmpty(out_dtype, context_name)(
out = gpugemmbatch_no_inplace(c, np.asarray(1.0, dtype=out_dtype), a.shape[0], a.shape[1], b.shape[2])
a, b, np.asarray(0.0, dtype=out_dtype)) out = gpugemmbatch_no_inplace(c, np.asarray(1.0, dtype=out_dtype),
if len(output_dims) != 3: a, b, np.asarray(0.0, dtype=out_dtype))
out = GpuDimShuffle(out.broadcastable, output_dims)(out) if len(output_dims) != 3:
return out out = GpuDimShuffle(out.broadcastable, output_dims)(out)
return out
@register_opt() @register_opt()
...@@ -2599,8 +2600,9 @@ def local_gpu_solve(op, context_name, inputs, outputs): ...@@ -2599,8 +2600,9 @@ def local_gpu_solve(op, context_name, inputs, outputs):
@local_optimizer([GpuCusolverSolve], inplace=True) @local_optimizer([GpuCusolverSolve], inplace=True)
def local_inplace_gpu_solve(node): def local_inplace_gpu_solve(node):
if isinstance(node.op, GpuCusolverSolve) and not node.op.inplace: if isinstance(node.op, GpuCusolverSolve) and not node.op.inplace:
return [GpuCusolverSolve(A_structure=node.op.A_structure, trans=node.op.trans, with inherit_stack_trace(node.outputs):
inplace=True)(*node.inputs)] return [GpuCusolverSolve(A_structure=node.op.A_structure, trans=node.op.trans,
inplace=True)(*node.inputs)]
# Cholesky decomposition # Cholesky decomposition
...@@ -2638,7 +2640,8 @@ register_opt2([slinalg.Solve], 'fast_compile', name='matrix_ops_db2')(matrix_ops ...@@ -2638,7 +2640,8 @@ register_opt2([slinalg.Solve], 'fast_compile', name='matrix_ops_db2')(matrix_ops
@local_optimizer([GpuCholesky], inplace=True) @local_optimizer([GpuCholesky], inplace=True)
def local_inplace_gpu_cholesky(node): def local_inplace_gpu_cholesky(node):
if isinstance(node.op, GpuCholesky) and not node.op.inplace: if isinstance(node.op, GpuCholesky) and not node.op.inplace:
return [node.op.clone_inplace()(*node.inputs)] with inherit_stack_trace(node.outputs):
return [node.op.clone_inplace()(*node.inputs)]
def local_gpu_magma_cholesky(op, context_name, inputs, outputs): def local_gpu_magma_cholesky(op, context_name, inputs, outputs):
...@@ -2721,7 +2724,8 @@ def local_gpu_magma_matrix_inverse(op, context_name, inputs, outputs): ...@@ -2721,7 +2724,8 @@ def local_gpu_magma_matrix_inverse(op, context_name, inputs, outputs):
@local_optimizer([GpuMagmaMatrixInverse]) @local_optimizer([GpuMagmaMatrixInverse])
def local_inplace_gpu_magma_matrix_inverse(node): def local_inplace_gpu_magma_matrix_inverse(node):
if isinstance(node.op, GpuMagmaMatrixInverse) and not node.op.inplace: if isinstance(node.op, GpuMagmaMatrixInverse) and not node.op.inplace:
return [node.op.clone_inplace()(*node.inputs)] with inherit_stack_trace(node.outputs):
return [node.op.clone_inplace()(*node.inputs)]
# Eigen decomposition of a symmetric matrix # Eigen decomposition of a symmetric matrix
......
...@@ -41,6 +41,7 @@ def _check_stack_trace(thing): ...@@ -41,6 +41,7 @@ def _check_stack_trace(thing):
theano.ifelse.IfElse, theano.ifelse.IfElse,
GpuFromHost, HostFromGpu, GpuFromHost, HostFromGpu,
GpuCAReduceCuda, GpuCAReduceCuda,
basic_ops.GpuContiguous,
GpuElemwise, GpuElemwise,
theano.printing.Print, theano.printing.Print,
PdbBreakpoint, PdbBreakpoint,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论