提交 ef968c65 authored 作者: Frederic's avatar Frederic

In debugmode, in the check out output memory, transfer less data to the gpu.

上级 bf797056
...@@ -1026,13 +1026,11 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1026,13 +1026,11 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
for r in node.outputs: for r in node.outputs:
if isinstance(r.type, (TensorType, CudaNdarrayType)): if isinstance(r.type, (TensorType, CudaNdarrayType)):
# Build a C-contiguous buffer # Build a C-contiguous buffer
new_buf = numpy.zeros( new_buf = r.type.value_zeros(r_vals[r].shape)
shape=r_vals[r].shape, # CudaNdarray don't have flags field
dtype=r_vals[r].dtype, # assert new_buf.flags["C_CONTIGUOUS"]
order='C') new_buf += numpy.asarray(def_val).astype(r.type.dtype)
new_buf += def_val
if isinstance(r.type, CudaNdarrayType):
new_buf = CudaNdarray(new_buf)
c_cont_outputs[r] = new_buf c_cont_outputs[r] = new_buf
if len(c_cont_outputs): if len(c_cont_outputs):
...@@ -1096,21 +1094,12 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1096,21 +1094,12 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
shapes.append(slice(None, size, None)) shapes.append(slice(None, size, None))
r_buf = init_strided[r] r_buf = init_strided[r]
if r_buf.ndim > 0: if r_buf.ndim > 0:
r_buf = r_buf[tuple(strides)][tuple(shapes)] r_buf = r_buf[tuple(strides)][tuple(shapes)]
assert r_buf.shape == r_vals[r].shape assert r_buf.shape == r_vals[r].shape
if isinstance(r.type, CudaNdarrayType): r_buf[...] = numpy.asarray(def_val).astype(r_buf.dtype)
# It seems stupid, but we need to allocate a
# new ndarray and copy it into the GPU one.
# TODO: When it is possible to simply do
# r_buff[...] = def_val, do so.
new_rbuf = numpy.zeros(r_vals[r].shape,
dtype=r.dtype)
new_rbuf += def_val
r_buf[...] = CudaNdarray(new_rbuf)
else:
r_buf[...] = def_val
strided[r] = r_buf strided[r] = r_buf
...@@ -1133,12 +1122,8 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1133,12 +1122,8 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
out_shape = [max((s + sd), 0) out_shape = [max((s + sd), 0)
for s, sd in zip(r_vals[r].shape, for s, sd in zip(r_vals[r].shape,
r_shape_diff)] r_shape_diff)]
new_buf = numpy.zeros( new_buf = r.type.value_zeros(r_vals[r].shape)
shape=out_shape, new_buf += numpy.asarray(def_val).astype(r.type.dtype)
dtype=r.dtype)
new_buf += def_val
if isinstance(r.type, CudaNdarrayType):
new_buf = CudaNdarray(new_buf)
wrong_size[r] = new_buf wrong_size[r] = new_buf
yield (name, wrong_size) yield (name, wrong_size)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论