提交 cfbe7c3c authored 作者: abergeron's avatar abergeron

Merge pull request #2925 from carriepl/scan_gpuarray_memory_usage

Scan gpuarray memory usage
......@@ -318,7 +318,7 @@ def scan(fn,
:param strict:
If true, all the shared variables used in ``fn`` must be provided as a
part of ``non_sequences`` or ``sequences``.
part of ``non_sequences`` or ``sequences``.
:rtype: tuple
:return: tuple of the form (outputs, updates); ``outputs`` is either a
......@@ -962,21 +962,23 @@ def scan(fn,
shared_inner_outputs)
if condition is not None:
inner_outs.append(condition)
# Cuda is imported here, instead of being imported on top of the file
# because forces on the user some dependencies that we might do not want
# to. Currently we are working on removing the dependencies on sandbox
# code completeley.
from theano.sandbox import cuda
if cuda.cuda_available:
# Cuda and Gpuarray are imported here, instead of being imported on top of
# the file because that would force on the user some dependencies that we
# might do not want to. Currently we are working on removing the
# dependencies on sandbox code completeley.
from theano.sandbox import cuda, gpuarray
if cuda.cuda_available or gpuarray.pygpu_activated:
# very often we end up in this situation when we want to
# replace w with w_copy, where w is CudaNdarray
# replace w with w_copy, where w is a GPU variable
# and w_copy is TensorType. This is caused because shared
# variables are put on GPU right aways >:| ,
new_givens = OrderedDict()
for w, w_copy in givens.iteritems():
if (isinstance(w.type, cuda.CudaNdarrayType)
and isinstance(w_copy.type, tensor.TensorType)):
if ((isinstance(w.type, cuda.CudaNdarrayType) or
isinstance(w.type, gpuarray.GpuArrayType)) and
isinstance(w_copy.type, tensor.TensorType)):
for o in inner_outs:
new_givens = traverse(o, w, w_copy, new_givens)
else:
......
......@@ -127,9 +127,13 @@ def traverse(out, x, x_copy, d, visited=None):
if out in visited:
return d
visited.add(out)
import theano.sandbox.cuda as cuda
from theano.sandbox import cuda, gpuarray
if out == x:
d[out] = cuda.gpu_from_host(x_copy)
if isinstance(x.type, cuda.CudaNdarrayType):
d[out] = cuda.gpu_from_host(x_copy)
else:
assert isinstance(x.type, gpuarray.GpuArrayType)
d[out] = gpuarray.gpu_from_host(x_copy)
return d
elif out.owner is None:
return d
......@@ -138,6 +142,11 @@ def traverse(out, x, x_copy, d, visited=None):
out.owner.inputs == [x]):
d[out] = tensor.as_tensor_variable(x_copy)
return d
elif (gpuarray.pygpu_activated and
out.owner.op == gpuarray.host_from_gpu and
out.owner.inputs == [x]):
d[out] = tensor.as_tensor_variable(x_copy)
return d
else:
for inp in out.owner.inputs:
d = traverse(inp, x, x_copy, d, visited)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论