提交 cfbe7c3c authored 作者: abergeron's avatar abergeron

Merge pull request #2925 from carriepl/scan_gpuarray_memory_usage

Scan gpuarray memory usage
...@@ -318,7 +318,7 @@ def scan(fn, ...@@ -318,7 +318,7 @@ def scan(fn,
:param strict: :param strict:
If true, all the shared variables used in ``fn`` must be provided as a If true, all the shared variables used in ``fn`` must be provided as a
part of ``non_sequences`` or ``sequences``. part of ``non_sequences`` or ``sequences``.
:rtype: tuple :rtype: tuple
:return: tuple of the form (outputs, updates); ``outputs`` is either a :return: tuple of the form (outputs, updates); ``outputs`` is either a
...@@ -962,21 +962,23 @@ def scan(fn, ...@@ -962,21 +962,23 @@ def scan(fn,
shared_inner_outputs) shared_inner_outputs)
if condition is not None: if condition is not None:
inner_outs.append(condition) inner_outs.append(condition)
# Cuda is imported here, instead of being imported on top of the file # Cuda and Gpuarray are imported here, instead of being imported on top of
# because forces on the user some dependencies that we might do not want # the file because that would force on the user some dependencies that we
# to. Currently we are working on removing the dependencies on sandbox # might do not want to. Currently we are working on removing the
# code completeley. # dependencies on sandbox code completeley.
from theano.sandbox import cuda from theano.sandbox import cuda, gpuarray
if cuda.cuda_available: if cuda.cuda_available or gpuarray.pygpu_activated:
# very often we end up in this situation when we want to # very often we end up in this situation when we want to
# replace w with w_copy, where w is CudaNdarray # replace w with w_copy, where w is a GPU variable
# and w_copy is TensorType. This is caused because shared # and w_copy is TensorType. This is caused because shared
# variables are put on GPU right aways >:| , # variables are put on GPU right aways >:| ,
new_givens = OrderedDict() new_givens = OrderedDict()
for w, w_copy in givens.iteritems(): for w, w_copy in givens.iteritems():
if (isinstance(w.type, cuda.CudaNdarrayType) if ((isinstance(w.type, cuda.CudaNdarrayType) or
and isinstance(w_copy.type, tensor.TensorType)): isinstance(w.type, gpuarray.GpuArrayType)) and
isinstance(w_copy.type, tensor.TensorType)):
for o in inner_outs: for o in inner_outs:
new_givens = traverse(o, w, w_copy, new_givens) new_givens = traverse(o, w, w_copy, new_givens)
else: else:
......
...@@ -127,9 +127,13 @@ def traverse(out, x, x_copy, d, visited=None): ...@@ -127,9 +127,13 @@ def traverse(out, x, x_copy, d, visited=None):
if out in visited: if out in visited:
return d return d
visited.add(out) visited.add(out)
import theano.sandbox.cuda as cuda from theano.sandbox import cuda, gpuarray
if out == x: if out == x:
d[out] = cuda.gpu_from_host(x_copy) if isinstance(x.type, cuda.CudaNdarrayType):
d[out] = cuda.gpu_from_host(x_copy)
else:
assert isinstance(x.type, gpuarray.GpuArrayType)
d[out] = gpuarray.gpu_from_host(x_copy)
return d return d
elif out.owner is None: elif out.owner is None:
return d return d
...@@ -138,6 +142,11 @@ def traverse(out, x, x_copy, d, visited=None): ...@@ -138,6 +142,11 @@ def traverse(out, x, x_copy, d, visited=None):
out.owner.inputs == [x]): out.owner.inputs == [x]):
d[out] = tensor.as_tensor_variable(x_copy) d[out] = tensor.as_tensor_variable(x_copy)
return d return d
elif (gpuarray.pygpu_activated and
out.owner.op == gpuarray.host_from_gpu and
out.owner.inputs == [x]):
d[out] = tensor.as_tensor_variable(x_copy)
return d
else: else:
for inp in out.owner.inputs: for inp in out.owner.inputs:
d = traverse(inp, x, x_copy, d, visited) d = traverse(inp, x, x_copy, d, visited)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论