提交 c1cdabc8 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Allow only modes None and 'c' in LoadFromDisk.

The other modes would trigger errors, segmentation faults, or wrong results, if the next Op was working inplace.
上级 21de50e6
......@@ -19,6 +19,9 @@ class LoadFromDisk(Op):
def __init__(self, dtype, broadcastable, mmap_mode=None):
self.dtype = numpy.dtype(dtype) # turn "float64" into numpy.float64
self.broadcastable = broadcastable
if mmap_mode not in (None, 'c'):
raise ValueError("The only supported values for mmap_mode "
"are None and 'c', got %s" % mmap_mode)
self.mmap_mode = mmap_mode
self._info = (dtype, broadcastable, mmap_mode)
......@@ -49,7 +52,19 @@ class LoadFromDisk(Op):
def load(path, dtype, broadcastable, mmap_mode=None):
"""
Load an array from an .npy file
Load an array from an .npy file.
:param path: A Generic symbolic variable, that will contain a string
:param dtype: The data type of the array to be read.
:param broadcastable: The broadcastable pattern of the loaded array,
for instance, (False,) for a vector, (False, True) for a column,
(False, False) for a matrix.
:param mmap_mode: How the file will be loaded. None means that the
data will be copied into an array in memory, 'c' means that the file
will be mapped into virtual memory, so only the parts that are
needed will be actually read from disk and put into memory.
Other modes supported by numpy.load ('r', 'r+', 'w+') cannot
be supported by Theano.
>>> from theano import *
>>> path = Variable(Generic())
......
......@@ -6,28 +6,49 @@ import os
class T_load_tensor(unittest.TestCase):
def test0(self):
data = numpy.arange(5, dtype=numpy.int32)
filename = os.path.join(
def setUp(self):
self.data = numpy.arange(5, dtype=numpy.int32)
self.filename = os.path.join(
theano.config.base_compiledir,
"_test.npy")
numpy.save(filename, data)
numpy.save(self.filename, self.data)
def test0(self):
path = Variable(Generic())
# Not specifying mmap_mode defaults to None, and the data is
# copied into main memory
x = tensor.load(path, 'int32', (False,))
y = x*2
y = x * 2
fn = function([path], y)
assert (fn(self.filename) == (self.data * 2)).all()
def test_invalid_modes(self):
# Modes 'r+', 'r', and 'w+' cannot work with Theano, becausei
# the output array may be modified inplace, and that should not
# modify the original file.
path = Variable(Generic())
for mmap_mode in ('r+', 'r', 'w+', 'toto'):
self.assertRaises(ValueError,
tensor.load, path, 'int32', (False,), mmap_mode)
def test1(self):
path = Variable(Generic())
# 'c' means "copy-on-write", which allow the array to be overwritten
# by an inplace Op in the graph, without modifying the underlying
# file.
x = tensor.load(path, 'int32', (False,), 'c')
# x ** 2 has been chosen because it will work inplace.
y = (x ** 2).sum()
fn = function([path], y)
assert (fn(filename) == data*2).all()
# Call fn() twice, to check that inplace ops do not cause trouble
assert (fn(self.filename) == (self.data ** 2).sum()).all()
assert (fn(self.filename) == (self.data ** 2).sum()).all()
def test_memmap(self):
data = numpy.arange(5, dtype=numpy.int32)
filename = os.path.join(
theano.config.base_compiledir,
"_test.npy")
numpy.save(filename, data)
path = Variable(Generic())
x = tensor.load(path, 'int32', (False,), mmap_mode='r+')
x = tensor.load(path, 'int32', (False,), mmap_mode='c')
fn = function([path], x)
assert type(fn(filename)) == numpy.core.memmap
assert type(fn(self.filename)) == numpy.core.memmap
def tearDown(self):
os.remove(os.path.join(
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论