提交 273eaf4c authored 作者: Matthew Rocklin's avatar Matthew Rocklin

added memmap mode to load. changed from npz to npy

上级 512a28f3
...@@ -30,6 +30,7 @@ import sharedvar # adds shared-variable constructors ...@@ -30,6 +30,7 @@ import sharedvar # adds shared-variable constructors
# `theano.shared` and `tensor._shared`. # `theano.shared` and `tensor._shared`.
from sharedvar import tensor_constructor as _shared from sharedvar import tensor_constructor as _shared
from io import *
def shared(*args, **kw): def shared(*args, **kw):
""" """
......
...@@ -16,17 +16,17 @@ class LoadFromDisk(Op): ...@@ -16,17 +16,17 @@ class LoadFromDisk(Op):
@note: Non-differentiable. @note: Non-differentiable.
""" """
def __init__(self, dtype, broadcastable): def __init__(self, dtype, broadcastable, mmap_mode=None):
self.dtype = dtype self.dtype = dtype
self.broadcastable = broadcastable self.broadcastable = broadcastable
self.mmap_mode = mmap_mode
self._info = (dtype, broadcastable, mmap_mode)
def __eq__(self, other): def __eq__(self, other):
return (type(self) == type(other) and return (type(self) == type(other) and self._info == other._info)
self.broadcastable == other.broadcastable and
self.dtype == other.dtype)
def __hash__(self): def __hash__(self):
return hash((type(self), self.dtype, self.broadcastable)) return hash(self._info)
def make_node(self, path): def make_node(self, path):
if isinstance(path, str): if isinstance(path, str):
...@@ -36,24 +36,29 @@ class LoadFromDisk(Op): ...@@ -36,24 +36,29 @@ class LoadFromDisk(Op):
def perform(self, node, inp, out): def perform(self, node, inp, out):
path = inp[0] path = inp[0]
d = numpy.load(path) if (path.split('.')[-1] == 'npz'):
out[0][0] = d[d.keys()[0]].astype(self.dtype) raise ValueError("Expected a .npy file, got %s instead"%path)
result = numpy.load(path, mmap_mode=self.mmap_mode)
if result.dtype != self.dtype:
raise TypeError("Expected an array of type %s, got %s instead"%
(self.dtype, result.dtype))
out[0][0] = result
def __str__(self): def __str__(self):
return "Load: %s, %s"%(self.dtype, self.broadcastable) return "Load: dtype:%s, broadcastable:%s, mmep:%s"%self._info
def load(path, dtype, broadcastable): def load(path, dtype, broadcastable, mmap_mode=None):
""" """
Load an array from a .npz file Load an array from an .npy file
>>> from theano import * >>> from theano import *
>>> path = Variable(Generic()) >>> path = Variable(Generic())
>>> x = tensor.load(path, 'int64', (False,)) >>> x = tensor.load(path, 'int64', (False,))
>>> y = x*2 >>> y = x*2
>>> fn = function([path], y) >>> fn = function([path], y)
>>> fn("stored-array.npz") >>> fn("stored-array.npy")
array([0, 2, 4, 6, 8], dtype=int64) array([0, 2, 4, 6, 8], dtype=int64)
""" """
return LoadFromDisk(dtype, broadcastable)(path) return LoadFromDisk(dtype, broadcastable, mmap_mode)(path)
...@@ -4,12 +4,19 @@ import numpy ...@@ -4,12 +4,19 @@ import numpy
class T_load_tensor(unittest.TestCase): class T_load_tensor(unittest.TestCase):
def test0(self): def test0(self):
data = numpy.arange(5) data = numpy.arange(5, dtype=numpy.int32)
filename = "_load_tensor_test_1.npz" filename = "_load_tensor_test_1.npy"
numpy.savez(filename, data) numpy.save(filename, data)
path = Variable(Generic()) path = Variable(Generic())
x = tensor.load(path, 'int64', (False,)) x = tensor.load(path, 'int32', (False,))
y = x*2 y = x*2
fn = function([path], [y]) fn = function([path], y)
assert (fn(filename) == data*2).all() assert (fn(filename) == data*2).all()
def test_memmap(self):
data = numpy.arange(5, dtype=numpy.int32)
filename = "_load_tensor_test_1.npy"
numpy.save(filename, data)
path = Variable(Generic())
x = tensor.load(path, 'int32', (False,), mmap_mode='r+')
fn = function([path], x)
assert type(fn(filename)) == numpy.core.memmap
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论