提交 5e1935ba authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Mirror tensor and move the subtensor op in its own file.

上级 d4d7141a
...@@ -499,99 +499,3 @@ class GpuAlloc(Op): ...@@ -499,99 +499,3 @@ class GpuAlloc(Op):
return True return True
gpu_alloc = GpuAlloc() gpu_alloc = GpuAlloc()
class GpuSubtensor(tensor.Subtensor):
def make_node(self, x, *inputs):
assert isinstance(x.type, GpuArrayType)
rval = tensor.Subtensor.make_node(self, x, *inputs)
otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
broadcastable=rval.outputs[0].type.broadcastable)
return Apply(self, [x] + rval.inputs[1:], [otype()])
def perform(self, node, inputs, out_):
out, = out_
x = inputs[0]
indices = list(reversed(inputs[1:]))
def convert(entry):
if isinstance(entry, Type):
rval = indices.pop()
if sys.version_info < (2, 5):
# Before Python 2.5, PySlice_GetIndicesEx requires
# Python int to be passed.
rval_ = int(rval)
if rval_ != rval:
raise IndexError((
"Invalid value for indexing: %s. "
"That value may be too big.") % rval)
return rval_
return rval
elif isinstance(entry, slice):
return slice(convert(entry.start),
convert(entry.stop),
convert(entry.step))
else:
return entry
cdata = tuple(map(convert, self.idx_list))
if len(cdata) == 1:
cdata = cdata[0]
out[0] = x.__getitem__(cdata)
def c_code(self, node, name, inputs, outputs, sub):
view_ndim = node.outputs[0].ndim
indices = inputs[1:]
sio = StringIO.StringIO("""
ssize_t %(name)s_starts[%(sz)s];
ssize_t %(name)s_stops[%(sz)s];
ssize_t %(name)s_steps[%(sz)s];
""" % dict(name=name, sz=len(self.idx_list)))
ndim = 0
for i, idx in enumerate(self.idx_list):
if isinstance(idx, Type):
# Index by an input number
print >>sio, """
%(name)s_starts[%(i)s] = %(start)s;
%(name)s_steps[%(i)s] = %(step)s;
""" % dict(name=name, i=i, start=indices.pop(), step=0)
elif isinstance(idx, slice):
# index by a fixed slice
print >>sio, """
%(name)s_starts[%(i)s] = %(start)s;
%(name)s_stops[%(i)s] = %(stop)s;
%(name)s_steps[%(i)s] = %(step)s;
""" % dict(i=i, name=name, start=idx.start, stop=idx.stop,
step=idx.step)
ndim += 1
else:
# Index by a fixed number
print >>sio, """
%(name)s_starts[%(i)s] = %(start)s;
%(name)s_steps[%(i)s] = %(step)s;
""" % dict(name=name, i=i, start=idx, step=0)
print >>sio, """
if (%(out)s) {
// Try to reuse the python object.
GpuArray_clear(&%(out)s->ga);
} else {
%(out)s = new_GpuArray((PyObject *)&GpuArrayType, GpuArray_default_context);
}
if (!%(out)s) { %(fail)s }
int %(name)s_err;
%(name)s_err = GpuArray_index(&%(out)s->ga, &%(inp)s->ga,
%(name)s_starts, %(name)s_steps,
%(name)s_stops)
if (%(name)s_err != GA_NO_ERROR) {
Py_DECREF(%(out)s); %(out)s = NULL;
PyErr_SetString(PyExc_RuntimeError, "Error during index");
%(fail)s
}
""" % dict(name=name, fail=sub['fail'], inp=inputs[0], out=outputs[0])
return sio.getvalue()
def c_code_cache_version(self):
return (0,)
import numpy
import theano
from theano import tensor
from theano.tensor.subtensor import Subtensor, get_idx_list
from theano.gof.python25 import all, any
try:
import pygpu
from pygpu import gpuarray
except ImportError:
pass
from type import GpuArrayType
from basic_ops import as_gpuarray_variable, zeros_like
class GpuSubtensor(Subtensor):
def make_node(self, x, *inputs):
rval = tensor.Subtensor.make_node(self, x, *inputs)
otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
broadcastable=rval.outputs[0].type.broadcastable)
x = as_gpuarray_variable(x)
return Apply(self, [x] + rval.inputs[1:], [otype()])
def perform(self, node, inputs, out_):
out, = out_
x = inputs[0]
if self.perform_cache_cdata is not None:
out[0] = x.__getitem__(self.perform_cache_cdata)
return
cdata = get_idx_list(inputs, self.idx_list)
if len(cdata) == 1:
cdata = cdata[0]
if len(inputs) == 1:
self.perform_cache_cdata = cdata
out[0] = x.__getitem__(cdata)
def c_code(self, node, name, inputs, outputs, sub):
view_ndim = node.outputs[0].ndim
indices = inputs[1:]
sio = StringIO.StringIO("""
ssize_t %(name)s_starts[%(sz)s];
ssize_t %(name)s_stops[%(sz)s];
ssize_t %(name)s_steps[%(sz)s];
""" % dict(name=name, sz=len(self.idx_list)))
ndim = 0
for i, idx in enumerate(self.idx_list):
if isinstance(idx, Type):
# Index by an input number
print >>sio, """
%(name)s_starts[%(i)s] = %(start)s;
%(name)s_steps[%(i)s] = %(step)s;
""" % dict(name=name, i=i, start=indices.pop(), step=0)
elif isinstance(idx, slice):
# index by a fixed slice
print >>sio, """
%(name)s_starts[%(i)s] = %(start)s;
%(name)s_stops[%(i)s] = %(stop)s;
%(name)s_steps[%(i)s] = %(step)s;
""" % dict(i=i, name=name, start=idx.start, stop=idx.stop,
step=idx.step)
ndim += 1
else:
# Index by a fixed number
print >>sio, """
%(name)s_starts[%(i)s] = %(start)s;
%(name)s_steps[%(i)s] = %(step)s;
""" % dict(name=name, i=i, start=idx, step=0)
print >>sio, """
if (%(out)s) {
// Try to reuse the python object.
GpuArray_clear(&%(out)s->ga);
} else {
%(out)s = new_GpuArray((PyObject *)&GpuArrayType, GpuArray_default_context);
}
if (!%(out)s) { %(fail)s }
int %(name)s_err;
%(name)s_err = GpuArray_index(&%(out)s->ga, &%(inp)s->ga,
%(name)s_starts, %(name)s_steps,
%(name)s_stops)
if (%(name)s_err != GA_NO_ERROR) {
Py_DECREF(%(out)s); %(out)s = NULL;
PyErr_SetString(PyExc_RuntimeError, "Error during index");
%(fail)s
}
""" % dict(name=name, fail=sub['fail'], inp=inputs[0], out=outputs[0])
return sio.getvalue()
def c_code_cache_version(self):
return (0,)
def grad(self, inputs, grads):
gz, = grads
x = inputs[0]
rest = inputs[1:]
output = self(*inputs)
if output.dtype.find('int') != -1:
first = zeros_like(x, theano.config.floatX)
else:
first = GpuIncSubtensor(self.idx_list)(zeros_like(x), gz, *rest)
return ([first] + [DisconnectedType()()] * len(rest))
...@@ -9,7 +9,6 @@ from theano.compile import DeepCopyOp ...@@ -9,7 +9,6 @@ from theano.compile import DeepCopyOp
from theano.tensor.tests.test_basic import safe_make_node from theano.tensor.tests.test_basic import safe_make_node
from theano.tests.unittest_tools import SkipTest from theano.tests.unittest_tools import SkipTest
from numpy.testing.noseclasses import KnownFailureTest from numpy.testing.noseclasses import KnownFailureTest
from theano.tensor.tests.test_subtensor import T_subtensor
import theano.sandbox.gpuarray import theano.sandbox.gpuarray
...@@ -35,7 +34,7 @@ from theano.sandbox.gpuarray.type import (GpuArrayType, ...@@ -35,7 +34,7 @@ from theano.sandbox.gpuarray.type import (GpuArrayType,
from theano.sandbox.gpuarray.basic_ops import (host_from_gpu, gpu_from_host, from theano.sandbox.gpuarray.basic_ops import (host_from_gpu, gpu_from_host,
gpu_alloc, gpu_from_cuda, gpu_alloc, gpu_from_cuda,
cuda_from_gpu, HostFromGpu, cuda_from_gpu, HostFromGpu,
GpuFromHost, GpuSubtensor) GpuFromHost)
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
utt.seed_rng() utt.seed_rng()
...@@ -330,18 +329,3 @@ def test_deep_copy(): ...@@ -330,18 +329,3 @@ def test_deep_copy():
res = f(a) res = f(a)
assert GpuArrayType.values_eq(res, a) assert GpuArrayType.values_eq(res, a)
class G_subtensor(T_subtensor):
def shortDescription(self):
return None
shared = staticmethod(gpuarray_shared_constructor)
sub = GpuSubtensor
mode = mode_with_gpu
dtype = 'float32' # avoid errors on gpus which do not support float64
ignore_topo = (HostFromGpu, GpuFromHost)
fast_compile = False
ops = (GpuSubtensor,)
def __init__(self, name):
T_subtensor.__init__(self, name)
from theano.tensor.tests.test_subtensor import T_subtensor
from theano.sandbox.gpuarray.basic_ops import (HostFromGpu, GpuFromHost)
from theano.sandbox.gpuarray.subtensor import GpuSubtensor
from theano.sandbox.gpuarray.type import gpuarray_shared_constructor
from theano.sandbox.gpuarray.tests.test_basic_ops import mode_with_gpu
class G_subtensor(T_subtensor):
def shortDescription(self):
return None
shared = staticmethod(gpuarray_shared_constructor)
sub = GpuSubtensor
mode = mode_with_gpu
dtype = 'float32' # avoid errors on gpus which do not support float64
ignore_topo = (HostFromGpu, GpuFromHost)
fast_compile = False
ops = (GpuSubtensor,)
def __init__(self, name):
T_subtensor.__init__(self, name)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论