Initial files for the support of a new type base on compyte in theano

481cf68a · Arnaud Bergeron · 920d123d · 481cf68a · 481cf68a · 481cf68a
--- a/theano/sandbox/gpuarray/__init__.py
+++ b/theano/sandbox/gpuarray/__init__.py
+import theano
+from theano.configparser import config, AddConfigVar, StrParam, \
+    BoolParam, IntParam
+_logger_name = 'theano.sandbox.gpuarray'
+_logger = logging.getLogger(_logger_name)
+_logger.setLevel(logging.WARNING)
+try:
+    import pygpu.gpuarray
+except ImportError:
+    pygpu = None
+AddConfigVar('gpuarray.init_device',
+             """
+             Device to initialize for gpuarray use without moving
+             computations automatically.
+             """,
+             StrParam(''))
+# This is for documentation not to depend on the availability of pygpu
+from type import GpuArrayType
+from var import (GpuArrayVariable, GpuArrayConstant, GpuArraySharedVariable,
+                 gpuarray_shared_constructor)
+def init_dev(dev):
+    import globals
+    if dev.startswith('cuda'):
+        globals.kind = 'cuda'
+        devnum = int(dev[4:])
+    elif dev.startswith('opencl'):
+        globals.kind = 'opencl'
+        devspec = dev[6:]
+        plat, dev = devspec.split(':')
+        devnum = int(dev)|(int(plat)>>16)
+    else:
+        globals.kind = None
+    if globals.kind:
+        globals.context = pygpu.gpuarray.init(globals.kind, devnum)
+if pygpu:
+    try:
+        if (config.device.startswith('cuda') or
+            config.device.startswith('opencl')):
+            init_dev(config.device)
+            # XXX add optimization tags here (when we will have some)
+            import theano.compile
+            theano.compile.shared_constructor(gpuarray_shared_constructor)
+        elif config.gpuarray.init_device != '':
+            init_dev(config.gpuarray.init_device)
+    except Exception:
+        print >> sys.stderr, "Could not initialize pygpu, support disabled"
+        pygpu = None
--- a/theano/sandbox/gpuarray/basic_ops.py
+++ b/theano/sandbox/gpuarray/basic_ops.py
+import numpy
+import theano
+from theano import Op, Type, Apply, Variable, Constant
+from theano import tensor, scalar, config
+from theano.scalar import Scalar
+from theano.gof.python25 import all, any
+from theano.sandbox.cuda.type import CudaNdArrayType
+from pygpu import gpuarray, elemwise
+from type import GpuArrayType
+def as_gpuarray_variable(x):
+    if hasattr(x, '_as_GpuArrayVariable'):
+        return x._as_GpuArrayVariable()
+    # XXX: we need to have the cuda -> gpu path taken care of.
+    tensor_x = tensor.as_tensor_variable(x)
+    return gpu_from_host(tensor_x)
+def as_gpuarray(x):
+    return gpuarray.array(x, copy=False)
+class HostFromGpu(Op):
+    def __eq__(self, other):
+        return type(self) == type(other)
+    def __hash__(self):
+        return hash(type(self))
+    def make_node(self, x):
+        if not isinstance(x.type, GpuArrayType):
+            raise TypeError(x)
+        return Apply(self, [x],
+                     [tensor.TensorType(dtype=x.dtype,
+                                        broadcastable=x.broadcastable,)()])
+    def perform(self, node, inp, out):
+        x, = inp
+        z, = out
+        z[0] = numpy.asarray(x)
+    def grad(self, inputs, grads):
+        gz, = grads
+        return [gpu_from_host(gz)]
+    def R_op(self, inputs, eval_points):
+        ev, = eval_points
+        if isinstance(ev, tensor.TensorType):
+            return [gpu_from_host(ev)]
+        else:
+            return [ev]
+    def infer_shape(self, node, xshp):
+        return xshp
+host_from_gpu = HostFromGpu()
+class GpuFromHost(Op):
+    def __eq__(self, other):
+        return type(self) == type(other)
+    def __hash__(self):
+        return hash(type(self))
+    def make_node(self, x):
+        if not isinstance(x.type, tensor.TensorType):
+            raise TypeError(x)
+        return Apply(self, [x], [GpuArrayType(broadcastable=x.broadcastable,
+                                              dtype=x.dtype)]())
+    def perform(self, node, inp, out):
+        x, = inp
+        z, = out
+        z[0] = gpuarray.array(x)
+    def grad(self, inputs, grads):
+        gz, = grads
+        return [host_from_gpu(gz)]
+    def R_op(self, inputs, eval_points):
+        ev, = eval_points
+        if isintance(ev, GpuArrayType):
+            return [host_from_gpu(ev)]
+        else:
+            return ev
+    def infer_shape(self, node, xshp):
+        return xshp
+gpu_from_host = GpuFromHost()
+class GpuFromCuda(Op):
+    def __eq__(self, other):
+        return type(self) == type(other)
+    def __hash__(self):
+        return hash(type(self))
+    def make_node(self, x):
+        if not isinstance(x.type, CudaNdArrayType):
+            raise TypeError(x)
+        return Apply(self, [x], [GpuArrayType(broadcastable=x.broadcastable,
+                                              dtype=x.dtype)]())
+    def perform(self, node, inp, out):
+        x, = inp
+        z, = out
+        if globals.kind == 'cuda':
+            base = x
+            while hasattr(base, 'base') and base.base is not None:
+                base = base.base
+            raise NotImplementedError("How are we going to get a gpudata pointer from here")
+            x[0] = gpuarray.from_gpudata(b, 0, base=base, x.dtype,
+                                         x.shape, kind=globals.kind,
+                                         context=globals.context,
+                                         strides=x.strides)
+        else:
+            z[0] = gpuarray.array(numpy.asarray(x), kind=globals.kind,
+                                  context=globals.context)
+    def grad(self, inputs, grads):
+        gz, = grads
+        return [host_from_gpu(gz)]
+    def R_op(self, inputs, eval_points):
+        ev, = eval_points
+        if isintance(ev, GpuArrayType):
+            return [host_from_gpu(ev)]
+        else:
+            return ev
+    def infer_shape(self, node, xshp):
+        return xshp
--- a/theano/sandbox/gpuarray/globals.py
+++ b/theano/sandbox/gpuarray/globals.py
+# This modules serves to stuff global values (like kind and context)
--- a/theano/sandbox/gpuarray/type.py
+++ b/theano/sandbox/gpuarray/type.py
+import copy_reg
+import numpy
+import theano
+from theano import Type, Variable, tensor, config, scalar
+import globals
+# Make sure this is importable even if pygpu is absent
+# (it will not work though)
+try:
+    from pygpu import gpuarray
+    from pygpu.elemwise import compare
+except ImportError:
+    pass
+class GpuArrayType(Type):
+    Variable = None
+    Constant = None
+    SharedVariable = None
+    @staticmethod
+    def value_zeros(*args, **kwargs):
+        pygpu.gpuarray.zeros(*args, **kwargs, kind=globals.kind,
+                             context=globals.context)
+    def __init__(self, dtype, broadcastable, name=None):
+        self.dtype = str(dtype)
+        self.broadcastable = tuple(bool(b), for b in broadcastable)
+        self.name = name
+        try:
+            self.typecode = gpuarray.dtype_to_typecode(self.dtype)
+        except gpuarray.GpuArrayException:
+            raise TypeError("Unsupported dtype for %s: %s" %
+                            (self.__class__.__name__, self.dtype))
+    def filter(self, data, strict=False, allow_downcast=None):
+        if strict:
+            if not isinstance(data, gpuarray.GpuArray):
+                raise TypeError("%s expected a GpuArray object." % self,
+                                data, type(data))
+            if self.typecode != data.typecode:
+                raise TypeError("%s expected typecode %d (dtype %s), "
+                                "got %d (dtype %s)." %
+                                (self, self.typecode, self.dtype,
+                                 data.typecode, str(data.dtype)))
+            # fallthrough to ndim check
+        elif allow_downcast:
+            data = gpuarray.array(data, dtype=self.typecode, copy=False,
+                                  kind=globals.kind, context=globals.context,
+                                  ndmin=len(self.broadcastable))
+        else:
+            if isinstance(data, gpuarray.GpuArray):
+                up_dtype = scalar.upcast(self.dtype, data.dtype)
+                if up_dtype == self.dtype:
+                    data = data.astype(self.dtype)
+                else:
+                    raise TypeError("%s cannot store a value of dtype %s "
+                                    "without risking loss of precision." %
+                                    (self, data.dtype))
+        if self.ndim != data.ndim:
+            raise TypeError("Wrong number of dimensions: expected %s, "
+                            "got %s with shape %s." % (self.ndim, data.ndim,
+                                                       data.shape), data)
+        shp = data.shape
+        for b in self.broadcastable:
+            if b and shp[i] != 1:
+                raise TypeError("Non-unit value on shape on a broadcastable"
+                                " dimension.", shp, self.broadcastable)
+            i += 1
+        return data
+    def values_eq(self, a, b):
+        if a.shape != b.shape:
+            return False
+        if a.typecode != b.typecode:
+            return False
+        return numpy.asarray(compare(a, '==', b)).all()
+    def __eq__(self, other):
+        return (type(self) == type(other) and
+                self.typecode == other.typecode and
+                self.broadcastable == other.broadcastable)
+    def __hash__(self):
+        return hash(self.typecode) ^ hash(self.broadcastable)
+    def __str__(self):
+        return "GpuArray<%s>" % self.dtype
--- a/theano/sandbox/gpuarray/var.py
+++ b/theano/sandbox/gpuarray/var.py
+import numpy
+import theano
+from theano import Variable, Constant, tensor
+from theano.compile import SharedVariable
+try:
+    # Let this be importable for documentation purposes
+    import pygpu.gpuarray
+except ImportError:
+    pass
+from var import GpuArrayType
+from basic_ops import host_from_gpu, gpu_from_host
+class _operators(tensor.basic._tensor_py_operators):
+    def _as_TensorVariable(self):
+        return host_from_gpu(self)
+    # XXX: don't forget to add _as_CudaNdarrayVariable() when we
+    #      figure out how to do it.
+    def _as_GpuArrayVariable(self):
+        return self
+    dtype = property(lambda s: s.type.dtype)
+    broadcastable = property(lambda s: s.type.broadcastable)
+    ndim = property(lambda s: s.type.ndim)
+class GpuArrayVariable(_operators, Variable):
+    pass
+GpuArrayType.Variable = GpuArrayVariable
+class GpuArrayConstant(_operators, Constant):
+    def signature(self):
+        return GpuArraySignature((self.type, numpy.asarray(self.data)))
+    def __str__(self):
+        if self.name is not None:
+            return self.name
+        return "GpuArrayConstant{%s}" % numpy.asarray(self.data)
+GpuArrayType.Constant = GpuArrayConstant
+class GpuArraySharedVariable(_operators, SharedVariable):
+    def get_value(self, borrow=False, return_internal_type=False):
+        if return_internal_type:
+            if borrow:
+                return self.container.value
+            else:
+                return self.container.value.copy()
+        else:
+            return numpy.asarray(self.container.value)
+    def set_value(self, value, borrow=False):
+        self.container.value = pygpu.gpuarray.array(value, copy=(not borrow))
+    def __getitem__(self, *args):
+        retrurn _operators.__getitem__(self, *args)
+GpuArrayType.SharedVariable = GpuArraySharedVariable
+def gpuarray_shared_constructor(value, name=None, strict=False,
+                                allow_downcast=None, borrow=False,
+                                broadcastable=None):
+    """SharedVariable constructor for GpuArrayType"""
+    if globals.kind is None:
+        raise RuntimeError("pygpu is not initialized")
+    if not isinstance(value, (numpy.ndarray, pygpu.gpuarray.GpuArray)):
+        raise TypeError('ndarray or GpuArray required')
+    if broadcastable is None:
+        broadcastable = (False,) * value.ndim
+    type = GpuArrayType(value.dtype, broadcastable)
+    deviceval = pygpu.gpuarray.array(value, copy=(not borrow))
+    return GpuArraySharedVariable(type=type, value=deviceval, name=name,
+                                  strict=strict)