GpuAlloc now uses params

073b822d · Thomas George · d003afce · 073b822d
--- a/theano/gpuarray/basic_ops.py
+++ b/theano/gpuarray/basic_ops.py
@@ -8,6 +8,7 @@ import theano
 from theano import Op, Apply, Type, Variable
 from theano import tensor, config
 from theano.gradient import grad_undefined
+from theano.scalar import bool as bool_t
 from theano.tensor.basic import (
    Alloc, AllocEmpty, alloc_validate_shape, Join, Split)

@@ -808,14 +809,15 @@ class GpuAlloc(HideC, Alloc):

    __props__ = ('memset_0', 'context_name')
    _f16_ok = True
-    params_type = gpu_context_type
+    params_type = ParamsType(context=gpu_context_type, memset_0=bool_t)

    def __init__(self, context_name, memset_0=False):
        self.context_name = context_name
        self.memset_0 = memset_0

    def get_params(self, node):
-        return get_context(self.context_name)
+        return self.params_type.get_params(context=get_context(self.context_name),
+                                           memset_0=self.memset_0)

    def __str__(self):
        # Hide the memset parameter when not used to prevent confusion.
@@ -837,15 +839,15 @@ class GpuAlloc(HideC, Alloc):
    def c_headers(self):
        return ['<numpy_compat.h>']

-    def perform(self, node, inputs, outs, ctx):
+    def perform(self, node, inputs, outs, params):
        out, = outs
        v = inputs[0]
        sh = tuple(map(int, inputs[1:]))
        if out[0] is None or out[0].shape != sh:
            if self.memset_0:
-                out[0] = gpuarray.zeros(sh, dtype=v.dtype, context=ctx)
+                out[0] = gpuarray.zeros(sh, dtype=v.dtype, context=params.context)
            else:
-                out[0] = gpuarray.empty(sh, dtype=v.dtype, context=ctx)
+                out[0] = gpuarray.empty(sh, dtype=v.dtype, context=params.context)
                out[0][...] = v
        else:
            out[0][...] = v
@@ -857,7 +859,6 @@ class GpuAlloc(HideC, Alloc):
        ndim = len(inp[1:])
        zz, = out

-        memset_0 = int(self.memset_0)
        code = """
        int i;
        size_t %(name)s_shape[%(ndim)s];
@@ -875,12 +876,12 @@ class GpuAlloc(HideC, Alloc):
            for (i = 0; i < %(ndim)s; i++)
                need_new_out |= %(zz)s->ga.dimensions[i] != %(name)s_shape[i];

-        if (need_new_out && (%(memset_0)s)) {
+        if (need_new_out && (%(params)s->memset_0)) {
            //pygpu_zeros can be faster then empty followed by memset.
            Py_XDECREF(%(zz)s);
            %(zz)s = pygpu_zeros(%(ndim)s, %(name)s_shape,
                                 %(vv)s->ga.typecode, GA_C_ORDER,
-                                 %(ctx)s, Py_None);
+                                 %(params)s->context, Py_None);
            if (!%(zz)s) {
                %(fail)s
            }
@@ -889,12 +890,12 @@ class GpuAlloc(HideC, Alloc):
                Py_XDECREF(%(zz)s);
                %(zz)s = pygpu_empty(%(ndim)s, %(name)s_shape,
                                     %(vv)s->ga.typecode, GA_C_ORDER,
-                                     %(ctx)s, Py_None);
+                                     %(params)s->context, Py_None);
                if (!%(zz)s) {
                    %(fail)s
                }
            }
-            if (%(memset_0)s && GpuArray_ISONESEGMENT(&%(zz)s->ga))
+            if (%(params)s->memset_0 && GpuArray_ISONESEGMENT(&%(zz)s->ga))
            {
                int err = GpuArray_memset(&%(zz)s->ga, 0);
                if (err != GA_NO_ERROR)
@@ -912,8 +913,8 @@ class GpuAlloc(HideC, Alloc):
                %(fail)s
            }
        }
-        """ % dict(name=name, ndim=ndim, zz=zz, vv=vv, ctx=sub['params'],
-                   fail=sub['fail'], memset_0=memset_0)
+        """ % dict(name=name, ndim=ndim, zz=zz, vv=vv, params=sub['params'],
+                   fail=sub['fail'])

        if config.gpuarray.sync:
            code += "GpuArray_sync(&%(zz)s->ga);" % dict(zz=zz)
@@ -921,7 +922,7 @@ class GpuAlloc(HideC, Alloc):
        return code

    def c_code_cache_version(self):
-        return (3,)
+        return (4,)

    def do_constant_folding(self, node):
        from . import subtensor, blas