Reshape crashfix and added caching at other places

de536bd5 · sentient07 · e01db583 · de536bd5 · de536bd5 · de536bd5
--- a/theano/gpuarray/basic_ops.py
+++ b/theano/gpuarray/basic_ops.py
@@ -956,6 +956,14 @@ def empty_like(var):
    return GpuAllocEmpty(var.type.dtype, var.type.context_name)(*var.shape)
+def gpu_alloc_empty(dtype, ctx):
+    key = (dtype, ctx)
+    if key not in gpu_alloc_empty.cache:
+        gpu_alloc_empty.cache[key] = GpuAllocEmpty(dtype, ctx)
+    return gpu_alloc_empty.cache[key]
+gpu_alloc_empty.cache = {}
 class GpuContiguous(Op):
    """
    Return a C contiguous version of the input.
@@ -1031,6 +1039,7 @@ class GpuReshape(HideC, tensor.Reshape):
    def make_node(self, x, shp):
        ctx_name = infer_context_name(x)
        x = as_gpuarray_variable(x, context_name=ctx_name)
+        shp = tensor.as_tensor_variable(shp)
        res = host_from_gpu(x).reshape(shp, ndim=self.ndim)
        otype = GpuArrayType(dtype=res.dtype,
                             broadcastable=res.broadcastable,

--- a/theano/gpuarray/dnn.py
+++ b/theano/gpuarray/dnn.py
@@ -25,14 +25,15 @@ from theano.tensor.signal.pool import (
 from . import pygpu
 from .type import get_context, gpu_context_type, list_contexts, GpuArrayType
 from .basic_ops import (as_gpuarray_variable, infer_context_name,
-                        gpu_contiguous, GpuAllocEmpty, empty_like)
+                        gpu_contiguous, GpuAllocEmpty, gpu_alloc_empty, 
+                        empty_like)
 from .elemwise import GpuElemwise
 # These don't exist in gpuarray
 # GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
 from .nnet import GpuSoftmax
 from .opt import (gpu_seqopt, register_opt, conv_groupopt,
-                  op_lifter, register_opt2, gpu_alloc_empty)
+                  op_lifter, register_opt2)
 from .opt_util import alpha_merge, output_merge, inplace_allocempty

--- a/theano/gpuarray/nerv.py
+++ b/theano/gpuarray/nerv.py
@@ -158,7 +158,7 @@ def local_dot_to_gemm16(op, ctx_name, inputs):
    if (A.ndim == 2 and B.ndim == 2 and
            A.dtype == 'float16' and B.dtype == 'float16'):
        fgraph = inputs[0].fgraph
-        C = GpuAllocEmpty(dtype='float16', context_name=ctx_name)(
+        C = gpu_alloc_empty(dtype='float16', context_name=ctx_name)(
            shape_i(A, 0, fgraph), shape_i(B, 1, fgraph))
        return Gemm16()(C, 1.0, A, B, 0.0)

--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -32,7 +32,7 @@ from .basic_ops import (as_gpuarray_variable, infer_context_name,
                        HostFromGpu, GpuFromHost,
                        GpuSplit, GpuContiguous, gpu_contiguous,
                        GpuAlloc, GpuAllocEmpty, GpuReshape,
-                        GpuEye, gpu_join, GpuJoin)
+                        GpuEye, gpu_join, GpuJoin, gpu_alloc_empty)
 from .blas import (gpu_dot22, GpuGemm, GpuGer, GpuGemmBatch,
                   gpugemm_no_inplace, gpugemm_inplace, gpugemmbatch_no_inplace,
                   gpugemv_no_inplace, gpugemv_inplace)
@@ -61,14 +61,6 @@ gpu_optimizer2 = EquilibriumDB()
 gpu_cut_copies = EquilibriumDB()
-def gpu_alloc_empty(dtype, ctx):
-    key = (dtype, ctx)
-    if key not in gpu_alloc_empty.cache:
-        gpu_alloc_empty.cache[key] = GpuAllocEmpty(dtype, ctx)
-    return gpu_alloc_empty.cache[key]
-gpu_alloc_empty.cache = {}
 class GraphToGPUDB(DB):
    """
    Retrieves the list local optimizers based on the optimizer flag's value
@@ -456,7 +448,7 @@ def local_gpuaalloc(op, context_name, inputs):
 def local_gpuaallocempty(op, context_name, inputs):
    # We use _props_dict() to make sure that the GPU op know all the
    # CPU op props.
-    return GpuAllocEmpty(context_name=context_name,
+    return gpu_alloc_empty(context_name=context_name,
                         **op._props_dict())(*inputs)
@@ -975,7 +967,7 @@ def local_gpua_hgemm(op, context_name, inputs):
    if (A.ndim == 2 and B.ndim == 2 and
            A.dtype == 'float16' and B.dtype == 'float16'):
        fgraph = inputs[0].fgraph
-        C = GpuAllocEmpty(dtype='float16', context_name=context_name)(
+        C = gpu_alloc_empty(dtype='float16', context_name=context_name)(
            shape_i(A, 0, fgraph),
            shape_i(B, 1, fgraph))
        return gpugemm_no_inplace(C, 1.0, A, B, 0.0)
@@ -1024,7 +1016,7 @@ def local_gpua_dot22scalar(op, context_name, inputs):
    x, y, a = inputs
    x = as_gpuarray_variable(x, context_name)
    y = as_gpuarray_variable(y, context_name)
-    z = GpuAllocEmpty(x.dtype, context_name)(x.shape[0], y.shape[1])
+    z = gpu_alloc_empty(x.dtype, context_name)(x.shape[0], y.shape[1])
    return [gpugemm_no_inplace(z, a, x, y, 0)]

--- a/theano/gpuarray/opt_util.py
+++ b/theano/gpuarray/opt_util.py
@@ -8,7 +8,7 @@ from theano.gof import local_optimizer
 from theano.tensor import (DimShuffle, get_scalar_constant_value,
                           NotScalarConstantError)
-from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty
+from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty, gpu_alloc_empty
 from .elemwise import GpuDimShuffle, GpuElemwise
 _one = scal.constant(numpy.asarray(1.0, dtype='float32'))
@@ -324,7 +324,7 @@ def inplace_allocempty(op, idx):
            if (alloc.owner and
                    isinstance(alloc.owner.op, GpuAllocEmpty) and
                    len(alloc.clients) > 1):
-                alloc_op = GpuAllocEmpty(alloc.owner.op.dtype,
+                alloc_op = gpu_alloc_empty(alloc.owner.op.dtype,
                                         alloc.owner.op.context_name)
                inputs[idx] = alloc_op(*alloc.owner.inputs)
            return maker(node, inputs)