removed context_name set to None and fixed GpuAllocEmpty input order

cc9b423a · Reyhane Askari · 1d7b9bdb · cc9b423a · cc9b423a · cc9b423a
--- a/theano/gpuarray/basic_ops.py
+++ b/theano/gpuarray/basic_ops.py
@@ -954,7 +954,7 @@ class GpuAllocEmpty(HideC, AllocEmpty):
    _f16_ok = True
    params_type = gpu_context_type
-    def __init__(self, dtype, context_name=None):
+    def __init__(self, dtype, context_name):
        self.dtype = dtype
        self.context_name = context_name

--- a/theano/gpuarray/dnn.py
+++ b/theano/gpuarray/dnn.py
@@ -966,7 +966,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
                   shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1,
                   shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1)
        out_shp = assert_conv_shape(out_shp)
-        out = GpuAllocEmpty(ctx_name, dtype=img.dtype)(*out_shp)
+        out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
        desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
                              conv_mode='cross', precision=precision)(out.shape)
        conv = GpuDnnConvGradW()(img, kerns, out, desc)
@@ -985,7 +985,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
                   shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1,
                   shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1)
        out_shp = assert_conv_shape(out_shp)
-        out = GpuAllocEmpty(ctx_name, dtype=img.dtype)(*out_shp)
+        out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
        desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
                              conv_mode=conv_mode, precision=precision)(kerns.shape)
        return GpuDnnConvGradI()(kerns, img, out, desc)
@@ -1006,7 +1006,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
                                    desc_op.border_mode,
                                    desc_op.subsample)
    out_shp = assert_conv_shape(out_shp)
-    out = GpuAllocEmpty(ctx_name, dtype=img.dtype)(*out_shp)
+    out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
    return GpuDnnConv(algo=algo)(img, kerns, out, desc)
@@ -1078,7 +1078,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
                   shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1,
                   shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1)
        out_shp = assert_conv_shape(out_shp)
-        out = GpuAllocEmpty(ctx_name, dtype=img.dtype)(*out_shp)
+        out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
        desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1),
                              conv_mode='cross', precision=precision)(out.shape)
        conv = GpuDnnConvGradW()(img, kerns, out, desc)
@@ -1098,7 +1098,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
                   shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1,
                   shape_i(img, 4, fgraph) + shape_i(kerns, 4, fgraph) - 1)
        out_shp = assert_conv_shape(out_shp)
-        out = GpuAllocEmpty(ctx_name, dtype=img.dtype)(*out_shp)
+        out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
        desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1),
                              conv_mode=conv_mode, precision=precision)(kerns.shape)
        return GpuDnnConvGradI()(kerns, img, out, desc)
@@ -1119,7 +1119,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
                                    desc_op.border_mode,
                                    desc_op.subsample)
    out_shp = assert_conv_shape(out_shp)
-    out = GpuAllocEmpty(ctx_name, dtype=img.dtype)(*out_shp)
+    out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
    return GpuDnnConv(algo=algo)(img, kerns, out, desc)
@@ -1151,7 +1151,6 @@ def dnn_gradweight3d(img, topgrad, kerns_shp, border_mode='valid',
    return dnn_gradweight(img, topgrad, kerns_shp, border_mode,
                          subsample, conv_mode, precision)
 def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
                  subsample=(1, 1), conv_mode='conv', precision=None):
    """
@@ -1180,7 +1179,6 @@ def dnn_gradinput3d(kerns, topgrad, img_shp, border_mode='valid',
    return dnn_gradinput(kerns, topgrad, img_shp, border_mode, subsample,
                         conv_mode, precision)
 class GpuDnnPoolDesc(Op):
    """

--- a/theano/gpuarray/extra_ops.py
+++ b/theano/gpuarray/extra_ops.py
@@ -8,7 +8,7 @@ try:
 except ImportError:
    pass
-from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, GpuReshape)
+from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, GpuReshape, infer_context_name)
 from .opt import register_opt, op_lifter, register_opt2

--- a/theano/gpuarray/nerv.py
+++ b/theano/gpuarray/nerv.py
@@ -157,7 +157,7 @@ def local_gpua_dot_to_gemm16(op, ctx_name, inputs, outputs):
    B = inputs[1]
    if (A.ndim == 2 and B.ndim == 2 and
            A.dtype == 'float16' and B.dtype == 'float16'):
-        fgraph = inputs[0].fgraph
+        fgraph = getattr(outputs[0], 'fgraph', None)
        C = GpuAllocEmpty('float16', ctx_name)(
            shape_i(A, 0, fgraph), shape_i(B, 1, fgraph))
        return Gemm16()(C, 1.0, A, B, 0.0)

--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -61,7 +61,7 @@ from .nnet import (gpu_crossentropy_softmax_1hot_with_bias_dx,
                   gpu_crossentropy_softmax_argmax_1hot_with_bias,
                   gpu_softmax_with_bias, gpu_softmax)
 from .elemwise import (GpuElemwise, GpuDimShuffle, GpuCAReduceCuda,
-                       GpuCAReduceCPY, gpu_ca_reduce_cuda, gpu_erfinv, gpu_erfcinv,
+                       GpuCAReduceCPY, gpu_erfinv, gpu_erfcinv,
                       max_inputs_to_GpuElemwise)
 from .subtensor import (GpuIncSubtensor, GpuSubtensor,
                        GpuAdvancedSubtensor,
@@ -614,7 +614,7 @@ def local_gpuaalloc(op, context_name, inputs, outputs):
 def local_gpua_alloc_empty(op, context_name, inputs, outputs):
    # We use _props_dict() to make sure that the GPU op know all the
    # CPU op props.
-    return GpuAllocEmpty(**op._props_dict())(*inputs)
+    return GpuAllocEmpty(context_name=context_name, **op._props_dict())(*inputs)
 @register_opt()

--- a/theano/scan_module/scan_utils.py
+++ b/theano/scan_module/scan_utils.py
@@ -152,7 +152,7 @@ def traverse(out, x, x_copy, d, visited=None):
        return d
    visited.add(out)
    from theano.sandbox import cuda
-    from theano.gpuarray.basic_ops import gpu_from_host, host_from_gpu
+    from theano.gpuarray.basic_ops import GpuFromHost, host_from_gpu
    from theano.gpuarray import pygpu_activated
    from theano.gpuarray.type import GpuArrayType
    if out == x:
@@ -160,7 +160,7 @@ def traverse(out, x, x_copy, d, visited=None):
            d[out] = cuda.gpu_from_host(x_copy)
        else:
            assert isinstance(x.type, GpuArrayType)
-            d[out] = gpu_from_host(x.type.context_name)(x_copy)
+            d[out] = GpuFromHost(x.type.context_name)(x_copy)
        return d
    elif out.owner is None:
        return d