Mostly adapted Op and tests to new backend. TODO: Remove faulty python…

Mostly adapted Op and tests to new backend. TODO: Remove faulty python implementation from _dev20 version of op

Mostly adapted Op and tests to new backend. TODO: Remove faulty python…
fff9c1f7 · Pierre Luc Carrier · 6936dd28 · fff9c1f7 · fff9c1f7 · fff9c1f7
--- a/theano/sandbox/gpuarray/opt.py
+++ b/theano/sandbox/gpuarray/opt.py
@@ -10,6 +10,7 @@ from theano.gof import (local_optimizer, EquilibriumDB,
 from theano.gof.python25 import all, any
 from theano.tensor.nnet.conv import ConvOp
+from theano.sandbox.cuda.basic_ops import device_properties
 from theano.sandbox.gpuarray.type import GpuArrayType
 from theano.sandbox.gpuarray.basic_ops import (host_from_gpu,
                                               gpu_from_host,
@@ -25,7 +26,9 @@ from theano.sandbox.gpuarray.nnet import (GpuCrossentropySoftmaxArgmax1HotWithBi
                                          GpuSoftmax)
 from theano.sandbox.gpuarray.elemwise import (GpuElemwise, _is_scalar,
                                              GpuDimShuffle, GpuCAReduceCuda)
-from theano.sandbox.gpuarray.subtensor import GpuIncSubtensor, GpuSubtensor
+from theano.sandbox.gpuarray.subtensor import (GpuIncSubtensor, GpuSubtensor,
+                                               GpuAdvancedIncSubtensor1,
+                                               GpuAdvancedIncSubtensor1_dev20)
 from theano.sandbox.gpuarray.type import GpuArrayConstant
 gpu_optimizer = EquilibriumDB()
@@ -241,6 +244,23 @@ def local_gpua_incsubtensor(node):
    return GpuIncSubtensor(node.op.idx_list, node.op.inplace,
                           node.op.set_instead_of_inc,
                           node.op.destroyhandler_tolerate_aliased)
+@register_opt()
+@op_lifter([tensor.AdvancedIncSubtensor1])
+def local_gpua_advanced_incsubtensor(node):
+    x, y = node.inputs[0:2]
+    coords = node.inputs[2:]
+    set_instead_of_inc = node.op.set_instead_of_inc
+    active_device_no = theano.sandbox.cuda.active_device_number()
+    compute_capability = device_properties(active_device_no)['major']
+    if (compute_capability < 2 or x.ndim != 2 or y.ndim != 2):
+        return GpuAdvancedIncSubtensor1(
+                    set_instead_of_inc=set_instead_of_inc)
+    else:
+        return GpuAdvancedIncSubtensor1_dev20(
+                    set_instead_of_inc=set_instead_of_inc)
 @register_opt()

--- a/theano/sandbox/gpuarray/subtensor.py
+++ b/theano/sandbox/gpuarray/subtensor.py
--- a/theano/sandbox/gpuarray/tests/test_subtensor.py
+++ b/theano/sandbox/gpuarray/tests/test_subtensor.py
+import numpy
+import theano
 from theano.tensor.tests.test_subtensor import T_subtensor
 from theano.sandbox.gpuarray.basic_ops import (HostFromGpu, GpuFromHost)
-from theano.sandbox.gpuarray.subtensor import GpuIncSubtensor, GpuSubtensor
+from theano.sandbox.gpuarray.subtensor import (GpuIncSubtensor, GpuSubtensor,
+                                               GpuAdvancedIncSubtensor1)
 from theano.sandbox.gpuarray.type import gpuarray_shared_constructor
@@ -21,6 +25,7 @@ class G_subtensor(T_subtensor):
                             shared=gpuarray_shared_constructor,
                             sub=GpuSubtensor,
                             inc_sub=GpuIncSubtensor,
+                             adv_incsub1 = GpuAdvancedIncSubtensor1,
                             mode=mode_with_gpu,
                             # avoid errors with limited devices
                             dtype='float32',
@@ -34,17 +39,17 @@ class G_subtensor(T_subtensor):
 def test_advinc_subtensor1():
    """ Test the second case in the opt local_gpu_advanced_incsubtensor1 """
    for shp in [(3, 3), (3, 3, 3)]:
-        shared = cuda.shared_constructor
+        shared = gpuarray_shared_constructor
        xval = numpy.arange(numpy.prod(shp), dtype='float32').reshape(shp) + 1
        yval = numpy.empty((2,) + shp[1:], dtype='float32')
        yval[:] = 10
        x = shared(xval, name='x')
-        y = T.tensor(dtype='float32',
+        y = tensor.tensor(dtype='float32',
                     broadcastable=(False,) * len(shp),
                     name='y')
-        expr = T.advanced_inc_subtensor1(x, y, [0, 2])
+        expr = tensor.advanced_inc_subtensor1(x, y, [0, 2])
        f = theano.function([y], expr, mode=mode_with_gpu)
-        assert sum([isinstance(node.op, cuda.GpuAdvancedIncSubtensor1)
+        assert sum([isinstance(node.op, GpuAdvancedIncSubtensor1)
                    for node in f.maker.fgraph.toposort()]) == 1
        rval = f(yval)
        rep = xval.copy()