make erfinv work on the gpu.

7f3ea07c · Guillaume Desjardins · 487aa554 · 7f3ea07c · 7f3ea07c · 7f3ea07c
--- a/theano/sandbox/cuda/elemwise.py
+++ b/theano/sandbox/cuda/elemwise.py
@@ -9,6 +9,8 @@ import copy, logging, StringIO, sys
 import numpy
+from theano.scalar.basic import upgrade_to_float_no_complex, complex_types
+from theano.scalar.basic_scipy import Erfinv
 from theano import Apply, Constant, Op, Type, Variable
 from theano import gof, scalar, tensor
@@ -1021,3 +1023,20 @@ nd_collapse_[i]=0;
        #print sio.getvalue()
        return sio.getvalue()
+class ErfinvGPU(Erfinv):
+    def c_libraries(self):
+        return ['math.h']
+    def c_headers(self):
+        return ['math_functions.h', 'cublas_v2.h']
+    def c_code(self, node, name, inp, out, sub):
+        x, = inp
+        z, = out
+        if node.inputs[0].type in complex_types:
+            raise NotImplementedError('type not supported', type)
+        return "%(z)s = erfinv(%(x)s);" % locals()
+erfinv_gpu = ErfinvGPU(upgrade_to_float_no_complex, name='erfinv_gpu')
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -33,6 +33,8 @@ from theano.sandbox.cuda.nnet import (
        GpuCrossentropySoftmax1HotWithBiasDx,
        GpuSoftmax, GpuSoftmaxWithBias)
 from theano.sandbox.cuda.elemwise import SupportCodeError
+from theano.scalar.basic_scipy import Erfinv
+from theano.sandbox.cuda.elemwise import ErfinvGPU, erfinv_gpu
 from theano.sandbox.cuda.var import CudaNdarrayConstant
 from theano.scan_module import scan_utils, scan_op
 from theano.tensor.blas import _is_real_vector, _is_real_matrix
@@ -177,11 +179,15 @@ def local_gpu_elemwise_0(node):
            if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
                # Don't set any inplace pattern.
                # gpu_inplace_elemwise_optimizer will do it later
-                try:
-                    new_op = GpuElemwise(node.op.scalar_op)
+                if isinstance(node.op.scalar_op, Erfinv):
-                except SupportCodeError:
+                    new_op = GpuElemwise(erfinv_gpu)
-                    # This happens when scalar_op requires support code
+                else:
-                    return False
+                    try:
+                        new_op = GpuElemwise(node.op.scalar_op)
+                    except SupportCodeError:
+                        # This happens when scalar_op requires support code
+                        return False
                #   first establish that float32 can store all inputs
                upcastable = set(['float32', 'int8', 'int16', 'uint8',
@@ -234,11 +240,16 @@ def local_gpu_elemwise_1(node):
            elemwise_node = host_i.owner
            # Don't set any inplace pattern.
            # gpu_inplace_elemwise_optimizer will do it later
-            try:
-                new_op = GpuElemwise(elemwise_node.op.scalar_op)
+            if isinstance(node.op.scalar_op, Erfinv):
-            except SupportCodeError:
+                new_op = GpuElemwise(erfinv_gpu)
-                # This happens when scalar_op requires support code
+            else:
-                return False
+                try:
+                    new_op = GpuElemwise(elemwise_node.op.scalar_op)
+                except SupportCodeError:
+                    # This happens when scalar_op requires support code
+                    return False
            if all([i.dtype == 'float32' for i in elemwise_node.inputs]):
                gpu_elemwise = new_op(*[gpu_from_host(i)
                                        for i in elemwise_node.inputs])

--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
@@ -17,6 +17,7 @@ if cuda.cuda_available == False:
 from theano.sandbox.cuda import basic_ops
 from theano.sandbox.cuda.type import CudaNdarrayType
+from theano.scalar.basic_scipy import erfinv
 if theano.config.mode=='FAST_COMPILE':
    mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
@@ -368,6 +369,18 @@ def test_incsubtensor_mixed():
    client, idx = packed
    assert isinstance(client.op, cuda.GpuFromHost)
+def test_erfinvgpu():
+    """ Test that local_gpu_elemwise_0 replaces Erfinv with ErfinvGPU """
+    x = tensor.fmatrix()
+    f = theano.function([x], tensor.Elemwise(erfinv)(x), mode=mode_with_gpu)
+    f2 = theano.function([x], tensor.Elemwise(erfinv)(x), mode=mode_without_gpu)
+    assert isinstance(f.maker.fgraph.toposort()[1].op, cuda.GpuElemwise)
+    assert isinstance(f.maker.fgraph.toposort()[1].op.scalar_op, cuda.elemwise.ErfinvGPU)
+    xv=numpy.random.rand(7,8).astype('float32')
+    assert numpy.allclose(f(xv),f2(xv))
 if __name__ == '__main__':
    test_gpualloc()
    test_opt_gpujoin_onlyajoin()