Merge pull request #2872 from TimSalimans/erfcx_op

Add new scalar and elemwise op erfcx

Merge pull request #2872 from TimSalimans/erfcx_op
7495a50a · Frédéric Bastien · 450365a9 · a35ff4eb · 7495a50a · 7495a50a
--- a/theano/sandbox/cuda/elemwise.py
+++ b/theano/sandbox/cuda/elemwise.py
@@ -1065,3 +1065,26 @@ class ErfinvGPU(Erfinv):
        return "%(z)s = erfinv(%(x)s);" % locals()

 erfinv_gpu = ErfinvGPU(upgrade_to_float_no_complex, name='erfinv_gpu')
+
+
+class ErfcxGPU(Erfinv):
+    """
+    Provides a c-code implementation of the scaled complementary error function for GPU.
+
+    Note: We do not add this c_code to theano.scalar.basic_scipy.Erfcx, as we
+    currently rely on Nvidia's cublas library to provide the erfcx
+    c-implementation (which requires different c_headers). As it stands,
+    theano.scalar.basic_scipy.Erfcx does not have c_code as scipy does not
+    export the required C function
+    """
+    def c_headers(self):
+        return ['math_functions.h', 'cublas_v2.h']
+
+    def c_code(self, node, name, inp, out, sub):
+        x, = inp
+        z, = out
+        if node.inputs[0].type in complex_types:
+            raise NotImplementedError('type not supported', type)
+        return "%(z)s = erfcx(%(x)s);" % locals()
+
+erfcx_gpu = ErfcxGPU(upgrade_to_float_no_complex, name='erfcx_gpu')
\ No newline at end of file
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -50,7 +50,9 @@ from theano.sandbox.cuda.nnet import (

 from theano.sandbox.cuda.elemwise import SupportCodeError
 from theano.scalar.basic_scipy import Erfinv
+from theano.scalar.basic_scipy import Erfcx
 from theano.sandbox.cuda.elemwise import erfinv_gpu
+from theano.sandbox.cuda.elemwise import erfcx_gpu
 from theano.sandbox.cuda.var import CudaNdarrayConstant
 from theano.sandbox.cuda import gpu_optimizer, register_opt, gpu_seqopt, GpuOp
 from theano.scan_module import scan_utils, scan_op, scan_opt
@@ -239,6 +241,8 @@ def local_gpu_elemwise_0(node):

                if isinstance(node.op.scalar_op, Erfinv):
                    new_op = GpuElemwise(erfinv_gpu)
+                elif isinstance(node.op.scalar_op, Erfcx):
+                    new_op = GpuElemwise(erfcx_gpu)
                else:
                    try:
                        new_op = GpuElemwise(node.op.scalar_op)
@@ -300,6 +304,8 @@ def local_gpu_elemwise_1(node):

            if isinstance(elemwise_node.op.scalar_op, Erfinv):
                new_op = GpuElemwise(erfinv_gpu)
+            elif isinstance(elemwise_node.op.scalar_op, Erfcx):
+                new_op = GpuElemwise(erfcx_gpu)
            else:
                try:
                    new_op = GpuElemwise(elemwise_node.op.scalar_op)

--- a/theano/scalar/basic_scipy.py
+++ b/theano/scalar/basic_scipy.py
@@ -85,6 +85,41 @@ class Erfc(UnaryScalarOp):
 erfc = Erfc(upgrade_to_float_no_complex, name='erfc')


+class Erfcx(UnaryScalarOp):
+    """
+    Implements the scaled complementary error function exp(x**2)*erfc(x) in a numerically stable way for large x. This
+    is useful for calculating things like log(erfc(x)) = log(erfcx(x)) - x ** 2 without causing underflow. Should only
+    be used if x is known to be large and positive, as using erfcx(x) for large negative x may instead introduce
+    overflow problems.
+
+    Note: This op can still be executed on GPU, despite not having c_code.  When
+    running on GPU, sandbox.cuda.opt.local_gpu_elemwise_[0,1] replaces this op
+    with sandbox.cuda.elemwise.ErfcxGPU.
+    """
+    def impl(self, x):
+        if imported_scipy_special:
+            return scipy.special.erfcx(x)
+        else:
+            super(Erfcx, self).impl(x)
+
+    def grad(self, inp, grads):
+        x, = inp
+        gz, = grads
+        if x.type in complex_types:
+            raise NotImplementedError()
+        if self(x).type in discrete_types:
+            if x.type in discrete_types:
+                return [x.zeros_like(dtype=theano.config.floatX)]
+            else:
+                return [x.zeros_like()]
+
+        cst = numpy.asarray(2. / numpy.sqrt(numpy.pi),
+                            dtype=upcast(x.type.dtype, gz.type.dtype))
+        return gz * (-cst + (2. * x) * erfcx(x)),
+
+erfcx = Erfcx(upgrade_to_float_no_complex, name='erfcx')
+
+
 class Erfinv(UnaryScalarOp):
    """
    Implements the inverse error function.

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -1937,6 +1937,11 @@ def erfc(a):
    """complementary error function"""


+@_scal_elemwise
+def erfcx(a):
+    """scaled complementary error function"""
+
+
 @_scal_elemwise
 def erfinv(a):
    """inverse error function"""

--- a/theano/tensor/inplace.py
+++ b/theano/tensor/inplace.py
@@ -263,6 +263,11 @@ def erfc_inplace(a):
    """complementary error function"""


+@_scal_inplace
+def erfcx_inplace(a):
+    """scaled complementary error function"""
+
+
 @_scal_inplace
 def gamma_inplace(a):
    """gamma function"""

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -17,6 +17,7 @@ from nose.plugins.attrib import attr
 import numpy
 from numpy.testing import dec, assert_array_equal, assert_allclose
 from numpy.testing.noseclasses import KnownFailureTest
+from distutils.version import LooseVersion

 import theano
 from theano.compat import PY3, exc_message, operator_div
@@ -57,6 +58,7 @@ mode_no_scipy = get_default_mode()
 try:
    import scipy.special
    import scipy.stats
+    from scipy import __version__ as scipy_version
    imported_scipy_special = True
 except ImportError:
    if config.mode == "FAST_COMPILE":
@@ -1079,6 +1081,11 @@ _good_broadcast_unary_normal_float_no_empty_no_complex = copymod(
 _good_broadcast_unary_normal_float_no_complex = copymod(
        _good_broadcast_unary_normal_float,
        without=['complex'])
+        
+_good_broadcast_unary_normal_float_no_complex_small_neg_range = dict(
+        normal=[rand_ranged(-2, 5, (2, 3))],
+        corner_case=[corner_case],
+        empty=[numpy.asarray([], dtype=config.floatX)])

 _good_broadcast_unary_normal = dict(
        normal=[numpy.asarray(rand_ranged(-5, 5, (2, 3)),
@@ -1108,6 +1115,10 @@ _grad_broadcast_unary_normal = dict(
        corner_case=[corner_case_grad],
        # empty = [numpy.asarray([])] # XXX: should this be included?
        )
+        
+_grad_broadcast_unary_normal_small_neg_range = dict(
+        normal=[numpy.asarray(rand_ranged(-2, 5, (2, 3)), dtype=floatX)],
+        corner_case=[corner_case_grad])

 _grad_broadcast_unary_normal_no_complex_no_corner_case = copymod(
        _grad_broadcast_unary_normal_no_complex,
@@ -1650,9 +1661,16 @@ if imported_scipy_special:
    expected_psi = scipy.special.psi
    expected_chi2sf = lambda x, df: scipy.stats.chi2.sf(x, df).astype(x.dtype)
    skip_scipy = False
+    if LooseVersion(scipy_version) >= LooseVersion("0.12.0"):
+        expected_erfcx = scipy.special.erfcx
+        skip_scipy12 = False
+    else:
+        expected_erfcx = []
+        skip_scipy12 = "the erfcx op requires scipy version >= 0.12, installed version is " + scipy_version
 else:
    expected_erf = []
    expected_erfc = []
+    expected_erfcx = []
    expected_erfinv = []
    expected_erfcinv = []
    expected_gamma = []
@@ -1660,6 +1678,7 @@ else:
    expected_psi = []
    expected_chi2sf = []
    skip_scipy = "scipy is not present"
+    skip_scipy12 = "scipy is not present"

 ErfTester = makeBroadcastTester(
    op=tensor.erf,
@@ -1697,6 +1716,24 @@ ErfcInplaceTester = makeBroadcastTester(
    inplace=True,
    skip=skip_scipy)

+ErfcxTester = makeBroadcastTester(
+    op=tensor.erfcx,
+    expected=expected_erfcx,
+    good=_good_broadcast_unary_normal_float_no_complex_small_neg_range,
+    grad=_grad_broadcast_unary_normal_small_neg_range,
+    eps=2e-10,
+    mode=mode_no_scipy,
+    skip=skip_scipy12)
+ErfcxInplaceTester = makeBroadcastTester(
+    op=inplace.erfcx_inplace,
+    expected=expected_erfcx,
+    good=_good_broadcast_unary_normal_float_no_complex_small_neg_range,
+    grad=_grad_broadcast_unary_normal_small_neg_range,
+    eps=2e-10,
+    mode=mode_no_scipy,
+    inplace=True,
+    skip=skip_scipy12)
+
 ErfinvTester = makeBroadcastTester(
    op=tensor.erfinv,
    expected=expected_erfinv,