Add new scalar and elemwise op erfcx

op for calculating the scaled complementary error function exp(x**2)*erfc(x) in a numerically stable way. This op is useful for calculating various things related to Gaussian distributions, such as truncated moments and expressions involving the log(cdf(x)) for large negative values of x. The Theano code closely follows that of the Erfinv op.

Add new scalar and elemwise op erfcx
bdf60983 · TimSalimans · 10d68dd8 · bdf60983 · bdf60983 · bdf60983
--- a/theano/sandbox/cuda/elemwise.py
+++ b/theano/sandbox/cuda/elemwise.py
@@ -1064,3 +1064,26 @@ class ErfinvGPU(Erfinv):
        return "%(z)s = erfinv(%(x)s);" % locals()

 erfinv_gpu = ErfinvGPU(upgrade_to_float_no_complex, name='erfinv_gpu')
+
+
+class ErfcxGPU(Erfinv):
+    """
+    Provides a c-code implementation of the scaled complementary error function for GPU.
+
+    Note: We do not add this c_code to theano.scalar.basic_scipy.Erfcx, as we
+    currently rely on Nvidia's cublas library to provide the erfcx
+    c-implementation (which requires different c_headers). As it stands,
+    theano.scalar.basic_scipy.Erfcx does not have c_code as scipy does not
+    export the required C function
+    """
+    def c_headers(self):
+        return ['math_functions.h', 'cublas_v2.h']
+
+    def c_code(self, node, name, inp, out, sub):
+        x, = inp
+        z, = out
+        if node.inputs[0].type in complex_types:
+            raise NotImplementedError('type not supported', type)
+        return "%(z)s = erfcx(%(x)s);" % locals()
+
+erfcx_gpu = ErfcxGPU(upgrade_to_float_no_complex, name='erfcx_gpu')
\ No newline at end of file
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -48,7 +48,9 @@ from theano.sandbox.cuda.nnet import (

 from theano.sandbox.cuda.elemwise import SupportCodeError
 from theano.scalar.basic_scipy import Erfinv
+from theano.scalar.basic_scipy import Erfcx
 from theano.sandbox.cuda.elemwise import erfinv_gpu
+from theano.sandbox.cuda.elemwise import erfcx_gpu
 from theano.sandbox.cuda.var import CudaNdarrayConstant
 from theano.sandbox.cuda import gpu_optimizer, register_opt, gpu_seqopt, GpuOp
 from theano.scan_module import scan_utils, scan_op, scan_opt
@@ -237,6 +239,8 @@ def local_gpu_elemwise_0(node):

                if isinstance(node.op.scalar_op, Erfinv):
                    new_op = GpuElemwise(erfinv_gpu)
+                elif isinstance(node.op.scalar_op, Erfcx):
+                    new_op = GpuElemwise(erfcx_gpu)
                else:
                    try:
                        new_op = GpuElemwise(node.op.scalar_op)
@@ -298,6 +302,8 @@ def local_gpu_elemwise_1(node):

            if isinstance(elemwise_node.op.scalar_op, Erfinv):
                new_op = GpuElemwise(erfinv_gpu)
+            elif isinstance(elemwise_node.op.scalar_op, Erfcx):
+                new_op = GpuElemwise(erfcx_gpu)
            else:
                try:
                    new_op = GpuElemwise(elemwise_node.op.scalar_op)

--- a/theano/scalar/basic_scipy.py
+++ b/theano/scalar/basic_scipy.py
@@ -85,6 +85,38 @@ class Erfc(UnaryScalarOp):
 erfc = Erfc(upgrade_to_float_no_complex, name='erfc')


+class Erfcx(UnaryScalarOp):
+    """
+    Implements the scaled complementary error function exp(x**2)*erfc(x) in a numerically stable way.
+
+    Note: This op can still be executed on GPU, despite not having c_code.  When
+    running on GPU, sandbox.cuda.opt.local_gpu_elemwise_[0,1] replaces this op
+    with sandbox.cuda.elemwise.ErfcxGPU. (Same as Erfinv below.)
+    """
+    def impl(self, x):
+        if imported_scipy_special:
+            return scipy.special.erfcx(x)
+        else:
+            super(Erfcx, self).impl(x)
+
+    def grad(self, inp, grads):
+        x, = inp
+        gz, = grads
+        if x.type in complex_types:
+            raise NotImplementedError()
+        if self(x).type in discrete_types:
+            if x.type in discrete_types:
+                return [x.zeros_like(dtype=theano.config.floatX)]
+            else:
+                return [x.zeros_like()]
+
+        cst = numpy.asarray(2. / numpy.sqrt(numpy.pi),
+                            dtype=upcast(x.type.dtype, gz.type.dtype))
+        return - gz * cst + (2. * x) * erfcx(x),
+
+erfcx = Erfcx(upgrade_to_float_no_complex, name='erfcx')
+
+
 class Erfinv(UnaryScalarOp):
    """
    Implements the inverse error function.

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -1929,6 +1929,11 @@ def erfc(a):
    """complementary error function"""


+@_scal_elemwise
+def erfcx(a):
+    """scaled complementary error function"""
+
+
 @_scal_elemwise
 def erfinv(a):
    """inverse error function"""

--- a/theano/tensor/inplace.py
+++ b/theano/tensor/inplace.py
@@ -263,6 +263,11 @@ def erfc_inplace(a):
    """complementary error function"""


+@_scal_inplace
+def erfcx_inplace(a):
+    """scaled complementary error function"""
+
+
 @_scal_inplace
 def gamma_inplace(a):
    """gamma function"""

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -1642,6 +1642,7 @@ ArctanhInplaceTester = makeBroadcastTester(
 if imported_scipy_special:
    expected_erf = scipy.special.erf
    expected_erfc = scipy.special.erfc
+    expected_erfcx = scipy.special.erfcx
    expected_erfinv = scipy.special.erfinv
    expected_erfcinv = scipy.special.erfcinv
    expected_gamma = scipy.special.gamma
@@ -1652,6 +1653,7 @@ if imported_scipy_special:
 else:
    expected_erf = []
    expected_erfc = []
+    expected_erfcx = []
    expected_erfinv = []
    expected_erfcinv = []
    expected_gamma = []
@@ -1696,6 +1698,24 @@ ErfcInplaceTester = makeBroadcastTester(
    inplace=True,
    skip=skip_scipy)

+ErfcxTester = makeBroadcastTester(
+    op=tensor.erfcx,
+    expected=expected_erfcx,
+    good=_good_broadcast_unary_normal_float_no_complex,
+    grad=_grad_broadcast_unary_normal,
+    eps=2e-10,
+    mode=mode_no_scipy,
+    skip=skip_scipy)
+ErfcxInplaceTester = makeBroadcastTester(
+    op=inplace.erfcx_inplace,
+    expected=expected_erfcx,
+    good=_good_broadcast_unary_normal_float_no_complex,
+    grad=_grad_broadcast_unary_normal,
+    eps=2e-10,
+    mode=mode_no_scipy,
+    inplace=True,
+    skip=skip_scipy)
+
 ErfinvTester = makeBroadcastTester(
    op=tensor.erfinv,
    expected=expected_erfinv,