Merge pull request #6030 from lamblin/fix_5036

Add lifter for CrossentropyCategorical1Hot and grad

Merge pull request #6030 from lamblin/fix_5036
e34c0424 · Frédéric Bastien · GitHub · 7509fa75 · 45e6855f · e34c0424
--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -19,6 +19,7 @@ from theano.ifelse import IfElse
 from theano.misc.ordered_set import OrderedSet
 from theano.scalar.basic import Scalar, Pow, Cast
+from theano.scalar.basic import log, neg, true_div
 from theano.scalar.basic_scipy import Erfinv, Erfcinv
 from theano.scan_module import scan_utils, scan_op, scan_opt
@@ -163,6 +164,8 @@ gpu_optimizer.register('local_remove_all_assert',
                       'unsafe')
+# Define a few operations to use in optimizations,
+# in order to avoid introducin new CPU Ops, or useless ones.
 def safe_to_gpu(x, ctx_name):
    if isinstance(x.type, tensor.TensorType):
        return GpuFromHost(ctx_name)(x)
@@ -176,6 +179,10 @@ def safe_to_cpu(x):
    else:
        return x
+gpu_log = GpuElemwise(log)
+gpu_neg = GpuElemwise(neg)
+gpu_true_div = GpuElemwise(true_div)
 def op_lifter(OP, cuda_only=False):
    """
@@ -1329,6 +1336,38 @@ def local_gpua_softmaxwithbias(op, context_name, inputs, outputs):
    return gpu_softmax_with_bias
+@register_opt('fast_compile')
+@op_lifter([tensor.nnet.CrossentropyCategorical1Hot])
+@register_opt2([tensor.nnet.CrossentropyCategorical1Hot], 'fast_compile')
+def local_gpu_crossentropycategorical1hot(op, context_name, inputs, outputs):
+    # There is no corresponding GPU Op, but we can express it as:
+    #   coding, one_of_n = inputs
+    #   -log(coding[arange(coding.shape[0]), one_of_n])
+    coding, one_of_n = inputs
+    idx0 = theano.tensor.arange(shape_i(coding, 0))
+    return [gpu_neg(gpu_log(coding[idx0, one_of_n]))]
+@register_opt('fast_compile')
+@op_lifter([tensor.nnet.CrossentropyCategorical1HotGrad])
+@register_opt2([tensor.nnet.CrossentropyCategorical1HotGrad], 'fast_compile')
+def local_gpu_crossentropycategorical1hotgrad(op, context_name, inputs, outputs):
+    # There is no corresponding GPU Op, but we can express it as:
+    #   gy, coding, one_of_n = inputs
+    #   gcoding = zeros_like(coding)
+    #   gcoding[arange(coding.shape[0]), one_of_n] = -g / (
+    #       coding[arange(coding.shape[0]), one_of_n])
+    gy, coding, one_of_n = inputs
+    idx0 = theano.tensor.arange(shape_i(coding, 0))
+    z = GpuAlloc(context_name, memset_0=True)(
+        as_gpuarray_variable(np.zeros((), dtype=coding.dtype), context_name),
+        *[shape_i(coding, i) for i in xrange(coding.ndim)])
+    gcoding = tensor.set_subtensor(
+        z[idx0, one_of_n],
+        gpu_neg(gpu_true_div(gy, coding[idx0, one_of_n])))
+    return [gcoding.transfer(context_name)]
 @register_opt('fast_compile')
 @op_lifter([theano.tensor.opt.Assert])
 def local_gpua_assert(op, context_name, inputs, outputs):

--- a/theano/gpuarray/tests/test_opt.py
+++ b/theano/gpuarray/tests/test_opt.py
@@ -680,3 +680,17 @@ def test_batched_dot_lifter():
        z = tensor.batched_dot(x, y)
        f = theano.function([x, y], z, mode=mode_with_gpu)
        f(x_val, y_val)
+def test_crossentropycategorical1hot_lifter():
+    rng = np.random.RandomState(utt.fetch_seed())
+    x = tensor.matrix()
+    y = tensor.lvector()
+    z = tensor.nnet.crossentropy_categorical_1hot(x, y)
+    gx = theano.grad(z.mean(), x)
+    f = theano.function([x, y], [z, gx], mode=mode_with_gpu)
+    assert not any(isinstance(n.op, (tensor.nnet.CrossentropyCategorical1Hot,
+                                     tensor.nnet.CrossentropyCategorical1HotGrad))
+                   for n in f.maker.fgraph.apply_nodes)
+    f(rng.uniform(0.1, 0.9, (13, 5)).astype(theano.config.floatX),
+      rng.randint(5, size=(13,)))