提交 e34c0424 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #6030 from lamblin/fix_5036

Add lifter for CrossentropyCategorical1Hot and grad
...@@ -19,6 +19,7 @@ from theano.ifelse import IfElse ...@@ -19,6 +19,7 @@ from theano.ifelse import IfElse
from theano.misc.ordered_set import OrderedSet from theano.misc.ordered_set import OrderedSet
from theano.scalar.basic import Scalar, Pow, Cast from theano.scalar.basic import Scalar, Pow, Cast
from theano.scalar.basic import log, neg, true_div
from theano.scalar.basic_scipy import Erfinv, Erfcinv from theano.scalar.basic_scipy import Erfinv, Erfcinv
from theano.scan_module import scan_utils, scan_op, scan_opt from theano.scan_module import scan_utils, scan_op, scan_opt
...@@ -163,6 +164,8 @@ gpu_optimizer.register('local_remove_all_assert', ...@@ -163,6 +164,8 @@ gpu_optimizer.register('local_remove_all_assert',
'unsafe') 'unsafe')
# Define a few operations to use in optimizations,
# in order to avoid introducin new CPU Ops, or useless ones.
def safe_to_gpu(x, ctx_name): def safe_to_gpu(x, ctx_name):
if isinstance(x.type, tensor.TensorType): if isinstance(x.type, tensor.TensorType):
return GpuFromHost(ctx_name)(x) return GpuFromHost(ctx_name)(x)
...@@ -176,6 +179,10 @@ def safe_to_cpu(x): ...@@ -176,6 +179,10 @@ def safe_to_cpu(x):
else: else:
return x return x
gpu_log = GpuElemwise(log)
gpu_neg = GpuElemwise(neg)
gpu_true_div = GpuElemwise(true_div)
def op_lifter(OP, cuda_only=False): def op_lifter(OP, cuda_only=False):
""" """
...@@ -1329,6 +1336,38 @@ def local_gpua_softmaxwithbias(op, context_name, inputs, outputs): ...@@ -1329,6 +1336,38 @@ def local_gpua_softmaxwithbias(op, context_name, inputs, outputs):
return gpu_softmax_with_bias return gpu_softmax_with_bias
@register_opt('fast_compile')
@op_lifter([tensor.nnet.CrossentropyCategorical1Hot])
@register_opt2([tensor.nnet.CrossentropyCategorical1Hot], 'fast_compile')
def local_gpu_crossentropycategorical1hot(op, context_name, inputs, outputs):
# There is no corresponding GPU Op, but we can express it as:
# coding, one_of_n = inputs
# -log(coding[arange(coding.shape[0]), one_of_n])
coding, one_of_n = inputs
idx0 = theano.tensor.arange(shape_i(coding, 0))
return [gpu_neg(gpu_log(coding[idx0, one_of_n]))]
@register_opt('fast_compile')
@op_lifter([tensor.nnet.CrossentropyCategorical1HotGrad])
@register_opt2([tensor.nnet.CrossentropyCategorical1HotGrad], 'fast_compile')
def local_gpu_crossentropycategorical1hotgrad(op, context_name, inputs, outputs):
# There is no corresponding GPU Op, but we can express it as:
# gy, coding, one_of_n = inputs
# gcoding = zeros_like(coding)
# gcoding[arange(coding.shape[0]), one_of_n] = -g / (
# coding[arange(coding.shape[0]), one_of_n])
gy, coding, one_of_n = inputs
idx0 = theano.tensor.arange(shape_i(coding, 0))
z = GpuAlloc(context_name, memset_0=True)(
as_gpuarray_variable(np.zeros((), dtype=coding.dtype), context_name),
*[shape_i(coding, i) for i in xrange(coding.ndim)])
gcoding = tensor.set_subtensor(
z[idx0, one_of_n],
gpu_neg(gpu_true_div(gy, coding[idx0, one_of_n])))
return [gcoding.transfer(context_name)]
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([theano.tensor.opt.Assert]) @op_lifter([theano.tensor.opt.Assert])
def local_gpua_assert(op, context_name, inputs, outputs): def local_gpua_assert(op, context_name, inputs, outputs):
......
...@@ -680,3 +680,17 @@ def test_batched_dot_lifter(): ...@@ -680,3 +680,17 @@ def test_batched_dot_lifter():
z = tensor.batched_dot(x, y) z = tensor.batched_dot(x, y)
f = theano.function([x, y], z, mode=mode_with_gpu) f = theano.function([x, y], z, mode=mode_with_gpu)
f(x_val, y_val) f(x_val, y_val)
def test_crossentropycategorical1hot_lifter():
rng = np.random.RandomState(utt.fetch_seed())
x = tensor.matrix()
y = tensor.lvector()
z = tensor.nnet.crossentropy_categorical_1hot(x, y)
gx = theano.grad(z.mean(), x)
f = theano.function([x, y], [z, gx], mode=mode_with_gpu)
assert not any(isinstance(n.op, (tensor.nnet.CrossentropyCategorical1Hot,
tensor.nnet.CrossentropyCategorical1HotGrad))
for n in f.maker.fgraph.apply_nodes)
f(rng.uniform(0.1, 0.9, (13, 5)).astype(theano.config.floatX),
rng.randint(5, size=(13,)))
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论