提交 2a0dc002 authored 作者: Frederic Bastien's avatar Frederic Bastien

move the multinomial to the gpu when only the output is on the gpu.

上级 5d551988
...@@ -297,6 +297,16 @@ def use_gpu_multinomial(node): ...@@ -297,6 +297,16 @@ def use_gpu_multinomial(node):
for i in node.inputs])): for i in node.inputs])):
gpu_op = GpuMultinomialFromUniform(node.op.odtype) gpu_op = GpuMultinomialFromUniform(node.op.odtype)
return [host_from_gpu(gpu_op(*[gpu_from_host(i) for i in node.inputs])).T] return [host_from_gpu(gpu_op(*[gpu_from_host(i) for i in node.inputs])).T]
if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost) and
node.inputs[0].owner and type(node.inputs[0].owner.op) is MultinomialFromUniform):
multi = node.inputs[0].owner
p, u = multi.inputs
m, = multi.outputs
if (p.dtype == u.dtype == m.dtype == 'float32'):
gpu_op = GpuMultinomialFromUniform(multi.op.odtype)
ret = gpu_op(*[gpu_from_host(i) for i in multi.inputs]).T
# The dimshuffle is on the cpu, but will be moved to the gpu by an opt.
return [gpu_from_host(ret)]
if cuda_available: if cuda_available:
register_opt()(use_gpu_multinomial) register_opt()(use_gpu_multinomial)
......
import copy
import numpy import numpy
import theano import theano
from theano import tensor, shared, function from theano import tensor, function
import multinomial import multinomial
from theano.compile.mode import get_default_mode, predefined_linkers from theano.compile.mode import get_default_mode, predefined_linkers
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
def run_with_c(f, gpu=False): def get_mode(gpu):
mode = get_default_mode() mode = get_default_mode()
linker_orig = mode.linker mode = copy.copy(mode)
if linker_orig == predefined_linkers['py']:
mode.linker = predefined_linkers['c|py']
if gpu: if gpu:
mode = mode.including('gpu') mode = mode.including('gpu', 'gpu_local_optimizations', 'local_cut_gpu_host_gpu', 'use_gpu_multinomial')
try: if isinstance(mode.linker, theano.gof.PerformLinker):
mode.linker = predefined_linkers['c|py']
return mode
def run_with_c(f, gpu=False):
mode = get_mode(gpu)
f(mode, gpu) f(mode, gpu)
finally:
mode.linker = linker_orig
def test_multinomial_0(): def test_multinomial_0():
...@@ -99,3 +102,23 @@ def test_multinomial_dtypes(): ...@@ -99,3 +102,23 @@ def test_multinomial_dtypes():
u = tensor.fvector() u = tensor.fvector()
m = multinomial.MultinomialFromUniform('float64')(p,u) m = multinomial.MultinomialFromUniform('float64')(p,u)
assert m.dtype == 'float64', m.dtype assert m.dtype == 'float64', m.dtype
def test_gpu_opt():
if not cuda.cuda_available:
# Skip test if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest
raise SkipTest('Optional package cuda not available')
# We test the case where we put the op on the gpu when the output is moved to the gpu.
p = tensor.fmatrix()
u = tensor.fvector()
m = multinomial.MultinomialFromUniform('auto')(p,u)
assert m.dtype == 'float32', m.dtype
m_gpu = cuda.gpu_from_host(m)
f = function([p,u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
assert any([type(node.op) is multinomial.GpuMultinomialFromUniform for node in f.maker.env.toposort()])
pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4))+0.1
pval = pval / pval.sum(axis=1)[:,None]
uval = numpy.ones_like(pval[:,0]) * 0.5
mval = f(pval,uval)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论