提交 a300e1e2 authored 作者: Frederic Bastien's avatar Frederic Bastien

added new optimizer InputToGpuOptimizer that transfert the input of a graph on…

added new optimizer InputToGpuOptimizer that transfert the input of a graph on the gpu card when needed. This make the gpu code work when not using shared variables
上级 000b5da8
......@@ -2,9 +2,10 @@ import sys
import theano
import numpy
from theano import tensor, scalar, compile
from theano.gof import local_optimizer, EquilibriumDB, SequenceDB
from theano.gof import local_optimizer, EquilibriumDB, SequenceDB, Optimizer, toolbox, DestroyHandler
from theano.sandbox.cuda.basic_ops import *
from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda.blas import gpu_dot22, gpu_gemm, GpuConv
from theano.sandbox.cuda.blas import GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
from theano.sandbox.cuda.nnet import (
......@@ -32,6 +33,33 @@ def register_opt(*tags, **kwargs):
return local_opt
return f
class InputToGpuOptimizer(Optimizer):
"""Transfert the input of a graph to the gpu if needed
It should make this part of the optimizer faster we will will need only 1 pass on the env.
"""
def __init__(self):
Optimizer.__init__(self)
def add_requirements(self, env):
env.extend(toolbox.ReplaceValidate())
env.extend(DestroyHandler())
def apply(self, env):
for input in env.inputs:
if not isinstance(input.type, CudaNdarrayType):
try:
new_input = host_from_gpu(gpu_from_host(input))
env.replace_validate(input, new_input, "To allow further optimisation to move Ops to gpu")
except Exception, e:
#as we currently only support float32, this can fail.
#Using try except make that we won't need
pass
#we register it before all other gpu optimizer to be sure that the input are on the gpu.
gpu_seqopt.register('InputToGpuOptimizer', InputToGpuOptimizer(),
0, 'fast_run', 'fast_compile', 'merge')#TODO: how to make it mandatory for gpu_seqopt?
@local_optimizer([])
def local_cut_gpu_host_gpu(node):
if tensor.opt.opt.check_chain(node, gpu_from_host, host_from_gpu):
......
import sys, time
from theano.compile.sandbox.sharedvalue import shared
from theano.compile.sandbox.pfunc import pfunc
from theano import tensor
import theano
import numpy
# Skip test if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest
try:
import cuda_ndarray
except ImportError:
raise SkipTest('Optional package cuda_ndarray not available')
import theano.sandbox.cuda as cuda
def test_no_shared_var_graph():
"""Test that the InputToGpuOptimizer optimizer make graph that don't have shared variable compiled too.
"""
a=tensor.fmatrix()
b=tensor.fmatrix()
f = theano.function([a,b],[a+b])
l = f.maker.env.toposort()
assert len(l)==4
assert any(isinstance(x.op,cuda.GpuElemwise) for x in l)
assert any(isinstance(x.op,cuda.GpuFromHost) for x in l)
assert any(isinstance(x.op,cuda.HostFromGpu) for x in l)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论