提交 f0fee893 authored 作者: James Bergstra's avatar James Bergstra

cuda.opt: 80-char limit

上级 6c365bf6
...@@ -1898,7 +1898,7 @@ CudaNdarray_gpu_init(PyObject* _unused, PyObject* args) ...@@ -1898,7 +1898,7 @@ CudaNdarray_gpu_init(PyObject* _unused, PyObject* args)
} }
if(card_number_provided && (card_nb < 0 || card_nb > (deviceCount - 1))) { if(card_number_provided && (card_nb < 0 || card_nb > (deviceCount - 1))) {
return PyErr_Format(PyExc_ValueError, return PyErr_Format(PyExc_ValueError,
"Bad device number %d. There is only %d device available.", "Bad device number %d. Only %d devices available.",
card_nb, card_nb,
deviceCount); deviceCount);
} }
......
...@@ -6,27 +6,32 @@ import theano ...@@ -6,27 +6,32 @@ import theano
import numpy import numpy
from theano import scalar as scal from theano import scalar as scal
from theano import tensor, compile, gof from theano import tensor, compile, gof
from theano.gof import local_optimizer, EquilibriumDB, SequenceDB, Optimizer, toolbox, DestroyHandler, EquilibriumOptimizer from theano.gof import (local_optimizer, EquilibriumDB, SequenceDB, Optimizer,
toolbox, DestroyHandler, EquilibriumOptimizer)
from theano.sandbox.cuda.basic_ops import * from theano.sandbox.cuda.basic_ops import *
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda.blas import (gpu_dot22, gpu_dot22scalar, gpu_gemm_inplace, from theano.sandbox.cuda.blas import (gpu_dot22, gpu_dot22scalar,
gpu_gemm_no_inplace, GpuConv) gpu_gemm_inplace, gpu_gemm_no_inplace, GpuConv)
from theano.sandbox.cuda.blas import GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad from theano.sandbox.cuda.blas import (GpuDownsampleFactorMax,
GpuDownsampleFactorMaxGrad)
from theano.sandbox.cuda.nnet import ( from theano.sandbox.cuda.nnet import (
GpuCrossentropySoftmaxArgmax1HotWithBias, GpuCrossentropySoftmaxArgmax1HotWithBias,
GpuCrossentropySoftmax1HotWithBiasDx, GpuCrossentropySoftmax1HotWithBiasDx,
GpuSoftmax, GpuSoftmaxWithBias) GpuSoftmax, GpuSoftmaxWithBias)
from theano.compile import optdb from theano.compile import optdb
from theano.tensor.blas import _is_real_vector, _is_real_matrix from theano.tensor.blas import _is_real_vector, _is_real_matrix
#optdb.print_summary() # this shows what is currently registered (in a so-far crude way...) #optdb.print_summary() # shows what is currently registered
gpu_optimizer = EquilibriumDB() gpu_optimizer = EquilibriumDB()
gpu_cut_copies = EquilibriumDB() gpu_cut_copies = EquilibriumDB()
gpu_seqopt = SequenceDB() gpu_seqopt = SequenceDB()
gpu_seqopt.register('gpu_local_optimizations', gpu_optimizer, 1, 'fast_run', 'inplace') gpu_seqopt.register('gpu_local_optimizations', gpu_optimizer, 1,
gpu_seqopt.register('gpu_cut_transfers', gpu_cut_copies, 2, 'fast_run', 'inplace') 'fast_run', 'inplace')
optdb.register('gpu', gpu_seqopt, optdb.__position__.get('add_destroy_handler', 49.5) - 1) gpu_seqopt.register('gpu_cut_transfers', gpu_cut_copies, 2,
'fast_run', 'inplace')
optdb.register('gpu',
gpu_seqopt, optdb.__position__.get('add_destroy_handler', 49.5) - 1)
def register_opt(*tags, **kwargs): def register_opt(*tags, **kwargs):
def f(local_opt): def f(local_opt):
...@@ -35,12 +40,14 @@ def register_opt(*tags, **kwargs): ...@@ -35,12 +40,14 @@ def register_opt(*tags, **kwargs):
return local_opt return local_opt
return f return f
#register local_track_shape_i at this level too to make multi-level lift of shape work. #register local_track_shape_i at this level too
#to make multi-level lift of shape work.
register_opt()(theano.tensor.opt.local_track_shape_i) register_opt()(theano.tensor.opt.local_track_shape_i)
class InputToGpuOptimizer(Optimizer): class InputToGpuOptimizer(Optimizer):
"""Transfert the input of a graph to the gpu if needed """Transfert the input of a graph to the gpu if needed
It should make this part of the optimizer faster we will will need only 1 pass on the env. It should make this part of the optimizer faster we will will need only 1
pass on the env.
""" """
def __init__(self): def __init__(self):
Optimizer.__init__(self) Optimizer.__init__(self)
...@@ -741,7 +748,8 @@ def local_inplace_gemm(node): ...@@ -741,7 +748,8 @@ def local_inplace_gemm(node):
# After destroyhandler is in but before we try to make elemwise things inplace # After destroyhandler is in but before we try to make elemwise things inplace
# Try to make gpu gemm inplace # Try to make gpu gemm inplace
# Also, need to make the gemm optimisation(step 70) happen before the fusion of elemwise(step 71) # Also, need to make the gemm optimisation(step 70) happen before the fusion of
# elemwise(step 71)
optdb.register('InplaceGpuBlasOpt', optdb.register('InplaceGpuBlasOpt',
EquilibriumOptimizer([local_inplace_gemm], failure_callback=EquilibriumOptimizer.warn_inplace, EquilibriumOptimizer([local_inplace_gemm], failure_callback=EquilibriumOptimizer.warn_inplace,
max_use_ratio=5), max_use_ratio=5),
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论