提交 017b6e3e authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Make a proxy NVCC_compiler that does not create a second context on the card as a side effect.

Cuda docs suggest that more than one context on a card may slow down operations, so this might give us a slight win.
上级 3810c977
......@@ -12,7 +12,6 @@ from theano.gof.python25 import any
from theano.gof.utils import MethodNotDefined
from theano.compat import PY3
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
try:
import pygpu
from pygpu import gpuarray, elemwise
......
import os
import numpy
import theano
from theano import config
# This is a big hack to avoid creating a second context on the card.
from theano.sandbox.cuda.nvcc_compiler import (NVCC_compiler as NVCC_base,
hash_from_file)
class NVCC_compiler(NVCC_base):
@staticmethod
def compile_args():
"""
Re-implementation of compile_args that does not create an
additionnal context on the GPU.
"""
flags = [flag for flag in config.nvcc.flags.split(' ') if flag]
if config.nvcc.fastmath:
flags.append('-use_fast_math')
cuda_ndarray_cuh_hash = hash_from_file(
os.path.join(os.path.split(theano.sandbox.cuda.__file__)[0],
'cuda_ndarray.cuh'))
flags.append('-DCUDA_NDARRAY_CUH=' + cuda_ndarray_cuh_hash)
# numpy 1.7 deprecated the following macros but they didn't
# exist in the past
numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
if bool(numpy_ver < [1, 7]):
flags.append("-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY")
flags.append("-D NPY_ARRAY_ALIGNED=NPY_ALIGNED")
flags.append("-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE")
flags.append("-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL")
flags.append("-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS")
flags.append("-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS")
# If the user didn't specify architecture flags add them
if not any(['-arch=sm_' in f for f in flags]):
dev = theano.sandbox.gpuarray.init_dev.device
if dev is None:
raise Exception, "Trying to compile GPU code without a context"
if dev.startswith("opencl"):
raise Exception, "Trying to call nvcc with an OpenCL context"
assert dev.startswith('cuda')
n = int(dev[4:])
p = theano.sandbox.cuda.device_properties(n)
flags.append('-arch=sm_' + str(p['major']) + str(p['minor']))
return flags
......@@ -3,7 +3,7 @@ import os
import theano
from theano import config, gof
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
from theano.sandbox.gpuarray.comp import NVCC_compiler
from theano.sandbox.gpuarray.type import GpuArrayType
from theano.sandbox.gpuarray.basic_ops import as_gpuarray_variable
......
......@@ -8,9 +8,8 @@ import theano
from theano import Apply, scalar, config
from theano import scalar as scal
from theano.scalar import Scalar
from theano.tensor.elemwise import (Elemwise, DimShuffle,
CAReduceDtype)
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
from theano.tensor.elemwise import (Elemwise, DimShuffle, CAReduceDtype)
from theano.sandbox.gpuarray.comp import NVCC_compiler
try:
import pygpu
......
......@@ -2,7 +2,6 @@ import numpy
from theano import Op, Apply, config
from theano.gof import local_optimizer
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
from theano.sandbox.neighbours import Images2Neibs
import theano.tensor as T
......@@ -17,6 +16,7 @@ from theano.sandbox.gpuarray.basic_ops import (as_gpuarray_variable,
from theano.sandbox.gpuarray.opt import register_opt as register_gpu_opt
from theano.sandbox.gpuarray.opt import op_lifter as op_lifter
from theano.sandbox.gpuarray.type import GpuArrayType
from theano.sandbox.gpuarray.comp import NVCC_compiler
class GpuImages2Neibs(Images2Neibs, Op):
......
......@@ -2,7 +2,7 @@ import numpy
from theano import Op, Apply, config
from theano.compat.six import StringIO
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
from theano.sandbox.gpuarray.comp import NVCC_compiler
try:
......
......@@ -7,7 +7,6 @@ from theano import tensor, gof, Op
from theano.gof.python25 import all, any
from theano.tensor.subtensor import IncSubtensor, Subtensor, get_idx_list
import theano.tensor.inplace
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
try:
import pygpu
......@@ -18,6 +17,8 @@ except ImportError:
from theano.sandbox.gpuarray.type import GpuArrayType
from theano.sandbox.gpuarray.basic_ops import as_gpuarray_variable, HideC
from theano.sandbox.gpuarray.elemwise import GpuElemwise
from theano.sandbox.gpuarray.comp import NVCC_compiler
class GpuSubtensor(HideC, Subtensor):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论