提交 017b6e3e authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Make a proxy NVCC_compiler that does not create a second context on the card as a side effect.

Cuda docs suggest that more than one context on a card may slow down operations, so this might give us a slight win.
上级 3810c977
...@@ -12,7 +12,6 @@ from theano.gof.python25 import any ...@@ -12,7 +12,6 @@ from theano.gof.python25 import any
from theano.gof.utils import MethodNotDefined from theano.gof.utils import MethodNotDefined
from theano.compat import PY3 from theano.compat import PY3
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
try: try:
import pygpu import pygpu
from pygpu import gpuarray, elemwise from pygpu import gpuarray, elemwise
......
import os
import numpy
import theano
from theano import config
# This is a big hack to avoid creating a second context on the card.
from theano.sandbox.cuda.nvcc_compiler import (NVCC_compiler as NVCC_base,
hash_from_file)
class NVCC_compiler(NVCC_base):
@staticmethod
def compile_args():
"""
Re-implementation of compile_args that does not create an
additionnal context on the GPU.
"""
flags = [flag for flag in config.nvcc.flags.split(' ') if flag]
if config.nvcc.fastmath:
flags.append('-use_fast_math')
cuda_ndarray_cuh_hash = hash_from_file(
os.path.join(os.path.split(theano.sandbox.cuda.__file__)[0],
'cuda_ndarray.cuh'))
flags.append('-DCUDA_NDARRAY_CUH=' + cuda_ndarray_cuh_hash)
# numpy 1.7 deprecated the following macros but they didn't
# exist in the past
numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
if bool(numpy_ver < [1, 7]):
flags.append("-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY")
flags.append("-D NPY_ARRAY_ALIGNED=NPY_ALIGNED")
flags.append("-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE")
flags.append("-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL")
flags.append("-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS")
flags.append("-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS")
# If the user didn't specify architecture flags add them
if not any(['-arch=sm_' in f for f in flags]):
dev = theano.sandbox.gpuarray.init_dev.device
if dev is None:
raise Exception, "Trying to compile GPU code without a context"
if dev.startswith("opencl"):
raise Exception, "Trying to call nvcc with an OpenCL context"
assert dev.startswith('cuda')
n = int(dev[4:])
p = theano.sandbox.cuda.device_properties(n)
flags.append('-arch=sm_' + str(p['major']) + str(p['minor']))
return flags
...@@ -3,7 +3,7 @@ import os ...@@ -3,7 +3,7 @@ import os
import theano import theano
from theano import config, gof from theano import config, gof
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler from theano.sandbox.gpuarray.comp import NVCC_compiler
from theano.sandbox.gpuarray.type import GpuArrayType from theano.sandbox.gpuarray.type import GpuArrayType
from theano.sandbox.gpuarray.basic_ops import as_gpuarray_variable from theano.sandbox.gpuarray.basic_ops import as_gpuarray_variable
......
...@@ -8,9 +8,8 @@ import theano ...@@ -8,9 +8,8 @@ import theano
from theano import Apply, scalar, config from theano import Apply, scalar, config
from theano import scalar as scal from theano import scalar as scal
from theano.scalar import Scalar from theano.scalar import Scalar
from theano.tensor.elemwise import (Elemwise, DimShuffle, from theano.tensor.elemwise import (Elemwise, DimShuffle, CAReduceDtype)
CAReduceDtype) from theano.sandbox.gpuarray.comp import NVCC_compiler
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
try: try:
import pygpu import pygpu
......
...@@ -2,7 +2,6 @@ import numpy ...@@ -2,7 +2,6 @@ import numpy
from theano import Op, Apply, config from theano import Op, Apply, config
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
from theano.sandbox.neighbours import Images2Neibs from theano.sandbox.neighbours import Images2Neibs
import theano.tensor as T import theano.tensor as T
...@@ -17,6 +16,7 @@ from theano.sandbox.gpuarray.basic_ops import (as_gpuarray_variable, ...@@ -17,6 +16,7 @@ from theano.sandbox.gpuarray.basic_ops import (as_gpuarray_variable,
from theano.sandbox.gpuarray.opt import register_opt as register_gpu_opt from theano.sandbox.gpuarray.opt import register_opt as register_gpu_opt
from theano.sandbox.gpuarray.opt import op_lifter as op_lifter from theano.sandbox.gpuarray.opt import op_lifter as op_lifter
from theano.sandbox.gpuarray.type import GpuArrayType from theano.sandbox.gpuarray.type import GpuArrayType
from theano.sandbox.gpuarray.comp import NVCC_compiler
class GpuImages2Neibs(Images2Neibs, Op): class GpuImages2Neibs(Images2Neibs, Op):
......
...@@ -2,7 +2,7 @@ import numpy ...@@ -2,7 +2,7 @@ import numpy
from theano import Op, Apply, config from theano import Op, Apply, config
from theano.compat.six import StringIO from theano.compat.six import StringIO
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler from theano.sandbox.gpuarray.comp import NVCC_compiler
try: try:
......
...@@ -7,7 +7,6 @@ from theano import tensor, gof, Op ...@@ -7,7 +7,6 @@ from theano import tensor, gof, Op
from theano.gof.python25 import all, any from theano.gof.python25 import all, any
from theano.tensor.subtensor import IncSubtensor, Subtensor, get_idx_list from theano.tensor.subtensor import IncSubtensor, Subtensor, get_idx_list
import theano.tensor.inplace import theano.tensor.inplace
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
try: try:
import pygpu import pygpu
...@@ -18,6 +17,8 @@ except ImportError: ...@@ -18,6 +17,8 @@ except ImportError:
from theano.sandbox.gpuarray.type import GpuArrayType from theano.sandbox.gpuarray.type import GpuArrayType
from theano.sandbox.gpuarray.basic_ops import as_gpuarray_variable, HideC from theano.sandbox.gpuarray.basic_ops import as_gpuarray_variable, HideC
from theano.sandbox.gpuarray.elemwise import GpuElemwise from theano.sandbox.gpuarray.elemwise import GpuElemwise
from theano.sandbox.gpuarray.comp import NVCC_compiler
class GpuSubtensor(HideC, Subtensor): class GpuSubtensor(HideC, Subtensor):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论