提交 a68097aa authored 作者: Frederic Bastien's avatar Frederic Bastien

included cuda_ndarray in theano to have everything in one package.

上级 847c505b
...@@ -16,7 +16,7 @@ def debug(*msg): ...@@ -16,7 +16,7 @@ def debug(*msg):
_logger.debug(_logger_name+'DEBUG: '+' '.join(str(m) for m in msg)) _logger.debug(_logger_name+'DEBUG: '+' '.join(str(m) for m in msg))
# Compile type_support.cu # Compile cuda_ndarray.cu
# This need that nvcc (part of cuda) is installed. If it is not, a warning is # This need that nvcc (part of cuda) is installed. If it is not, a warning is
# printed and this module will not be working properly (we set `enable_cuda` # printed and this module will not be working properly (we set `enable_cuda`
# to False). # to False).
...@@ -45,54 +45,27 @@ def set_cuda_disabled(): ...@@ -45,54 +45,27 @@ def set_cuda_disabled():
warning('Cuda is disabled, cuda-based code will thus not be ' warning('Cuda is disabled, cuda-based code will thus not be '
'working properly') 'working properly')
old_file = os.path.join(os.path.split(__file__)[0],'type_support.so') #cuda_ndarray compile and import
if os.path.exists(old_file): sys.path.append(get_compiledir())
os.remove(old_file)
try: try:
sys.path.append(get_compiledir()) from cuda_ndarray.cuda_ndarray import *
from type_support.type_support import *
except ImportError: except ImportError:
import nvcc_compiler import nvcc_compiler
if not nvcc_compiler.is_nvcc_available(): if not nvcc_compiler.is_nvcc_available():
set_cuda_disabled() set_cuda_disabled()
if enable_cuda: if enable_cuda:
cuda_path = os.path.split(__file__)[0]
code = open(os.path.join(cuda_path, "cuda_ndarray.cu")).read()
cuda_path=os.path.split(old_file)[0] loc = os.path.join(get_compiledir(),'cuda_ndarray')
code = open(os.path.join(cuda_path, "type_support.cu")).read()
loc = os.path.join(get_compiledir(),'type_support')
if not os.path.exists(loc): if not os.path.exists(loc):
os.makedirs(loc) os.makedirs(loc)
CUDA_NDARRAY=os.getenv('CUDA_NDARRAY') nvcc_compiler.nvcc_module_compile_str('cuda_ndarray', code, location = loc, include_dirs=[cuda_path], libs=['cublas'],
include_dirs=[] preargs=['-DDONT_UNROLL', '-O3'])
lib_dirs=[]
if CUDA_NDARRAY:
include_dirs.append(CUDA_NDARRAY)
lib_dirs.append(CUDA_NDARRAY)
else:
import theano.sandbox
path = os.path.split(os.path.split(os.path.split(theano.sandbox.__file__)[0])[0])[0]
path2 = os.path.join(path,'cuda_ndarray')
if os.path.isdir(path2):
include_dirs.append(path2)
lib_dirs.append(path2)
else:
path = os.path.split(path)[0]
path2 = os.path.join(path,'cuda_ndarray')
include_dirs.append(path2)
lib_dirs.append(path2)
nvcc_compiler.nvcc_module_compile_str('type_support', code, location = loc, include_dirs=include_dirs, lib_dirs=lib_dirs, libs=['cuda_ndarray'])
from type_support.type_support import *
from cuda_ndarray.cuda_ndarray import *
if enable_cuda: if enable_cuda:
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
...@@ -118,7 +91,7 @@ def use(device=config.THEANO_GPU): ...@@ -118,7 +91,7 @@ def use(device=config.THEANO_GPU):
device=0 device=0
device=int(device) device=int(device)
try: try:
cuda_ndarray.gpu_init(device) gpu_init(device)
handle_shared_float32(True) handle_shared_float32(True)
use.device_number = device use.device_number = device
except RuntimeError, e: except RuntimeError, e:
......
...@@ -2,7 +2,7 @@ from theano import Op, Type, Apply, Variable, Constant ...@@ -2,7 +2,7 @@ from theano import Op, Type, Apply, Variable, Constant
from theano import tensor, scalar from theano import tensor, scalar
import StringIO import StringIO
import cuda_ndarray import cuda_ndarray.cuda_ndarray as cuda
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
class GpuDot22(Op): class GpuDot22(Op):
...@@ -187,7 +187,7 @@ class GpuConv(Op): ...@@ -187,7 +187,7 @@ class GpuConv(Op):
return Apply(self, [img, kern], [CudaNdarrayType(broadcastable)()]) return Apply(self, [img, kern], [CudaNdarrayType(broadcastable)()])
def perform(self, node, (img, kern), (out,)): def perform(self, node, (img, kern), (out,)):
out[0] = cuda_ndarray.conv(img, kern, out[0] = cuda.conv(img, kern,
mode=self.border_mode, mode=self.border_mode,
out=out[0], out=out[0],
subsample=self.subsample, subsample=self.subsample,
......
差异被折叠。
差异被折叠。
This source diff could not be displayed because it is too large. You can view the blob instead.
差异被折叠。
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论