提交 0f1d6881 authored 作者: James Bergstra's avatar James Bergstra

merge

...@@ -7,14 +7,28 @@ from .var import (CudaNdarrayVariable, ...@@ -7,14 +7,28 @@ from .var import (CudaNdarrayVariable,
import basic_ops import basic_ops
import opt import opt
import cuda_ndarray
import theano.compile.sandbox import theano.compile.sandbox
import logging, os
def use():
handle_shared_float32(True)
def handle_shared_float32(tf): def handle_shared_float32(tf):
"""Set the CudaNdarrayType as the default handler for shared float32 arrays """Set the CudaNdarrayType as the default handler for shared float32 arrays
Use use(tf) instead as this is a bad name.
""" """
if tf: if tf:
theano.compile.sandbox.shared_constructor(shared_constructor) try:
v=os.getenv("THEANO_GPU",0)
cuda_ndarray.gpu_init(int(v))
theano.compile.sandbox.shared_constructor(shared_constructor)
except RuntimeError, e:
logging.getLogger('theano_cuda_ndarray').warning("WARNING: Won't use the GPU as the initialisation failed."+str(e))
else: else:
raise NotImplementedError('removing our handler') raise NotImplementedError('removing our handler')
...@@ -880,6 +880,8 @@ class NaiveAlgo(object): ...@@ -880,6 +880,8 @@ class NaiveAlgo(object):
print >> sio, """ print >> sio, """
int dims[%(nd)s] = {%(initial_dims)s}; int dims[%(nd)s] = {%(initial_dims)s};
""" %locals() """ %locals()
#check that all inputs have valid dimensions
for iname in inputs: for iname in inputs:
print >> sio, """ print >> sio, """
//std::cerr << "C_CODE %(opname)s checking input %(iname)s\\n"; //std::cerr << "C_CODE %(opname)s checking input %(iname)s\\n";
...@@ -904,6 +906,7 @@ class NaiveAlgo(object): ...@@ -904,6 +906,7 @@ class NaiveAlgo(object):
} }
""" %locals() """ %locals()
#check that all outputs have valid dimensions
for oname in outputs: for oname in outputs:
print >> sio, """ print >> sio, """
for (int i = 0; (i< %(nd)s) && (cnda_%(oname)s); ++i) { for (int i = 0; (i< %(nd)s) && (cnda_%(oname)s); ++i) {
......
...@@ -34,10 +34,12 @@ def nvcc_module_compile_str(module_name, src_code, location=None, include_dirs=[ ...@@ -34,10 +34,12 @@ def nvcc_module_compile_str(module_name, src_code, location=None, include_dirs=[
preargs= [] if preargs is None else list(preargs) preargs= [] if preargs is None else list(preargs)
preargs.append('-fPIC') preargs.append('-fPIC')
no_opt = False no_opt = False
cuda_root = os.getenv('CUDA_ROOT')
include_dirs = std_include_dirs() + include_dirs include_dirs = std_include_dirs() + include_dirs
libs = std_libs() + ['cudart'] + libs libs = std_libs() + ['cudart'] + libs
lib_dirs = std_lib_dirs() + [os.path.join(os.getenv('CUDA_ROOT'), 'lib')] + lib_dirs lib_dirs = std_lib_dirs() + lib_dirs
if cuda_root:
lib_dirs.append(os.path.join(cuda_root, 'lib'))
cppfilename = os.path.join(location, 'mod.cu') cppfilename = os.path.join(location, 'mod.cu')
cppfile = file(cppfilename, 'w') cppfile = file(cppfilename, 'w')
...@@ -83,7 +85,7 @@ def nvcc_module_compile_str(module_name, src_code, location=None, include_dirs=[ ...@@ -83,7 +85,7 @@ def nvcc_module_compile_str(module_name, src_code, location=None, include_dirs=[
print '===============================' print '==============================='
for i, l in enumerate(src_code.split('\n')): for i, l in enumerate(src_code.split('\n')):
print i+1, l print i+1, l
raise Exception('nvcc return status', p.returncode) raise Exception('nvcc return status', p.returncode, 'for file',cppfilename)
#touch the __init__ file #touch the __init__ file
file(os.path.join(location, "__init__.py"),'w').close() file(os.path.join(location, "__init__.py"),'w').close()
......
...@@ -175,7 +175,7 @@ def local_gpu_flatten(node): ...@@ -175,7 +175,7 @@ def local_gpu_flatten(node):
host_input = node.inputs[0] host_input = node.inputs[0]
if host_input.owner and isinstance(host_input.owner.op, tensor.Flatten): if host_input.owner and isinstance(host_input.owner.op, tensor.Flatten):
outdim = host_input.owner.op.outdim outdim = host_input.owner.op.outdim
return [GpuFlatten(outdim)(gpu_from_host(host_input.inputs[0]))] return [GpuFlatten(outdim)(gpu_from_host(host_input.owner.inputs[0]))]
if isinstance(node.op, tensor.Flatten): if isinstance(node.op, tensor.Flatten):
x, = node.inputs x, = node.inputs
outdim = node.op.outdim outdim = node.op.outdim
......
...@@ -52,8 +52,14 @@ class CudaNdarrayType(Type): ...@@ -52,8 +52,14 @@ class CudaNdarrayType(Type):
def filter(self, data, strict=False): def filter(self, data, strict=False):
return type_support_filter(data, self.broadcastable, strict) return type_support_filter(data, self.broadcastable, strict)
@staticmethod
def values_eq(a, b):
#TODO: make the comparaison without transfert.
return tensor.TensorType.values_eq(numpy.asarray(a), numpy.asarray(b))
@staticmethod @staticmethod
def values_eq_approx(a, b): def values_eq_approx(a, b):
#TODO: make the comparaison without transfert.
return tensor.TensorType.values_eq_approx(numpy.asarray(a), numpy.asarray(b)) return tensor.TensorType.values_eq_approx(numpy.asarray(a), numpy.asarray(b))
def dtype_specs(self): def dtype_specs(self):
...@@ -229,12 +235,18 @@ class CudaNdarrayType(Type): ...@@ -229,12 +235,18 @@ class CudaNdarrayType(Type):
def c_header_dirs(self): def c_header_dirs(self):
"""Override `CLinkerOp.c_headers` """ """Override `CLinkerOp.c_headers` """
return [os.path.dirname(cuda_ndarray.__file__), ret = [os.path.dirname(cuda_ndarray.__file__)]
os.path.join(os.getenv("CUDA_ROOT"),'include')] cuda_root = os.getenv("CUDA_ROOT")
if cuda_root:
ret.append(os.path.join(cuda_root,'include'))
return ret
def c_lib_dirs(self): def c_lib_dirs(self):
return [os.path.dirname(cuda_ndarray.__file__), ret = [os.path.dirname(cuda_ndarray.__file__)]
os.path.join(os.getenv("CUDA_ROOT"),'lib')] cuda_root = os.getenv("CUDA_ROOT")
if cuda_root:
ret.append(os.path.join(cuda_root,'lib'))
return ret
def c_libraries(self): def c_libraries(self):
return ['cuda_ndarray', 'cudart'] return ['cuda_ndarray', 'cudart']
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论