提交 0f1d6881 authored 作者: James Bergstra's avatar James Bergstra

merge

......@@ -7,14 +7,28 @@ from .var import (CudaNdarrayVariable,
import basic_ops
import opt
import cuda_ndarray
import theano.compile.sandbox
import logging, os
def use():
handle_shared_float32(True)
def handle_shared_float32(tf):
"""Set the CudaNdarrayType as the default handler for shared float32 arrays
Use use(tf) instead as this is a bad name.
"""
if tf:
try:
v=os.getenv("THEANO_GPU",0)
cuda_ndarray.gpu_init(int(v))
theano.compile.sandbox.shared_constructor(shared_constructor)
except RuntimeError, e:
logging.getLogger('theano_cuda_ndarray').warning("WARNING: Won't use the GPU as the initialisation failed."+str(e))
else:
raise NotImplementedError('removing our handler')
......@@ -880,6 +880,8 @@ class NaiveAlgo(object):
print >> sio, """
int dims[%(nd)s] = {%(initial_dims)s};
""" %locals()
#check that all inputs have valid dimensions
for iname in inputs:
print >> sio, """
//std::cerr << "C_CODE %(opname)s checking input %(iname)s\\n";
......@@ -904,6 +906,7 @@ class NaiveAlgo(object):
}
""" %locals()
#check that all outputs have valid dimensions
for oname in outputs:
print >> sio, """
for (int i = 0; (i< %(nd)s) && (cnda_%(oname)s); ++i) {
......
......@@ -34,10 +34,12 @@ def nvcc_module_compile_str(module_name, src_code, location=None, include_dirs=[
preargs= [] if preargs is None else list(preargs)
preargs.append('-fPIC')
no_opt = False
cuda_root = os.getenv('CUDA_ROOT')
include_dirs = std_include_dirs() + include_dirs
libs = std_libs() + ['cudart'] + libs
lib_dirs = std_lib_dirs() + [os.path.join(os.getenv('CUDA_ROOT'), 'lib')] + lib_dirs
lib_dirs = std_lib_dirs() + lib_dirs
if cuda_root:
lib_dirs.append(os.path.join(cuda_root, 'lib'))
cppfilename = os.path.join(location, 'mod.cu')
cppfile = file(cppfilename, 'w')
......@@ -83,7 +85,7 @@ def nvcc_module_compile_str(module_name, src_code, location=None, include_dirs=[
print '==============================='
for i, l in enumerate(src_code.split('\n')):
print i+1, l
raise Exception('nvcc return status', p.returncode)
raise Exception('nvcc return status', p.returncode, 'for file',cppfilename)
#touch the __init__ file
file(os.path.join(location, "__init__.py"),'w').close()
......
......@@ -175,7 +175,7 @@ def local_gpu_flatten(node):
host_input = node.inputs[0]
if host_input.owner and isinstance(host_input.owner.op, tensor.Flatten):
outdim = host_input.owner.op.outdim
return [GpuFlatten(outdim)(gpu_from_host(host_input.inputs[0]))]
return [GpuFlatten(outdim)(gpu_from_host(host_input.owner.inputs[0]))]
if isinstance(node.op, tensor.Flatten):
x, = node.inputs
outdim = node.op.outdim
......
......@@ -52,8 +52,14 @@ class CudaNdarrayType(Type):
def filter(self, data, strict=False):
return type_support_filter(data, self.broadcastable, strict)
@staticmethod
def values_eq(a, b):
#TODO: make the comparaison without transfert.
return tensor.TensorType.values_eq(numpy.asarray(a), numpy.asarray(b))
@staticmethod
def values_eq_approx(a, b):
#TODO: make the comparaison without transfert.
return tensor.TensorType.values_eq_approx(numpy.asarray(a), numpy.asarray(b))
def dtype_specs(self):
......@@ -229,12 +235,18 @@ class CudaNdarrayType(Type):
def c_header_dirs(self):
"""Override `CLinkerOp.c_headers` """
return [os.path.dirname(cuda_ndarray.__file__),
os.path.join(os.getenv("CUDA_ROOT"),'include')]
ret = [os.path.dirname(cuda_ndarray.__file__)]
cuda_root = os.getenv("CUDA_ROOT")
if cuda_root:
ret.append(os.path.join(cuda_root,'include'))
return ret
def c_lib_dirs(self):
return [os.path.dirname(cuda_ndarray.__file__),
os.path.join(os.getenv("CUDA_ROOT"),'lib')]
ret = [os.path.dirname(cuda_ndarray.__file__)]
cuda_root = os.getenv("CUDA_ROOT")
if cuda_root:
ret.append(os.path.join(cuda_root,'lib'))
return ret
def c_libraries(self):
return ['cuda_ndarray', 'cudart']
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论