提交 602e87d8 authored 作者: James Bergstra's avatar James Bergstra

added files that i forgot to add before

上级 fbbeb192
type_support.so : type_support.cu
nvcc -O3 -shared -I$(HOME)/cvs/lgcm/cuda_ndarray -I$(CUDA_ROOT)/include -I/usr/include/python2.6 -o type_support.so -Xcompiler -fPIC type_support.cu -L$(CUDA_ROOT)/lib $(HOME)/cvs/lgcm/cuda_ndarray/cuda_ndarray.so
clean :
rm type_support.so
import sys, os, subprocess, logging
from theano.gof.cmodule import (std_libs, std_lib_dirs, std_include_dirs, dlimport,
get_lib_extension)
_logger=logging.getLogger("theano_cuda_ndarray.nvcc_compiler")
_logger.setLevel(logging.WARN)
def error(*args):
#sys.stderr.write('ERROR:'+ ' '.join(str(a) for a in args)+'\n')
_logger.error("ERROR: "+' '.join(str(a) for a in args))
def warning(*args):
#sys.stderr.write('WARNING:'+ ' '.join(str(a) for a in args)+'\n')
_logger.warning("WARNING: "+' '.join(str(a) for a in args))
def info(*args):
#sys.stderr.write('INFO:'+ ' '.join(str(a) for a in args)+'\n')
_logger.info("INFO: "+' '.join(str(a) for a in args))
def debug(*args):
#sys.stderr.write('DEBUG:'+ ' '.join(str(a) for a in args)+'\n')
_logger.debug("DEBUG: "+' '.join(str(a) for a in args))
def nvcc_module_compile_str(module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[],
preargs=[]):
"""
:param module_name: string (this has been embedded in the src_code
:param src_code: a complete c or c++ source listing for the module
:param location: a pre-existing filesystem directory where the cpp file and .so will be written
:param include_dirs: a list of include directory names (each gets prefixed with -I)
:param lib_dirs: a list of library search path directory names (each gets prefixed with -L)
:param libs: a list of libraries to link with (each gets prefixed with -l)
:param preargs: a list of extra compiler arguments
:returns: dynamically-imported python module of the compiled code.
"""
preargs= [] if preargs is None else list(preargs)
preargs.append('-fPIC')
no_opt = False
include_dirs = std_include_dirs() + include_dirs
libs = std_libs() + ['cudart'] + libs
lib_dirs = std_lib_dirs() + [os.path.join(os.getenv('CUDA_ROOT'), 'lib')] + lib_dirs
cppfilename = os.path.join(location, 'mod.cu')
cppfile = file(cppfilename, 'w')
debug('Writing module C++ code to', cppfilename)
ofiles = []
rval = None
cppfile.write(src_code)
cppfile.close()
lib_filename = os.path.join(location, '%s.%s' %
(module_name, get_lib_extension()))
debug('Generating shared lib', lib_filename)
cmd = ['nvcc', '-shared', '-g'] + [pa for pa in preargs if pa.startswith('-O')]
cmd.extend(['-Xcompiler', ','.join(pa for pa in preargs if not pa.startswith('-O'))])
cmd.extend('-I%s'%idir for idir in include_dirs)
cmd.extend(['-o',lib_filename])
cmd.append(cppfilename)
cmd.extend(['-L%s'%ldir for ldir in lib_dirs])
cmd.extend(['-l%s'%l for l in libs])
debug('Running cmd', ' '.join(cmd))
p = subprocess.Popen(cmd, stderr=subprocess.PIPE)
stderr = p.communicate()[1]
if p.returncode:
# filter the output from the compiler
for l in stderr.split('\n'):
if not l:
continue
# filter out the annoying declaration warnings
try:
if l[l.index(':'):].startswith(': warning: variable'):
continue
if l[l.index(':'):].startswith(': warning: label'):
continue
except:
pass
print l
print '==============================='
for i, l in enumerate(src_code.split('\n')):
print i+1, l
raise Exception('nvcc return status', p.returncode)
#touch the __init__ file
file(os.path.join(location, "__init__.py"),'w').close()
return dlimport(lib_filename)
from theano import tensor, gof
from theano import tensor, scalar
from .basic_ops import *
@gof.local_optimizer([GpuFromHost(), None])
def local_gpu_host_gpu(node):
if not tensor.opt.opt.check_chain(node, GpuFromHost(), HostFromGpu()):
return False
return [node.inputs[0].owner.inputs[0]]
tensor.opt.register_specialize(local_gpu_host_gpu, 'gpu')
@gof.local_optimizer([HostFromGpu(), None])
def local_host_gpu_host(node):
if not tensor.opt.opt.check_chain(node, HostFromGpu(), GpuFromHost()):
return False
return [node.inputs[0].owner.inputs[0]]
tensor.opt.register_specialize(local_host_gpu_host, 'gpu')
@gof.local_optimizer([])
def local_gpu_elemwise(node):
if isinstance(node.op, tensor.Elemwise):
if any(hasattr(i.owner, 'op') and isinstance(i.owner.op, HostFromGpu) for i in node.inputs):
# move the add to a GpuAdd
new_op = GpuElemwise(node.op.scalar_op, node.op.inplace_pattern)
return [host_from_gpu(new_op(*(gpu_from_host(i) for i in node.inputs)))]
return False
tensor.opt.register_specialize(local_gpu_elemwise, 'gpu')
@gof.local_optimizer([])
def local_gpu_dimshuffle(node):
if isinstance(node.op, tensor.DimShuffle):
input, = node.inputs
if input.owner and isinstance(input.owner.op, HostFromGpu):
# move the add to a GpuAdd
new_op = GpuDimShuffle(node.op.input_broadcastable,
node.op.new_order)
if node.op.inplace:
return [host_from_gpu(new_op(gpu_from_host(input)))]
else:
return [host_from_gpu(new_op(gpu_from_host(tensor.tensor_copy(input))))]
return False
tensor.opt.register_specialize(local_gpu_dimshuffle, 'gpu')
import numpy
from theano import Op, Type, Apply, Variable, Constant
from theano import tensor
from theano.compile.sandbox.sharedvalue import shared, SharedVariable, shared_constructor
from .type import CudaNdarrayType
from .type_support import filter as type_support_filter
from .basic_ops import HostFromGpu, GpuFromHost
class _operators(tensor.basic._tensor_py_operators):
"""Define a few properties and conversion methods for CudaNdarray Variables.
The default implementation of arithemetic operators is to build graphs of TensorType
variables.
The optimization pass (specialization) will insert pure GPU implementations.
This approach relieves the Cuda-Ops of having to deal with input argument checking and
gradients.
"""
def _as_TensorVariable(self):
return HostFromGpu()(self)
def _as_CudaNdarrayVariable(self):
return self
dtype = property(lambda s:'float32')
broadcastable = property(lambda s:s.type.broadcastable)
ndim = property(lambda s:s.type.ndim)
class CudaNdarrayVariable(Variable, _operators):
pass
CudaNdarrayType.Variable = CudaNdarrayVariable
class CudaNdarrayConstant(Constant, _operators):
pass
CudaNdarrayType.Constant = CudaNdarrayConstant
class CudaNdarraySharedVariable(SharedVariable, _operators):
def __getvalue(self):
return numpy.asarray(self.container.value)
def __setvalue(self, value):
self.container.value = value #container does the filtering
value = property(__getvalue, __setvalue)
def filter_update(self, other):
if hasattr(other, '_as_CudaNdarrayVariable'):
return other._as_CudaNdarrayVariable()
if isinstance(other.type, tensor.TensorType) and (other.type.dtype == self.dtype) and (other.broadcastable == self.broadcastable):
return GpuFromHost()(other)
else:
raise TypeError((other, other.type))
CudaNdarrayType.SharedVariable = CudaNdarraySharedVariable
def shared_constructor(value, name, strict=False):
"""SharedVariable Constructor for TensorType"""
if strict:
_value = value
else:
_value = numpy.asarray(value, dtype='float32')
if not isinstance(_value, numpy.ndarray):
raise TypeError('ndarray required')
if _value.dtype.num != CudaNdarrayType.typenum:
raise TypeError('float32 ndarray required')
bcast = [0 for b in value.shape]
type = CudaNdarrayType(broadcastable=bcast)
return CudaNdarraySharedVariable(type=type, value=_value, name=name, strict=strict)
def unset_shared_for_numpy():
raise NotImplementedError()
def set_shared_for_numpy():
"""
Set the gpu_tensor_constructor as the handler for ndarray
"""
shared_constructor(shared_constructor)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论