提交 89afe76e authored 作者: notoraptor's avatar notoraptor

Move C files into c_code sub-folder in every module.

上级 8d2ea245
...@@ -271,7 +271,7 @@ Python File ...@@ -271,7 +271,7 @@ Python File
``tstgpueye.c`` ``tstgpueye.c``
~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~
.. literalinclude:: ../../theano/gpuarray/tests/tstgpueye.c .. literalinclude:: ../../theano/gpuarray/tests/c_code/tstgpueye.c
:language: C :language: C
Wrapping Exisiting Libraries Wrapping Exisiting Libraries
......
...@@ -1609,7 +1609,7 @@ def std_include_dirs(): ...@@ -1609,7 +1609,7 @@ def std_include_dirs():
py_plat_spec_inc = distutils.sysconfig.get_python_inc(plat_specific=True) py_plat_spec_inc = distutils.sysconfig.get_python_inc(plat_specific=True)
python_inc_dirs = ([py_inc] if py_inc == py_plat_spec_inc python_inc_dirs = ([py_inc] if py_inc == py_plat_spec_inc
else [py_inc, py_plat_spec_inc]) else [py_inc, py_plat_spec_inc])
gof_inc_dir = os.path.abspath(os.path.dirname(__file__)) gof_inc_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'c_code')
return numpy_inc_dirs + python_inc_dirs + [gof_inc_dir] return numpy_inc_dirs + python_inc_dirs + [gof_inc_dir]
......
...@@ -98,7 +98,7 @@ except ImportError: ...@@ -98,7 +98,7 @@ except ImportError:
raise raise
_logger.info("Compiling new CVM") _logger.info("Compiling new CVM")
dirname = 'lazylinker_ext' dirname = 'lazylinker_ext'
cfile = os.path.join(theano.__path__[0], 'gof', 'lazylinker_c.c') cfile = os.path.join(theano.__path__[0], 'gof', 'c_code', 'lazylinker_c.c')
if not os.path.exists(cfile): if not os.path.exists(cfile):
# This can happen in not normal case. We just # This can happen in not normal case. We just
# disable the c clinker. If we are here the user # disable the c clinker. If we are here the user
......
...@@ -98,7 +98,7 @@ class QuadraticCOpFunc(COp): ...@@ -98,7 +98,7 @@ class QuadraticCOpFunc(COp):
c=generic_type) c=generic_type)
def __init__(self, a, b, c): def __init__(self, a, b, c):
super(QuadraticCOpFunc, self).__init__('test_quadratic_function.c', super(QuadraticCOpFunc, self).__init__('c_code/test_quadratic_function.c',
'APPLY_SPECIFIC(compute_quadratic)') 'APPLY_SPECIFIC(compute_quadratic)')
self.a = a self.a = a
self.b = b self.b = b
......
...@@ -126,6 +126,10 @@ def infer_context_name(*vars): ...@@ -126,6 +126,10 @@ def infer_context_name(*vars):
raise ValueError("Could not infer context from inputs") raise ValueError("Could not infer context from inputs")
def gpuarray_helper_inc_dir():
return os.path.join(os.path.dirname(__file__), 'c_code')
class Kernel(object): class Kernel(object):
""" """
This class groups together all the attributes of a gpu kernel. This class groups together all the attributes of a gpu kernel.
...@@ -688,7 +692,7 @@ class GpuFromHost(Op): ...@@ -688,7 +692,7 @@ class GpuFromHost(Op):
return ["gpuarray_helper.h"] return ["gpuarray_helper.h"]
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__)] return [gpuarray_helper_inc_dir()]
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
return """ return """
...@@ -999,7 +1003,7 @@ class GpuAllocEmpty(HideC, AllocEmpty): ...@@ -999,7 +1003,7 @@ class GpuAllocEmpty(HideC, AllocEmpty):
return ['<gpuarray_helper.h>'] return ['<gpuarray_helper.h>']
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__)] return [gpuarray_helper_inc_dir()]
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
ndim = len(inp) ndim = len(inp)
...@@ -1068,7 +1072,7 @@ class GpuContiguous(Op): ...@@ -1068,7 +1072,7 @@ class GpuContiguous(Op):
return Apply(self, [input], [input.type()]) return Apply(self, [input], [input.type()])
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__)] return [gpuarray_helper_inc_dir()]
def c_headers(self): def c_headers(self):
return ['<gpuarray_helper.h>'] return ['<gpuarray_helper.h>']
...@@ -1390,7 +1394,7 @@ class GpuSplit(HideC, Split): ...@@ -1390,7 +1394,7 @@ class GpuSplit(HideC, Split):
return ['<numpy_compat.h>', '<gpuarray_helper.h>'] return ['<numpy_compat.h>', '<gpuarray_helper.h>']
def c_header_dirs(self): def c_header_dirs(self):
return [pygpu.get_include(), os.path.dirname(__file__)] return [pygpu.get_include(), gpuarray_helper_inc_dir()]
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
if self.len_splits == 0: if self.len_splits == 0:
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import os.path
from six import integer_types from six import integer_types
import theano import theano
...@@ -12,7 +11,7 @@ from theano.tensor.basic import as_tensor_variable ...@@ -12,7 +11,7 @@ from theano.tensor.basic import as_tensor_variable
from theano.tensor.opt import in2out from theano.tensor.opt import in2out
from .basic_ops import (GpuArrayType, CGpuKernelBase, from .basic_ops import (GpuArrayType, CGpuKernelBase,
as_gpuarray_variable, gpu_contiguous, infer_context_name) as_gpuarray_variable, gpu_contiguous, infer_context_name, gpuarray_helper_inc_dir)
from .opt_util import inplace_allocempty from .opt_util import inplace_allocempty
try: try:
...@@ -28,7 +27,7 @@ class BlasOp(Op): ...@@ -28,7 +27,7 @@ class BlasOp(Op):
return ['<blas_api.h>', '<numpy_compat.h>', '<gpuarray_helper.h>'] return ['<blas_api.h>', '<numpy_compat.h>', '<gpuarray_helper.h>']
def c_header_dirs(self): def c_header_dirs(self):
return [pygpu.get_include(), os.path.dirname(__file__)] return [pygpu.get_include(), gpuarray_helper_inc_dir()]
def c_init_code(self): def c_init_code(self):
return ['import_pygpu__blas();'] return ['import_pygpu__blas();']
...@@ -487,7 +486,7 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -487,7 +486,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
if num_groups < 1: if num_groups < 1:
raise ValueError("Number of groups should be greater than 0") raise ValueError("Number of groups should be greater than 0")
self.num_groups = num_groups self.num_groups = num_groups
CGpuKernelBase.__init__(self, ['corr_gemm.c']) CGpuKernelBase.__init__(self, ['c_code/corr_gemm.c'])
@property @property
def pad(self): def pad(self):
...@@ -530,7 +529,7 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -530,7 +529,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
return ["<gpuarray/array.h>", "<gpuarray/blas.h>", "gpuarray_helper.h"] return ["<gpuarray/array.h>", "<gpuarray/blas.h>", "gpuarray_helper.h"]
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__)] return [gpuarray_helper_inc_dir()]
def c_code_cache_version(self): def c_code_cache_version(self):
# Raise this whenever modifying the C code (including the file). # Raise this whenever modifying the C code (including the file).
...@@ -1094,7 +1093,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1094,7 +1093,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
raise ValueError("filter_dilation must have three elements") raise ValueError("filter_dilation must have three elements")
self.subsample = tuple(subsample) self.subsample = tuple(subsample)
self.filter_dilation = tuple(filter_dilation) self.filter_dilation = tuple(filter_dilation)
CGpuKernelBase.__init__(self, ['corr3d_gemm.c']) CGpuKernelBase.__init__(self, ['c_code/corr3d_gemm.c'])
@property @property
def pad(self): def pad(self):
...@@ -1131,7 +1130,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1131,7 +1130,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
return ["<gpuarray/array.h>", "<gpuarray/blas.h>", "gpuarray_helper.h"] return ["<gpuarray/array.h>", "<gpuarray/blas.h>", "gpuarray_helper.h"]
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__)] return [gpuarray_helper_inc_dir()]
def c_code_cache_version(self): def c_code_cache_version(self):
# raise this whenever modifying the code below. # raise this whenever modifying the code below.
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import logging import logging
import os
import numpy as np import numpy as np
from theano import Apply, tensor from theano import Apply, tensor
...@@ -11,7 +10,7 @@ from theano.scalar import bool as bool_t ...@@ -11,7 +10,7 @@ from theano.scalar import bool as bool_t
from theano.gradient import grad_undefined from theano.gradient import grad_undefined
from .type import gpu_context_type from .type import gpu_context_type
from .basic_ops import as_gpuarray_variable, infer_context_name from .basic_ops import as_gpuarray_variable, infer_context_name, gpuarray_helper_inc_dir
_logger = logging.getLogger('theano.gpuarray.blocksparse') _logger = logging.getLogger('theano.gpuarray.blocksparse')
...@@ -30,7 +29,7 @@ class GpuSparseBlockGemv(COp): ...@@ -30,7 +29,7 @@ class GpuSparseBlockGemv(COp):
# NB: DTYPE_INPUT_* is used in C code, so I think we should not set check_input to False. # NB: DTYPE_INPUT_* is used in C code, so I think we should not set check_input to False.
def __init__(self, inplace=False): def __init__(self, inplace=False):
COp.__init__(self, "blockgemv.c", "APPLY_SPECIFIC(blockgemv)") COp.__init__(self, "c_code/blockgemv.c", "APPLY_SPECIFIC(blockgemv)")
self.inplace = inplace self.inplace = inplace
if self.inplace: if self.inplace:
self.destroy_map = {0: [0]} self.destroy_map = {0: [0]}
...@@ -39,7 +38,7 @@ class GpuSparseBlockGemv(COp): ...@@ -39,7 +38,7 @@ class GpuSparseBlockGemv(COp):
return self.params_type.get_params(self, context=node.inputs[0].type.context) return self.params_type.get_params(self, context=node.inputs[0].type.context)
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__)] return [gpuarray_helper_inc_dir()]
def c_headers(self): def c_headers(self):
return ['<gpuarray/buffer_blas.h>', '<gpuarray/buffer.h>', return ['<gpuarray/buffer_blas.h>', '<gpuarray/buffer.h>',
...@@ -101,7 +100,7 @@ class GpuSparseBlockOuter(COp): ...@@ -101,7 +100,7 @@ class GpuSparseBlockOuter(COp):
params_type = ParamsType(inplace=bool_t, context=gpu_context_type) params_type = ParamsType(inplace=bool_t, context=gpu_context_type)
def __init__(self, inplace=False): def __init__(self, inplace=False):
COp.__init__(self, ["blockger.c"], "APPLY_SPECIFIC(blockger)") COp.__init__(self, ["c_code/blockger.c"], "APPLY_SPECIFIC(blockger)")
self.inplace = inplace self.inplace = inplace
if self.inplace: if self.inplace:
self.destroy_map = {0: [0]} self.destroy_map = {0: [0]}
...@@ -126,7 +125,7 @@ class GpuSparseBlockOuter(COp): ...@@ -126,7 +125,7 @@ class GpuSparseBlockOuter(COp):
return [input_shapes[0]] return [input_shapes[0]]
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__)] return [gpuarray_helper_inc_dir()]
def c_headers(self): def c_headers(self):
return ['<gpuarray/buffer_blas.h>', '<gpuarray/buffer.h>', return ['<gpuarray/buffer_blas.h>', '<gpuarray/buffer.h>',
......
...@@ -3,7 +3,7 @@ from __future__ import absolute_import, print_function, division ...@@ -3,7 +3,7 @@ from __future__ import absolute_import, print_function, division
import theano import theano
from theano import (config, gof) from theano import (config, gof)
import theano.tensor as T import theano.tensor as T
from .basic_ops import (gpu_contiguous, as_gpuarray_variable, infer_context_name) from .basic_ops import (gpu_contiguous, as_gpuarray_variable, infer_context_name, gpuarray_helper_inc_dir)
import theano.tensor.nnet.ctc import theano.tensor.nnet.ctc
from .type import (GpuArrayType, gpu_context_type) from .type import (GpuArrayType, gpu_context_type)
from .elemwise import GpuDimShuffle from .elemwise import GpuDimShuffle
...@@ -58,7 +58,7 @@ class GpuConnectionistTemporalClassification(gof.COp): ...@@ -58,7 +58,7 @@ class GpuConnectionistTemporalClassification(gof.COp):
return ["warpctc", "gpuarray"] return ["warpctc", "gpuarray"]
def c_header_dirs(self): def c_header_dirs(self):
dirs = [os.path.dirname(__file__), pygpu.get_include()] dirs = [gpuarray_helper_inc_dir(), pygpu.get_include()]
if config.ctc.root != '': if config.ctc.root != '':
dirs.append(os.path.join(config.ctc.root, "include")) dirs.append(os.path.join(config.ctc.root, "include"))
return dirs return dirs
......
...@@ -33,7 +33,7 @@ from theano.tensor.signal.pool import ( ...@@ -33,7 +33,7 @@ from theano.tensor.signal.pool import (
from . import pygpu, cudnn_defs from . import pygpu, cudnn_defs
from .type import (get_context, gpu_context_type, list_contexts, from .type import (get_context, gpu_context_type, list_contexts,
GpuArraySharedVariable) GpuArraySharedVariable)
from .basic_ops import (as_gpuarray_variable, infer_context_name, from .basic_ops import (as_gpuarray_variable, infer_context_name, gpuarray_helper_inc_dir,
gpu_contiguous, GpuAllocEmpty, gpu_contiguous, GpuAllocEmpty,
empty_like, GpuArrayType, HostFromGpu) empty_like, GpuArrayType, HostFromGpu)
from .elemwise import GpuElemwise, GpuCAReduceCuda from .elemwise import GpuElemwise, GpuCAReduceCuda
...@@ -137,7 +137,7 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) { ...@@ -137,7 +137,7 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
path_wrapper = "\"" if os.name == 'nt' else "" path_wrapper = "\"" if os.name == 'nt' else ""
params = ["-l", "cudnn"] params = ["-l", "cudnn"]
params.extend(['-I%s%s%s' % (path_wrapper, os.path.dirname(__file__), path_wrapper)]) params.extend(['-I%s%s%s' % (path_wrapper, gpuarray_helper_inc_dir(), path_wrapper)])
if config.dnn.include_path: if config.dnn.include_path:
params.extend(['-I%s%s%s' % (path_wrapper, config.dnn.include_path, path_wrapper)]) params.extend(['-I%s%s%s' % (path_wrapper, config.dnn.include_path, path_wrapper)])
if config.cuda.include_path: if config.cuda.include_path:
...@@ -371,7 +371,7 @@ class DnnBase(COp): ...@@ -371,7 +371,7 @@ class DnnBase(COp):
def __init__(self, files=None, c_func=None): def __init__(self, files=None, c_func=None):
if files is None: if files is None:
files = [] files = []
COp.__init__(self, ["dnn_base.c"] + files, c_func) COp.__init__(self, ["c_code/dnn_base.c"] + files, c_func)
def c_headers(self): def c_headers(self):
return ['gpuarray/types.h', 'gpuarray/array.h', 'gpuarray/kernel.h', return ['gpuarray/types.h', 'gpuarray/array.h', 'gpuarray/kernel.h',
...@@ -380,7 +380,7 @@ class DnnBase(COp): ...@@ -380,7 +380,7 @@ class DnnBase(COp):
'gpuarray_helper.h'] 'gpuarray_helper.h']
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__), pygpu.get_include(), return [gpuarray_helper_inc_dir(), pygpu.get_include(),
config.dnn.include_path, config.cuda.include_path] config.dnn.include_path, config.cuda.include_path]
def c_libraries(self): def c_libraries(self):
...@@ -426,7 +426,7 @@ class GpuDnnConvDesc(COp): ...@@ -426,7 +426,7 @@ class GpuDnnConvDesc(COp):
return ['cudnn.h', 'cudnn_helper.h'] return ['cudnn.h', 'cudnn_helper.h']
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__), config.dnn.include_path, return [gpuarray_helper_inc_dir(), config.dnn.include_path,
config.cuda.include_path] config.cuda.include_path]
def c_libraries(self): def c_libraries(self):
...@@ -448,7 +448,7 @@ class GpuDnnConvDesc(COp): ...@@ -448,7 +448,7 @@ class GpuDnnConvDesc(COp):
def __init__(self, border_mode, subsample=(1, 1), dilation=(1, 1), conv_mode='conv', def __init__(self, border_mode, subsample=(1, 1), dilation=(1, 1), conv_mode='conv',
precision="float32"): precision="float32"):
COp.__init__(self, ["conv_desc.c"], "APPLY_SPECIFIC(conv_desc)") COp.__init__(self, ["c_code/conv_desc.c"], "APPLY_SPECIFIC(conv_desc)")
if version() < 6000 and any([d != 1 for d in dilation]): if version() < 6000 and any([d != 1 for d in dilation]):
raise RuntimeError("Dilation > 1 not supported for cuDNN version < 6.") raise RuntimeError("Dilation > 1 not supported for cuDNN version < 6.")
...@@ -566,7 +566,7 @@ class GpuDnnConv(DnnBase): ...@@ -566,7 +566,7 @@ class GpuDnnConv(DnnBase):
num_groups=int_t) num_groups=int_t)
def __init__(self, algo=None, inplace=False, num_groups=1): def __init__(self, algo=None, inplace=False, num_groups=1):
DnnBase.__init__(self, ["dnn_conv_base.c", "dnn_fwd.c"], DnnBase.__init__(self, ["c_code/dnn_conv_base.c", "c_code/dnn_fwd.c"],
"APPLY_SPECIFIC(conv_fwd)") "APPLY_SPECIFIC(conv_fwd)")
if algo is None: if algo is None:
...@@ -709,7 +709,7 @@ class GpuDnnConvGradW(DnnBase): ...@@ -709,7 +709,7 @@ class GpuDnnConvGradW(DnnBase):
num_groups=int_t) num_groups=int_t)
def __init__(self, inplace=False, algo=None, num_groups=1): def __init__(self, inplace=False, algo=None, num_groups=1):
DnnBase.__init__(self, ["dnn_conv_base.c", "dnn_gw.c"], DnnBase.__init__(self, ["c_code/dnn_conv_base.c", "c_code/dnn_gw.c"],
"APPLY_SPECIFIC(conv_gw)") "APPLY_SPECIFIC(conv_gw)")
self.inplace = bool(inplace) self.inplace = bool(inplace)
if self.inplace: if self.inplace:
...@@ -845,7 +845,7 @@ class GpuDnnConvGradI(DnnBase): ...@@ -845,7 +845,7 @@ class GpuDnnConvGradI(DnnBase):
num_groups=int_t) num_groups=int_t)
def __init__(self, inplace=False, algo=None, num_groups=1): def __init__(self, inplace=False, algo=None, num_groups=1):
DnnBase.__init__(self, ["dnn_conv_base.c", "dnn_gi.c"], DnnBase.__init__(self, ["c_code/dnn_conv_base.c", "c_code/dnn_gi.c"],
"APPLY_SPECIFIC(conv_gi)") "APPLY_SPECIFIC(conv_gi)")
self.inplace = bool(inplace) self.inplace = bool(inplace)
if self.inplace: if self.inplace:
...@@ -1261,7 +1261,7 @@ class GpuDnnPoolDesc(Op): ...@@ -1261,7 +1261,7 @@ class GpuDnnPoolDesc(Op):
return ['cudnn.h', 'cudnn_helper.h'] return ['cudnn.h', 'cudnn_helper.h']
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__), config.dnn.include_path] return [gpuarray_helper_inc_dir(), config.dnn.include_path]
def c_libraries(self): def c_libraries(self):
return ['cudnn'] return ['cudnn']
...@@ -1390,7 +1390,7 @@ class GpuDnnPool(GpuDnnPoolBase): ...@@ -1390,7 +1390,7 @@ class GpuDnnPool(GpuDnnPoolBase):
(padX, padY) or (padX, padY, padZ) (padX, padY) or (padX, padY, padZ)
""" """
c_file = "dnn_pool.c" c_file = "c_code/dnn_pool.c"
c_function = "APPLY_SPECIFIC(dnn_pool)" c_function = "APPLY_SPECIFIC(dnn_pool)"
def make_node(self, img, ws, stride, pad): def make_node(self, img, ws, stride, pad):
...@@ -1458,7 +1458,7 @@ class GpuDnnPoolGrad(GpuDnnPoolBase): ...@@ -1458,7 +1458,7 @@ class GpuDnnPoolGrad(GpuDnnPoolBase):
(padX, padY) or (padX, padY, padZ) (padX, padY) or (padX, padY, padZ)
""" """
c_file = "dnn_pool_grad.c" c_file = "c_code/dnn_pool_grad.c"
c_function = "APPLY_SPECIFIC(dnn_pool_grad)" c_function = "APPLY_SPECIFIC(dnn_pool_grad)"
def make_node(self, inp, out, out_grad, ws, stride, pad): def make_node(self, inp, out, out_grad, ws, stride, pad):
...@@ -1587,7 +1587,7 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase): ...@@ -1587,7 +1587,7 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
""" """
_f16_ok = True _f16_ok = True
direction = "forward" direction = "forward"
file = "dnn_softmax.c" file = "c_code/dnn_softmax.c"
c_func = "APPLY_SPECIFIC(softmax)" c_func = "APPLY_SPECIFIC(softmax)"
def make_node(self, x): def make_node(self, x):
...@@ -1624,7 +1624,7 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase): ...@@ -1624,7 +1624,7 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
""" """
_f16_ok = True _f16_ok = True
direction = 'backward' direction = 'backward'
file = "dnn_softmax_grad.c" file = "c_code/dnn_softmax_grad.c"
c_func = "APPLY_SPECIFIC(softmax_grad)" c_func = "APPLY_SPECIFIC(softmax_grad)"
def make_node(self, dy, sm): def make_node(self, dy, sm):
...@@ -1649,7 +1649,7 @@ class GpuDnnReduction(DnnBase): ...@@ -1649,7 +1649,7 @@ class GpuDnnReduction(DnnBase):
handle=handle_type) handle=handle_type)
def __init__(self, red_op, axis, acc_dtype, dtype, return_indices): def __init__(self, red_op, axis, acc_dtype, dtype, return_indices):
DnnBase.__init__(self, ['dnn_redux.c'], 'APPLY_SPECIFIC(dnn_redux)') DnnBase.__init__(self, ['c_code/dnn_redux.c'], 'APPLY_SPECIFIC(dnn_redux)')
assert cudnn.cudnnReduceTensorOp_t.has_alias(red_op) assert cudnn.cudnnReduceTensorOp_t.has_alias(red_op)
self.red_op = red_op self.red_op = red_op
assert acc_dtype in ['float16', 'float32', 'float64'] assert acc_dtype in ['float16', 'float32', 'float64']
...@@ -1753,7 +1753,7 @@ class GpuDnnBatchNorm(DnnBase): ...@@ -1753,7 +1753,7 @@ class GpuDnnBatchNorm(DnnBase):
def __init__(self, mode='per-activation', running_averages=False, def __init__(self, mode='per-activation', running_averages=False,
inplace_running_mean=False, inplace_running_var=False, inplace_running_mean=False, inplace_running_var=False,
inplace_output=False): inplace_output=False):
DnnBase.__init__(self, ['dnn_batchnorm_base.c', 'dnn_batchnorm.c'], DnnBase.__init__(self, ['c_code/dnn_batchnorm_base.c', 'c_code/dnn_batchnorm.c'],
'dnn_batchnorm_op') 'dnn_batchnorm_op')
assert cudnn.cudnnBatchNormMode_t.has_alias(mode) assert cudnn.cudnnBatchNormMode_t.has_alias(mode)
...@@ -1864,7 +1864,7 @@ class GpuDnnBatchNormInference(DnnBase): ...@@ -1864,7 +1864,7 @@ class GpuDnnBatchNormInference(DnnBase):
handle=handle_type) handle=handle_type)
def __init__(self, mode='per-activation', inplace=False): def __init__(self, mode='per-activation', inplace=False):
DnnBase.__init__(self, ['dnn_batchnorm_base.c', 'dnn_batchnorm_inf.c'], DnnBase.__init__(self, ['c_code/dnn_batchnorm_base.c', 'c_code/dnn_batchnorm_inf.c'],
'dnn_batchnorm_op') 'dnn_batchnorm_op')
assert cudnn.cudnnBatchNormMode_t.has_alias(mode) assert cudnn.cudnnBatchNormMode_t.has_alias(mode)
...@@ -1931,7 +1931,7 @@ class GpuDnnBatchNormGrad(DnnBase): ...@@ -1931,7 +1931,7 @@ class GpuDnnBatchNormGrad(DnnBase):
handle=handle_type) handle=handle_type)
def __init__(self, mode='per-activation'): def __init__(self, mode='per-activation'):
DnnBase.__init__(self, ['dnn_batchnorm_base.c', 'dnn_batchnorm_grad.c'], DnnBase.__init__(self, ['c_code/dnn_batchnorm_base.c', 'c_code/dnn_batchnorm_grad.c'],
'dnn_batchnorm_grad') 'dnn_batchnorm_grad')
assert cudnn.cudnnBatchNormMode_t.has_alias(mode) assert cudnn.cudnnBatchNormMode_t.has_alias(mode)
...@@ -1961,7 +1961,7 @@ class GpuDnnDropoutOp(DnnBase): ...@@ -1961,7 +1961,7 @@ class GpuDnnDropoutOp(DnnBase):
__props__ = ('inplace',) __props__ = ('inplace',)
def __init__(self, inplace=False): def __init__(self, inplace=False):
DnnBase.__init__(self, ["dnn_dropout_fwd.c"], "dnn_dropout_fwd") DnnBase.__init__(self, ["c_code/dnn_dropout_fwd.c"], "dnn_dropout_fwd")
self.inplace = inplace self.inplace = inplace
if self.inplace: if self.inplace:
self.destroy_map = {1: [2]} self.destroy_map = {1: [2]}
...@@ -1980,7 +1980,7 @@ class _DropoutDescriptor(DnnBase): ...@@ -1980,7 +1980,7 @@ class _DropoutDescriptor(DnnBase):
__props__ = ('context_name',) __props__ = ('context_name',)
def __init__(self, context_name): def __init__(self, context_name):
DnnBase.__init__(self, ["dnn_dropout_desc.c"], "dnn_dropout_desc") DnnBase.__init__(self, ["c_code/dnn_dropout_desc.c"], "dnn_dropout_desc")
self.context_name = context_name self.context_name = context_name
def dnn_context(self, node): def dnn_context(self, node):
...@@ -2036,7 +2036,7 @@ class _RNNDescriptor(DnnBase): ...@@ -2036,7 +2036,7 @@ class _RNNDescriptor(DnnBase):
def __init__(self, context_name): def __init__(self, context_name):
if version() < 5005: if version() < 5005:
raise RuntimeError("cudnn RNN require cudnn v5 final or higher.") raise RuntimeError("cudnn RNN require cudnn v5 final or higher.")
DnnBase.__init__(self, ["dnn_rnn_desc.c"], "dnn_rnn_desc") DnnBase.__init__(self, ["c_code/dnn_rnn_desc.c"], "dnn_rnn_desc")
self.context_name = context_name self.context_name = context_name
def dnn_context(self, node): def dnn_context(self, node):
...@@ -2103,7 +2103,7 @@ class _RNNParamSize(DnnBase): ...@@ -2103,7 +2103,7 @@ class _RNNParamSize(DnnBase):
__props__ = ('context_name',) __props__ = ('context_name',)
def __init__(self, context_name): def __init__(self, context_name):
DnnBase.__init__(self, ["dnn_rnn_paramsize.c"], DnnBase.__init__(self, ["c_code/dnn_rnn_paramsize.c"],
"dnn_rnn_paramsize") "dnn_rnn_paramsize")
self.context_name = context_name self.context_name = context_name
...@@ -2348,7 +2348,7 @@ class GpuDnnRNNOp(DnnBase): ...@@ -2348,7 +2348,7 @@ class GpuDnnRNNOp(DnnBase):
_cop_num_outputs = 4 _cop_num_outputs = 4
def __init__(self, rnn_mode, direction_mode): def __init__(self, rnn_mode, direction_mode):
DnnBase.__init__(self, ["dnn_rnn_fwd.c"], 'dnn_rnn_fwd') DnnBase.__init__(self, ["c_code/dnn_rnn_fwd.c"], 'dnn_rnn_fwd')
self.rnn_mode = rnn_mode self.rnn_mode = rnn_mode
if direction_mode == 'bidirectional': if direction_mode == 'bidirectional':
self.num_dirs = 2 self.num_dirs = 2
...@@ -2439,7 +2439,7 @@ class GpuDnnRNNGradInputs(DnnBase): ...@@ -2439,7 +2439,7 @@ class GpuDnnRNNGradInputs(DnnBase):
_cop_num_outputs = 4 _cop_num_outputs = 4
def __init__(self, rnn_mode, grad_h, grad_c): def __init__(self, rnn_mode, grad_h, grad_c):
DnnBase.__init__(self, ['dnn_rnn_gi.c'], 'dnn_rnn_gi') DnnBase.__init__(self, ['c_code/dnn_rnn_gi.c'], 'dnn_rnn_gi')
self.rnn_mode = rnn_mode self.rnn_mode = rnn_mode
self.grad_h = grad_h self.grad_h = grad_h
self.grad_c = grad_c self.grad_c = grad_c
...@@ -2488,7 +2488,7 @@ class GpuDnnRNNGradWeights(DnnBase): ...@@ -2488,7 +2488,7 @@ class GpuDnnRNNGradWeights(DnnBase):
__props__ = () __props__ = ()
def __init__(self): def __init__(self):
DnnBase.__init__(self, ['dnn_rnn_gw.c'], 'dnn_rnn_gw') DnnBase.__init__(self, ['c_code/dnn_rnn_gw.c'], 'dnn_rnn_gw')
def make_node(self, desc, x, hx, y, reserve, w): def make_node(self, desc, x, hx, y, reserve, w):
# We trust the callers here # We trust the callers here
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import os
from theano import Apply, Op from theano import Apply, Op
from theano.tensor.extra_ops import CumOp from theano.tensor.extra_ops import CumOp
...@@ -8,7 +7,7 @@ try: ...@@ -8,7 +7,7 @@ try:
except ImportError: except ImportError:
pass pass
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, GpuReshape, infer_context_name) from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, GpuReshape, infer_context_name, gpuarray_helper_inc_dir)
from .opt import register_opt, op_lifter, register_opt2 from .opt import register_opt, op_lifter, register_opt2
from .type import gpu_context_type from .type import gpu_context_type
from theano.gof import ParamsType from theano.gof import ParamsType
...@@ -47,7 +46,7 @@ class GpuCumOp(GpuKernelBase, Op): ...@@ -47,7 +46,7 @@ class GpuCumOp(GpuKernelBase, Op):
return ['<numpy_compat.h>', '<gpuarray/types.h>', '<gpuarray_helper.h>'] return ['<numpy_compat.h>', '<gpuarray/types.h>', '<gpuarray_helper.h>']
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__)] return [gpuarray_helper_inc_dir()]
def get_params(self, node): def get_params(self, node):
return self.params_type.get_params(self, context=node.inputs[0].type.context) return self.params_type.get_params(self, context=node.inputs[0].type.context)
......
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
import os
import warnings import warnings
import pkg_resources import pkg_resources
...@@ -13,7 +12,7 @@ from theano.scalar import bool as bool_t ...@@ -13,7 +12,7 @@ from theano.scalar import bool as bool_t
from theano.gof import COp, ParamsType from theano.gof import COp, ParamsType
from theano.gpuarray import GpuArrayType from theano.gpuarray import GpuArrayType
from .basic_ops import (CGpuKernelBase, as_gpuarray_variable, gpu_contiguous, from .basic_ops import (CGpuKernelBase, as_gpuarray_variable, gpu_contiguous, gpuarray_helper_inc_dir,
infer_context_name) infer_context_name)
from .type import gpu_context_type from .type import gpu_context_type
...@@ -499,7 +498,7 @@ class GpuMagmaBase(COp): ...@@ -499,7 +498,7 @@ class GpuMagmaBase(COp):
'gpuarray_helper.h', 'magma.h'] 'gpuarray_helper.h', 'magma.h']
def c_header_dirs(self): def c_header_dirs(self):
dirs = [os.path.dirname(__file__), pygpu.get_include()] dirs = [gpuarray_helper_inc_dir(), pygpu.get_include()]
if config.magma.include_path: if config.magma.include_path:
dirs.append(config.magma.include_path) dirs.append(config.magma.include_path)
return dirs return dirs
...@@ -540,7 +539,7 @@ class GpuMagmaSVD(GpuMagmaBase): ...@@ -540,7 +539,7 @@ class GpuMagmaSVD(GpuMagmaBase):
def __init__(self, full_matrices=True, compute_uv=True): def __init__(self, full_matrices=True, compute_uv=True):
self.full_matrices = full_matrices self.full_matrices = full_matrices
self.compute_uv = compute_uv self.compute_uv = compute_uv
COp.__init__(self, ['magma_svd.c'], 'APPLY_SPECIFIC(magma_svd)') COp.__init__(self, ['c_code/magma_svd.c'], 'APPLY_SPECIFIC(magma_svd)')
def make_node(self, A): def make_node(self, A):
ctx_name = infer_context_name(A) ctx_name = infer_context_name(A)
...@@ -624,7 +623,7 @@ class GpuMagmaMatrixInverse(GpuMagmaBase): ...@@ -624,7 +623,7 @@ class GpuMagmaMatrixInverse(GpuMagmaBase):
params_type = ParamsType(inplace=bool_t, context=gpu_context_type) params_type = ParamsType(inplace=bool_t, context=gpu_context_type)
def __init__(self, inplace=False): def __init__(self, inplace=False):
COp.__init__(self, ['magma_inv.c'], 'APPLY_SPECIFIC(magma_inv)') COp.__init__(self, ['c_code/magma_inv.c'], 'APPLY_SPECIFIC(magma_inv)')
self.inplace = inplace self.inplace = inplace
if self.inplace: if self.inplace:
self.destroy_map = {0: [0]} self.destroy_map = {0: [0]}
...@@ -672,7 +671,7 @@ class GpuMagmaCholesky(GpuMagmaBase, CGpuKernelBase): ...@@ -672,7 +671,7 @@ class GpuMagmaCholesky(GpuMagmaBase, CGpuKernelBase):
def __init__(self, lower=True, inplace=False): def __init__(self, lower=True, inplace=False):
self.lower = lower self.lower = lower
COp.__init__(self, ['magma_cholesky.c'], 'APPLY_SPECIFIC(magma_cholesky)') COp.__init__(self, ['c_code/magma_cholesky.c'], 'APPLY_SPECIFIC(magma_cholesky)')
self.inplace = inplace self.inplace = inplace
if self.inplace: if self.inplace:
self.destroy_map = {0: [0]} self.destroy_map = {0: [0]}
...@@ -719,7 +718,7 @@ class GpuMagmaQR(GpuMagmaBase, CGpuKernelBase): ...@@ -719,7 +718,7 @@ class GpuMagmaQR(GpuMagmaBase, CGpuKernelBase):
def __init__(self, complete=True): def __init__(self, complete=True):
self.complete = complete self.complete = complete
COp.__init__(self, ['magma_qr.c'], 'APPLY_SPECIFIC(magma_qr)') COp.__init__(self, ['c_code/magma_qr.c'], 'APPLY_SPECIFIC(magma_qr)')
def make_node(self, A): def make_node(self, A):
ctx_name = infer_context_name(A) ctx_name = infer_context_name(A)
...@@ -785,7 +784,7 @@ class GpuMagmaEigh(GpuMagmaBase): ...@@ -785,7 +784,7 @@ class GpuMagmaEigh(GpuMagmaBase):
assert UPLO in ['L', 'U'] assert UPLO in ['L', 'U']
self.lower = UPLO == 'L' self.lower = UPLO == 'L'
self.compute_v = compute_v self.compute_v = compute_v
COp.__init__(self, ['magma_eigh.c'], 'APPLY_SPECIFIC(magma_eigh)') COp.__init__(self, ['c_code/magma_eigh.c'], 'APPLY_SPECIFIC(magma_eigh)')
def make_node(self, A): def make_node(self, A):
ctx_name = infer_context_name(A) ctx_name = infer_context_name(A)
......
# TODO test dtype != float32 # TODO test dtype != float32
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import os
import warnings import warnings
try: try:
...@@ -14,7 +13,7 @@ from theano import Apply ...@@ -14,7 +13,7 @@ from theano import Apply
from theano.gof import Op from theano.gof import Op
from theano.tensor import NotScalarConstantError, get_scalar_constant_value from theano.tensor import NotScalarConstantError, get_scalar_constant_value
from .basic_ops import as_gpuarray_variable, infer_context_name, GpuKernelBase, Kernel from .basic_ops import as_gpuarray_variable, infer_context_name, GpuKernelBase, Kernel, gpuarray_helper_inc_dir
from .opt import register_opt, op_lifter, register_opt2 from .opt import register_opt, op_lifter, register_opt2
from .type import GpuArrayType from .type import GpuArrayType
from .elemwise import GpuDimShuffle from .elemwise import GpuDimShuffle
...@@ -37,7 +36,7 @@ class GPUAMultinomialFromUniform(GpuKernelBase, Op): ...@@ -37,7 +36,7 @@ class GPUAMultinomialFromUniform(GpuKernelBase, Op):
return ['<numpy_compat.h>', 'gpuarray_helper.h'] return ['<numpy_compat.h>', 'gpuarray_helper.h']
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__)] return [gpuarray_helper_inc_dir()]
def make_node(self, pvals, unis): def make_node(self, pvals, unis):
assert unis.dtype == pvals.dtype assert unis.dtype == pvals.dtype
...@@ -249,7 +248,7 @@ class GPUAChoiceFromUniform(GpuKernelBase, Op): ...@@ -249,7 +248,7 @@ class GPUAChoiceFromUniform(GpuKernelBase, Op):
return ['<numpy_compat.h>', 'gpuarray_helper.h'] return ['<numpy_compat.h>', 'gpuarray_helper.h']
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__)] return [gpuarray_helper_inc_dir()]
def make_node(self, pvals, unis, n): def make_node(self, pvals, unis, n):
assert pvals.dtype == 'float32' assert pvals.dtype == 'float32'
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import os
import numpy as np import numpy as np
from theano import Op, Apply from theano import Op, Apply
...@@ -11,7 +10,7 @@ try: ...@@ -11,7 +10,7 @@ try:
except ImportError: except ImportError:
pass pass
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, gpuarray_helper_inc_dir,
infer_context_name) infer_context_name)
from .type import GpuArrayType from .type import GpuArrayType
from .fp16_help import work_dtype, load_w, write_w from .fp16_help import work_dtype, load_w, write_w
...@@ -43,7 +42,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op): ...@@ -43,7 +42,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
return ['<numpy_compat.h>', '<gpuarray/types.h>', 'gpuarray_helper.h'] return ['<numpy_compat.h>', '<gpuarray/types.h>', 'gpuarray_helper.h']
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__)] return [gpuarray_helper_inc_dir()]
def gpu_kernels(self, node, nodename): def gpu_kernels(self, node, nodename):
dtype_x = node.inputs[0].dtype dtype_x = node.inputs[0].dtype
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import os.path
import theano import theano
from theano import Apply from theano import Apply
...@@ -9,7 +8,7 @@ from theano.tensor.basic import as_tensor_variable ...@@ -9,7 +8,7 @@ from theano.tensor.basic import as_tensor_variable
from theano.tensor.signal.pool import Pool, PoolingMode_t from theano.tensor.signal.pool import Pool, PoolingMode_t
from .type import gpu_context_type from .type import gpu_context_type
from .basic_ops import (CGpuKernelBase, infer_context_name, from .basic_ops import (CGpuKernelBase, infer_context_name, gpuarray_helper_inc_dir,
as_gpuarray_variable, gpu_contiguous) as_gpuarray_variable, gpu_contiguous)
try: try:
...@@ -35,7 +34,7 @@ class GpuPool(CGpuKernelBase): ...@@ -35,7 +34,7 @@ class GpuPool(CGpuKernelBase):
if mode == 'average': if mode == 'average':
mode = 'average_inc_pad' mode = 'average_inc_pad'
self.mode = mode self.mode = mode
CGpuKernelBase.__init__(self, ['pool.c'], CGpuKernelBase.__init__(self, ['c_code/pool.c'],
'APPLY_SPECIFIC(pool)') 'APPLY_SPECIFIC(pool)')
assert PoolingMode_t.has_alias(self.mode) assert PoolingMode_t.has_alias(self.mode)
assert self.ndim in [2, 3] assert self.ndim in [2, 3]
...@@ -47,7 +46,7 @@ class GpuPool(CGpuKernelBase): ...@@ -47,7 +46,7 @@ class GpuPool(CGpuKernelBase):
return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h'] return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h']
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__), pygpu.get_include()] return [gpuarray_helper_inc_dir(), pygpu.get_include()]
def make_node(self, inp, ws, stride=None, pad=None): def make_node(self, inp, ws, stride=None, pad=None):
ctx_name = infer_context_name(inp) ctx_name = infer_context_name(inp)
...@@ -144,7 +143,7 @@ class GpuMaxPoolGrad(CGpuKernelBase): ...@@ -144,7 +143,7 @@ class GpuMaxPoolGrad(CGpuKernelBase):
self.ndim = ndim self.ndim = ndim
self.ignore_border = ignore_border self.ignore_border = ignore_border
self.mode = mode self.mode = mode
CGpuKernelBase.__init__(self, ['pool_max_grad.c'], CGpuKernelBase.__init__(self, ['c_code/pool_max_grad.c'],
'APPLY_SPECIFIC(max_pool_grad)') 'APPLY_SPECIFIC(max_pool_grad)')
assert mode == 'max' assert mode == 'max'
assert ndim in [2, 3] assert ndim in [2, 3]
...@@ -153,7 +152,7 @@ class GpuMaxPoolGrad(CGpuKernelBase): ...@@ -153,7 +152,7 @@ class GpuMaxPoolGrad(CGpuKernelBase):
return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h'] return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h']
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__), pygpu.get_include()] return [gpuarray_helper_inc_dir(), pygpu.get_include()]
def make_node(self, inp, out, out_grad, ws, stride=None, pad=None): def make_node(self, inp, out, out_grad, ws, stride=None, pad=None):
ctx_name = infer_context_name(inp, out, out_grad) ctx_name = infer_context_name(inp, out, out_grad)
...@@ -221,7 +220,7 @@ class GpuAveragePoolGrad(CGpuKernelBase): ...@@ -221,7 +220,7 @@ class GpuAveragePoolGrad(CGpuKernelBase):
if mode == 'average': if mode == 'average':
mode = 'average_inc_pad' mode = 'average_inc_pad'
self.mode = mode self.mode = mode
CGpuKernelBase.__init__(self, ['pool_ave_grad.c'], CGpuKernelBase.__init__(self, ['c_code/pool_ave_grad.c'],
'APPLY_SPECIFIC(ave_pool_grad)') 'APPLY_SPECIFIC(ave_pool_grad)')
assert mode in ('sum', 'average_inc_pad', 'average_exc_pad') assert mode in ('sum', 'average_inc_pad', 'average_exc_pad')
assert ndim in [2, 3] assert ndim in [2, 3]
...@@ -233,7 +232,7 @@ class GpuAveragePoolGrad(CGpuKernelBase): ...@@ -233,7 +232,7 @@ class GpuAveragePoolGrad(CGpuKernelBase):
return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h'] return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h']
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__), pygpu.get_include()] return [gpuarray_helper_inc_dir(), pygpu.get_include()]
def make_node(self, inp, out_grad, ws, stride=None, pad=None): def make_node(self, inp, out_grad, ws, stride=None, pad=None):
ctx_name = infer_context_name(inp, out_grad) ctx_name = infer_context_name(inp, out_grad)
...@@ -297,7 +296,7 @@ class GpuDownsampleFactorMaxGradGrad(CGpuKernelBase): ...@@ -297,7 +296,7 @@ class GpuDownsampleFactorMaxGradGrad(CGpuKernelBase):
self.ndim = ndim self.ndim = ndim
self.ignore_border = ignore_border self.ignore_border = ignore_border
self.mode = mode self.mode = mode
CGpuKernelBase.__init__(self, ['pool_grad_grad.c'], CGpuKernelBase.__init__(self, ['c_code/pool_grad_grad.c'],
'APPLY_SPECIFIC(pool_grad_grad)') 'APPLY_SPECIFIC(pool_grad_grad)')
assert self.mode == 'max' assert self.mode == 'max'
assert self.ndim in [2, 3] assert self.ndim in [2, 3]
...@@ -306,7 +305,7 @@ class GpuDownsampleFactorMaxGradGrad(CGpuKernelBase): ...@@ -306,7 +305,7 @@ class GpuDownsampleFactorMaxGradGrad(CGpuKernelBase):
return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h'] return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h']
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__), pygpu.get_include()] return [gpuarray_helper_inc_dir(), pygpu.get_include()]
def make_node(self, inp, out, out_grad, ws, stride=None, pad=None): def make_node(self, inp, out, out_grad, ws, stride=None, pad=None):
ctx_name = infer_context_name(inp, out, out_grad) ctx_name = infer_context_name(inp, out, out_grad)
...@@ -372,7 +371,7 @@ class GpuMaxPoolRop(CGpuKernelBase): ...@@ -372,7 +371,7 @@ class GpuMaxPoolRop(CGpuKernelBase):
self.ndim = ndim self.ndim = ndim
self.ignore_border = ignore_border self.ignore_border = ignore_border
self.mode = mode self.mode = mode
CGpuKernelBase.__init__(self, ['pool_max_rop.c'], CGpuKernelBase.__init__(self, ['c_code/pool_max_rop.c'],
'APPLY_SPECIFIC(max_pool_rop)') 'APPLY_SPECIFIC(max_pool_rop)')
assert mode == 'max' assert mode == 'max'
assert ndim in [2, 3] assert ndim in [2, 3]
...@@ -384,7 +383,7 @@ class GpuMaxPoolRop(CGpuKernelBase): ...@@ -384,7 +383,7 @@ class GpuMaxPoolRop(CGpuKernelBase):
return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h'] return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h']
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__), pygpu.get_include()] return [gpuarray_helper_inc_dir(), pygpu.get_include()]
def make_node(self, inp, eval_point, ws, stride=None, pad=None): def make_node(self, inp, eval_point, ws, stride=None, pad=None):
ctx_name = infer_context_name(inp) ctx_name = infer_context_name(inp)
......
from __future__ import print_function, absolute_import, division from __future__ import print_function, absolute_import, division
import os
from theano.gof import Op, Apply from theano.gof import Op, Apply
from theano.gof.type import Generic from theano.gof.type import Generic
from .basic_ops import (infer_context_name, as_gpuarray_variable) from .basic_ops import (infer_context_name, as_gpuarray_variable, gpuarray_helper_inc_dir)
from .type import GpuArrayType from .type import GpuArrayType
try: try:
...@@ -44,7 +43,7 @@ class GpuMaxAndArgmax(Op): ...@@ -44,7 +43,7 @@ class GpuMaxAndArgmax(Op):
return ['<numpy_compat.h>', '<gpuarray_helper.h>'] return ['<numpy_compat.h>', '<gpuarray_helper.h>']
def c_header_dirs(self): def c_header_dirs(self):
return [pygpu.get_include(), os.path.dirname(__file__)] return [pygpu.get_include(), gpuarray_helper_inc_dir()]
def c_code(self, node, name, input_names, output_names, sub): def c_code(self, node, name, input_names, output_names, sub):
# Recall: X = input_names[0] # Recall: X = input_names[0]
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import os
import numpy as np import numpy as np
from six import integer_types from six import integer_types
from six.moves import StringIO from six.moves import StringIO
...@@ -20,7 +18,7 @@ except ImportError: ...@@ -20,7 +18,7 @@ except ImportError:
pass pass
from .type import GpuArrayType, gpu_context_type from .type import GpuArrayType, gpu_context_type
from .basic_ops import (as_gpuarray_variable, HideC, GpuKernelBase, Kernel, from .basic_ops import (as_gpuarray_variable, HideC, GpuKernelBase, Kernel, gpuarray_helper_inc_dir,
infer_context_name, gpu_contiguous) infer_context_name, gpu_contiguous)
iadd_reg = {} iadd_reg = {}
...@@ -843,7 +841,7 @@ class GpuAdvancedIncSubtensor1(Op): ...@@ -843,7 +841,7 @@ class GpuAdvancedIncSubtensor1(Op):
'<gpuarray/elemwise.h>', 'gpuarray_helper.h'] '<gpuarray/elemwise.h>', 'gpuarray_helper.h']
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__)] return [gpuarray_helper_inc_dir()]
def c_support_code_struct(self, node, nodename): def c_support_code_struct(self, node, nodename):
return "\nGpuElemwise *iadd;\n" return "\nGpuElemwise *iadd;\n"
...@@ -1014,7 +1012,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC, ...@@ -1014,7 +1012,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC,
'<gpuarray/types.h>'] '<gpuarray/types.h>']
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__)] return [gpuarray_helper_inc_dir()]
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
ctx = self.get_params(node).context ctx = self.get_params(node).context
......
...@@ -27,7 +27,7 @@ class GpuEye(CGpuKernelBase, Op): ...@@ -27,7 +27,7 @@ class GpuEye(CGpuKernelBase, Op):
dtype = config.floatX dtype = config.floatX
self.dtype = dtype self.dtype = dtype
self.context_name = context_name self.context_name = context_name
CGpuKernelBase.__init__(self, ['tstgpueye.c'], CGpuKernelBase.__init__(self, ['c_code/tstgpueye.c'],
'APPLY_SPECIFIC(tstgpueye)') 'APPLY_SPECIFIC(tstgpueye)')
def get_params(self, node): def get_params(self, node):
......
...@@ -66,7 +66,7 @@ except ImportError: ...@@ -66,7 +66,7 @@ except ImportError:
raise ImportError("no c compiler, can't compile cython code") raise ImportError("no c compiler, can't compile cython code")
_logger.info("Compiling C code for scan") _logger.info("Compiling C code for scan")
dirname = 'scan_perform' dirname = 'scan_perform'
cfile = os.path.join(theano.__path__[0], 'scan_module', cfile = os.path.join(theano.__path__[0], 'scan_module', 'c_code',
'scan_perform.c') 'scan_perform.c')
if not os.path.exists(cfile): if not os.path.exists(cfile):
# This can happen in not normal case. We just # This can happen in not normal case. We just
......
...@@ -9,7 +9,7 @@ import logging ...@@ -9,7 +9,7 @@ import logging
import textwrap import textwrap
import sys import sys
import os import os
from os.path import dirname, normpath from os.path import dirname
from theano import config from theano import config
from theano.gof.cmodule import GCC_compiler from theano.gof.cmodule import GCC_compiler
...@@ -736,8 +736,8 @@ def blas_header_text(): ...@@ -736,8 +736,8 @@ def blas_header_text():
if not config.blas.ldflags: if not config.blas.ldflags:
# Include the Numpy version implementation of [sd]gemm_. # Include the Numpy version implementation of [sd]gemm_.
current_filedir = dirname(__file__) current_filedir = dirname(__file__)
gemm_common_filepath = normpath(current_filedir + "/alt_gemm_common.c") gemm_common_filepath = os.path.join(current_filedir, 'c_code', 'alt_gemm_common.c')
gemm_template_filepath = normpath(current_filedir + "/alt_gemm_template.c") gemm_template_filepath = os.path.join(current_filedir, 'c_code', 'alt_gemm_template.c')
common_code = "" common_code = ""
sgemm_code = "" sgemm_code = ""
dgemm_code = "" dgemm_code = ""
......
...@@ -221,7 +221,7 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -221,7 +221,7 @@ class BaseCorrMM(gof.OpenMPOp):
sub['blas_set_num_threads'] = '' sub['blas_set_num_threads'] = ''
sub['blas_get_num_threads'] = '0' sub['blas_get_num_threads'] = '0'
files = ['corr_gemm.c'] files = [os.path.join('c_code', 'corr_gemm.c')]
codes = [open(os.path.join(os.path.split(__file__)[0], f)).read() codes = [open(os.path.join(os.path.split(__file__)[0], f)).read()
for f in files] for f in files]
final_code = '' final_code = ''
......
...@@ -212,7 +212,7 @@ class BaseCorr3dMM(gof.OpenMPOp): ...@@ -212,7 +212,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
sub['blas_set_num_threads'] = '' sub['blas_set_num_threads'] = ''
sub['blas_get_num_threads'] = '0' sub['blas_get_num_threads'] = '0'
files = ['corr3d_gemm.c'] files = [os.path.join('c_code', 'corr3d_gemm.c')]
codes = [open(os.path.join(os.path.split(__file__)[0], f)).read() codes = [open(os.path.join(os.path.split(__file__)[0], f)).read()
for f in files] for f in files]
final_code = '' final_code = ''
......
from __future__ import (division, absolute_import, print_function) from __future__ import (division, absolute_import, print_function)
import os import os
import os.path
import theano.tensor as T import theano.tensor as T
from theano import config from theano import config
from theano import gof from theano import gof
...@@ -105,7 +104,7 @@ class ConnectionistTemporalClassification(gof.COp, gof.OpenMPOp): ...@@ -105,7 +104,7 @@ class ConnectionistTemporalClassification(gof.COp, gof.OpenMPOp):
_cop_num_inputs = 3 _cop_num_inputs = 3
_cop_num_outputs = 2 _cop_num_outputs = 2
func_file = "./ctc_wrapper.c" func_file = os.path.join('c_code', 'ctc_wrapper.c')
func_name = "APPLY_SPECIFIC(ctc_cost_cpu)" func_name = "APPLY_SPECIFIC(ctc_cost_cpu)"
def __init__(self, compute_grad=True, openmp=None): def __init__(self, compute_grad=True, openmp=None):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论