提交 89afe76e authored 作者: notoraptor's avatar notoraptor

Move C files into c_code sub-folder in every module.

上级 8d2ea245
......@@ -271,7 +271,7 @@ Python File
``tstgpueye.c``
~~~~~~~~~~~~~~~
.. literalinclude:: ../../theano/gpuarray/tests/tstgpueye.c
.. literalinclude:: ../../theano/gpuarray/tests/c_code/tstgpueye.c
:language: C
Wrapping Exisiting Libraries
......
......@@ -1609,7 +1609,7 @@ def std_include_dirs():
py_plat_spec_inc = distutils.sysconfig.get_python_inc(plat_specific=True)
python_inc_dirs = ([py_inc] if py_inc == py_plat_spec_inc
else [py_inc, py_plat_spec_inc])
gof_inc_dir = os.path.abspath(os.path.dirname(__file__))
gof_inc_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'c_code')
return numpy_inc_dirs + python_inc_dirs + [gof_inc_dir]
......
......@@ -98,7 +98,7 @@ except ImportError:
raise
_logger.info("Compiling new CVM")
dirname = 'lazylinker_ext'
cfile = os.path.join(theano.__path__[0], 'gof', 'lazylinker_c.c')
cfile = os.path.join(theano.__path__[0], 'gof', 'c_code', 'lazylinker_c.c')
if not os.path.exists(cfile):
# This can happen in not normal case. We just
# disable the c clinker. If we are here the user
......
......@@ -98,7 +98,7 @@ class QuadraticCOpFunc(COp):
c=generic_type)
def __init__(self, a, b, c):
super(QuadraticCOpFunc, self).__init__('test_quadratic_function.c',
super(QuadraticCOpFunc, self).__init__('c_code/test_quadratic_function.c',
'APPLY_SPECIFIC(compute_quadratic)')
self.a = a
self.b = b
......
......@@ -126,6 +126,10 @@ def infer_context_name(*vars):
raise ValueError("Could not infer context from inputs")
def gpuarray_helper_inc_dir():
return os.path.join(os.path.dirname(__file__), 'c_code')
class Kernel(object):
"""
This class groups together all the attributes of a gpu kernel.
......@@ -688,7 +692,7 @@ class GpuFromHost(Op):
return ["gpuarray_helper.h"]
def c_header_dirs(self):
return [os.path.dirname(__file__)]
return [gpuarray_helper_inc_dir()]
def c_code(self, node, name, inputs, outputs, sub):
return """
......@@ -999,7 +1003,7 @@ class GpuAllocEmpty(HideC, AllocEmpty):
return ['<gpuarray_helper.h>']
def c_header_dirs(self):
return [os.path.dirname(__file__)]
return [gpuarray_helper_inc_dir()]
def c_code(self, node, name, inp, out, sub):
ndim = len(inp)
......@@ -1068,7 +1072,7 @@ class GpuContiguous(Op):
return Apply(self, [input], [input.type()])
def c_header_dirs(self):
return [os.path.dirname(__file__)]
return [gpuarray_helper_inc_dir()]
def c_headers(self):
return ['<gpuarray_helper.h>']
......@@ -1390,7 +1394,7 @@ class GpuSplit(HideC, Split):
return ['<numpy_compat.h>', '<gpuarray_helper.h>']
def c_header_dirs(self):
return [pygpu.get_include(), os.path.dirname(__file__)]
return [pygpu.get_include(), gpuarray_helper_inc_dir()]
def c_code(self, node, name, inputs, outputs, sub):
if self.len_splits == 0:
......
from __future__ import absolute_import, print_function, division
import os.path
from six import integer_types
import theano
......@@ -12,7 +11,7 @@ from theano.tensor.basic import as_tensor_variable
from theano.tensor.opt import in2out
from .basic_ops import (GpuArrayType, CGpuKernelBase,
as_gpuarray_variable, gpu_contiguous, infer_context_name)
as_gpuarray_variable, gpu_contiguous, infer_context_name, gpuarray_helper_inc_dir)
from .opt_util import inplace_allocempty
try:
......@@ -28,7 +27,7 @@ class BlasOp(Op):
return ['<blas_api.h>', '<numpy_compat.h>', '<gpuarray_helper.h>']
def c_header_dirs(self):
return [pygpu.get_include(), os.path.dirname(__file__)]
return [pygpu.get_include(), gpuarray_helper_inc_dir()]
def c_init_code(self):
return ['import_pygpu__blas();']
......@@ -487,7 +486,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
if num_groups < 1:
raise ValueError("Number of groups should be greater than 0")
self.num_groups = num_groups
CGpuKernelBase.__init__(self, ['corr_gemm.c'])
CGpuKernelBase.__init__(self, ['c_code/corr_gemm.c'])
@property
def pad(self):
......@@ -530,7 +529,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
return ["<gpuarray/array.h>", "<gpuarray/blas.h>", "gpuarray_helper.h"]
def c_header_dirs(self):
return [os.path.dirname(__file__)]
return [gpuarray_helper_inc_dir()]
def c_code_cache_version(self):
# Raise this whenever modifying the C code (including the file).
......@@ -1094,7 +1093,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
raise ValueError("filter_dilation must have three elements")
self.subsample = tuple(subsample)
self.filter_dilation = tuple(filter_dilation)
CGpuKernelBase.__init__(self, ['corr3d_gemm.c'])
CGpuKernelBase.__init__(self, ['c_code/corr3d_gemm.c'])
@property
def pad(self):
......@@ -1131,7 +1130,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
return ["<gpuarray/array.h>", "<gpuarray/blas.h>", "gpuarray_helper.h"]
def c_header_dirs(self):
return [os.path.dirname(__file__)]
return [gpuarray_helper_inc_dir()]
def c_code_cache_version(self):
# raise this whenever modifying the code below.
......
from __future__ import absolute_import, print_function, division
import logging
import os
import numpy as np
from theano import Apply, tensor
......@@ -11,7 +10,7 @@ from theano.scalar import bool as bool_t
from theano.gradient import grad_undefined
from .type import gpu_context_type
from .basic_ops import as_gpuarray_variable, infer_context_name
from .basic_ops import as_gpuarray_variable, infer_context_name, gpuarray_helper_inc_dir
_logger = logging.getLogger('theano.gpuarray.blocksparse')
......@@ -30,7 +29,7 @@ class GpuSparseBlockGemv(COp):
# NB: DTYPE_INPUT_* is used in C code, so I think we should not set check_input to False.
def __init__(self, inplace=False):
COp.__init__(self, "blockgemv.c", "APPLY_SPECIFIC(blockgemv)")
COp.__init__(self, "c_code/blockgemv.c", "APPLY_SPECIFIC(blockgemv)")
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [0]}
......@@ -39,7 +38,7 @@ class GpuSparseBlockGemv(COp):
return self.params_type.get_params(self, context=node.inputs[0].type.context)
def c_header_dirs(self):
return [os.path.dirname(__file__)]
return [gpuarray_helper_inc_dir()]
def c_headers(self):
return ['<gpuarray/buffer_blas.h>', '<gpuarray/buffer.h>',
......@@ -101,7 +100,7 @@ class GpuSparseBlockOuter(COp):
params_type = ParamsType(inplace=bool_t, context=gpu_context_type)
def __init__(self, inplace=False):
COp.__init__(self, ["blockger.c"], "APPLY_SPECIFIC(blockger)")
COp.__init__(self, ["c_code/blockger.c"], "APPLY_SPECIFIC(blockger)")
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [0]}
......@@ -126,7 +125,7 @@ class GpuSparseBlockOuter(COp):
return [input_shapes[0]]
def c_header_dirs(self):
return [os.path.dirname(__file__)]
return [gpuarray_helper_inc_dir()]
def c_headers(self):
return ['<gpuarray/buffer_blas.h>', '<gpuarray/buffer.h>',
......
......@@ -3,7 +3,7 @@ from __future__ import absolute_import, print_function, division
import theano
from theano import (config, gof)
import theano.tensor as T
from .basic_ops import (gpu_contiguous, as_gpuarray_variable, infer_context_name)
from .basic_ops import (gpu_contiguous, as_gpuarray_variable, infer_context_name, gpuarray_helper_inc_dir)
import theano.tensor.nnet.ctc
from .type import (GpuArrayType, gpu_context_type)
from .elemwise import GpuDimShuffle
......@@ -58,7 +58,7 @@ class GpuConnectionistTemporalClassification(gof.COp):
return ["warpctc", "gpuarray"]
def c_header_dirs(self):
dirs = [os.path.dirname(__file__), pygpu.get_include()]
dirs = [gpuarray_helper_inc_dir(), pygpu.get_include()]
if config.ctc.root != '':
dirs.append(os.path.join(config.ctc.root, "include"))
return dirs
......
......@@ -33,7 +33,7 @@ from theano.tensor.signal.pool import (
from . import pygpu, cudnn_defs
from .type import (get_context, gpu_context_type, list_contexts,
GpuArraySharedVariable)
from .basic_ops import (as_gpuarray_variable, infer_context_name,
from .basic_ops import (as_gpuarray_variable, infer_context_name, gpuarray_helper_inc_dir,
gpu_contiguous, GpuAllocEmpty,
empty_like, GpuArrayType, HostFromGpu)
from .elemwise import GpuElemwise, GpuCAReduceCuda
......@@ -137,7 +137,7 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
path_wrapper = "\"" if os.name == 'nt' else ""
params = ["-l", "cudnn"]
params.extend(['-I%s%s%s' % (path_wrapper, os.path.dirname(__file__), path_wrapper)])
params.extend(['-I%s%s%s' % (path_wrapper, gpuarray_helper_inc_dir(), path_wrapper)])
if config.dnn.include_path:
params.extend(['-I%s%s%s' % (path_wrapper, config.dnn.include_path, path_wrapper)])
if config.cuda.include_path:
......@@ -371,7 +371,7 @@ class DnnBase(COp):
def __init__(self, files=None, c_func=None):
if files is None:
files = []
COp.__init__(self, ["dnn_base.c"] + files, c_func)
COp.__init__(self, ["c_code/dnn_base.c"] + files, c_func)
def c_headers(self):
return ['gpuarray/types.h', 'gpuarray/array.h', 'gpuarray/kernel.h',
......@@ -380,7 +380,7 @@ class DnnBase(COp):
'gpuarray_helper.h']
def c_header_dirs(self):
return [os.path.dirname(__file__), pygpu.get_include(),
return [gpuarray_helper_inc_dir(), pygpu.get_include(),
config.dnn.include_path, config.cuda.include_path]
def c_libraries(self):
......@@ -426,7 +426,7 @@ class GpuDnnConvDesc(COp):
return ['cudnn.h', 'cudnn_helper.h']
def c_header_dirs(self):
return [os.path.dirname(__file__), config.dnn.include_path,
return [gpuarray_helper_inc_dir(), config.dnn.include_path,
config.cuda.include_path]
def c_libraries(self):
......@@ -448,7 +448,7 @@ class GpuDnnConvDesc(COp):
def __init__(self, border_mode, subsample=(1, 1), dilation=(1, 1), conv_mode='conv',
precision="float32"):
COp.__init__(self, ["conv_desc.c"], "APPLY_SPECIFIC(conv_desc)")
COp.__init__(self, ["c_code/conv_desc.c"], "APPLY_SPECIFIC(conv_desc)")
if version() < 6000 and any([d != 1 for d in dilation]):
raise RuntimeError("Dilation > 1 not supported for cuDNN version < 6.")
......@@ -566,7 +566,7 @@ class GpuDnnConv(DnnBase):
num_groups=int_t)
def __init__(self, algo=None, inplace=False, num_groups=1):
DnnBase.__init__(self, ["dnn_conv_base.c", "dnn_fwd.c"],
DnnBase.__init__(self, ["c_code/dnn_conv_base.c", "c_code/dnn_fwd.c"],
"APPLY_SPECIFIC(conv_fwd)")
if algo is None:
......@@ -709,7 +709,7 @@ class GpuDnnConvGradW(DnnBase):
num_groups=int_t)
def __init__(self, inplace=False, algo=None, num_groups=1):
DnnBase.__init__(self, ["dnn_conv_base.c", "dnn_gw.c"],
DnnBase.__init__(self, ["c_code/dnn_conv_base.c", "c_code/dnn_gw.c"],
"APPLY_SPECIFIC(conv_gw)")
self.inplace = bool(inplace)
if self.inplace:
......@@ -845,7 +845,7 @@ class GpuDnnConvGradI(DnnBase):
num_groups=int_t)
def __init__(self, inplace=False, algo=None, num_groups=1):
DnnBase.__init__(self, ["dnn_conv_base.c", "dnn_gi.c"],
DnnBase.__init__(self, ["c_code/dnn_conv_base.c", "c_code/dnn_gi.c"],
"APPLY_SPECIFIC(conv_gi)")
self.inplace = bool(inplace)
if self.inplace:
......@@ -1261,7 +1261,7 @@ class GpuDnnPoolDesc(Op):
return ['cudnn.h', 'cudnn_helper.h']
def c_header_dirs(self):
return [os.path.dirname(__file__), config.dnn.include_path]
return [gpuarray_helper_inc_dir(), config.dnn.include_path]
def c_libraries(self):
return ['cudnn']
......@@ -1390,7 +1390,7 @@ class GpuDnnPool(GpuDnnPoolBase):
(padX, padY) or (padX, padY, padZ)
"""
c_file = "dnn_pool.c"
c_file = "c_code/dnn_pool.c"
c_function = "APPLY_SPECIFIC(dnn_pool)"
def make_node(self, img, ws, stride, pad):
......@@ -1458,7 +1458,7 @@ class GpuDnnPoolGrad(GpuDnnPoolBase):
(padX, padY) or (padX, padY, padZ)
"""
c_file = "dnn_pool_grad.c"
c_file = "c_code/dnn_pool_grad.c"
c_function = "APPLY_SPECIFIC(dnn_pool_grad)"
def make_node(self, inp, out, out_grad, ws, stride, pad):
......@@ -1587,7 +1587,7 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
"""
_f16_ok = True
direction = "forward"
file = "dnn_softmax.c"
file = "c_code/dnn_softmax.c"
c_func = "APPLY_SPECIFIC(softmax)"
def make_node(self, x):
......@@ -1624,7 +1624,7 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
"""
_f16_ok = True
direction = 'backward'
file = "dnn_softmax_grad.c"
file = "c_code/dnn_softmax_grad.c"
c_func = "APPLY_SPECIFIC(softmax_grad)"
def make_node(self, dy, sm):
......@@ -1649,7 +1649,7 @@ class GpuDnnReduction(DnnBase):
handle=handle_type)
def __init__(self, red_op, axis, acc_dtype, dtype, return_indices):
DnnBase.__init__(self, ['dnn_redux.c'], 'APPLY_SPECIFIC(dnn_redux)')
DnnBase.__init__(self, ['c_code/dnn_redux.c'], 'APPLY_SPECIFIC(dnn_redux)')
assert cudnn.cudnnReduceTensorOp_t.has_alias(red_op)
self.red_op = red_op
assert acc_dtype in ['float16', 'float32', 'float64']
......@@ -1753,7 +1753,7 @@ class GpuDnnBatchNorm(DnnBase):
def __init__(self, mode='per-activation', running_averages=False,
inplace_running_mean=False, inplace_running_var=False,
inplace_output=False):
DnnBase.__init__(self, ['dnn_batchnorm_base.c', 'dnn_batchnorm.c'],
DnnBase.__init__(self, ['c_code/dnn_batchnorm_base.c', 'c_code/dnn_batchnorm.c'],
'dnn_batchnorm_op')
assert cudnn.cudnnBatchNormMode_t.has_alias(mode)
......@@ -1864,7 +1864,7 @@ class GpuDnnBatchNormInference(DnnBase):
handle=handle_type)
def __init__(self, mode='per-activation', inplace=False):
DnnBase.__init__(self, ['dnn_batchnorm_base.c', 'dnn_batchnorm_inf.c'],
DnnBase.__init__(self, ['c_code/dnn_batchnorm_base.c', 'c_code/dnn_batchnorm_inf.c'],
'dnn_batchnorm_op')
assert cudnn.cudnnBatchNormMode_t.has_alias(mode)
......@@ -1931,7 +1931,7 @@ class GpuDnnBatchNormGrad(DnnBase):
handle=handle_type)
def __init__(self, mode='per-activation'):
DnnBase.__init__(self, ['dnn_batchnorm_base.c', 'dnn_batchnorm_grad.c'],
DnnBase.__init__(self, ['c_code/dnn_batchnorm_base.c', 'c_code/dnn_batchnorm_grad.c'],
'dnn_batchnorm_grad')
assert cudnn.cudnnBatchNormMode_t.has_alias(mode)
......@@ -1961,7 +1961,7 @@ class GpuDnnDropoutOp(DnnBase):
__props__ = ('inplace',)
def __init__(self, inplace=False):
DnnBase.__init__(self, ["dnn_dropout_fwd.c"], "dnn_dropout_fwd")
DnnBase.__init__(self, ["c_code/dnn_dropout_fwd.c"], "dnn_dropout_fwd")
self.inplace = inplace
if self.inplace:
self.destroy_map = {1: [2]}
......@@ -1980,7 +1980,7 @@ class _DropoutDescriptor(DnnBase):
__props__ = ('context_name',)
def __init__(self, context_name):
DnnBase.__init__(self, ["dnn_dropout_desc.c"], "dnn_dropout_desc")
DnnBase.__init__(self, ["c_code/dnn_dropout_desc.c"], "dnn_dropout_desc")
self.context_name = context_name
def dnn_context(self, node):
......@@ -2036,7 +2036,7 @@ class _RNNDescriptor(DnnBase):
def __init__(self, context_name):
if version() < 5005:
raise RuntimeError("cudnn RNN require cudnn v5 final or higher.")
DnnBase.__init__(self, ["dnn_rnn_desc.c"], "dnn_rnn_desc")
DnnBase.__init__(self, ["c_code/dnn_rnn_desc.c"], "dnn_rnn_desc")
self.context_name = context_name
def dnn_context(self, node):
......@@ -2103,7 +2103,7 @@ class _RNNParamSize(DnnBase):
__props__ = ('context_name',)
def __init__(self, context_name):
DnnBase.__init__(self, ["dnn_rnn_paramsize.c"],
DnnBase.__init__(self, ["c_code/dnn_rnn_paramsize.c"],
"dnn_rnn_paramsize")
self.context_name = context_name
......@@ -2348,7 +2348,7 @@ class GpuDnnRNNOp(DnnBase):
_cop_num_outputs = 4
def __init__(self, rnn_mode, direction_mode):
DnnBase.__init__(self, ["dnn_rnn_fwd.c"], 'dnn_rnn_fwd')
DnnBase.__init__(self, ["c_code/dnn_rnn_fwd.c"], 'dnn_rnn_fwd')
self.rnn_mode = rnn_mode
if direction_mode == 'bidirectional':
self.num_dirs = 2
......@@ -2439,7 +2439,7 @@ class GpuDnnRNNGradInputs(DnnBase):
_cop_num_outputs = 4
def __init__(self, rnn_mode, grad_h, grad_c):
DnnBase.__init__(self, ['dnn_rnn_gi.c'], 'dnn_rnn_gi')
DnnBase.__init__(self, ['c_code/dnn_rnn_gi.c'], 'dnn_rnn_gi')
self.rnn_mode = rnn_mode
self.grad_h = grad_h
self.grad_c = grad_c
......@@ -2488,7 +2488,7 @@ class GpuDnnRNNGradWeights(DnnBase):
__props__ = ()
def __init__(self):
DnnBase.__init__(self, ['dnn_rnn_gw.c'], 'dnn_rnn_gw')
DnnBase.__init__(self, ['c_code/dnn_rnn_gw.c'], 'dnn_rnn_gw')
def make_node(self, desc, x, hx, y, reserve, w):
# We trust the callers here
......
from __future__ import absolute_import, print_function, division
import os
from theano import Apply, Op
from theano.tensor.extra_ops import CumOp
......@@ -8,7 +7,7 @@ try:
except ImportError:
pass
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, GpuReshape, infer_context_name)
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, GpuReshape, infer_context_name, gpuarray_helper_inc_dir)
from .opt import register_opt, op_lifter, register_opt2
from .type import gpu_context_type
from theano.gof import ParamsType
......@@ -47,7 +46,7 @@ class GpuCumOp(GpuKernelBase, Op):
return ['<numpy_compat.h>', '<gpuarray/types.h>', '<gpuarray_helper.h>']
def c_header_dirs(self):
return [os.path.dirname(__file__)]
return [gpuarray_helper_inc_dir()]
def get_params(self, node):
return self.params_type.get_params(self, context=node.inputs[0].type.context)
......
from __future__ import absolute_import, division, print_function
import os
import warnings
import pkg_resources
......@@ -13,7 +12,7 @@ from theano.scalar import bool as bool_t
from theano.gof import COp, ParamsType
from theano.gpuarray import GpuArrayType
from .basic_ops import (CGpuKernelBase, as_gpuarray_variable, gpu_contiguous,
from .basic_ops import (CGpuKernelBase, as_gpuarray_variable, gpu_contiguous, gpuarray_helper_inc_dir,
infer_context_name)
from .type import gpu_context_type
......@@ -499,7 +498,7 @@ class GpuMagmaBase(COp):
'gpuarray_helper.h', 'magma.h']
def c_header_dirs(self):
dirs = [os.path.dirname(__file__), pygpu.get_include()]
dirs = [gpuarray_helper_inc_dir(), pygpu.get_include()]
if config.magma.include_path:
dirs.append(config.magma.include_path)
return dirs
......@@ -540,7 +539,7 @@ class GpuMagmaSVD(GpuMagmaBase):
def __init__(self, full_matrices=True, compute_uv=True):
self.full_matrices = full_matrices
self.compute_uv = compute_uv
COp.__init__(self, ['magma_svd.c'], 'APPLY_SPECIFIC(magma_svd)')
COp.__init__(self, ['c_code/magma_svd.c'], 'APPLY_SPECIFIC(magma_svd)')
def make_node(self, A):
ctx_name = infer_context_name(A)
......@@ -624,7 +623,7 @@ class GpuMagmaMatrixInverse(GpuMagmaBase):
params_type = ParamsType(inplace=bool_t, context=gpu_context_type)
def __init__(self, inplace=False):
COp.__init__(self, ['magma_inv.c'], 'APPLY_SPECIFIC(magma_inv)')
COp.__init__(self, ['c_code/magma_inv.c'], 'APPLY_SPECIFIC(magma_inv)')
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [0]}
......@@ -672,7 +671,7 @@ class GpuMagmaCholesky(GpuMagmaBase, CGpuKernelBase):
def __init__(self, lower=True, inplace=False):
self.lower = lower
COp.__init__(self, ['magma_cholesky.c'], 'APPLY_SPECIFIC(magma_cholesky)')
COp.__init__(self, ['c_code/magma_cholesky.c'], 'APPLY_SPECIFIC(magma_cholesky)')
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [0]}
......@@ -719,7 +718,7 @@ class GpuMagmaQR(GpuMagmaBase, CGpuKernelBase):
def __init__(self, complete=True):
self.complete = complete
COp.__init__(self, ['magma_qr.c'], 'APPLY_SPECIFIC(magma_qr)')
COp.__init__(self, ['c_code/magma_qr.c'], 'APPLY_SPECIFIC(magma_qr)')
def make_node(self, A):
ctx_name = infer_context_name(A)
......@@ -785,7 +784,7 @@ class GpuMagmaEigh(GpuMagmaBase):
assert UPLO in ['L', 'U']
self.lower = UPLO == 'L'
self.compute_v = compute_v
COp.__init__(self, ['magma_eigh.c'], 'APPLY_SPECIFIC(magma_eigh)')
COp.__init__(self, ['c_code/magma_eigh.c'], 'APPLY_SPECIFIC(magma_eigh)')
def make_node(self, A):
ctx_name = infer_context_name(A)
......
# TODO test dtype != float32
from __future__ import absolute_import, print_function, division
import os
import warnings
try:
......@@ -14,7 +13,7 @@ from theano import Apply
from theano.gof import Op
from theano.tensor import NotScalarConstantError, get_scalar_constant_value
from .basic_ops import as_gpuarray_variable, infer_context_name, GpuKernelBase, Kernel
from .basic_ops import as_gpuarray_variable, infer_context_name, GpuKernelBase, Kernel, gpuarray_helper_inc_dir
from .opt import register_opt, op_lifter, register_opt2
from .type import GpuArrayType
from .elemwise import GpuDimShuffle
......@@ -37,7 +36,7 @@ class GPUAMultinomialFromUniform(GpuKernelBase, Op):
return ['<numpy_compat.h>', 'gpuarray_helper.h']
def c_header_dirs(self):
return [os.path.dirname(__file__)]
return [gpuarray_helper_inc_dir()]
def make_node(self, pvals, unis):
assert unis.dtype == pvals.dtype
......@@ -249,7 +248,7 @@ class GPUAChoiceFromUniform(GpuKernelBase, Op):
return ['<numpy_compat.h>', 'gpuarray_helper.h']
def c_header_dirs(self):
return [os.path.dirname(__file__)]
return [gpuarray_helper_inc_dir()]
def make_node(self, pvals, unis, n):
assert pvals.dtype == 'float32'
......
from __future__ import absolute_import, print_function, division
import os
import numpy as np
from theano import Op, Apply
......@@ -11,7 +10,7 @@ try:
except ImportError:
pass
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel,
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, gpuarray_helper_inc_dir,
infer_context_name)
from .type import GpuArrayType
from .fp16_help import work_dtype, load_w, write_w
......@@ -43,7 +42,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
return ['<numpy_compat.h>', '<gpuarray/types.h>', 'gpuarray_helper.h']
def c_header_dirs(self):
return [os.path.dirname(__file__)]
return [gpuarray_helper_inc_dir()]
def gpu_kernels(self, node, nodename):
dtype_x = node.inputs[0].dtype
......
from __future__ import absolute_import, print_function, division
import os.path
import theano
from theano import Apply
......@@ -9,7 +8,7 @@ from theano.tensor.basic import as_tensor_variable
from theano.tensor.signal.pool import Pool, PoolingMode_t
from .type import gpu_context_type
from .basic_ops import (CGpuKernelBase, infer_context_name,
from .basic_ops import (CGpuKernelBase, infer_context_name, gpuarray_helper_inc_dir,
as_gpuarray_variable, gpu_contiguous)
try:
......@@ -35,7 +34,7 @@ class GpuPool(CGpuKernelBase):
if mode == 'average':
mode = 'average_inc_pad'
self.mode = mode
CGpuKernelBase.__init__(self, ['pool.c'],
CGpuKernelBase.__init__(self, ['c_code/pool.c'],
'APPLY_SPECIFIC(pool)')
assert PoolingMode_t.has_alias(self.mode)
assert self.ndim in [2, 3]
......@@ -47,7 +46,7 @@ class GpuPool(CGpuKernelBase):
return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h']
def c_header_dirs(self):
return [os.path.dirname(__file__), pygpu.get_include()]
return [gpuarray_helper_inc_dir(), pygpu.get_include()]
def make_node(self, inp, ws, stride=None, pad=None):
ctx_name = infer_context_name(inp)
......@@ -144,7 +143,7 @@ class GpuMaxPoolGrad(CGpuKernelBase):
self.ndim = ndim
self.ignore_border = ignore_border
self.mode = mode
CGpuKernelBase.__init__(self, ['pool_max_grad.c'],
CGpuKernelBase.__init__(self, ['c_code/pool_max_grad.c'],
'APPLY_SPECIFIC(max_pool_grad)')
assert mode == 'max'
assert ndim in [2, 3]
......@@ -153,7 +152,7 @@ class GpuMaxPoolGrad(CGpuKernelBase):
return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h']
def c_header_dirs(self):
return [os.path.dirname(__file__), pygpu.get_include()]
return [gpuarray_helper_inc_dir(), pygpu.get_include()]
def make_node(self, inp, out, out_grad, ws, stride=None, pad=None):
ctx_name = infer_context_name(inp, out, out_grad)
......@@ -221,7 +220,7 @@ class GpuAveragePoolGrad(CGpuKernelBase):
if mode == 'average':
mode = 'average_inc_pad'
self.mode = mode
CGpuKernelBase.__init__(self, ['pool_ave_grad.c'],
CGpuKernelBase.__init__(self, ['c_code/pool_ave_grad.c'],
'APPLY_SPECIFIC(ave_pool_grad)')
assert mode in ('sum', 'average_inc_pad', 'average_exc_pad')
assert ndim in [2, 3]
......@@ -233,7 +232,7 @@ class GpuAveragePoolGrad(CGpuKernelBase):
return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h']
def c_header_dirs(self):
return [os.path.dirname(__file__), pygpu.get_include()]
return [gpuarray_helper_inc_dir(), pygpu.get_include()]
def make_node(self, inp, out_grad, ws, stride=None, pad=None):
ctx_name = infer_context_name(inp, out_grad)
......@@ -297,7 +296,7 @@ class GpuDownsampleFactorMaxGradGrad(CGpuKernelBase):
self.ndim = ndim
self.ignore_border = ignore_border
self.mode = mode
CGpuKernelBase.__init__(self, ['pool_grad_grad.c'],
CGpuKernelBase.__init__(self, ['c_code/pool_grad_grad.c'],
'APPLY_SPECIFIC(pool_grad_grad)')
assert self.mode == 'max'
assert self.ndim in [2, 3]
......@@ -306,7 +305,7 @@ class GpuDownsampleFactorMaxGradGrad(CGpuKernelBase):
return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h']
def c_header_dirs(self):
return [os.path.dirname(__file__), pygpu.get_include()]
return [gpuarray_helper_inc_dir(), pygpu.get_include()]
def make_node(self, inp, out, out_grad, ws, stride=None, pad=None):
ctx_name = infer_context_name(inp, out, out_grad)
......@@ -372,7 +371,7 @@ class GpuMaxPoolRop(CGpuKernelBase):
self.ndim = ndim
self.ignore_border = ignore_border
self.mode = mode
CGpuKernelBase.__init__(self, ['pool_max_rop.c'],
CGpuKernelBase.__init__(self, ['c_code/pool_max_rop.c'],
'APPLY_SPECIFIC(max_pool_rop)')
assert mode == 'max'
assert ndim in [2, 3]
......@@ -384,7 +383,7 @@ class GpuMaxPoolRop(CGpuKernelBase):
return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h']
def c_header_dirs(self):
return [os.path.dirname(__file__), pygpu.get_include()]
return [gpuarray_helper_inc_dir(), pygpu.get_include()]
def make_node(self, inp, eval_point, ws, stride=None, pad=None):
ctx_name = infer_context_name(inp)
......
from __future__ import print_function, absolute_import, division
import os
from theano.gof import Op, Apply
from theano.gof.type import Generic
from .basic_ops import (infer_context_name, as_gpuarray_variable)
from .basic_ops import (infer_context_name, as_gpuarray_variable, gpuarray_helper_inc_dir)
from .type import GpuArrayType
try:
......@@ -44,7 +43,7 @@ class GpuMaxAndArgmax(Op):
return ['<numpy_compat.h>', '<gpuarray_helper.h>']
def c_header_dirs(self):
return [pygpu.get_include(), os.path.dirname(__file__)]
return [pygpu.get_include(), gpuarray_helper_inc_dir()]
def c_code(self, node, name, input_names, output_names, sub):
# Recall: X = input_names[0]
......
from __future__ import absolute_import, print_function, division
import os
import numpy as np
from six import integer_types
from six.moves import StringIO
......@@ -20,7 +18,7 @@ except ImportError:
pass
from .type import GpuArrayType, gpu_context_type
from .basic_ops import (as_gpuarray_variable, HideC, GpuKernelBase, Kernel,
from .basic_ops import (as_gpuarray_variable, HideC, GpuKernelBase, Kernel, gpuarray_helper_inc_dir,
infer_context_name, gpu_contiguous)
iadd_reg = {}
......@@ -843,7 +841,7 @@ class GpuAdvancedIncSubtensor1(Op):
'<gpuarray/elemwise.h>', 'gpuarray_helper.h']
def c_header_dirs(self):
return [os.path.dirname(__file__)]
return [gpuarray_helper_inc_dir()]
def c_support_code_struct(self, node, nodename):
return "\nGpuElemwise *iadd;\n"
......@@ -1014,7 +1012,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, HideC,
'<gpuarray/types.h>']
def c_header_dirs(self):
return [os.path.dirname(__file__)]
return [gpuarray_helper_inc_dir()]
def c_code(self, node, name, inputs, outputs, sub):
ctx = self.get_params(node).context
......
......@@ -27,7 +27,7 @@ class GpuEye(CGpuKernelBase, Op):
dtype = config.floatX
self.dtype = dtype
self.context_name = context_name
CGpuKernelBase.__init__(self, ['tstgpueye.c'],
CGpuKernelBase.__init__(self, ['c_code/tstgpueye.c'],
'APPLY_SPECIFIC(tstgpueye)')
def get_params(self, node):
......
......@@ -66,7 +66,7 @@ except ImportError:
raise ImportError("no c compiler, can't compile cython code")
_logger.info("Compiling C code for scan")
dirname = 'scan_perform'
cfile = os.path.join(theano.__path__[0], 'scan_module',
cfile = os.path.join(theano.__path__[0], 'scan_module', 'c_code',
'scan_perform.c')
if not os.path.exists(cfile):
# This can happen in not normal case. We just
......
......@@ -9,7 +9,7 @@ import logging
import textwrap
import sys
import os
from os.path import dirname, normpath
from os.path import dirname
from theano import config
from theano.gof.cmodule import GCC_compiler
......@@ -736,8 +736,8 @@ def blas_header_text():
if not config.blas.ldflags:
# Include the Numpy version implementation of [sd]gemm_.
current_filedir = dirname(__file__)
gemm_common_filepath = normpath(current_filedir + "/alt_gemm_common.c")
gemm_template_filepath = normpath(current_filedir + "/alt_gemm_template.c")
gemm_common_filepath = os.path.join(current_filedir, 'c_code', 'alt_gemm_common.c')
gemm_template_filepath = os.path.join(current_filedir, 'c_code', 'alt_gemm_template.c')
common_code = ""
sgemm_code = ""
dgemm_code = ""
......
......@@ -221,7 +221,7 @@ class BaseCorrMM(gof.OpenMPOp):
sub['blas_set_num_threads'] = ''
sub['blas_get_num_threads'] = '0'
files = ['corr_gemm.c']
files = [os.path.join('c_code', 'corr_gemm.c')]
codes = [open(os.path.join(os.path.split(__file__)[0], f)).read()
for f in files]
final_code = ''
......
......@@ -212,7 +212,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
sub['blas_set_num_threads'] = ''
sub['blas_get_num_threads'] = '0'
files = ['corr3d_gemm.c']
files = [os.path.join('c_code', 'corr3d_gemm.c')]
codes = [open(os.path.join(os.path.split(__file__)[0], f)).read()
for f in files]
final_code = ''
......
from __future__ import (division, absolute_import, print_function)
import os
import os.path
import theano.tensor as T
from theano import config
from theano import gof
......@@ -105,7 +104,7 @@ class ConnectionistTemporalClassification(gof.COp, gof.OpenMPOp):
_cop_num_inputs = 3
_cop_num_outputs = 2
func_file = "./ctc_wrapper.c"
func_file = os.path.join('c_code', 'ctc_wrapper.c')
func_name = "APPLY_SPECIFIC(ctc_cost_cpu)"
def __init__(self, compute_grad=True, openmp=None):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论