提交 cd1b4a21 authored 作者: Caglar's avatar Caglar 提交者: Tanjay94

Removed gpu svd.

上级 d7407bf9
import copy
import logging
import sys
import os
import numpy
......@@ -3406,203 +3405,6 @@ def profile_printer(fct_name, compile_time, fct_call_time, fct_call,
for i in node.outputs])
class GpuSVD(GpuOp):
"""
Singular Value Decomposition.
Factors the matrix a as u * np.diag(s) * v, where u and v are unitary
and s is a 1-d array of a's singular values.
"""
def __init__(self, full_matrices=True, compute_uv=True, dtype=None):
"""
inputs :
--------
full_matrices : bool, optional
If True (default), u and v have the shapes (M, M) and (N, N), respectively.
Otherwise, the shapes are (M, K) and (K, N), respectively, where K = min(M, N).
compute_uv : bool, optional
Whether or not to compute u and v in addition to s. True by default.
"""
if dtype is None:
dtype = config.floatX
assert dtype == 'float32'
self.dtype = dtype
self.full_matrices = full_matrices
self.compute_uv = compute_uv
def props(self):
return self.full_matrices, self.compute_uv,
def make_node(self, x):
x = as_cuda_ndarray_variable(x)
assert x.ndim == 2, "The input of svd function should be a matrix."
w = x.type()
u = vector(dtype=x.dtype)
v = x.type()
return Apply(self, [x], [w, u, v])
def grad(self, inp, grads):
return [grad_undefined(self, i, inp[i]) for i in xrange(3)]
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def c_headers(self):
return [ "stdio.h", "math.h", "cuda_runtime.h", "stdlib.h", "cula_lapack_device.h"]
def c_init_code(self):
return ["culaStatus status = culaInitialize();"]
def c_compile_args(self):
cula_inc_path = "CULA_INC_PATH"
cula_lib_path_64 = "CULA_LIB_PATH_64"
cula_lib_path_32 = "CULA_LIB_PATH_32"
cula_lib_path = None
assert os.environ[cula_inc_path] is not None
if os.environ[cula_lib_path_64] is not None:
cula_lib_path = cula_lib_path_64
elif os.environ[cula_lib_path_32] is not None:
cula_lib_path = cula_lib_path_32
else:
raise Exception("Could not find the cula library path to import.")
return ["-I${%s}" % cula_inc_path, "-L${%s}" % cula_lib_path, "-ggdb", "-Wall"]
def c_support_code(self):
return "culaStatus status;"
def c_libraries(self):
return ["m", "cula_lapack_basic", "cublas"]
def c_code(self, node, name, inp, out, sub):
x = inp[0]
w, u, v, = out
fail = sub['fail']
compute_uv = self.compute_uv
full_matrices = self.full_matrices
code = """
int compute_uv = %(compute_uv)d;
int full_matrices = %(full_matrices)d;
char jobu = 'N';
char jobvt = 'N';
int dims[] = {0, 0};
//dims[0] = ((dtype_%(x)s*)PyArray_DIMS(%(x)s))[0];
//dims[1] = ((dtype_%(x)s*)PyArray_DIMS(%(x)s))[1];
dims[0] = ((float *)PyArray_DIMS(%(x)s))[0];
dims[1] = ((float *)PyArray_DIMS(%(x)s))[1];
int ldvt = dims[0];
int ldu = dims[0];
int lda = dims[1];
int wdim = (dims[0] > dims[1]) ? dims[1] : dims[0];
if (compute_uv == 1) {
if (full_matrices == 1) {
jobu = 'A';
jobvt = 'A';
} else if (compute_uv == 1) {
jobu = 'S';
jobvt = 'S';
ldu = (int)(ldu / 2);
ldvt = (int)(ldu / 2);
}
}
int x_total_size = dims[0] * dims[1] * sizeof(float);
int w_total_size = wdim * sizeof(float);
int u_total_size = dims[0] * ldu * sizeof(float);
int v_total_size = dims[1] * ldvt * sizeof(float);
int w_dims[] = {wdim};
int u_dims[] = {dims[0], ldu};
int v_dims[] = {ldvt, dims[1]};
cudaError_t sts;
void * orig_u = %(u)s;
void * orig_w = %(w)s;
void * orig_v = %(v)s;
if (CudaNdarray_prep_output(& %(w)s, 1, w_dims, 1))
{
%(fail)s;
}
if (CudaNdarray_prep_output(& %(u)s, 2, u_dims, 1))
{
%(fail)s;
}
if (CudaNdarray_prep_output(& %(v)s, 2, v_dims, 1))
{
%(fail)s;
}
sts = cudaMemset(CudaNdarray_DEV_DATA(%(w)s), 0, w_total_size);
if (cudaSuccess != sts)
{
PyErr_Format(PyExc_MemoryError,
"GpuSVD: Error in memset %%d bytes of device memory.",
w_total_size);
if(orig_w == NULL)
Py_XDECREF(%(w)s);
%(fail)s;
}
sts = cudaMemset(CudaNdarray_DEV_DATA(%(u)s), 0, u_total_size);
if (cudaSuccess != sts)
{
PyErr_Format(PyExc_MemoryError,
"GpuSVD: Error in memset %%d bytes of device memory.",
u_total_size);
if(orig_u == NULL)
Py_XDECREF(%(u)s);
%(fail)s;
}
sts = cudaMemset(CudaNdarray_DEV_DATA(%(v)s), 0, v_total_size);
if (cudaSuccess != sts)
{
PyErr_Format(PyExc_MemoryError,
"GpuSVD: Error in memset %%d bytes of device memory.",
v_total_size);
if(orig_v == NULL)
Py_XDECREF(%(v)s);
%(fail)s;
}
status = culaDeviceSgesvd(jobu, jobvt, dims[0], dims[1], CudaNdarray_DEV_DATA(%(x)s), lda,
CudaNdarray_DEV_DATA(%(w)s), CudaNdarray_DEV_DATA(%(u)s), ldu,
CudaNdarray_DEV_DATA(%(v)s), ldvt);
CNDA_THREAD_SYNC;
sts = cudaGetLastError();
if (cudaSuccess != sts)
{
PyErr_Format(PyExc_RuntimeError,
"Cuda error: culaDeviceSgesvd: %%s. n=%%d, m=%%d.",
cudaGetErrorString(sts),
dims[0], dims[1]);
%(fail)s;
}
""" % locals()
return code
def c_code_cache_version(self):
return (3,)
def gpu_svd(a, full_matrices=1, compute_uv=1, dtype="float32"):
return GpuSVD(full_matrices, compute_uv, dtype=dtype)(a)
class GpuEye(GpuOp):
def __init__(self, dtype=None):
if dtype is None:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论