Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
f517e1a0
提交
f517e1a0
authored
6月 06, 2011
作者:
Olivier Delalleau
浏览文件
操作
浏览文件
下载
差异文件
Merged
上级
18f73bc6
5ca02715
隐藏空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
85 行增加
和
78 行删除
+85
-78
cuda_ndarray.cu
theano/sandbox/cuda/cuda_ndarray.cu
+9
-4
cuda_ndarray.cuh
theano/sandbox/cuda/cuda_ndarray.cuh
+75
-57
opt.py
theano/tensor/opt.py
+1
-17
没有找到文件。
theano/sandbox/cuda/cuda_ndarray.cu
浏览文件 @
f517e1a0
...
@@ -2188,7 +2188,7 @@ CudaNdarray_Dot(PyObject* _unused, PyObject* args)
...
@@ -2188,7 +2188,7 @@ CudaNdarray_Dot(PyObject* _unused, PyObject* args)
}
}
static
PyObject
*
static
PyObject
*
filter
(
PyObject
*
__unsed_self
,
PyObject
*
args
)
// args = (data, broadcastable, strict)
filter
(
PyObject
*
__unsed_self
,
PyObject
*
args
)
// args = (data, broadcastable, strict
, storage
)
{
{
/*
/*
* TODO: DOC what this function should do in the various cases of
* TODO: DOC what this function should do in the various cases of
...
@@ -2282,10 +2282,10 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s
...
@@ -2282,10 +2282,10 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s
Py_DECREF
(
rval
);
Py_DECREF
(
rval
);
rval
=
NULL
;
rval
=
NULL
;
}
}
Py_DECREF
(
data
);
Py_DECREF
(
py_data
);
Py_DECREF
(
broadcastable
);
}
}
Py_DECREF
(
data
);
Py_DECREF
(
py_data
);
Py_DECREF
(
broadcastable
);
return
(
PyObject
*
)
rval
;
return
(
PyObject
*
)
rval
;
}
}
}
}
...
@@ -2490,6 +2490,11 @@ CudaNdarray_new_nd(int nd)
...
@@ -2490,6 +2490,11 @@ CudaNdarray_new_nd(int nd)
return
(
PyObject
*
)
rval
;
return
(
PyObject
*
)
rval
;
}
}
/**
* Initialize 'self' as a view of 'base', with memory storage 'data'
*/
int
CudaNdarray_set_device_data
(
CudaNdarray
*
self
,
float
*
data
,
PyObject
*
base
)
int
CudaNdarray_set_device_data
(
CudaNdarray
*
self
,
float
*
data
,
PyObject
*
base
)
{
{
if
(
self
->
data_allocated
)
if
(
self
->
data_allocated
)
...
...
theano/sandbox/cuda/cuda_ndarray.cuh
浏览文件 @
f517e1a0
...
@@ -26,7 +26,7 @@ typedef float real;
...
@@ -26,7 +26,7 @@ typedef float real;
#endif
#endif
#ifndef SHARED_SIZE
#ifndef SHARED_SIZE
#define SHARED_SIZE (16*1024)
#define SHARED_SIZE (16*1024)
#endif
#endif
...
@@ -48,10 +48,10 @@ static T ceil_intdiv(T a, T b)
...
@@ -48,10 +48,10 @@ static T ceil_intdiv(T a, T b)
/**
/**
* struct CudaNdarray
* struct CudaNdarray
*
*
* This is a Python type.
* This is a Python type.
*
*
*/
*/
struct CudaNdarray
struct CudaNdarray
{
{
PyObject_HEAD
PyObject_HEAD
...
@@ -65,40 +65,46 @@ struct CudaNdarray
...
@@ -65,40 +65,46 @@ struct CudaNdarray
/* Type-specific fields go here. */
/* Type-specific fields go here. */
//GpuTensorType::VoidTensor * vt;
//GpuTensorType::VoidTensor * vt;
int nd; //the number of dimensions of the tensor
int nd; //the number of dimensions of the tensor
// Client should acces host_structure via CudaNdarray_HOST_DIMS / CudaNdarray_HOST_STRIDES macros
// Client should acces host_structure via CudaNdarray_HOST_DIMS / CudaNdarray_HOST_STRIDES macros
int * host_structure; //dim0, dim1, ... stride0, stride1, ...
int * host_structure; //dim0, dim1, ... stride0, stride1, ...
int data_allocated; //the number of bytes allocated for devdata
int data_allocated; //the number of bytes allocated for devdata
//device pointers (allocated by cudaMalloc)
//device pointers (allocated by cudaMalloc)
int dev_structure_fresh;
int dev_structure_fresh;
//dev_structure should be accessed via macros, otherwise may not be synchronized
//dev_structure should be accessed via macros, otherwise may not be synchronized
int * dev_structure; //dim0, dim1, ..., stride0, stride1, ...
int * dev_structure; //dim0, dim1, ..., stride0, stride1, ...
real* devdata; //pointer to data element [0,..,0].
real* devdata; //pointer to data element [0,..,0].
};
};
/*
/*
* Return a CudaNdarray whose 'nd' dimensions are all 0.
* Return a CudaNdarray whose 'nd' dimensions are all 0.
*/
*/
PyObject *
PyObject *
CudaNdarray_New(int nd=-1);
CudaNdarray_New(int nd=-1);
/**
/**
* Return 1 for a CudaNdarray otw 0
* Return 1 for a CudaNdarray otw 0
*/
*/
int
int
CudaNdarray_Check(const PyObject * ob);
CudaNdarray_Check(const PyObject * ob);
/**
/**
* Return 1 for a CudaNdarray otw 0
* Return 1 for a CudaNdarray otw 0
*/
*/
int
int
CudaNdarray_CheckExact(const PyObject * ob);
CudaNdarray_CheckExact(const PyObject * ob);
/**
* Return true for a C-contiguous CudaNdarray, else false
*/
bool
CudaNdarray_is_c_contiguous(const CudaNdarray * self);
/****
/****
* Returns the number of elements necessary in host_structure and dev_structure for a given number of dimensions.
* Returns the number of elements necessary in host_structure and dev_structure for a given number of dimensions.
*/
*/
int
int
cnda_structure_size(int nd)
cnda_structure_size(int nd)
{
{
// dim0, dim1, ...
// dim0, dim1, ...
...
@@ -107,23 +113,23 @@ cnda_structure_size(int nd)
...
@@ -107,23 +113,23 @@ cnda_structure_size(int nd)
return nd + nd + nd;
return nd + nd + nd;
}
}
const int *
const int *
CudaNdarray_HOST_DIMS(const CudaNdarray * self)
CudaNdarray_HOST_DIMS(const CudaNdarray * self)
{
{
return self->host_structure;
return self->host_structure;
}
}
const int *
const int *
CudaNdarray_HOST_STRIDES(const CudaNdarray * self)
CudaNdarray_HOST_STRIDES(const CudaNdarray * self)
{
{
return self->host_structure + self->nd;
return self->host_structure + self->nd;
}
}
const int *
const int *
CudaNdarray_HOST_LOG2DIMS(const CudaNdarray * self)
CudaNdarray_HOST_LOG2DIMS(const CudaNdarray * self)
{
{
return self->host_structure + 2*self->nd;
return self->host_structure + 2*self->nd;
}
}
void
void
cnda_mark_dev_structure_dirty(CudaNdarray * self)
cnda_mark_dev_structure_dirty(CudaNdarray * self)
{
{
self->dev_structure_fresh = 0;
self->dev_structure_fresh = 0;
...
@@ -190,7 +196,7 @@ CudaNdarray_Equal(CudaNdarray *cnda1, CudaNdarray *cnda2)
...
@@ -190,7 +196,7 @@ CudaNdarray_Equal(CudaNdarray *cnda1, CudaNdarray *cnda2)
*
*
* Does not sync structure to host.
* Does not sync structure to host.
*/
*/
void
void
CudaNdarray_set_dim(CudaNdarray * self, int idx, int d)
CudaNdarray_set_dim(CudaNdarray * self, int idx, int d)
{
{
if ((idx >= self->nd) || (idx < 0) || (d < 0))
if ((idx >= self->nd) || (idx < 0) || (d < 0))
...
@@ -206,7 +212,7 @@ CudaNdarray_set_dim(CudaNdarray * self, int idx, int d)
...
@@ -206,7 +212,7 @@ CudaNdarray_set_dim(CudaNdarray * self, int idx, int d)
cnda_mark_dev_structure_dirty(self);
cnda_mark_dev_structure_dirty(self);
}
}
}
}
void
void
CudaNdarray_set_stride(CudaNdarray * self, int idx, int s)
CudaNdarray_set_stride(CudaNdarray * self, int idx, int s)
{
{
if ((idx >= self->nd) || (idx < 0))
if ((idx >= self->nd) || (idx < 0))
...
@@ -225,7 +231,7 @@ CudaNdarray_set_stride(CudaNdarray * self, int idx, int s)
...
@@ -225,7 +231,7 @@ CudaNdarray_set_stride(CudaNdarray * self, int idx, int s)
*
*
* This means: recalculate the log2dims and transfer structure to the card
* This means: recalculate the log2dims and transfer structure to the card
*/
*/
int
int
cnda_copy_structure_to_device(CudaNdarray * self)
cnda_copy_structure_to_device(CudaNdarray * self)
{
{
cublasSetVector(cnda_structure_size(self->nd), sizeof(int), self->host_structure, 1, self->dev_structure, 1);
cublasSetVector(cnda_structure_size(self->nd), sizeof(int), self->host_structure, 1, self->dev_structure, 1);
...
@@ -239,7 +245,7 @@ cnda_copy_structure_to_device(CudaNdarray * self)
...
@@ -239,7 +245,7 @@ cnda_copy_structure_to_device(CudaNdarray * self)
return 0;
return 0;
}
}
const int *
const int *
CudaNdarray_DEV_DIMS(CudaNdarray * self)
CudaNdarray_DEV_DIMS(CudaNdarray * self)
{
{
if (!self->dev_structure_fresh)
if (!self->dev_structure_fresh)
...
@@ -249,7 +255,7 @@ CudaNdarray_DEV_DIMS(CudaNdarray * self)
...
@@ -249,7 +255,7 @@ CudaNdarray_DEV_DIMS(CudaNdarray * self)
}
}
return self->dev_structure;
return self->dev_structure;
}
}
const int *
const int *
CudaNdarray_DEV_STRIDES(CudaNdarray * self)
CudaNdarray_DEV_STRIDES(CudaNdarray * self)
{
{
if (!self->dev_structure_fresh)
if (!self->dev_structure_fresh)
...
@@ -259,7 +265,7 @@ CudaNdarray_DEV_STRIDES(CudaNdarray * self)
...
@@ -259,7 +265,7 @@ CudaNdarray_DEV_STRIDES(CudaNdarray * self)
}
}
return self->dev_structure + self->nd;
return self->dev_structure + self->nd;
}
}
const int *
const int *
CudaNdarray_DEV_LOG2DIMS(CudaNdarray * self)
CudaNdarray_DEV_LOG2DIMS(CudaNdarray * self)
{
{
if (!self->dev_structure_fresh)
if (!self->dev_structure_fresh)
...
@@ -269,7 +275,7 @@ CudaNdarray_DEV_LOG2DIMS(CudaNdarray * self)
...
@@ -269,7 +275,7 @@ CudaNdarray_DEV_LOG2DIMS(CudaNdarray * self)
}
}
return self->dev_structure + 2*self->nd;
return self->dev_structure + 2*self->nd;
}
}
float *
float *
CudaNdarray_DEV_DATA(const CudaNdarray * self)
CudaNdarray_DEV_DATA(const CudaNdarray * self)
{
{
return self->devdata;
return self->devdata;
...
@@ -278,7 +284,7 @@ CudaNdarray_DEV_DATA(const CudaNdarray * self)
...
@@ -278,7 +284,7 @@ CudaNdarray_DEV_DATA(const CudaNdarray * self)
/**
/**
* Return the number of elements in the ndarray (product of the dimensions)
* Return the number of elements in the ndarray (product of the dimensions)
*/
*/
int
int
CudaNdarray_SIZE(const CudaNdarray *self)
CudaNdarray_SIZE(const CudaNdarray *self)
{
{
if (self->nd == -1) return 0;
if (self->nd == -1) return 0;
...
@@ -289,7 +295,7 @@ CudaNdarray_SIZE(const CudaNdarray *self)
...
@@ -289,7 +295,7 @@ CudaNdarray_SIZE(const CudaNdarray *self)
}
}
return size;
return size;
}
}
static PyObject *
static PyObject *
CudaNdarray_SIZE_Object(const CudaNdarray *self, void *closure)
CudaNdarray_SIZE_Object(const CudaNdarray *self, void *closure)
{
{
return PyInt_FromLong(CudaNdarray_SIZE(self));
return PyInt_FromLong(CudaNdarray_SIZE(self));
...
@@ -320,7 +326,7 @@ int CudaNdarray_set_nd(CudaNdarray * self, const int nd)
...
@@ -320,7 +326,7 @@ int CudaNdarray_set_nd(CudaNdarray * self, const int nd)
}
}
self->dev_structure = NULL;
self->dev_structure = NULL;
}
}
if (self->host_structure)
if (self->host_structure)
{
{
free(self->host_structure);
free(self->host_structure);
self->host_structure = NULL;
self->host_structure = NULL;
...
@@ -386,29 +392,41 @@ int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const inttype
...
@@ -386,29 +392,41 @@ int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const inttype
size = size * dim[i];
size = size * dim[i];
}
}
if (
self->data_allocated != size
)
if (
CudaNdarray_is_c_contiguous(self) && (self->data_allocated == size)
)
{
{
if (device_free(self->devdata))
return 0;
{
}
// Does this ever happen?? Do we need to set data_allocated or devdata to 0?
return -1;
// The structure of self will be reused with newly allocated memory.
}
// If self was a view, we should remove the reference to its base.
assert(size>0);
// (If base was already NULL, the following has no effect.)
self->devdata = (float*)device_malloc(size*sizeof(real));
Py_XDECREF(self->base);
if (!self->devdata)
self->base = NULL;
{
CudaNdarray_set_nd(self,-1);
// If self is a view, do not try to free its memory
self->data_allocated = 0;
if (self->data_allocated && device_free(self->devdata))
self->devdata = 0;
{
return -1;
self->devdata = NULL;
}
self->data_allocated = 0;
if (0)
return -1;
fprintf(stderr,
"Allocated devdata %p (self=%p)\n",
self->devdata,
self);
self->data_allocated = size;
}
}
assert(size>0);
self->devdata = (float*)device_malloc(size*sizeof(real));
if (!self->devdata)
{
CudaNdarray_set_nd(self,-1);
self->data_allocated = 0;
self->devdata = 0;
return -1;
}
if (0)
fprintf(stderr,
"Allocated devdata %p (self=%p)\n",
self->devdata,
self);
self->data_allocated = size;
return 0;
return 0;
}
}
...
@@ -416,7 +434,7 @@ int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const inttype
...
@@ -416,7 +434,7 @@ int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const inttype
* Return a CudaNdarray whose 'nd' dimensions are set to dims, and allocated.
* Return a CudaNdarray whose 'nd' dimensions are set to dims, and allocated.
*/
*/
template<typename inttype>
template<typename inttype>
PyObject *
PyObject *
CudaNdarray_NewDims(int nd, const inttype * dims)
CudaNdarray_NewDims(int nd, const inttype * dims)
{
{
CudaNdarray * rval = (CudaNdarray*)CudaNdarray_New();
CudaNdarray * rval = (CudaNdarray*)CudaNdarray_New();
...
@@ -440,7 +458,7 @@ CudaNdarray_NewDims(int nd, const inttype * dims)
...
@@ -440,7 +458,7 @@ CudaNdarray_NewDims(int nd, const inttype * dims)
int CudaNdarray_set_device_data(CudaNdarray * self, float * data, PyObject * base);
int CudaNdarray_set_device_data(CudaNdarray * self, float * data, PyObject * base);
int CudaNdarray_set_device_data(CudaNdarray * self, float * data, CudaNdarray * base)
int CudaNdarray_set_device_data(CudaNdarray * self, float * data, CudaNdarray * base)
{
{
return CudaNdarray_set_device_data(self, data, (PyObject *) base);
return CudaNdarray_set_device_data(self, data, (PyObject *) base);
}
}
/**
/**
...
@@ -475,10 +493,10 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other, boo
...
@@ -475,10 +493,10 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other, boo
/**
/**
* Transfer the contents of CudaNdarray `self` to a new numpy ndarray.
* Transfer the contents of CudaNdarray `self` to a new numpy ndarray.
*/
*/
PyObject *
PyObject *
CudaNdarray_CreateArrayObj(CudaNdarray * self);
CudaNdarray_CreateArrayObj(CudaNdarray * self);
PyObject *
PyObject *
CudaNdarray_ZEROS(int n, int * dims);
CudaNdarray_ZEROS(int n, int * dims);
/**
/**
...
@@ -499,7 +517,7 @@ int CudaNdarray_dimshuffle(CudaNdarray * self, unsigned int len, const int * pat
...
@@ -499,7 +517,7 @@ int CudaNdarray_dimshuffle(CudaNdarray * self, unsigned int len, const int * pat
void fprint_CudaNdarray(FILE * fd, const CudaNdarray *self)
void fprint_CudaNdarray(FILE * fd, const CudaNdarray *self)
{
{
fprintf(fd, "CudaNdarray <%p, %p> nd=%i dev_structure_fresh=%d data_allocated=%d\n",
fprintf(fd, "CudaNdarray <%p, %p> nd=%i dev_structure_fresh=%d data_allocated=%d\n",
self, self->devdata, self->nd, self->dev_structure_fresh, self->data_allocated);
self, self->devdata, self->nd, self->dev_structure_fresh, self->data_allocated);
fprintf(fd, "\tHOST_DIMS: ");
fprintf(fd, "\tHOST_DIMS: ");
for (int i = 0; i < self->nd; ++i)
for (int i = 0; i < self->nd; ++i)
{
{
...
@@ -510,23 +528,23 @@ void fprint_CudaNdarray(FILE * fd, const CudaNdarray *self)
...
@@ -510,23 +528,23 @@ void fprint_CudaNdarray(FILE * fd, const CudaNdarray *self)
{
{
fprintf(fd, "%i\t", CudaNdarray_HOST_STRIDES(self)[i]);
fprintf(fd, "%i\t", CudaNdarray_HOST_STRIDES(self)[i]);
}
}
int data=0;
int data=0;
fprintf(fd, "\n\tDEV_DIMS: ");
fprintf(fd, "\n\tDEV_DIMS: ");
for (int i = 0; i < self->nd; ++i)
for (int i = 0; i < self->nd; ++i)
{
{
cublasGetVector(1, sizeof(int),
cublasGetVector(1, sizeof(int),
self->dev_structure+i, 1,
self->dev_structure+i, 1,
&data, 1);
&data, 1);
fprintf(fd, "%i\t", data);
fprintf(fd, "%i\t", data);
}
}
fprintf(fd, "\n\tDEV_STRIDES: ");
fprintf(fd, "\n\tDEV_STRIDES: ");
for (int i = 0; i < self->nd; ++i)
for (int i = 0; i < self->nd; ++i)
{
{
cublasGetVector(1, sizeof(int),
cublasGetVector(1, sizeof(int),
self->dev_structure + self->nd+i, 1,
self->dev_structure + self->nd+i, 1,
&data, 1);
&data, 1);
fprintf(fd, "%i \t", data);
fprintf(fd, "%i \t", data);
}
}
fprintf(fd, "\n");
fprintf(fd, "\n");
}
}
...
...
theano/tensor/opt.py
浏览文件 @
f517e1a0
...
@@ -6,7 +6,6 @@
...
@@ -6,7 +6,6 @@
import
logging
import
logging
_logger
=
logging
.
getLogger
(
'theano.tensor.opt'
)
_logger
=
logging
.
getLogger
(
'theano.tensor.opt'
)
import
copy
import
operator
import
operator
import
itertools
import
itertools
import
sys
import
sys
...
@@ -574,14 +573,6 @@ class ShapeFeature(object):
...
@@ -574,14 +573,6 @@ class ShapeFeature(object):
if
hasattr
(
r
.
type
,
"broadcastable"
)
and
r
.
type
.
broadcastable
[
i
]:
if
hasattr
(
r
.
type
,
"broadcastable"
)
and
r
.
type
.
broadcastable
[
i
]:
return
self
.
lscalar_one
return
self
.
lscalar_one
# NOTE: This may cause problems bacause the shape is not asserted
# there is an equivalent mechanism to do this, namely
# specify_shape that one should use
# If user provided size
#elif ( hasattr(r.tag,'shape') and
# r.tag.shape is not None and
# r.tag.shape[i] is not None):
# return T.constant(copy.copy(r.tag.shape[i]),dtype='int64')
else
:
else
:
return
Shape_i
(
i
)
.
make_node
(
r
)
.
outputs
[
0
]
return
Shape_i
(
i
)
.
make_node
(
r
)
.
outputs
[
0
]
...
@@ -1101,7 +1092,6 @@ def local_alloc_elemwise(node):
...
@@ -1101,7 +1092,6 @@ def local_alloc_elemwise(node):
return
[
node
.
op
(
*
new
)]
return
[
node
.
op
(
*
new
)]
#TODO, global optimizer that lift the assert to the beginning of the graph.
#TODO, global optimizer that lift the assert to the beginning of the graph.
#TODO, var.tag.shape to propagate the shape and lower the overhead of this op
#TODO, when all inputs can be optimized do all except one
#TODO, when all inputs can be optimized do all except one
theano
.
configparser
.
AddConfigVar
(
'experimental.local_alloc_elemwise'
,
theano
.
configparser
.
AddConfigVar
(
'experimental.local_alloc_elemwise'
,
...
@@ -2749,14 +2739,8 @@ register_specialize(local_mul_specialize)
...
@@ -2749,14 +2739,8 @@ register_specialize(local_mul_specialize)
@gof.local_optimizer
([
T
.
add
])
@gof.local_optimizer
([
T
.
add
])
def
local_add_specialize
(
node
):
def
local_add_specialize
(
node
):
def
fill_chain
(
v
):
def
fill_chain
(
v
):
# Not sure why this happens .. but I did not had the time to look
# into it, it probably has something to do with the dtype I'm
# providing the tag.shape of my variable
out
=
_fill_chain
(
v
,
node
.
inputs
)
out
=
_fill_chain
(
v
,
node
.
inputs
)
if
out
[
0
]
.
dtype
!=
node
.
outputs
[
0
]
.
dtype
:
return
out
return
[
T
.
cast
(
out
[
0
],
dtype
=
node
.
outputs
[
0
]
.
dtype
)]
else
:
return
out
#here, we are past the point of canonicalization, so we don't want to put in un-necessary fills.
#here, we are past the point of canonicalization, so we don't want to put in un-necessary fills.
if
node
.
op
==
T
.
add
:
if
node
.
op
==
T
.
add
:
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论