Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
f517e1a0
提交
f517e1a0
authored
6月 06, 2011
作者:
Olivier Delalleau
浏览文件
操作
浏览文件
下载
差异文件
Merged
上级
18f73bc6
5ca02715
显示空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
29 行增加
和
22 行删除
+29
-22
cuda_ndarray.cu
theano/sandbox/cuda/cuda_ndarray.cu
+7
-2
cuda_ndarray.cuh
theano/sandbox/cuda/cuda_ndarray.cuh
+22
-4
opt.py
theano/tensor/opt.py
+0
-16
没有找到文件。
theano/sandbox/cuda/cuda_ndarray.cu
浏览文件 @
f517e1a0
...
@@ -2188,7 +2188,7 @@ CudaNdarray_Dot(PyObject* _unused, PyObject* args)
...
@@ -2188,7 +2188,7 @@ CudaNdarray_Dot(PyObject* _unused, PyObject* args)
}
}
static
PyObject
*
static
PyObject
*
filter
(
PyObject
*
__unsed_self
,
PyObject
*
args
)
// args = (data, broadcastable, strict)
filter
(
PyObject
*
__unsed_self
,
PyObject
*
args
)
// args = (data, broadcastable, strict
, storage
)
{
{
/*
/*
* TODO: DOC what this function should do in the various cases of
* TODO: DOC what this function should do in the various cases of
...
@@ -2282,10 +2282,10 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s
...
@@ -2282,10 +2282,10 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s
Py_DECREF
(
rval
);
Py_DECREF
(
rval
);
rval
=
NULL
;
rval
=
NULL
;
}
}
}
Py_DECREF
(
data
);
Py_DECREF
(
data
);
Py_DECREF
(
py_data
);
Py_DECREF
(
py_data
);
Py_DECREF
(
broadcastable
);
Py_DECREF
(
broadcastable
);
}
return
(
PyObject
*
)
rval
;
return
(
PyObject
*
)
rval
;
}
}
}
}
...
@@ -2490,6 +2490,11 @@ CudaNdarray_new_nd(int nd)
...
@@ -2490,6 +2490,11 @@ CudaNdarray_new_nd(int nd)
return
(
PyObject
*
)
rval
;
return
(
PyObject
*
)
rval
;
}
}
/**
* Initialize 'self' as a view of 'base', with memory storage 'data'
*/
int
CudaNdarray_set_device_data
(
CudaNdarray
*
self
,
float
*
data
,
PyObject
*
base
)
int
CudaNdarray_set_device_data
(
CudaNdarray
*
self
,
float
*
data
,
PyObject
*
base
)
{
{
if
(
self
->
data_allocated
)
if
(
self
->
data_allocated
)
...
...
theano/sandbox/cuda/cuda_ndarray.cuh
浏览文件 @
f517e1a0
...
@@ -95,6 +95,12 @@ CudaNdarray_Check(const PyObject * ob);
...
@@ -95,6 +95,12 @@ CudaNdarray_Check(const PyObject * ob);
int
int
CudaNdarray_CheckExact(const PyObject * ob);
CudaNdarray_CheckExact(const PyObject * ob);
/**
* Return true for a C-contiguous CudaNdarray, else false
*/
bool
CudaNdarray_is_c_contiguous(const CudaNdarray * self);
/****
/****
* Returns the number of elements necessary in host_structure and dev_structure for a given number of dimensions.
* Returns the number of elements necessary in host_structure and dev_structure for a given number of dimensions.
*/
*/
...
@@ -386,13 +392,25 @@ int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const inttype
...
@@ -386,13 +392,25 @@ int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const inttype
size = size * dim[i];
size = size * dim[i];
}
}
if (
self->data_allocated != size
)
if (
CudaNdarray_is_c_contiguous(self) && (self->data_allocated == size)
)
{
{
if (device_free(self->devdata))
return 0;
}
// The structure of self will be reused with newly allocated memory.
// If self was a view, we should remove the reference to its base.
// (If base was already NULL, the following has no effect.)
Py_XDECREF(self->base);
self->base = NULL;
// If self is a view, do not try to free its memory
if (self->data_allocated && device_free(self->devdata))
{
{
// Does this ever happen?? Do we need to set data_allocated or devdata to 0?
self->devdata = NULL;
self->data_allocated = 0;
return -1;
return -1;
}
}
assert(size>0);
assert(size>0);
self->devdata = (float*)device_malloc(size*sizeof(real));
self->devdata = (float*)device_malloc(size*sizeof(real));
if (!self->devdata)
if (!self->devdata)
...
@@ -408,7 +426,7 @@ int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const inttype
...
@@ -408,7 +426,7 @@ int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const inttype
self->devdata,
self->devdata,
self);
self);
self->data_allocated = size;
self->data_allocated = size;
}
return 0;
return 0;
}
}
...
...
theano/tensor/opt.py
浏览文件 @
f517e1a0
...
@@ -6,7 +6,6 @@
...
@@ -6,7 +6,6 @@
import
logging
import
logging
_logger
=
logging
.
getLogger
(
'theano.tensor.opt'
)
_logger
=
logging
.
getLogger
(
'theano.tensor.opt'
)
import
copy
import
operator
import
operator
import
itertools
import
itertools
import
sys
import
sys
...
@@ -574,14 +573,6 @@ class ShapeFeature(object):
...
@@ -574,14 +573,6 @@ class ShapeFeature(object):
if
hasattr
(
r
.
type
,
"broadcastable"
)
and
r
.
type
.
broadcastable
[
i
]:
if
hasattr
(
r
.
type
,
"broadcastable"
)
and
r
.
type
.
broadcastable
[
i
]:
return
self
.
lscalar_one
return
self
.
lscalar_one
# NOTE: This may cause problems bacause the shape is not asserted
# there is an equivalent mechanism to do this, namely
# specify_shape that one should use
# If user provided size
#elif ( hasattr(r.tag,'shape') and
# r.tag.shape is not None and
# r.tag.shape[i] is not None):
# return T.constant(copy.copy(r.tag.shape[i]),dtype='int64')
else
:
else
:
return
Shape_i
(
i
)
.
make_node
(
r
)
.
outputs
[
0
]
return
Shape_i
(
i
)
.
make_node
(
r
)
.
outputs
[
0
]
...
@@ -1101,7 +1092,6 @@ def local_alloc_elemwise(node):
...
@@ -1101,7 +1092,6 @@ def local_alloc_elemwise(node):
return
[
node
.
op
(
*
new
)]
return
[
node
.
op
(
*
new
)]
#TODO, global optimizer that lift the assert to the beginning of the graph.
#TODO, global optimizer that lift the assert to the beginning of the graph.
#TODO, var.tag.shape to propagate the shape and lower the overhead of this op
#TODO, when all inputs can be optimized do all except one
#TODO, when all inputs can be optimized do all except one
theano
.
configparser
.
AddConfigVar
(
'experimental.local_alloc_elemwise'
,
theano
.
configparser
.
AddConfigVar
(
'experimental.local_alloc_elemwise'
,
...
@@ -2749,13 +2739,7 @@ register_specialize(local_mul_specialize)
...
@@ -2749,13 +2739,7 @@ register_specialize(local_mul_specialize)
@gof.local_optimizer
([
T
.
add
])
@gof.local_optimizer
([
T
.
add
])
def
local_add_specialize
(
node
):
def
local_add_specialize
(
node
):
def
fill_chain
(
v
):
def
fill_chain
(
v
):
# Not sure why this happens .. but I did not had the time to look
# into it, it probably has something to do with the dtype I'm
# providing the tag.shape of my variable
out
=
_fill_chain
(
v
,
node
.
inputs
)
out
=
_fill_chain
(
v
,
node
.
inputs
)
if
out
[
0
]
.
dtype
!=
node
.
outputs
[
0
]
.
dtype
:
return
[
T
.
cast
(
out
[
0
],
dtype
=
node
.
outputs
[
0
]
.
dtype
)]
else
:
return
out
return
out
#here, we are past the point of canonicalization, so we don't want to put in un-necessary fills.
#here, we are past the point of canonicalization, so we don't want to put in un-necessary fills.
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论