Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
61f85841
提交
61f85841
authored
5月 20, 2015
作者:
Arnaud Bergeron
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Flake8 fixes.
上级
63f8d7c2
隐藏空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
14 行增加
和
13 行删除
+14
-13
nerv.py
theano/sandbox/gpuarray/nerv.py
+6
-6
opt_util.py
theano/sandbox/gpuarray/opt_util.py
+7
-7
pycuda_helper.py
theano/sandbox/gpuarray/pycuda_helper.py
+1
-0
没有找到文件。
theano/sandbox/gpuarray/nerv.py
浏览文件 @
61f85841
import
os.path
import
os.path
import
numpy
import
theano
import
theano
from
theano
import
Op
,
Apply
,
Variable
,
tensor
from
theano
import
Apply
,
Variable
,
tensor
from
theano.compile
import
optdb
from
theano.compile
import
optdb
from
theano.compile.ops
import
shape_i
from
theano.compile.ops
import
shape_i
...
@@ -9,7 +8,7 @@ from theano.gof import local_optimizer, COp
...
@@ -9,7 +8,7 @@ from theano.gof import local_optimizer, COp
from
theano.scalar
import
as_scalar
,
constant
from
theano.scalar
import
as_scalar
,
constant
from
.
import
opt
from
.
import
opt
from
.basic_ops
import
(
as_gpuarray_variable
,
gpu_
alloc
,
gpu_
from_host
,
from
.basic_ops
import
(
as_gpuarray_variable
,
gpu_from_host
,
host_from_gpu
,
GpuAllocEmpty
)
host_from_gpu
,
GpuAllocEmpty
)
from
.opt_util
import
alpha_merge
,
output_merge
from
.opt_util
import
alpha_merge
,
output_merge
from
.pycuda_helper
import
ensure_pycuda_context
from
.pycuda_helper
import
ensure_pycuda_context
...
@@ -54,7 +53,7 @@ class Gemm16(COp):
...
@@ -54,7 +53,7 @@ class Gemm16(COp):
COp
.
__init__
(
self
,
[
"gemm16.c"
],
"gemm16"
)
COp
.
__init__
(
self
,
[
"gemm16.c"
],
"gemm16"
)
self
.
relu
=
relu
self
.
relu
=
relu
# relu = True will require more work in optimizations.
# relu = True will require more work in optimizations.
assert
self
.
relu
==
False
assert
self
.
relu
is
False
self
.
inplace
=
inplace
self
.
inplace
=
inplace
if
self
.
inplace
:
if
self
.
inplace
:
self
.
destroy_map
=
{
0
:
[
0
]}
self
.
destroy_map
=
{
0
:
[
0
]}
...
@@ -75,7 +74,7 @@ class Gemm16(COp):
...
@@ -75,7 +74,7 @@ class Gemm16(COp):
return
Apply
(
self
,
[
C
,
alpha
,
A
,
B
,
beta
],
[
C
.
type
()])
return
Apply
(
self
,
[
C
,
alpha
,
A
,
B
,
beta
],
[
C
.
type
()])
def
perform
(
self
,
node
,
inputs
,
outputs
):
def
perform
(
self
,
node
,
inputs
,
outputs
):
ctx
=
ensure_pycuda_context
()
ensure_pycuda_context
()
C
,
alpha
,
A
,
B
,
beta
=
inputs
C
,
alpha
,
A
,
B
,
beta
=
inputs
# The nervana code does not support the case where both inputs
# The nervana code does not support the case where both inputs
# are trans, so we need to copy one if them if that is the
# are trans, so we need to copy one if them if that is the
...
@@ -143,7 +142,7 @@ if (GpuKernel_init(&k_%(name)s, c->ops, c->ctx, 1, &bcode, &sz,
...
@@ -143,7 +142,7 @@ if (GpuKernel_init(&k_%(name)s, c->ops, c->ctx, 1, &bcode, &sz,
def
c_init_code_struct
(
self
,
node
,
nodename
,
sub
):
def
c_init_code_struct
(
self
,
node
,
nodename
,
sub
):
codel
=
[
super
(
Gemm16
,
self
)
.
c_init_code_struct
(
node
,
nodename
,
sub
)]
codel
=
[
super
(
Gemm16
,
self
)
.
c_init_code_struct
(
node
,
nodename
,
sub
)]
for
name
in
self
.
KERN_NAMES
:
for
name
in
self
.
KERN_NAMES
:
codel
.
append
(
"memset(&k_{0}, 0, sizeof(GpuKernel));"
.
format
(
name
))
;
codel
.
append
(
"memset(&k_{0}, 0, sizeof(GpuKernel));"
.
format
(
name
))
codel
.
append
(
"const char *bcode;"
)
codel
.
append
(
"const char *bcode;"
)
codel
.
append
(
"size_t sz;"
)
codel
.
append
(
"size_t sz;"
)
codel
.
append
(
"PyGpuContextObject *c = pygpu_default_context();"
)
codel
.
append
(
"PyGpuContextObject *c = pygpu_default_context();"
)
...
@@ -175,6 +174,7 @@ def local_dot_to_gemm16(node):
...
@@ -175,6 +174,7 @@ def local_dot_to_gemm16(node):
shape_i
(
A
,
0
,
fgraph
),
shape_i
(
B
,
1
,
fgraph
))
shape_i
(
A
,
0
,
fgraph
),
shape_i
(
B
,
1
,
fgraph
))
return
[
host_from_gpu
(
Gemm16
()(
C
,
1.0
,
A
,
B
,
0.0
))]
return
[
host_from_gpu
(
Gemm16
()(
C
,
1.0
,
A
,
B
,
0.0
))]
@opt.register_opt
()
@opt.register_opt
()
@alpha_merge
(
Gemm16
,
alpha_in
=
1
,
beta_in
=
4
,
nd
=
2
)
@alpha_merge
(
Gemm16
,
alpha_in
=
1
,
beta_in
=
4
,
nd
=
2
)
def
local_gemm16_alpha_merge
(
node
,
*
inputs
):
def
local_gemm16_alpha_merge
(
node
,
*
inputs
):
...
...
theano/sandbox/gpuarray/opt_util.py
浏览文件 @
61f85841
...
@@ -2,7 +2,6 @@ from functools import wraps
...
@@ -2,7 +2,6 @@ from functools import wraps
import
numpy
import
numpy
import
theano
from
theano
import
scalar
as
scal
,
Constant
from
theano
import
scalar
as
scal
,
Constant
from
theano.gof
import
local_optimizer
from
theano.gof
import
local_optimizer
from
theano.tensor
import
(
DimShuffle
,
get_scalar_constant_value
,
from
theano.tensor
import
(
DimShuffle
,
get_scalar_constant_value
,
...
@@ -13,11 +12,12 @@ from .elemwise import GpuDimShuffle, GpuElemwise
...
@@ -13,11 +12,12 @@ from .elemwise import GpuDimShuffle, GpuElemwise
_one
=
scal
.
constant
(
numpy
.
asarray
(
1.0
,
dtype
=
'float32'
))
_one
=
scal
.
constant
(
numpy
.
asarray
(
1.0
,
dtype
=
'float32'
))
def
grab_cpu_scalar
(
v
,
nd
):
def
grab_cpu_scalar
(
v
,
nd
):
if
v
.
owner
is
not
None
:
if
v
.
owner
is
not
None
:
n
=
v
.
owner
n
=
v
.
owner
if
(
isinstance
(
n
.
op
,
GpuDimShuffle
)
and
if
(
isinstance
(
n
.
op
,
GpuDimShuffle
)
and
n
.
op
.
new_order
==
(
'x'
,)
*
nd
):
n
.
op
.
new_order
==
(
'x'
,)
*
nd
):
return
host_from_gpu
(
n
.
inputs
[
0
])
return
host_from_gpu
(
n
.
inputs
[
0
])
elif
(
isinstance
(
n
.
op
,
DimShuffle
)
and
elif
(
isinstance
(
n
.
op
,
DimShuffle
)
and
n
.
op
.
new_order
==
(
'x'
,)
*
nd
):
n
.
op
.
new_order
==
(
'x'
,)
*
nd
):
...
@@ -28,7 +28,7 @@ def grab_cpu_scalar(v, nd):
...
@@ -28,7 +28,7 @@ def grab_cpu_scalar(v, nd):
return
None
return
None
else
:
else
:
if
(
isinstance
(
v
,
Constant
)
and
if
(
isinstance
(
v
,
Constant
)
and
v
.
broadcastable
==
(
True
,)
*
nd
):
v
.
broadcastable
==
(
True
,)
*
nd
):
return
v
.
dimshuffle
(())
return
v
.
dimshuffle
(())
...
@@ -64,8 +64,8 @@ def alpha_merge(cls, alpha_in, beta_in, nd):
...
@@ -64,8 +64,8 @@ def alpha_merge(cls, alpha_in, beta_in, nd):
@wraps
(
maker
)
@wraps
(
maker
)
def
opt
(
node
):
def
opt
(
node
):
if
(
isinstance
(
node
.
op
,
GpuElemwise
)
and
if
(
isinstance
(
node
.
op
,
GpuElemwise
)
and
node
.
op
.
scalar_op
==
scal
.
mul
and
node
.
op
.
scalar_op
==
scal
.
mul
and
node
.
nin
==
2
):
node
.
nin
==
2
):
targ
=
find_node
(
node
.
inputs
[
0
],
cls
)
targ
=
find_node
(
node
.
inputs
[
0
],
cls
)
if
targ
is
None
:
if
targ
is
None
:
targ
=
find_node
(
node
.
inputs
[
1
],
cls
)
targ
=
find_node
(
node
.
inputs
[
1
],
cls
)
...
@@ -88,8 +88,8 @@ def output_merge(cls, alpha_in, beta_in, out_in, nd):
...
@@ -88,8 +88,8 @@ def output_merge(cls, alpha_in, beta_in, out_in, nd):
@wraps
(
maker
)
@wraps
(
maker
)
def
opt
(
node
):
def
opt
(
node
):
if
(
isinstance
(
node
.
op
,
GpuElemwise
)
and
if
(
isinstance
(
node
.
op
,
GpuElemwise
)
and
node
.
op
.
scalar_op
==
scal
.
add
and
node
.
op
.
scalar_op
==
scal
.
add
and
node
.
nin
==
2
):
node
.
nin
==
2
):
targ
=
find_node
(
node
.
inputs
[
0
],
cls
)
targ
=
find_node
(
node
.
inputs
[
0
],
cls
)
W
=
node
.
inputs
[
1
]
W
=
node
.
inputs
[
1
]
if
targ
is
None
:
if
targ
is
None
:
...
...
theano/sandbox/gpuarray/pycuda_helper.py
浏览文件 @
61f85841
...
@@ -8,6 +8,7 @@ except ImportError:
...
@@ -8,6 +8,7 @@ except ImportError:
pycuda_initialized
=
False
pycuda_initialized
=
False
pycuda_context
=
None
pycuda_context
=
None
def
ensure_pycuda_context
():
def
ensure_pycuda_context
():
global
pycuda_context
,
pycuda_initialized
global
pycuda_context
,
pycuda_initialized
if
not
pycuda_initialized
:
if
not
pycuda_initialized
:
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论