Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
6701568f
提交
6701568f
authored
7月 03, 2016
作者:
sentient07
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Changed interface of gpu_alloc_empty and few cleanups
上级
8f602f6f
隐藏空白字符变更
内嵌
并排
正在显示
8 个修改的文件
包含
52 行增加
和
49 行删除
+52
-49
basic_ops.py
theano/gpuarray/basic_ops.py
+16
-1
dnn.py
theano/gpuarray/dnn.py
+10
-10
extra_ops.py
theano/gpuarray/extra_ops.py
+5
-1
multinomial.py
theano/gpuarray/multinomial.py
+0
-3
nerv.py
theano/gpuarray/nerv.py
+1
-1
opt.py
theano/gpuarray/opt.py
+16
-23
opt_util.py
theano/gpuarray/opt_util.py
+1
-2
rng_mrg.py
theano/sandbox/rng_mrg.py
+3
-8
没有找到文件。
theano/gpuarray/basic_ops.py
浏览文件 @
6701568f
...
@@ -973,7 +973,22 @@ def empty_like(var):
...
@@ -973,7 +973,22 @@ def empty_like(var):
return
GpuAllocEmpty
(
var
.
type
.
dtype
,
var
.
type
.
context_name
)(
*
var
.
shape
)
return
GpuAllocEmpty
(
var
.
type
.
dtype
,
var
.
type
.
context_name
)(
*
var
.
shape
)
def
gpu_alloc_empty
(
dtype
,
ctx
):
def
gpu_alloc_empty
(
ctx
,
**
kwargs
):
'''
This is the cache method of GpuAllocEmpty class.
This takes the parameters of context name and props_dict
and retrieves the dtype key from the dictionary
Parameters
----------
ctx : String
The context name.
kwargs : Dict
The props_dict of the Op
'''
dtype
=
kwargs
.
get
(
'dtype'
)
key
=
(
dtype
,
ctx
)
key
=
(
dtype
,
ctx
)
if
key
not
in
gpu_alloc_empty
.
cache
:
if
key
not
in
gpu_alloc_empty
.
cache
:
gpu_alloc_empty
.
cache
[
key
]
=
GpuAllocEmpty
(
dtype
,
ctx
)
gpu_alloc_empty
.
cache
[
key
]
=
GpuAllocEmpty
(
dtype
,
ctx
)
...
...
theano/gpuarray/dnn.py
浏览文件 @
6701568f
...
@@ -937,7 +937,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
...
@@ -937,7 +937,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
))
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
))
shape2
=
shape_i
(
img
,
2
,
fgraph
)
-
shape_i
(
kerns
,
2
,
fgraph
)
+
1
shape2
=
shape_i
(
img
,
2
,
fgraph
)
-
shape_i
(
kerns
,
2
,
fgraph
)
+
1
shape3
=
shape_i
(
img
,
3
,
fgraph
)
-
shape_i
(
kerns
,
3
,
fgraph
)
+
1
shape3
=
shape_i
(
img
,
3
,
fgraph
)
-
shape_i
(
kerns
,
3
,
fgraph
)
+
1
out
=
gpu_alloc_empty
(
img
.
dtype
,
ctx_nam
e
)(
out
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
img
.
dtyp
e
)(
shape_i
(
kerns
,
1
,
fgraph
),
shape_i
(
kerns
,
1
,
fgraph
),
shape_i
(
img
,
1
,
fgraph
),
shape2
,
shape3
)
shape_i
(
img
,
1
,
fgraph
),
shape2
,
shape3
)
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
...
@@ -955,7 +955,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
...
@@ -955,7 +955,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
conv_mode
=
'cross'
if
conv_mode
==
'conv'
else
'conv'
conv_mode
=
'cross'
if
conv_mode
==
'conv'
else
'conv'
shape2
=
shape_i
(
img
,
2
,
fgraph
)
+
shape_i
(
kerns
,
2
,
fgraph
)
-
1
shape2
=
shape_i
(
img
,
2
,
fgraph
)
+
shape_i
(
kerns
,
2
,
fgraph
)
-
1
shape3
=
shape_i
(
img
,
3
,
fgraph
)
+
shape_i
(
kerns
,
3
,
fgraph
)
-
1
shape3
=
shape_i
(
img
,
3
,
fgraph
)
+
shape_i
(
kerns
,
3
,
fgraph
)
-
1
out
=
gpu_alloc_empty
(
img
.
dtype
,
ctx_nam
e
)(
shape_i
(
img
,
0
,
fgraph
),
out
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
img
.
dtyp
e
)(
shape_i
(
img
,
0
,
fgraph
),
shape_i
(
kerns
,
1
,
fgraph
),
shape_i
(
kerns
,
1
,
fgraph
),
shape2
,
shape3
)
shape2
,
shape3
)
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
...
@@ -977,7 +977,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
...
@@ -977,7 +977,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
out_shp
=
get_conv_output_shape
(
ishape
,
kshape
,
out_shp
=
get_conv_output_shape
(
ishape
,
kshape
,
desc_op
.
border_mode
,
desc_op
.
border_mode
,
desc_op
.
subsample
)
desc_op
.
subsample
)
out
=
gpu_alloc_empty
(
img
.
dtype
,
ctx_nam
e
)(
*
out_shp
)
out
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
img
.
dtyp
e
)(
*
out_shp
)
return
gpu_dnn_conv
(
algo
=
algo
)(
img
,
kerns
,
out
,
desc
)
return
gpu_dnn_conv
(
algo
=
algo
)(
img
,
kerns
,
out
,
desc
)
...
@@ -991,7 +991,7 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
...
@@ -991,7 +991,7 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
kerns_shp
=
as_tensor_variable
(
kerns_shp
)
kerns_shp
=
as_tensor_variable
(
kerns_shp
)
desc
=
gpu_dnn_conv_desc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
desc
=
gpu_dnn_conv_desc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
conv_mode
=
conv_mode
)(
kerns_shp
)
conv_mode
=
conv_mode
)(
kerns_shp
)
out
=
gpu_alloc_empty
(
img
.
dtype
,
ctx_nam
e
)(
*
kerns_shp
)
out
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
img
.
dtyp
e
)(
*
kerns_shp
)
return
gpu_dnn_conv_gradW
()(
img
,
topgrad
,
out
,
desc
)
return
gpu_dnn_conv_gradW
()(
img
,
topgrad
,
out
,
desc
)
...
@@ -1005,7 +1005,7 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
...
@@ -1005,7 +1005,7 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
img_shp
=
as_tensor_variable
(
img_shp
)
img_shp
=
as_tensor_variable
(
img_shp
)
desc
=
gpu_dnn_conv_desc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
desc
=
gpu_dnn_conv_desc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
conv_mode
=
conv_mode
)(
kerns
.
shape
)
conv_mode
=
conv_mode
)(
kerns
.
shape
)
out
=
gpu_alloc_empty
(
kerns
.
dtype
,
ctx_nam
e
)(
*
img_shp
)
out
=
gpu_alloc_empty
(
ctx_name
,
kerns
.
dtyp
e
)(
*
img_shp
)
return
gpu_dnn_conv_gradI
()(
kerns
,
topgrad
,
out
,
desc
)
return
gpu_dnn_conv_gradI
()(
kerns
,
topgrad
,
out
,
desc
)
...
@@ -1480,7 +1480,7 @@ def local_abstractconv_cudnn(node):
...
@@ -1480,7 +1480,7 @@ def local_abstractconv_cudnn(node):
return
return
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
conv_groupopt
.
register
(
'local_abstractconv_cudnn
_graph
'
,
conv_groupopt
.
register
(
'local_abstractconv_cudnn'
,
local_abstractconv_cudnn
,
20
,
local_abstractconv_cudnn
,
20
,
'fast_compile'
,
'fast_run'
,
'fast_compile'
,
'fast_run'
,
'gpuarray'
,
'conv_dnn'
,
'cudnn'
)
'gpuarray'
,
'conv_dnn'
,
'cudnn'
)
...
@@ -1549,7 +1549,7 @@ def local_dnn_convi_output_merge(node, *inputs):
...
@@ -1549,7 +1549,7 @@ def local_dnn_convi_output_merge(node, *inputs):
@register_opt
(
'cudnn'
,
'fast_compile'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
Pool
])
@op_lifter
([
Pool
])
@register_opt2
([
Pool
],
'fast_compile'
)
@register_opt2
([
Pool
],
'fast_compile'
,
'cudnn'
)
def
local_pool_dnn_alternative
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_pool_dnn_alternative
(
op
,
ctx_name
,
inputs
,
outputs
):
if
not
dnn_available
(
ctx_name
):
if
not
dnn_available
(
ctx_name
):
raise_no_cudnn
()
raise_no_cudnn
()
...
@@ -1566,7 +1566,7 @@ def local_pool_dnn_alternative(op, ctx_name, inputs, outputs):
...
@@ -1566,7 +1566,7 @@ def local_pool_dnn_alternative(op, ctx_name, inputs, outputs):
@register_opt
(
'cudnn'
,
'fast_compile'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
MaxPoolGrad
])
@op_lifter
([
MaxPoolGrad
])
@register_opt2
([
MaxPoolGrad
],
'fast_compile'
)
@register_opt2
([
MaxPoolGrad
],
'fast_compile'
,
'cudnn'
)
def
local_pool_dnn_grad_stride
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_pool_dnn_grad_stride
(
op
,
ctx_name
,
inputs
,
outputs
):
if
not
dnn_available
(
ctx_name
):
if
not
dnn_available
(
ctx_name
):
raise_no_cudnn
()
raise_no_cudnn
()
...
@@ -1591,7 +1591,7 @@ def local_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
...
@@ -1591,7 +1591,7 @@ def local_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
@register_opt
(
'cudnn'
,
'fast_compile'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
AveragePoolGrad
])
@op_lifter
([
AveragePoolGrad
])
@register_opt2
([
AveragePoolGrad
],
'fast_compile'
)
@register_opt2
([
AveragePoolGrad
],
'fast_compile'
,
'cudnn'
)
def
local_avg_pool_dnn_grad_stride
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_avg_pool_dnn_grad_stride
(
op
,
ctx_name
,
inputs
,
outputs
):
if
not
dnn_available
(
ctx_name
):
if
not
dnn_available
(
ctx_name
):
raise_no_cudnn
()
raise_no_cudnn
()
...
@@ -1645,7 +1645,7 @@ def local_log_softmax_dnn(node):
...
@@ -1645,7 +1645,7 @@ def local_log_softmax_dnn(node):
@register_opt
(
'cudnn'
,
'fast_compile'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
LogSoftmax
])
@op_lifter
([
LogSoftmax
])
@register_opt2
([
LogSoftmax
],
'fast_compile'
)
@register_opt2
([
LogSoftmax
],
'fast_compile'
,
'cudnn'
)
def
local_logsoftmax_to_dnn
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_logsoftmax_to_dnn
(
op
,
ctx_name
,
inputs
,
outputs
):
# Transform the input in the format expected by GpuDnnSoftmax
# Transform the input in the format expected by GpuDnnSoftmax
inp
=
inputs
[
0
]
inp
=
inputs
[
0
]
...
...
theano/gpuarray/extra_ops.py
浏览文件 @
6701568f
...
@@ -3,6 +3,7 @@ import os
...
@@ -3,6 +3,7 @@ import os
from
theano
import
Apply
,
Op
from
theano
import
Apply
,
Op
from
theano.tensor.extra_ops
import
CumsumOp
from
theano.tensor.extra_ops
import
CumsumOp
from
.type
import
GpuArrayType
from
.type
import
GpuArrayType
from
.basic_ops
import
infer_context_name
try
:
try
:
from
pygpu
import
gpuarray
from
pygpu
import
gpuarray
except
ImportError
:
except
ImportError
:
...
@@ -40,6 +41,9 @@ class GpuCumsum(GpuKernelBase, Op):
...
@@ -40,6 +41,9 @@ class GpuCumsum(GpuKernelBase, Op):
def
make_node
(
self
,
x
):
def
make_node
(
self
,
x
):
assert
x
.
type
.
dtype
==
'float32'
,
"Only float32 supported for GpuCumSum"
assert
x
.
type
.
dtype
==
'float32'
,
"Only float32 supported for GpuCumSum"
context_name
=
infer_context_name
(
x
)
x
=
as_gpuarray_variable
(
x
,
context_name
)
if
x
.
ndim
>
GpuCumsum
.
SUPPORTED_NDIMS
:
if
x
.
ndim
>
GpuCumsum
.
SUPPORTED_NDIMS
:
raise
NotImplementedError
(
'Only cumsum on 1D, 2D and
\
raise
NotImplementedError
(
'Only cumsum on 1D, 2D and
\
3D arrays are supported right now!'
)
3D arrays are supported right now!'
)
...
@@ -467,5 +471,5 @@ def use_gpu_cumsumop(op, ctx_name, inputs, outputs):
...
@@ -467,5 +471,5 @@ def use_gpu_cumsumop(op, ctx_name, inputs, outputs):
# ``gpu_cumsum`` assume array has been flattened if needed.
# ``gpu_cumsum`` assume array has been flattened if needed.
if
axis
is
None
:
if
axis
is
None
:
axis
=
0
axis
=
0
assert
isinstance
(
x
.
type
,
GpuArrayType
)
return
GpuCumsum
(
axis
)(
x
)
return
GpuCumsum
(
axis
)(
x
)
theano/gpuarray/multinomial.py
浏览文件 @
6701568f
...
@@ -243,9 +243,6 @@ def local_gpua_multinomial(op, context_name, inputs, outputs):
...
@@ -243,9 +243,6 @@ def local_gpua_multinomial(op, context_name, inputs, outputs):
return
None
return
None
except
NotScalarConstantError
:
except
NotScalarConstantError
:
return
None
return
None
node
=
op
.
make_node
(
*
inputs
)
outputs
=
node
.
outputs
m
,
=
outputs
m
,
=
outputs
if
(
p
.
dtype
==
u
.
dtype
==
m
.
dtype
==
'float32'
):
if
(
p
.
dtype
==
u
.
dtype
==
m
.
dtype
==
'float32'
):
gpu_op
=
GPUAMultinomialFromUniform
(
op
.
odtype
)
gpu_op
=
GPUAMultinomialFromUniform
(
op
.
odtype
)
...
...
theano/gpuarray/nerv.py
浏览文件 @
6701568f
...
@@ -158,7 +158,7 @@ def local_dot_to_gemm16(op, ctx_name, inputs, outputs):
...
@@ -158,7 +158,7 @@ def local_dot_to_gemm16(op, ctx_name, inputs, outputs):
if
(
A
.
ndim
==
2
and
B
.
ndim
==
2
and
if
(
A
.
ndim
==
2
and
B
.
ndim
==
2
and
A
.
dtype
==
'float16'
and
B
.
dtype
==
'float16'
):
A
.
dtype
==
'float16'
and
B
.
dtype
==
'float16'
):
fgraph
=
inputs
[
0
]
.
fgraph
fgraph
=
inputs
[
0
]
.
fgraph
C
=
gpu_alloc_empty
(
dtype
=
'float16'
,
context_name
=
ctx_name
)(
C
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
'float16'
)(
shape_i
(
A
,
0
,
fgraph
),
shape_i
(
B
,
1
,
fgraph
))
shape_i
(
A
,
0
,
fgraph
),
shape_i
(
B
,
1
,
fgraph
))
return
Gemm16
()(
C
,
1.0
,
A
,
B
,
0.0
)
return
Gemm16
()(
C
,
1.0
,
A
,
B
,
0.0
)
...
...
theano/gpuarray/opt.py
浏览文件 @
6701568f
...
@@ -59,11 +59,10 @@ _logger = logging.getLogger("theano.gpuarray.opt")
...
@@ -59,11 +59,10 @@ _logger = logging.getLogger("theano.gpuarray.opt")
gpu_optimizer
=
EquilibriumDB
()
gpu_optimizer
=
EquilibriumDB
()
gpu_optimizer2
=
EquilibriumDB
()
gpu_cut_copies
=
EquilibriumDB
()
gpu_cut_copies
=
EquilibriumDB
()
old_not_transferred
=
[]
# Not used for an EquilibriumOptimizer. It has the "tracks" that we need for GraphToGPUDB.
new_not_transferred
=
[]
gpu_optimizer2
=
EquilibriumDB
()
class
GraphToGPUDB
(
DB
):
class
GraphToGPUDB
(
DB
):
...
@@ -207,8 +206,7 @@ def op_lifter(OP, cuda_only=False):
...
@@ -207,8 +206,7 @@ def op_lifter(OP, cuda_only=False):
i
.
tag
.
context_name
=
context_name
i
.
tag
.
context_name
=
context_name
new_op
=
maker
(
node
.
op
,
context_name
,
node
.
inputs
,
node
.
outputs
)
new_op
=
maker
(
node
.
op
,
context_name
,
node
.
inputs
,
node
.
outputs
)
if
not
new_op
:
old_not_transferred
.
append
(
node
)
# This is needed as sometimes new_op inherits from OP.
# This is needed as sometimes new_op inherits from OP.
if
new_op
and
new_op
!=
node
.
op
:
if
new_op
and
new_op
!=
node
.
op
:
if
isinstance
(
new_op
,
theano
.
Op
):
if
isinstance
(
new_op
,
theano
.
Op
):
...
@@ -375,8 +373,6 @@ class GraphToGPU(NavigatorOptimizer):
...
@@ -375,8 +373,6 @@ class GraphToGPU(NavigatorOptimizer):
if
not
new_ops
:
if
not
new_ops
:
newnode
=
node
.
clone_with_new_inputs
([
mapping
.
get
(
i
)
newnode
=
node
.
clone_with_new_inputs
([
mapping
.
get
(
i
)
for
i
in
node
.
inputs
])
for
i
in
node
.
inputs
])
new_not_transferred
.
append
(
newnode
)
outputs
=
newnode
.
outputs
outputs
=
newnode
.
outputs
elif
isinstance
(
new_ops
,
(
tuple
,
list
)):
elif
isinstance
(
new_ops
,
(
tuple
,
list
)):
outputs
=
[]
outputs
=
[]
...
@@ -596,7 +592,7 @@ def local_gpuaallocempty(op, context_name, inputs, outputs):
...
@@ -596,7 +592,7 @@ def local_gpuaallocempty(op, context_name, inputs, outputs):
# We use _props_dict() to make sure that the GPU op know all the
# We use _props_dict() to make sure that the GPU op know all the
# CPU op props.
# CPU op props.
dtype
=
op
.
_props_dict
()
.
get
(
'dtype'
)
dtype
=
op
.
_props_dict
()
.
get
(
'dtype'
)
return
gpu_alloc_empty
(
dtype
,
context_nam
e
)(
*
inputs
)
return
gpu_alloc_empty
(
context_name
,
dtype
=
dtyp
e
)(
*
inputs
)
@register_opt
()
@register_opt
()
...
@@ -921,17 +917,14 @@ def local_gpua_subtensor(op, context_name, inputs, outputs):
...
@@ -921,17 +917,14 @@ def local_gpua_subtensor(op, context_name, inputs, outputs):
isinstance
(
gpu_x
.
owner
.
op
,
GpuFromHost
)
and
isinstance
(
gpu_x
.
owner
.
op
,
GpuFromHost
)
and
# And it is a shared var or an input of the graph.
# And it is a shared var or an input of the graph.
not
gpu_x
.
owner
.
inputs
[
0
]
.
owner
):
not
gpu_x
.
owner
.
inputs
[
0
]
.
owner
):
if
len
(
x
.
clients
)
==
1
and
len
(
outputs
[
0
]
.
clients
)
==
1
:
if
len
(
x
.
clients
)
==
1
:
return
if
any
([
n
==
'output'
or
any
([
isinstance
(
v
.
type
,
GpuArrayType
)
# Here is the condition for the GraphToGPU opt. inputs is the
for
v
in
n
.
inputs
+
n
.
outputs
])
# inputs we want to use for the new node
for
n
,
_
in
outputs
[
0
]
.
clients
]):
if
(
x
.
owner
and
isinstance
(
x
.
owner
.
op
,
GpuFromHost
)):
return
cpu_x
=
x
.
owner
.
inputs
[
0
]
else
:
# And it is a shared var or an input of the graph.
return
[
host_from_gpu
(
gpu_x
.
owner
.
op
(
outputs
[
0
]))]
# and is used by only 1 node.
# x is in the new graph, so we can't tests its number of clients.
if
not
cpu_x
.
owner
and
len
(
cpu_x
.
clients
)
==
1
:
return
return
GpuSubtensor
(
op
.
idx_list
)
return
GpuSubtensor
(
op
.
idx_list
)
...
@@ -1146,8 +1139,8 @@ def local_gpua_hgemm(op, context_name, inputs, outputs):
...
@@ -1146,8 +1139,8 @@ def local_gpua_hgemm(op, context_name, inputs, outputs):
B
=
inputs
[
1
]
B
=
inputs
[
1
]
if
(
A
.
ndim
==
2
and
B
.
ndim
==
2
and
if
(
A
.
ndim
==
2
and
B
.
ndim
==
2
and
A
.
dtype
==
'float16'
and
B
.
dtype
==
'float16'
):
A
.
dtype
==
'float16'
and
B
.
dtype
==
'float16'
):
fgraph
=
in
puts
[
0
]
.
fgraph
fgraph
=
out
puts
[
0
]
.
fgraph
C
=
gpu_alloc_empty
(
'float16'
,
context_name
)(
C
=
gpu_alloc_empty
(
context_name
,
dtype
=
'float16'
)(
shape_i
(
A
,
0
,
fgraph
),
shape_i
(
A
,
0
,
fgraph
),
shape_i
(
B
,
1
,
fgraph
))
shape_i
(
B
,
1
,
fgraph
))
return
gpugemm_no_inplace
(
C
,
1.0
,
A
,
B
,
0.0
)
return
gpugemm_no_inplace
(
C
,
1.0
,
A
,
B
,
0.0
)
...
@@ -1198,7 +1191,7 @@ def local_gpua_dot22scalar(op, context_name, inputs, outputs):
...
@@ -1198,7 +1191,7 @@ def local_gpua_dot22scalar(op, context_name, inputs, outputs):
x
,
y
,
a
=
inputs
x
,
y
,
a
=
inputs
x
=
as_gpuarray_variable
(
x
,
context_name
)
x
=
as_gpuarray_variable
(
x
,
context_name
)
y
=
as_gpuarray_variable
(
y
,
context_name
)
y
=
as_gpuarray_variable
(
y
,
context_name
)
z
=
gpu_alloc_empty
(
x
.
dtype
,
context_nam
e
)(
x
.
shape
[
0
],
y
.
shape
[
1
])
z
=
gpu_alloc_empty
(
context_name
,
dtype
=
x
.
dtyp
e
)(
x
.
shape
[
0
],
y
.
shape
[
1
])
return
[
gpugemm_no_inplace
(
z
,
a
,
x
,
y
,
0
)]
return
[
gpugemm_no_inplace
(
z
,
a
,
x
,
y
,
0
)]
...
@@ -1298,7 +1291,7 @@ def local_inplace_sparseblockouter(node):
...
@@ -1298,7 +1291,7 @@ def local_inplace_sparseblockouter(node):
# This deals with any abstract convs that have a transfer somewhere
# This deals with any abstract convs that have a transfer somewhere
@register_opt
(
'fast_compile'
,
'conv_dnn'
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
AbstractConv2d
,
@op_lifter
([
AbstractConv2d
,
AbstractConv2d_gradWeights
,
AbstractConv2d_gradWeights
,
AbstractConv2d_gradInputs
])
AbstractConv2d_gradInputs
])
...
...
theano/gpuarray/opt_util.py
浏览文件 @
6701568f
...
@@ -324,8 +324,7 @@ def inplace_allocempty(op, idx):
...
@@ -324,8 +324,7 @@ def inplace_allocempty(op, idx):
if
(
alloc
.
owner
and
if
(
alloc
.
owner
and
isinstance
(
alloc
.
owner
.
op
,
GpuAllocEmpty
)
and
isinstance
(
alloc
.
owner
.
op
,
GpuAllocEmpty
)
and
len
(
alloc
.
clients
)
>
1
):
len
(
alloc
.
clients
)
>
1
):
alloc_op
=
gpu_alloc_empty
(
alloc
.
owner
.
op
.
dtype
,
alloc_op
=
gpu_alloc_empty
(
alloc
.
owner
.
op
.
context_name
,
dtype
=
alloc
.
owner
.
op
.
dtype
)
alloc
.
owner
.
op
.
context_name
)
inputs
[
idx
]
=
alloc_op
(
*
alloc
.
owner
.
inputs
)
inputs
[
idx
]
=
alloc_op
(
*
alloc
.
owner
.
inputs
)
return
maker
(
node
,
inputs
)
return
maker
(
node
,
inputs
)
return
opt
return
opt
...
...
theano/sandbox/rng_mrg.py
浏览文件 @
6701568f
...
@@ -24,7 +24,7 @@ from . import multinomial
...
@@ -24,7 +24,7 @@ from . import multinomial
import
theano.sandbox.cuda
import
theano.sandbox.cuda
from
theano.sandbox.cuda
import
GpuOp
from
theano.sandbox.cuda
import
GpuOp
from
theano.gpuarray.basic_ops
import
GpuKernelBase
,
Kernel
from
theano.gpuarray.basic_ops
import
GpuKernelBase
,
Kernel
,
infer_context_name
from
theano.gpuarray.type
import
GpuArrayType
from
theano.gpuarray.type
import
GpuArrayType
from
theano.gpuarray.fp16_help
import
write_w
from
theano.gpuarray.fp16_help
import
write_w
from
theano.gpuarray.opt
import
(
register_opt
as
register_gpua
,
from
theano.gpuarray.opt
import
(
register_opt
as
register_gpua
,
...
@@ -1567,13 +1567,8 @@ def local_gpua_mrg1(op, context_name, inputs, outputs):
...
@@ -1567,13 +1567,8 @@ def local_gpua_mrg1(op, context_name, inputs, outputs):
@local_optimizer
([
mrg_uniform
])
@local_optimizer
([
mrg_uniform
])
def
local_gpua_mrg
(
node
):
def
local_gpua_mrg
(
node
):
# TODO : need description for function
# TODO : need description for function
if
(
type
(
node
.
op
)
==
mrg_uniform
and
context_name
=
infer_context_name
(
*
node
.
inputs
)
isinstance
(
node
.
inputs
[
0
]
.
type
,
GpuArrayType
)):
return
local_gpua_mrg1
(
node
.
op
,
context_name
,
node
.
inputs
,
node
.
outputs
)
outs
=
GPUA_mrg_uniform
.
new
(
node
.
inputs
[
0
],
node
.
op
.
output_type
.
ndim
,
node
.
op
.
output_type
.
dtype
,
node
.
inputs
[
1
])
return
[
outs
[
0
],
host_from_gpua
(
outs
[
1
])]
MRG_RNGs
=
(
mrg_uniform
,
GPU_mrg_uniform
,
GPUA_mrg_uniform
)
MRG_RNGs
=
(
mrg_uniform
,
GPU_mrg_uniform
,
GPUA_mrg_uniform
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论