Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
a24fd9bb
提交
a24fd9bb
authored
7月 12, 2016
作者:
Frédéric Bastien
提交者:
GitHub
7月 12, 2016
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4570 from Sentient07/new_graph2gpu
New graph2gpu
上级
2b371c6f
195f9b1d
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
22 个修改的文件
包含
193 行增加
和
78 行删除
+193
-78
ops.py
theano/compile/ops.py
+16
-0
profiling.py
theano/compile/profiling.py
+1
-1
opt.py
theano/gof/opt.py
+1
-1
basic_ops.py
theano/gpuarray/basic_ops.py
+28
-2
dnn.py
theano/gpuarray/dnn.py
+0
-0
elemwise.py
theano/gpuarray/elemwise.py
+12
-0
extra_ops.py
theano/gpuarray/extra_ops.py
+16
-15
fft.py
theano/gpuarray/fft.py
+5
-3
multinomial.py
theano/gpuarray/multinomial.py
+9
-8
neighbours.py
theano/gpuarray/neighbours.py
+6
-6
nerv.py
theano/gpuarray/nerv.py
+8
-7
opt.py
theano/gpuarray/opt.py
+0
-0
opt_util.py
theano/gpuarray/opt_util.py
+2
-3
test_abstractconv.py
theano/gpuarray/tests/test_abstractconv.py
+2
-0
test_basic_ops.py
theano/gpuarray/tests/test_basic_ops.py
+1
-1
test_opt.py
theano/gpuarray/tests/test_opt.py
+31
-7
type.py
theano/gpuarray/type.py
+2
-2
rng_mrg.py
theano/sandbox/rng_mrg.py
+15
-9
scan_utils.py
theano/scan_module/scan_utils.py
+7
-5
basic.py
theano/tensor/basic.py
+16
-7
abstract_conv.py
theano/tensor/nnet/abstract_conv.py
+12
-1
test_basic.py
theano/tensor/tests/test_basic.py
+3
-0
没有找到文件。
theano/compile/ops.py
浏览文件 @
a24fd9bb
...
@@ -402,6 +402,14 @@ class Shape_i(gof.Op):
...
@@ -402,6 +402,14 @@ class Shape_i(gof.Op):
def
infer_shape
(
self
,
node
,
input_shapes
):
def
infer_shape
(
self
,
node
,
input_shapes
):
return
[()]
return
[()]
def
connection_pattern
(
self
,
node
):
# the grad returns the gradient with respect to the
# elements of a tensor variable
# the elements of the tensor variable do not participate
# in the computation of the shape, so they are not really
# part of the graph
return
[[
False
]]
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
return
[
theano
.
gradient
.
grad_not_implemented
(
return
[
theano
.
gradient
.
grad_not_implemented
(
op
=
self
,
x_pos
=
0
,
x
=
inp
[
0
],
op
=
self
,
x_pos
=
0
,
x
=
inp
[
0
],
...
@@ -455,6 +463,14 @@ def shape_i(var, i, fgraph=None):
...
@@ -455,6 +463,14 @@ def shape_i(var, i, fgraph=None):
return
var
.
shape
[
i
]
return
var
.
shape
[
i
]
def
shape_i_op
(
i
):
key
=
i
if
key
not
in
shape_i_op
.
cache
:
shape_i_op
.
cache
[
key
]
=
Shape_i
(
i
)
return
shape_i_op
.
cache
[
key
]
shape_i_op
.
cache
=
{}
def
register_shape_i_c_code
(
typ
,
code
,
check_input
,
version
=
()):
def
register_shape_i_c_code
(
typ
,
code
,
check_input
,
version
=
()):
"""
"""
Tell Shape_i how to generate C code for a Theano Type.
Tell Shape_i how to generate C code for a Theano Type.
...
...
theano/compile/profiling.py
浏览文件 @
a24fd9bb
...
@@ -54,7 +54,7 @@ def _atexit_print_fn():
...
@@ -54,7 +54,7 @@ def _atexit_print_fn():
destination_file
=
open
(
config
.
profiling
.
destination
,
'w'
)
destination_file
=
open
(
config
.
profiling
.
destination
,
'w'
)
for
ps
in
_atexit_print_list
:
for
ps
in
_atexit_print_list
:
if
ps
.
fct_callcount
or
ps
.
compile_time
>
0
:
if
ps
.
fct_callcount
>=
1
or
ps
.
compile_time
>
1
:
ps
.
summary
(
file
=
destination_file
,
ps
.
summary
(
file
=
destination_file
,
n_ops_to_print
=
config
.
profiling
.
n_ops
,
n_ops_to_print
=
config
.
profiling
.
n_ops
,
n_apply_to_print
=
config
.
profiling
.
n_apply
)
n_apply_to_print
=
config
.
profiling
.
n_apply
)
...
...
theano/gof/opt.py
浏览文件 @
a24fd9bb
...
@@ -2413,7 +2413,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -2413,7 +2413,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
for
(
t
,
count
,
n_created
,
o
)
in
count_opt
[::
-
1
]:
for
(
t
,
count
,
n_created
,
o
)
in
count_opt
[::
-
1
]:
print
(
blanc
,
'
%.3
fs -
%
d -
%
d -
%
s'
%
(
print
(
blanc
,
'
%.3
fs -
%
d -
%
d -
%
s'
%
(
t
,
count
,
n_created
,
o
),
file
=
stream
)
t
,
count
,
n_created
,
o
),
file
=
stream
)
print
(
blanc
,
'
%.3
fs - in
%
d optimization that w
h
ere not used (display only those with a runtime > 0)'
%
(
print
(
blanc
,
'
%.3
fs - in
%
d optimization that were not used (display only those with a runtime > 0)'
%
(
not_used_time
,
len
(
not_used
)),
file
=
stream
)
not_used_time
,
len
(
not_used
)),
file
=
stream
)
not_used
.
sort
(
key
=
lambda
nu
:
(
nu
[
0
],
str
(
nu
[
1
])))
not_used
.
sort
(
key
=
lambda
nu
:
(
nu
[
0
],
str
(
nu
[
1
])))
for
(
t
,
o
)
in
not_used
[::
-
1
]:
for
(
t
,
o
)
in
not_used
[::
-
1
]:
...
...
theano/gpuarray/basic_ops.py
浏览文件 @
a24fd9bb
...
@@ -70,7 +70,7 @@ def as_gpuarray_variable(x, context_name):
...
@@ -70,7 +70,7 @@ def as_gpuarray_variable(x, context_name):
# If we couldn't deal with transfers, then maybe it's a tensor
# If we couldn't deal with transfers, then maybe it's a tensor
if
isinstance
(
x
.
type
,
tensor
.
TensorType
):
if
isinstance
(
x
.
type
,
tensor
.
TensorType
):
return
GpuFromH
ost
(
context_name
)(
x
)
return
gpu_from_h
ost
(
context_name
)(
x
)
# Try _as_GpuArrayVariable if possible
# Try _as_GpuArrayVariable if possible
if
hasattr
(
x
,
'_as_GpuArrayVariable'
):
if
hasattr
(
x
,
'_as_GpuArrayVariable'
):
...
@@ -544,7 +544,7 @@ class HostFromGpu(Op):
...
@@ -544,7 +544,7 @@ class HostFromGpu(Op):
def
grad
(
self
,
inputs
,
grads
):
def
grad
(
self
,
inputs
,
grads
):
gz
,
=
grads
gz
,
=
grads
return
[
GpuFromH
ost
(
inputs
[
0
]
.
type
.
context_name
)(
gz
)]
return
[
gpu_from_h
ost
(
inputs
[
0
]
.
type
.
context_name
)(
gz
)]
def
R_op
(
self
,
inputs
,
eval_points
):
def
R_op
(
self
,
inputs
,
eval_points
):
ev
,
=
eval_points
ev
,
=
eval_points
...
@@ -647,6 +647,14 @@ class GpuFromHost(Op):
...
@@ -647,6 +647,14 @@ class GpuFromHost(Op):
return
(
9
,)
return
(
9
,)
# Caching GPUAlloc
def
gpu_from_host
(
ctx
):
if
ctx
not
in
gpu_alloc
.
cache
:
gpu_from_host
.
cache
[
ctx
]
=
GpuFromHost
(
ctx
)
return
gpu_from_host
.
cache
[
ctx
]
gpu_from_host
.
cache
=
{}
class
GpuToGpu
(
Op
):
class
GpuToGpu
(
Op
):
"""
"""
Transfer data between GPUs.
Transfer data between GPUs.
...
@@ -870,6 +878,15 @@ class GpuAlloc(HideC, Alloc):
...
@@ -870,6 +878,15 @@ class GpuAlloc(HideC, Alloc):
return
True
return
True
# Caching GPUAlloc
def
gpu_alloc
(
ctx
,
memset_0
=
False
):
key
=
(
ctx
,
memset_0
)
if
key
not
in
gpu_alloc
.
cache
:
gpu_alloc
.
cache
[
key
]
=
GpuAlloc
(
ctx
,
memset_0
)
return
gpu_alloc
.
cache
[
key
]
gpu_alloc
.
cache
=
{}
class
GpuAllocEmpty
(
HideC
,
Alloc
):
class
GpuAllocEmpty
(
HideC
,
Alloc
):
"""
"""
Allocate uninitialized memory on the GPU.
Allocate uninitialized memory on the GPU.
...
@@ -956,6 +973,14 @@ def empty_like(var):
...
@@ -956,6 +973,14 @@ def empty_like(var):
return
GpuAllocEmpty
(
var
.
type
.
dtype
,
var
.
type
.
context_name
)(
*
var
.
shape
)
return
GpuAllocEmpty
(
var
.
type
.
dtype
,
var
.
type
.
context_name
)(
*
var
.
shape
)
def
gpu_alloc_empty
(
ctx
,
dtype
):
key
=
(
dtype
,
ctx
)
if
key
not
in
gpu_alloc_empty
.
cache
:
gpu_alloc_empty
.
cache
[
key
]
=
GpuAllocEmpty
(
dtype
,
ctx
)
return
gpu_alloc_empty
.
cache
[
key
]
gpu_alloc_empty
.
cache
=
{}
class
GpuContiguous
(
Op
):
class
GpuContiguous
(
Op
):
"""
"""
Return a C contiguous version of the input.
Return a C contiguous version of the input.
...
@@ -1031,6 +1056,7 @@ class GpuReshape(HideC, tensor.Reshape):
...
@@ -1031,6 +1056,7 @@ class GpuReshape(HideC, tensor.Reshape):
def
make_node
(
self
,
x
,
shp
):
def
make_node
(
self
,
x
,
shp
):
ctx_name
=
infer_context_name
(
x
)
ctx_name
=
infer_context_name
(
x
)
x
=
as_gpuarray_variable
(
x
,
context_name
=
ctx_name
)
x
=
as_gpuarray_variable
(
x
,
context_name
=
ctx_name
)
shp
=
tensor
.
as_tensor_variable
(
shp
)
res
=
host_from_gpu
(
x
)
.
reshape
(
shp
,
ndim
=
self
.
ndim
)
res
=
host_from_gpu
(
x
)
.
reshape
(
shp
,
ndim
=
self
.
ndim
)
otype
=
GpuArrayType
(
dtype
=
res
.
dtype
,
otype
=
GpuArrayType
(
dtype
=
res
.
dtype
,
broadcastable
=
res
.
broadcastable
,
broadcastable
=
res
.
broadcastable
,
...
...
theano/gpuarray/dnn.py
浏览文件 @
a24fd9bb
差异被折叠。
点击展开。
theano/gpuarray/elemwise.py
浏览文件 @
a24fd9bb
...
@@ -2587,6 +2587,18 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
...
@@ -2587,6 +2587,18 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
return
kernels
return
kernels
# Caching GpuCAReduceCuda
def
gpu_ca_reduce_cuda
(
scalar_op
,
axis
=
None
,
reduce_mask
=
None
,
dtype
=
None
,
acc_dtype
=
None
,
pre_scalar_op
=
None
):
key
=
(
scalar_op
,
axis
,
reduce_mask
,
dtype
,
acc_dtype
,
pre_scalar_op
)
if
key
not
in
gpu_ca_reduce_cuda
.
cache
:
gpu_ca_reduce_cuda
.
cache
[
key
]
=
GpuCAReduceCuda
(
scalar_op
,
axis
,
reduce_mask
,
dtype
,
acc_dtype
,
pre_scalar_op
)
return
gpu_ca_reduce_cuda
.
cache
[
key
]
gpu_ca_reduce_cuda
.
cache
=
{}
class
GpuCAReduceCPY
(
GpuKernelBase
,
HideC
,
CAReduceDtype
):
class
GpuCAReduceCPY
(
GpuKernelBase
,
HideC
,
CAReduceDtype
):
"""
"""
CAReduce that reuse the python code from gpuarray.
CAReduce that reuse the python code from gpuarray.
...
...
theano/gpuarray/extra_ops.py
浏览文件 @
a24fd9bb
...
@@ -2,15 +2,14 @@ from __future__ import absolute_import, print_function, division
...
@@ -2,15 +2,14 @@ from __future__ import absolute_import, print_function, division
import
os
import
os
from
theano
import
Apply
,
Op
from
theano
import
Apply
,
Op
from
theano.tensor.extra_ops
import
CumsumOp
from
theano.tensor.extra_ops
import
CumsumOp
from
.basic_ops
import
infer_context_name
try
:
try
:
from
pygpu
import
gpuarray
from
pygpu
import
gpuarray
except
ImportError
:
except
ImportError
:
pass
pass
from
.basic_ops
import
(
as_gpuarray_variable
,
GpuKernelBase
,
Kernel
,
from
.basic_ops
import
(
as_gpuarray_variable
,
GpuKernelBase
,
Kernel
,
GpuReshape
)
infer_context_name
,
GpuFromHost
)
from
.opt
import
register_opt
,
op_lifter
,
register_opt2
from
.opt
import
register_opt
as
register_gpu_opt
,
op_lifter
class
GpuCumsum
(
GpuKernelBase
,
Op
):
class
GpuCumsum
(
GpuKernelBase
,
Op
):
...
@@ -40,7 +39,10 @@ class GpuCumsum(GpuKernelBase, Op):
...
@@ -40,7 +39,10 @@ class GpuCumsum(GpuKernelBase, Op):
def
make_node
(
self
,
x
):
def
make_node
(
self
,
x
):
assert
x
.
type
.
dtype
==
'float32'
,
"Only float32 supported for GpuCumSum"
assert
x
.
type
.
dtype
==
'float32'
,
"Only float32 supported for GpuCumSum"
x
=
as_gpuarray_variable
(
x
,
infer_context_name
(
x
))
context_name
=
infer_context_name
(
x
)
x
=
as_gpuarray_variable
(
x
,
context_name
)
if
x
.
ndim
>
GpuCumsum
.
SUPPORTED_NDIMS
:
if
x
.
ndim
>
GpuCumsum
.
SUPPORTED_NDIMS
:
raise
NotImplementedError
(
'Only cumsum on 1D, 2D and
\
raise
NotImplementedError
(
'Only cumsum on 1D, 2D and
\
...
@@ -451,24 +453,23 @@ class GpuCumsum(GpuKernelBase, Op):
...
@@ -451,24 +453,23 @@ class GpuCumsum(GpuKernelBase, Op):
return
super
(
GpuCumsum
,
self
)
.
c_support_code_struct
(
node
,
nodename
)
+
code
return
super
(
GpuCumsum
,
self
)
.
c_support_code_struct
(
node
,
nodename
)
+
code
@register_opt
(
'fast_compile'
)
@op_lifter
([
CumsumOp
])
@op_lifter
([
CumsumOp
])
def
use_gpu_cumsumop
(
node
,
ctx_name
):
@register_opt2
([
CumsumOp
],
'fast_compile'
)
if
node
.
inputs
[
0
]
.
dtype
==
'float32'
:
def
local_gpua_cumsumop
(
op
,
ctx_name
,
inputs
,
outputs
)
:
axis
=
node
.
op
.
axis
if
inputs
[
0
]
.
dtype
==
'float32'
:
x
=
node
.
inputs
[
0
]
axis
=
op
.
axis
x
=
inputs
[
0
]
if
axis
is
not
None
and
x
.
ndim
>
GpuCumsum
.
SUPPORTED_NDIMS
:
if
axis
is
not
None
and
x
.
ndim
>
GpuCumsum
.
SUPPORTED_NDIMS
:
return
None
return
None
if
axis
is
None
and
x
.
ndim
>
1
:
x
=
as_gpuarray_variable
(
x
,
ctx_name
)
x
=
x
.
flatten
()
x
=
GpuFromHost
(
ctx_name
)(
x
)
if
axis
is
None
and
x
.
ndim
>
1
:
x
=
GpuReshape
(
1
)(
x
,
(
-
1
,))
# ``gpu_cumsum`` assume array has been flattened if needed.
# ``gpu_cumsum`` assume array has been flattened if needed.
if
axis
is
None
:
if
axis
is
None
:
axis
=
0
axis
=
0
return
GpuCumsum
(
axis
)(
x
)
return
GpuCumsum
(
axis
)(
x
)
register_gpu_opt
()(
use_gpu_cumsumop
)
theano/gpuarray/fft.py
浏览文件 @
a24fd9bb
...
@@ -9,7 +9,7 @@ from theano.gradient import DisconnectedType
...
@@ -9,7 +9,7 @@ from theano.gradient import DisconnectedType
from
theano.gpuarray
import
(
basic_ops
,
GpuArrayType
)
from
theano.gpuarray
import
(
basic_ops
,
GpuArrayType
)
import
theano.tensor.fft
import
theano.tensor.fft
from
.opt
import
register_opt
,
op_lifter
from
.opt
import
register_opt
,
op_lifter
,
register_opt2
try
:
try
:
import
pygpu
import
pygpu
...
@@ -373,10 +373,12 @@ def _unitary(norm):
...
@@ -373,10 +373,12 @@ def _unitary(norm):
if
scikits_cuda_available
:
if
scikits_cuda_available
:
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
theano
.
tensor
.
fft
.
RFFTOp
])
@op_lifter
([
theano
.
tensor
.
fft
.
RFFTOp
])
def
local_curfft_op
(
node
,
context_name
):
@register_opt2
([
theano
.
tensor
.
fft
.
RFFTOp
],
'fast_compile'
)
def
local_gpua_curfft_op
(
op
,
ctx_name
,
inputs
,
outputs
):
return
curfft_op
return
curfft_op
@register_opt
(
'fast_compile'
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
theano
.
tensor
.
fft
.
IRFFTOp
])
@op_lifter
([
theano
.
tensor
.
fft
.
IRFFTOp
])
def
local_cuirfft_op
(
node
,
context_name
):
@register_opt2
([
theano
.
tensor
.
fft
.
IRFFTOp
],
'fast_compile'
)
def
local_gpua_cuirfft_op
(
op
,
ctx_name
,
inputs
,
outputs
):
return
cuirfft_op
return
cuirfft_op
theano/gpuarray/multinomial.py
浏览文件 @
a24fd9bb
...
@@ -14,7 +14,7 @@ from theano.gof import Op
...
@@ -14,7 +14,7 @@ from theano.gof import Op
from
theano.tensor
import
NotScalarConstantError
,
get_scalar_constant_value
from
theano.tensor
import
NotScalarConstantError
,
get_scalar_constant_value
from
theano
import
gpuarray
from
theano
import
gpuarray
from
.basic_ops
import
as_gpuarray_variable
,
infer_context_name
from
.basic_ops
import
as_gpuarray_variable
,
infer_context_name
from
.opt
import
register_opt
,
op_lifter
from
.opt
import
register_opt
,
op_lifter
,
register_opt2
from
.type
import
GpuArrayType
from
.type
import
GpuArrayType
...
@@ -227,23 +227,24 @@ KERNEL void k_multi_warp_multinomial(
...
@@ -227,23 +227,24 @@ KERNEL void k_multi_warp_multinomial(
return
(
1
,)
return
(
1
,)
@register_opt
()
@register_opt
(
'fast_compile'
)
@op_lifter
([
theano
.
sandbox
.
multinomial
.
MultinomialFromUniform
])
@op_lifter
([
theano
.
sandbox
.
multinomial
.
MultinomialFromUniform
])
def
local_gpua_multinomial
(
node
,
context_name
):
@register_opt2
([
theano
.
sandbox
.
multinomial
.
MultinomialFromUniform
],
'fast_compile'
)
def
local_gpua_multinomial
(
op
,
context_name
,
inputs
,
outputs
):
# TODO : need description for function
# TODO : need description for function
if
len
(
node
.
inputs
)
==
2
:
if
len
(
inputs
)
==
2
:
p
,
u
=
node
.
inputs
p
,
u
=
inputs
n_samples
=
1
n_samples
=
1
else
:
else
:
p
,
u
,
n_samples
=
node
.
inputs
p
,
u
,
n_samples
=
inputs
try
:
try
:
if
get_scalar_constant_value
(
n_samples
)
!=
1
:
if
get_scalar_constant_value
(
n_samples
)
!=
1
:
return
None
return
None
except
NotScalarConstantError
:
except
NotScalarConstantError
:
return
None
return
None
m
,
=
node
.
outputs
m
,
=
outputs
if
(
p
.
dtype
==
u
.
dtype
==
m
.
dtype
==
'float32'
):
if
(
p
.
dtype
==
u
.
dtype
==
m
.
dtype
==
'float32'
):
gpu_op
=
GPUAMultinomialFromUniform
(
node
.
op
.
odtype
)
gpu_op
=
GPUAMultinomialFromUniform
(
op
.
odtype
)
return
gpuarray
.
elemwise
.
GpuDimShuffle
([
False
,
False
],
[
1
,
0
])(
return
gpuarray
.
elemwise
.
GpuDimShuffle
([
False
,
False
],
[
1
,
0
])(
gpu_op
(
p
,
u
))
gpu_op
(
p
,
u
))
theano/gpuarray/neighbours.py
浏览文件 @
a24fd9bb
...
@@ -13,7 +13,7 @@ except ImportError:
...
@@ -13,7 +13,7 @@ except ImportError:
from
.basic_ops
import
(
as_gpuarray_variable
,
GpuKernelBase
,
Kernel
,
from
.basic_ops
import
(
as_gpuarray_variable
,
GpuKernelBase
,
Kernel
,
infer_context_name
)
infer_context_name
)
from
.opt
import
register_opt
as
register_gpu_opt
,
op_lifter
from
.opt
import
register_opt
2
,
op_lifter
,
register_opt
from
.type
import
GpuArrayType
from
.type
import
GpuArrayType
...
@@ -468,9 +468,9 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
...
@@ -468,9 +468,9 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
Op
.
perform
(
self
,
node
,
inp
,
out
,
ctx
)
Op
.
perform
(
self
,
node
,
inp
,
out
,
ctx
)
@register_opt
(
'fast_compile'
)
@op_lifter
([
Images2Neibs
])
@op_lifter
([
Images2Neibs
])
def
use_gpu_images2neibs
(
node
,
context_name
):
@register_opt2
([
Images2Neibs
],
'fast_compile'
)
if
node
.
op
.
mode
in
[
'valid'
,
'ignore_borders'
,
'wrap_centered'
]:
def
local_gpua_images2neibs
(
op
,
context_name
,
inputs
,
outputs
):
return
GpuImages2Neibs
(
node
.
op
.
mode
)
if
op
.
mode
in
[
'valid'
,
'ignore_borders'
,
'wrap_centered'
]:
return
GpuImages2Neibs
(
op
.
mode
)
register_gpu_opt
()(
use_gpu_images2neibs
)
theano/gpuarray/nerv.py
浏览文件 @
a24fd9bb
...
@@ -10,7 +10,7 @@ from theano.scalar import as_scalar, constant
...
@@ -10,7 +10,7 @@ from theano.scalar import as_scalar, constant
from
.
import
opt
from
.
import
opt
from
.basic_ops
import
(
as_gpuarray_variable
,
GpuAllocEmpty
,
from
.basic_ops
import
(
as_gpuarray_variable
,
GpuAllocEmpty
,
infer_context_name
)
infer_context_name
,
gpu_alloc_empty
)
from
.type
import
gpu_context_type
from
.type
import
gpu_context_type
from
.opt_util
import
alpha_merge
,
output_merge
from
.opt_util
import
alpha_merge
,
output_merge
...
@@ -147,17 +147,18 @@ if (GpuKernel_init(&k_%(name)s, c->ctx, 1, &bcode, &sz,
...
@@ -147,17 +147,18 @@ if (GpuKernel_init(&k_%(name)s, c->ctx, 1, &bcode, &sz,
return
'
\n
'
.
join
(
codel
)
return
'
\n
'
.
join
(
codel
)
@opt.register_opt
()
@opt.register_opt
(
'fast_compile'
)
@opt.op_lifter
([
tensor
.
Dot
])
@opt.op_lifter
([
tensor
.
Dot
])
def
local_dot_to_gemm16
(
node
,
ctx_name
):
@opt.register_opt2
([
tensor
.
Dot
],
'fast_compile'
)
def
local_gpua_dot_to_gemm16
(
op
,
ctx_name
,
inputs
,
outputs
):
if
nerv
is
None
:
if
nerv
is
None
:
return
return
A
=
node
.
inputs
[
0
]
A
=
inputs
[
0
]
B
=
node
.
inputs
[
1
]
B
=
inputs
[
1
]
if
(
A
.
ndim
==
2
and
B
.
ndim
==
2
and
if
(
A
.
ndim
==
2
and
B
.
ndim
==
2
and
A
.
dtype
==
'float16'
and
B
.
dtype
==
'float16'
):
A
.
dtype
==
'float16'
and
B
.
dtype
==
'float16'
):
fgraph
=
node
.
inputs
[
0
]
.
fgraph
fgraph
=
getattr
(
outputs
[
0
],
'fgraph'
,
None
)
C
=
GpuAllocEmpty
(
dtype
=
'float16'
,
context_name
=
ctx_name
)(
C
=
gpu_alloc_empty
(
ctx_name
,
dtype
=
'float16'
)(
shape_i
(
A
,
0
,
fgraph
),
shape_i
(
B
,
1
,
fgraph
))
shape_i
(
A
,
0
,
fgraph
),
shape_i
(
B
,
1
,
fgraph
))
return
Gemm16
()(
C
,
1.0
,
A
,
B
,
0.0
)
return
Gemm16
()(
C
,
1.0
,
A
,
B
,
0.0
)
...
...
theano/gpuarray/opt.py
浏览文件 @
a24fd9bb
差异被折叠。
点击展开。
theano/gpuarray/opt_util.py
浏览文件 @
a24fd9bb
...
@@ -8,7 +8,7 @@ from theano.gof import local_optimizer
...
@@ -8,7 +8,7 @@ from theano.gof import local_optimizer
from
theano.tensor
import
(
DimShuffle
,
get_scalar_constant_value
,
from
theano.tensor
import
(
DimShuffle
,
get_scalar_constant_value
,
NotScalarConstantError
)
NotScalarConstantError
)
from
.basic_ops
import
GpuFromHost
,
HostFromGpu
,
GpuAllocEmpty
from
.basic_ops
import
GpuFromHost
,
HostFromGpu
,
GpuAllocEmpty
,
gpu_alloc_empty
from
.elemwise
import
GpuDimShuffle
,
GpuElemwise
from
.elemwise
import
GpuDimShuffle
,
GpuElemwise
_one
=
scal
.
constant
(
numpy
.
asarray
(
1.0
,
dtype
=
'float32'
))
_one
=
scal
.
constant
(
numpy
.
asarray
(
1.0
,
dtype
=
'float32'
))
...
@@ -324,8 +324,7 @@ def inplace_allocempty(op, idx):
...
@@ -324,8 +324,7 @@ def inplace_allocempty(op, idx):
if
(
alloc
.
owner
and
if
(
alloc
.
owner
and
isinstance
(
alloc
.
owner
.
op
,
GpuAllocEmpty
)
and
isinstance
(
alloc
.
owner
.
op
,
GpuAllocEmpty
)
and
len
(
alloc
.
clients
)
>
1
):
len
(
alloc
.
clients
)
>
1
):
alloc_op
=
GpuAllocEmpty
(
alloc
.
owner
.
op
.
dtype
,
alloc_op
=
gpu_alloc_empty
(
alloc
.
owner
.
op
.
context_name
,
dtype
=
alloc
.
owner
.
op
.
dtype
)
alloc
.
owner
.
op
.
context_name
)
inputs
[
idx
]
=
alloc_op
(
*
alloc
.
owner
.
inputs
)
inputs
[
idx
]
=
alloc_op
(
*
alloc
.
owner
.
inputs
)
return
maker
(
node
,
inputs
)
return
maker
(
node
,
inputs
)
return
opt
return
opt
...
...
theano/gpuarray/tests/test_abstractconv.py
浏览文件 @
a24fd9bb
...
@@ -26,9 +26,11 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d):
...
@@ -26,9 +26,11 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d):
if
not
dnn_available
(
test_ctx_name
):
if
not
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn_available
.
msg
)
raise
SkipTest
(
dnn_available
.
msg
)
mode
=
mode_with_gpu
mode
=
mode_with_gpu
if
fd
!=
(
1
,
1
):
if
fd
!=
(
1
,
1
):
raise
SkipTest
(
"Doesn't have CUDNN implementation"
)
raise
SkipTest
(
"Doesn't have CUDNN implementation"
)
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
,
fd
)
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
,
fd
)
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
subsample
=
s
,
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
...
...
theano/gpuarray/tests/test_basic_ops.py
浏览文件 @
a24fd9bb
...
@@ -396,7 +396,7 @@ def test_gpueye():
...
@@ -396,7 +396,7 @@ def test_gpueye():
k_symb
=
numpy
.
asarray
(
0
)
k_symb
=
numpy
.
asarray
(
0
)
out
=
T
.
eye
(
N_symb
,
M_symb
,
k_symb
,
dtype
=
dtype
)
out
=
T
.
eye
(
N_symb
,
M_symb
,
k_symb
,
dtype
=
dtype
)
f
=
theano
.
function
([
N_symb
,
M_symb
],
f
=
theano
.
function
([
N_symb
,
M_symb
],
out
,
T
.
stack
(
out
)
,
mode
=
mode_with_gpu
)
mode
=
mode_with_gpu
)
result
=
numpy
.
asarray
(
f
(
N
,
M
))
result
=
numpy
.
asarray
(
f
(
N
,
M
))
assert
numpy
.
allclose
(
result
,
numpy
.
eye
(
N
,
M_
,
dtype
=
dtype
))
assert
numpy
.
allclose
(
result
,
numpy
.
eye
(
N
,
M_
,
dtype
=
dtype
))
...
...
theano/gpuarray/tests/test_opt.py
浏览文件 @
a24fd9bb
...
@@ -138,11 +138,21 @@ def test_local_gpualloc_memset_0():
...
@@ -138,11 +138,21 @@ def test_local_gpualloc_memset_0():
ones
=
numpy
.
ones
((
2
,),
dtype
=
'float32'
)
ones
=
numpy
.
ones
((
2
,),
dtype
=
'float32'
)
# Test with 0 from CPU op.
# Test with 0 from CPU op.
# Should not be transfered as the only client is the output
a
=
tensor
.
alloc
(
z
,
i
)
a
=
tensor
.
alloc
(
z
,
i
)
f
=
theano
.
function
([
i
],
a
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
i
],
a
,
mode
=
mode_with_gpu
)
topo
=
f
.
maker
.
fgraph
.
toposort
()
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
len
(
topo
)
==
2
assert
len
(
topo
)
==
1
assert
isinstance
(
topo
[
0
]
.
op
,
GpuAlloc
)
and
topo
[
0
]
.
op
.
memset_0
assert
isinstance
(
topo
[
0
]
.
op
,
theano
.
tensor
.
Alloc
)
assert
(
numpy
.
asarray
(
f
(
6
))
==
0
)
.
all
()
# Test with 0 from CPU op.
# Should be transfered as it is used by another op.
a
=
tensor
.
alloc
(
z
,
i
)
f
=
theano
.
function
([
i
],
a
.
cumsum
(),
mode
=
mode_with_gpu
)
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
len
(
topo
)
==
3
assert
isinstance
(
topo
[
0
]
.
op
,
GpuAlloc
)
assert
(
numpy
.
asarray
(
f
(
6
))
==
0
)
.
all
()
assert
(
numpy
.
asarray
(
f
(
6
))
==
0
)
.
all
()
# Test with 0
# Test with 0
...
@@ -177,19 +187,30 @@ def test_local_gpualloc_empty():
...
@@ -177,19 +187,30 @@ def test_local_gpualloc_empty():
ii
=
theano
.
tensor
.
iscalar
()
ii
=
theano
.
tensor
.
iscalar
()
# Test with vector
# Test with vector
# Should not be moved as the only client is the output
a
=
tensor
.
AllocEmpty
(
'float32'
)(
i
)
a
=
tensor
.
AllocEmpty
(
'float32'
)(
i
)
f
=
theano
.
function
([
i
],
a
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
i
],
a
,
mode
=
mode_with_gpu
)
topo
=
f
.
maker
.
fgraph
.
toposort
()
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
len
(
topo
)
==
2
assert
len
(
topo
)
==
1
assert
isinstance
(
topo
[
0
]
.
op
,
theano
.
tensor
.
AllocEmpty
)
# This return not initilized data, so we can only check the shape
assert
f
(
3
)
.
shape
==
(
3
,)
# Test with vector
# Should be moved
a
=
tensor
.
AllocEmpty
(
'float32'
)(
i
)
f
=
theano
.
function
([
i
],
a
.
cumsum
(),
mode
=
mode_with_gpu
)
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
len
(
topo
)
==
3
assert
isinstance
(
topo
[
0
]
.
op
,
GpuAllocEmpty
)
assert
isinstance
(
topo
[
0
]
.
op
,
GpuAllocEmpty
)
# This return not initilized data, so we can only check the shape
# This return not initilized data, so we can only check the shape
assert
f
(
3
)
.
shape
==
(
3
,)
assert
f
(
3
)
.
shape
==
(
3
,)
# Test with matrix
# Test with matrix
a
=
tensor
.
AllocEmpty
(
'float32'
)(
i
,
ii
)
a
=
tensor
.
AllocEmpty
(
'float32'
)(
i
,
ii
)
f
=
theano
.
function
([
i
,
ii
],
a
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
i
,
ii
],
a
.
cumsum
(
axis
=
0
)
,
mode
=
mode_with_gpu
)
topo
=
f
.
maker
.
fgraph
.
toposort
()
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
len
(
topo
)
==
2
assert
len
(
topo
)
==
3
assert
isinstance
(
topo
[
0
]
.
op
,
GpuAllocEmpty
)
assert
isinstance
(
topo
[
0
]
.
op
,
GpuAllocEmpty
)
# This return not initilized data, so we can only check the shape
# This return not initilized data, so we can only check the shape
assert
f
(
3
,
4
)
.
shape
==
(
3
,
4
)
assert
f
(
3
,
4
)
.
shape
==
(
3
,
4
)
...
@@ -334,7 +355,10 @@ def test_local_gpu_subtensor():
...
@@ -334,7 +355,10 @@ def test_local_gpu_subtensor():
topo
=
f
.
maker
.
fgraph
.
toposort
()
topo
=
f
.
maker
.
fgraph
.
toposort
()
assert
any
([
type
(
node
.
op
)
is
tensor
.
Subtensor
for
node
in
topo
])
assert
any
([
type
(
node
.
op
)
is
tensor
.
Subtensor
for
node
in
topo
])
assert
not
any
([
isinstance
(
node
.
op
,
GpuSubtensor
)
for
node
in
topo
])
assert
not
any
([
isinstance
(
node
.
op
,
GpuSubtensor
)
for
node
in
topo
])
assert
any
([
isinstance
(
node
.
op
,
GpuElemwise
)
for
node
in
topo
])
# Our optimizer isn't smart enough to move to the GPU Elemwise.
# If it where just a little bit smarter, it could wrongly move it to the GPU.
# If it where super smart, it would know it should not move it to the GPU.
assert
any
([
isinstance
(
node
.
op
,
tensor
.
Elemwise
)
for
node
in
topo
])
def
test_local_gpu_elemwise
():
def
test_local_gpu_elemwise
():
...
@@ -427,7 +451,7 @@ def test_local_assert_no_cpu_op():
...
@@ -427,7 +451,7 @@ def test_local_assert_no_cpu_op():
out
=
theano
.
tensor
.
tanh
(
ms
)
.
dot
(
ms
.
T
)
out
=
theano
.
tensor
.
tanh
(
ms
)
.
dot
(
ms
.
T
)
mode_local_assert
=
mode_with_gpu
.
including
(
"assert_no_cpu_op"
)
mode_local_assert
=
mode_with_gpu
.
including
(
"assert_no_cpu_op"
)
mode_local_assert
=
mode_local_assert
.
excluding
(
"local_gpu_elemwise"
)
mode_local_assert
=
mode_local_assert
.
excluding
(
"local_gpu
a
_elemwise"
)
old
=
theano
.
config
.
assert_no_cpu_op
old
=
theano
.
config
.
assert_no_cpu_op
old2
=
theano
.
config
.
on_opt_error
old2
=
theano
.
config
.
on_opt_error
...
...
theano/gpuarray/type.py
浏览文件 @
a24fd9bb
...
@@ -233,7 +233,7 @@ class GpuArrayType(Type):
...
@@ -233,7 +233,7 @@ class GpuArrayType(Type):
return
data
return
data
def
filter_variable
(
self
,
other
,
allow_convert
=
True
):
def
filter_variable
(
self
,
other
,
allow_convert
=
True
):
from
theano.gpuarray
import
GpuFromH
ost
from
theano.gpuarray
.basic_ops
import
gpu_from_h
ost
if
hasattr
(
other
,
'_as_GpuArrayVariable'
):
if
hasattr
(
other
,
'_as_GpuArrayVariable'
):
other
=
other
.
_as_GpuArrayVariable
(
self
.
context_name
)
other
=
other
.
_as_GpuArrayVariable
(
self
.
context_name
)
...
@@ -265,7 +265,7 @@ class GpuArrayType(Type):
...
@@ -265,7 +265,7 @@ class GpuArrayType(Type):
str
(
self
.
broadcastable
)))
str
(
self
.
broadcastable
)))
other
=
other2
other
=
other2
return
GpuFromH
ost
(
self
.
context_name
)(
other
)
return
gpu_from_h
ost
(
self
.
context_name
)(
other
)
@staticmethod
@staticmethod
def
values_eq
(
a
,
b
,
force_same_dtype
=
True
):
def
values_eq
(
a
,
b
,
force_same_dtype
=
True
):
...
...
theano/sandbox/rng_mrg.py
浏览文件 @
a24fd9bb
...
@@ -24,10 +24,11 @@ from . import multinomial
...
@@ -24,10 +24,11 @@ from . import multinomial
import
theano.sandbox.cuda
import
theano.sandbox.cuda
from
theano.sandbox.cuda
import
GpuOp
from
theano.sandbox.cuda
import
GpuOp
from
theano.gpuarray.basic_ops
import
GpuKernelBase
,
Kernel
from
theano.gpuarray.basic_ops
import
GpuKernelBase
,
Kernel
,
infer_context_name
from
theano.gpuarray.type
import
GpuArrayType
from
theano.gpuarray.type
import
GpuArrayType
from
theano.gpuarray.fp16_help
import
write_w
from
theano.gpuarray.fp16_help
import
write_w
from
theano.gpuarray.opt
import
(
register_opt
as
register_gpua
,
from
theano.gpuarray.opt
import
(
register_opt
as
register_gpua
,
register_opt2
,
host_from_gpu
as
host_from_gpua
)
host_from_gpu
as
host_from_gpua
)
if
theano
.
sandbox
.
cuda
.
cuda_available
:
if
theano
.
sandbox
.
cuda
.
cuda_available
:
from
theano.sandbox.cuda
import
(
CudaNdarrayType
,
from
theano.sandbox.cuda
import
(
CudaNdarrayType
,
...
@@ -1551,17 +1552,22 @@ class MRG_RandomStreams(object):
...
@@ -1551,17 +1552,22 @@ class MRG_RandomStreams(object):
return
final_samples
return
final_samples
@register_opt2
([
mrg_uniform
],
'fast_compile'
)
def
local_gpua_mrg_graph
(
op
,
context_name
,
inputs
,
outputs
):
if
(
type
(
op
)
==
mrg_uniform
and
isinstance
(
inputs
[
0
]
.
type
,
GpuArrayType
)):
outs
=
GPUA_mrg_uniform
.
new
(
inputs
[
0
],
op
.
output_type
.
ndim
,
op
.
output_type
.
dtype
,
inputs
[
1
])
return
[
outs
[
0
],
host_from_gpua
(
outs
[
1
])]
@register_gpua
(
'fast_compile'
)
@register_gpua
(
'fast_compile'
)
@local_optimizer
([
mrg_uniform
])
@local_optimizer
([
mrg_uniform
])
def
local_gpua_mrg
(
node
):
def
local_gpua_mrg
(
node
):
# TODO : need description for function
context_name
=
infer_context_name
(
*
node
.
inputs
)
if
(
type
(
node
.
op
)
==
mrg_uniform
and
return
local_gpua_mrg_graph
(
node
.
op
,
context_name
,
node
.
inputs
,
node
.
outputs
)
isinstance
(
node
.
inputs
[
0
]
.
type
,
GpuArrayType
)):
outs
=
GPUA_mrg_uniform
.
new
(
node
.
inputs
[
0
],
node
.
op
.
output_type
.
ndim
,
node
.
op
.
output_type
.
dtype
,
node
.
inputs
[
1
])
return
[
outs
[
0
],
host_from_gpua
(
outs
[
1
])]
MRG_RNGs
=
(
mrg_uniform
,
GPU_mrg_uniform
,
GPUA_mrg_uniform
)
MRG_RNGs
=
(
mrg_uniform
,
GPU_mrg_uniform
,
GPUA_mrg_uniform
)
...
...
theano/scan_module/scan_utils.py
浏览文件 @
a24fd9bb
...
@@ -152,13 +152,15 @@ def traverse(out, x, x_copy, d, visited=None):
...
@@ -152,13 +152,15 @@ def traverse(out, x, x_copy, d, visited=None):
return
d
return
d
visited
.
add
(
out
)
visited
.
add
(
out
)
from
theano.sandbox
import
cuda
from
theano.sandbox
import
cuda
from
theano
import
gpuarray
from
theano.gpuarray.basic_ops
import
gpu_from_host
,
host_from_gpu
from
theano.gpuarray
import
pygpu_activated
from
theano.gpuarray.type
import
GpuArrayType
if
out
==
x
:
if
out
==
x
:
if
isinstance
(
x
.
type
,
cuda
.
CudaNdarrayType
):
if
isinstance
(
x
.
type
,
cuda
.
CudaNdarrayType
):
d
[
out
]
=
cuda
.
gpu_from_host
(
x_copy
)
d
[
out
]
=
cuda
.
gpu_from_host
(
x_copy
)
else
:
else
:
assert
isinstance
(
x
.
type
,
gpuarray
.
GpuArrayType
)
assert
isinstance
(
x
.
type
,
GpuArrayType
)
d
[
out
]
=
gpu
array
.
GpuFromH
ost
(
x
.
type
.
context_name
)(
x_copy
)
d
[
out
]
=
gpu
_from_h
ost
(
x
.
type
.
context_name
)(
x_copy
)
return
d
return
d
elif
out
.
owner
is
None
:
elif
out
.
owner
is
None
:
return
d
return
d
...
@@ -167,8 +169,8 @@ def traverse(out, x, x_copy, d, visited=None):
...
@@ -167,8 +169,8 @@ def traverse(out, x, x_copy, d, visited=None):
out
.
owner
.
inputs
==
[
x
]):
out
.
owner
.
inputs
==
[
x
]):
d
[
out
]
=
tensor
.
as_tensor_variable
(
x_copy
)
d
[
out
]
=
tensor
.
as_tensor_variable
(
x_copy
)
return
d
return
d
elif
(
gpuarray
.
pygpu_activated
and
elif
(
pygpu_activated
and
out
.
owner
.
op
==
gpuarray
.
host_from_gpu
and
out
.
owner
.
op
==
host_from_gpu
and
out
.
owner
.
inputs
==
[
x
]):
out
.
owner
.
inputs
==
[
x
]):
d
[
out
]
=
tensor
.
as_tensor_variable
(
x_copy
)
d
[
out
]
=
tensor
.
as_tensor_variable
(
x_copy
)
return
d
return
d
...
...
theano/tensor/basic.py
浏览文件 @
a24fd9bb
...
@@ -630,9 +630,15 @@ def get_scalar_constant_value(orig_v, elemwise=True,
...
@@ -630,9 +630,15 @@ def get_scalar_constant_value(orig_v, elemwise=True,
v
=
v
.
owner
.
inputs
[
0
]
v
=
v
.
owner
.
inputs
[
0
]
continue
continue
elif
isinstance
(
v
.
owner
.
op
,
theano
.
compile
.
ops
.
Shape_i
):
elif
isinstance
(
v
.
owner
.
op
,
theano
.
compile
.
ops
.
Shape_i
):
if
isinstance
(
v
.
owner
.
inputs
[
0
],
Constant
):
i
=
v
.
owner
.
op
.
i
return
numpy
.
asarray
(
inp
=
v
.
owner
.
inputs
[
0
]
v
.
owner
.
inputs
[
0
]
.
data
.
shape
[
v
.
owner
.
op
.
i
])
if
isinstance
(
inp
,
Constant
):
return
numpy
.
asarray
(
inp
.
data
.
shape
[
i
])
# The shape of a broadcastable dimension is 1
if
(
hasattr
(
inp
.
type
,
'broadcastable'
)
and
inp
.
type
.
broadcastable
[
i
]):
return
numpy
.
asarray
(
1
)
# Don't act as the constant_folding optimization here as this
# Don't act as the constant_folding optimization here as this
# fct is used too early in the optimization phase. This would
# fct is used too early in the optimization phase. This would
# mess with the stabilization optimization and be too slow.
# mess with the stabilization optimization and be too slow.
...
@@ -2690,15 +2696,18 @@ class Alloc(gof.Op):
...
@@ -2690,15 +2696,18 @@ class Alloc(gof.Op):
sh
=
[
as_tensor_variable
(
s
)
for
s
in
shape
]
sh
=
[
as_tensor_variable
(
s
)
for
s
in
shape
]
bcast
=
[]
bcast
=
[]
for
i
,
s
in
enumerate
(
sh
):
for
i
,
s
in
enumerate
(
sh
):
if
config
.
exception_verbosity
==
'high'
:
def
err_str
():
s_as_str
=
'
\n
'
+
min_informative_str
(
s
)
if
config
.
exception_verbosity
==
'high'
:
else
:
return
'
\n
'
+
min_informative_str
(
s
)
s_as_str
=
str
(
s
)
else
:
return
str
(
s
)
if
s
.
type
.
dtype
[:
3
]
not
in
(
'int'
,
'uin'
):
if
s
.
type
.
dtype
[:
3
]
not
in
(
'int'
,
'uin'
):
s_as_str
=
err_str
()
raise
TypeError
(
'Shape arguments to Alloc must be integers, '
raise
TypeError
(
'Shape arguments to Alloc must be integers, '
'but argument
%
s is not for apply node:
%
s'
%
'but argument
%
s is not for apply node:
%
s'
%
(
i
,
s_as_str
))
(
i
,
s_as_str
))
if
s
.
ndim
!=
0
:
if
s
.
ndim
!=
0
:
s_as_str
=
err_str
()
raise
TypeError
(
raise
TypeError
(
"Each shape dimension to Alloc must be a scalar, "
,
"Each shape dimension to Alloc must be a scalar, "
,
'but dimension
%
s have
%
d dimensions for apply node:
%
s'
%
'but dimension
%
s have
%
d dimensions for apply node:
%
s'
%
...
...
theano/tensor/nnet/abstract_conv.py
浏览文件 @
a24fd9bb
...
@@ -66,8 +66,10 @@ def get_conv_output_shape(image_shape, kernel_shape,
...
@@ -66,8 +66,10 @@ def get_conv_output_shape(image_shape, kernel_shape,
"""
"""
bsize
,
imshp
=
image_shape
[
0
],
image_shape
[
2
:]
bsize
,
imshp
=
image_shape
[
0
],
image_shape
[
2
:]
nkern
,
kshp
=
kernel_shape
[
0
],
kernel_shape
[
2
:]
nkern
,
kshp
=
kernel_shape
[
0
],
kernel_shape
[
2
:]
if
filter_dilation
is
None
:
if
filter_dilation
is
None
:
filter_dilation
=
numpy
.
ones
(
len
(
subsample
),
dtype
=
'int'
)
filter_dilation
=
numpy
.
ones
(
len
(
subsample
),
dtype
=
'int'
)
if
isinstance
(
border_mode
,
tuple
):
if
isinstance
(
border_mode
,
tuple
):
out_shp
=
tuple
(
get_conv_shape_1axis
(
out_shp
=
tuple
(
get_conv_shape_1axis
(
imshp
[
i
],
kshp
[
i
],
border_mode
[
i
],
imshp
[
i
],
kshp
[
i
],
border_mode
[
i
],
...
@@ -121,7 +123,16 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode,
...
@@ -121,7 +123,16 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode,
pad
=
border_mode
pad
=
border_mode
if
pad
<
0
:
if
pad
<
0
:
raise
ValueError
(
"border_mode must be >= 0"
)
raise
ValueError
(
"border_mode must be >= 0"
)
out_shp
=
(
image_shape
+
2
*
pad
-
dil_kernel_shape
)
//
subsample
+
1
# In case of symbolic shape, we want to build the smallest graph
# (image_shape + 2 * pad - dil_kernel_shape) // subsample + 1
if
pad
==
0
:
out_shp
=
(
image_shape
-
dil_kernel_shape
)
else
:
out_shp
=
(
image_shape
+
2
*
pad
-
dil_kernel_shape
)
if
subsample
!=
1
:
out_shp
=
out_shp
//
subsample
out_shp
=
out_shp
+
1
return
out_shp
return
out_shp
...
...
theano/tensor/tests/test_basic.py
浏览文件 @
a24fd9bb
...
@@ -7003,6 +7003,9 @@ class T_get_scalar_constant_value(unittest.TestCase):
...
@@ -7003,6 +7003,9 @@ class T_get_scalar_constant_value(unittest.TestCase):
assert
get_scalar_constant_value
(
s
)
==
3
assert
get_scalar_constant_value
(
s
)
==
3
s
=
opt
.
Shape_i
(
1
)(
c
)
s
=
opt
.
Shape_i
(
1
)(
c
)
assert
get_scalar_constant_value
(
s
)
==
4
assert
get_scalar_constant_value
(
s
)
==
4
d
=
theano
.
shared
(
numpy
.
random
.
randn
(
1
,
1
),
broadcastable
=
(
True
,
True
))
f
=
theano
.
tensor
.
basic
.
ScalarFromTensor
()(
opt
.
Shape_i
(
0
)(
d
))
assert
get_scalar_constant_value
(
f
)
==
1
def
test_elemwise
(
self
):
def
test_elemwise
(
self
):
# We test only for a few elemwise, the list of all supported
# We test only for a few elemwise, the list of all supported
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论