Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
2d1e254e
提交
2d1e254e
authored
5月 14, 2014
作者:
Arnaud Bergeron
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix import loop involving fftconv.
上级
48a87d25
显示空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
24 行增加
和
25 行删除
+24
-25
pycuda_utils.py
theano/misc/pycuda_utils.py
+1
-1
fftconv.py
theano/sandbox/cuda/fftconv.py
+19
-18
opt.py
theano/sandbox/cuda/opt.py
+4
-6
没有找到文件。
theano/misc/pycuda_utils.py
浏览文件 @
2d1e254e
import
numpy
import
numpy
import
pycuda.gpuarray
import
pycuda.gpuarray
import
theano.sandbox.cuda
as
cuda
from
theano.sandbox
import
cuda
if
cuda
.
cuda_available
==
False
:
if
cuda
.
cuda_available
==
False
:
raise
ImportError
(
'Optional theano package cuda disabled'
)
raise
ImportError
(
'Optional theano package cuda disabled'
)
...
...
theano/sandbox/cuda/fftconv.py
浏览文件 @
2d1e254e
import
string
import
numpy
as
np
import
numpy
as
np
import
theano
import
theano
import
theano.tensor
as
T
import
theano.tensor
as
T
import
theano.sandbox.cuda
as
cuda
from
theano.sandbox.cuda
import
(
GpuOp
,
basic_ops
,
CudaNdarrayType
,
from
theano.misc.pycuda_utils
import
to_gpuarray
CudaNdarray
)
import
scikits.cuda
import
scikits.cuda
from
scikits.cuda
import
fft
,
linalg
,
cublas
from
scikits.cuda
import
fft
,
linalg
,
cublas
...
@@ -12,8 +14,6 @@ import pycuda.gpuarray
...
@@ -12,8 +14,6 @@ import pycuda.gpuarray
import
theano.misc.pycuda_init
import
theano.misc.pycuda_init
import
string
linalg
.
init
()
linalg
.
init
()
...
@@ -25,7 +25,7 @@ linalg.init()
...
@@ -25,7 +25,7 @@ linalg.init()
# base class for shared code between scikits.cuda-based ops
# base class for shared code between scikits.cuda-based ops
class
ScikitsCudaOp
(
cuda
.
GpuOp
):
class
ScikitsCudaOp
(
GpuOp
):
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
return
type
(
self
)
==
type
(
other
)
...
@@ -39,8 +39,8 @@ class ScikitsCudaOp(cuda.GpuOp):
...
@@ -39,8 +39,8 @@ class ScikitsCudaOp(cuda.GpuOp):
raise
NotImplementedError
raise
NotImplementedError
def
make_node
(
self
,
inp
):
def
make_node
(
self
,
inp
):
inp
=
cuda
.
basic_ops
.
gpu_contiguous
(
inp
=
basic_ops
.
gpu_contiguous
(
cuda
.
basic_ops
.
as_cuda_ndarray_variable
(
inp
))
basic_ops
.
as_cuda_ndarray_variable
(
inp
))
assert
inp
.
dtype
==
"float32"
assert
inp
.
dtype
==
"float32"
...
@@ -50,10 +50,11 @@ class ScikitsCudaOp(cuda.GpuOp):
...
@@ -50,10 +50,11 @@ class ScikitsCudaOp(cuda.GpuOp):
class
CuFFTOp
(
ScikitsCudaOp
):
class
CuFFTOp
(
ScikitsCudaOp
):
def
output_type
(
self
,
inp
):
def
output_type
(
self
,
inp
):
# add one extra dim for real/imag
# add one extra dim for real/imag
return
cuda
.
CudaNdarrayType
(
return
CudaNdarrayType
(
broadcastable
=
[
False
]
*
(
inp
.
type
.
ndim
+
1
))
broadcastable
=
[
False
]
*
(
inp
.
type
.
ndim
+
1
))
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
):
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
):
from
theano.misc.pycuda_utils
import
to_gpuarray
inputs
=
[
storage_map
[
v
]
for
v
in
node
.
inputs
]
inputs
=
[
storage_map
[
v
]
for
v
in
node
.
inputs
]
outputs
=
[
storage_map
[
v
]
for
v
in
node
.
outputs
]
outputs
=
[
storage_map
[
v
]
for
v
in
node
.
outputs
]
...
@@ -77,7 +78,7 @@ class CuFFTOp(ScikitsCudaOp):
...
@@ -77,7 +78,7 @@ class CuFFTOp(ScikitsCudaOp):
# only allocate if there is no previous allocation of the
# only allocate if there is no previous allocation of the
# right size.
# right size.
if
z
[
0
]
is
None
or
z
[
0
]
.
shape
!=
output_shape
:
if
z
[
0
]
is
None
or
z
[
0
]
.
shape
!=
output_shape
:
z
[
0
]
=
cuda
.
CudaNdarray
.
zeros
(
output_shape
)
z
[
0
]
=
CudaNdarray
.
zeros
(
output_shape
)
input_pycuda
=
to_gpuarray
(
inputs
[
0
][
0
])
input_pycuda
=
to_gpuarray
(
inputs
[
0
][
0
])
# I thought we'd need to change the type on output_pycuda
# I thought we'd need to change the type on output_pycuda
...
@@ -104,7 +105,7 @@ class CuFFTOp(ScikitsCudaOp):
...
@@ -104,7 +105,7 @@ class CuFFTOp(ScikitsCudaOp):
class
CuIFFTOp
(
ScikitsCudaOp
):
class
CuIFFTOp
(
ScikitsCudaOp
):
def
output_type
(
self
,
inp
):
def
output_type
(
self
,
inp
):
# remove extra real/imag dim
# remove extra real/imag dim
return
cuda
.
CudaNdarrayType
(
return
CudaNdarrayType
(
broadcastable
=
[
False
]
*
(
inp
.
type
.
ndim
-
1
))
broadcastable
=
[
False
]
*
(
inp
.
type
.
ndim
-
1
))
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
):
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
):
...
@@ -129,7 +130,7 @@ class CuIFFTOp(ScikitsCudaOp):
...
@@ -129,7 +130,7 @@ class CuIFFTOp(ScikitsCudaOp):
# only allocate if there is no previous allocation of the
# only allocate if there is no previous allocation of the
# right size.
# right size.
if
z
[
0
]
is
None
or
z
[
0
]
.
shape
!=
output_shape
:
if
z
[
0
]
is
None
or
z
[
0
]
.
shape
!=
output_shape
:
z
[
0
]
=
cuda
.
CudaNdarray
.
zeros
(
output_shape
)
z
[
0
]
=
CudaNdarray
.
zeros
(
output_shape
)
input_pycuda
=
to_gpuarray
(
inputs
[
0
][
0
])
input_pycuda
=
to_gpuarray
(
inputs
[
0
][
0
])
# input_pycuda is a float32 array with an extra dimension,
# input_pycuda is a float32 array with an extra dimension,
...
@@ -162,7 +163,7 @@ def to_complex_gpuarray(x, copyif=False):
...
@@ -162,7 +163,7 @@ def to_complex_gpuarray(x, copyif=False):
real/imaginary parts, and turns it into a complex64 PyCUDA
real/imaginary parts, and turns it into a complex64 PyCUDA
GPUArray.
GPUArray.
"""
"""
if
not
isinstance
(
x
,
cuda
.
CudaNdarray
):
if
not
isinstance
(
x
,
CudaNdarray
):
raise
ValueError
(
"We can transfer only CudaNdarray "
raise
ValueError
(
"We can transfer only CudaNdarray "
"to pycuda.gpuarray.GPUArray"
)
"to pycuda.gpuarray.GPUArray"
)
else
:
else
:
...
@@ -280,10 +281,10 @@ class BatchedComplexDotOp(ScikitsCudaOp):
...
@@ -280,10 +281,10 @@ class BatchedComplexDotOp(ScikitsCudaOp):
doing multiple cublasCgemm calls.
doing multiple cublasCgemm calls.
"""
"""
def
make_node
(
self
,
inp1
,
inp2
):
def
make_node
(
self
,
inp1
,
inp2
):
inp1
=
cuda
.
basic_ops
.
gpu_contiguous
(
inp1
=
basic_ops
.
gpu_contiguous
(
cuda
.
basic_ops
.
as_cuda_ndarray_variable
(
inp1
))
basic_ops
.
as_cuda_ndarray_variable
(
inp1
))
inp2
=
cuda
.
basic_ops
.
gpu_contiguous
(
inp2
=
basic_ops
.
gpu_contiguous
(
cuda
.
basic_ops
.
as_cuda_ndarray_variable
(
inp2
))
basic_ops
.
as_cuda_ndarray_variable
(
inp2
))
assert
inp1
.
dtype
==
"float32"
assert
inp1
.
dtype
==
"float32"
assert
inp2
.
dtype
==
"float32"
assert
inp2
.
dtype
==
"float32"
...
@@ -293,7 +294,7 @@ class BatchedComplexDotOp(ScikitsCudaOp):
...
@@ -293,7 +294,7 @@ class BatchedComplexDotOp(ScikitsCudaOp):
return
theano
.
Apply
(
self
,
[
inp1
,
inp2
],
[
self
.
output_type
(
inp1
)()])
return
theano
.
Apply
(
self
,
[
inp1
,
inp2
],
[
self
.
output_type
(
inp1
)()])
def
output_type
(
self
,
inp
):
def
output_type
(
self
,
inp
):
return
cuda
.
CudaNdarrayType
(
broadcastable
=
[
False
]
*
inp
.
type
.
ndim
)
return
CudaNdarrayType
(
broadcastable
=
[
False
]
*
inp
.
type
.
ndim
)
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
):
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
):
inputs
=
[
storage_map
[
v
]
for
v
in
node
.
inputs
]
inputs
=
[
storage_map
[
v
]
for
v
in
node
.
inputs
]
...
@@ -314,7 +315,7 @@ class BatchedComplexDotOp(ScikitsCudaOp):
...
@@ -314,7 +315,7 @@ class BatchedComplexDotOp(ScikitsCudaOp):
# only allocate if there is no previous allocation of the
# only allocate if there is no previous allocation of the
# right size.
# right size.
if
bz
[
0
]
is
None
or
bz
[
0
]
.
shape
!=
output_shape
:
if
bz
[
0
]
is
None
or
bz
[
0
]
.
shape
!=
output_shape
:
bz
[
0
]
=
cuda
.
CudaNdarray
.
zeros
(
output_shape
)
bz
[
0
]
=
CudaNdarray
.
zeros
(
output_shape
)
input_bx_pycuda
=
to_complex_gpuarray
(
bx
[
0
])
input_bx_pycuda
=
to_complex_gpuarray
(
bx
[
0
])
input_by_pycuda
=
to_complex_gpuarray
(
by
[
0
])
input_by_pycuda
=
to_complex_gpuarray
(
by
[
0
])
...
...
theano/sandbox/cuda/opt.py
浏览文件 @
2d1e254e
...
@@ -32,7 +32,6 @@ from theano.sandbox.cuda.blas import gpu_ger_inplace
...
@@ -32,7 +32,6 @@ from theano.sandbox.cuda.blas import gpu_ger_inplace
from
theano.sandbox.cuda.blas
import
gpu_ger_no_inplace
from
theano.sandbox.cuda.blas
import
gpu_ger_no_inplace
from
theano.sandbox.cuda.blas
import
(
GpuDownsampleFactorMax
,
from
theano.sandbox.cuda.blas
import
(
GpuDownsampleFactorMax
,
GpuDownsampleFactorMaxGrad
)
GpuDownsampleFactorMaxGrad
)
from
theano.sandbox.cuda.fftconv
import
conv2d_fft
from
theano.sandbox.cuda.nnet
import
(
from
theano.sandbox.cuda.nnet
import
(
GpuCrossentropySoftmaxArgmax1HotWithBias
,
GpuCrossentropySoftmaxArgmax1HotWithBias
,
GpuCrossentropySoftmax1HotWithBiasDx
,
GpuCrossentropySoftmax1HotWithBiasDx
,
...
@@ -41,6 +40,7 @@ from theano.sandbox.cuda.elemwise import SupportCodeError
...
@@ -41,6 +40,7 @@ from theano.sandbox.cuda.elemwise import SupportCodeError
from
theano.scalar.basic_scipy
import
Erfinv
from
theano.scalar.basic_scipy
import
Erfinv
from
theano.sandbox.cuda.elemwise
import
erfinv_gpu
from
theano.sandbox.cuda.elemwise
import
erfinv_gpu
from
theano.sandbox.cuda.var
import
CudaNdarrayConstant
from
theano.sandbox.cuda.var
import
CudaNdarrayConstant
from
theano.sandbox.cuda.fftconv
import
conv2d_fft
from
theano.scan_module
import
scan_utils
,
scan_op
,
scan_opt
from
theano.scan_module
import
scan_utils
,
scan_op
,
scan_opt
from
theano.tensor.blas
import
_is_real_vector
,
_is_real_matrix
from
theano.tensor.blas
import
_is_real_vector
,
_is_real_matrix
linalg
=
None
linalg
=
None
...
@@ -1124,11 +1124,9 @@ def local_gpu_conv(node):
...
@@ -1124,11 +1124,9 @@ def local_gpu_conv(node):
@local_optimizer
([
GpuConv
])
@local_optimizer
([
GpuConv
])
def
local_conv_fft
(
node
):
def
local_conv_fft
(
node
):
if
(
isinstance
(
node
.
op
,
GpuConv
)
and
if
(
isinstance
(
node
.
op
,
GpuConv
)
and
node
.
op
.
border_mode
==
'valid'
):
node
.
op
.
border_mode
==
'valid'
and
return
[
conv2d_fft
(
node
.
inputs
[
0
],
node
.
inputs
[
1
],
node
.
op
.
subsample
==
(
1
,
1
)):
image_shape
=
node
.
op
.
imgshp
,
return
[
conv2d_fft
(
node
.
inputs
[
0
],
node
.
inputs
[
1
])]
filter_shape
=
node
.
op
.
kshp
)]
import
theano.tensor.signal.downsample
as
downsample
import
theano.tensor.signal.downsample
as
downsample
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论