Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
1547ecc6
提交
1547ecc6
authored
5月 07, 2015
作者:
Frédéric Bastien
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2800 from abergeron/fp16_merge
Fp16 merge
上级
03d0e784
3226ffd2
全部展开
显示空白字符变更
内嵌
并排
正在显示
24 个修改的文件
包含
254 行增加
和
234 行删除
+254
-234
cop.txt
doc/extending/cop.txt
+15
-0
ops.py
theano/compile/ops.py
+4
-0
configdefaults.py
theano/configdefaults.py
+8
-3
op.py
theano/gof/op.py
+12
-1
basic_ops.py
theano/sandbox/gpuarray/basic_ops.py
+20
-34
elemwise.py
theano/sandbox/gpuarray/elemwise.py
+0
-0
fp16_help.py
theano/sandbox/gpuarray/fp16_help.py
+19
-0
nnet.py
theano/sandbox/gpuarray/nnet.py
+42
-60
opt.py
theano/sandbox/gpuarray/opt.py
+6
-10
subtensor.py
theano/sandbox/gpuarray/subtensor.py
+2
-0
test_basic_ops.py
theano/sandbox/gpuarray/tests/test_basic_ops.py
+1
-1
type.py
theano/sandbox/gpuarray/type.py
+6
-1
test_rng_mrg.py
theano/sandbox/test_rng_mrg.py
+7
-19
basic.py
theano/scalar/basic.py
+29
-11
test_basic.py
theano/sparse/tests/test_basic.py
+3
-0
basic.py
theano/tensor/basic.py
+26
-11
blas.py
theano/tensor/blas.py
+3
-2
elemwise.py
theano/tensor/elemwise.py
+9
-1
nnet.py
theano/tensor/nnet/nnet.py
+9
-8
opt.py
theano/tensor/opt.py
+4
-9
test_basic.py
theano/tensor/tests/test_basic.py
+24
-62
test_elemwise.py
theano/tensor/tests/test_elemwise.py
+1
-0
type.py
theano/tensor/type.py
+1
-0
test_flake8.py
theano/tests/test_flake8.py
+3
-1
没有找到文件。
doc/extending/cop.txt
浏览文件 @
1547ecc6
...
@@ -212,6 +212,21 @@ There are less methods to define for an Op than for a Type:
...
@@ -212,6 +212,21 @@ There are less methods to define for an Op than for a Type:
Op *must* have a `context_type` property with the Type to use
Op *must* have a `context_type` property with the Type to use
for the context variable.
for the context variable.
.. attribute:: _f16_ok
(optional) If this attribute is absent or evaluates to `False`,
C code will be disabled for the op if any of its inputs or
outputs contains float16 data. This is added as a check to make
sure we don't compute wrong results since there is no hardware
float16 type so special care must be taken to make sure
operations are done correctly.
If you don't intend to deal with float16 data you can leave
this undefined.
This attribute is internal and may go away at any point during
developpment if a better solution is found.
The ``name`` argument is currently given an invalid value, so steer
The ``name`` argument is currently given an invalid value, so steer
away from it. As was the case with Type, ``sub['fail']`` provides
away from it. As was the case with Type, ``sub['fail']`` provides
failure code that you *must* use if you want to raise an exception,
failure code that you *must* use if you want to raise an exception,
...
...
theano/compile/ops.py
浏览文件 @
1547ecc6
...
@@ -215,6 +215,8 @@ class Shape(gof.Op):
...
@@ -215,6 +215,8 @@ class Shape(gof.Op):
@note: Non-differentiable.
@note: Non-differentiable.
"""
"""
_f16_ok
=
True
# Mapping from Type to C code (and version) to use.
# Mapping from Type to C code (and version) to use.
# In the C code, the name of the input variable is %(iname)s,
# In the C code, the name of the input variable is %(iname)s,
# the output variable is %(oname)s.
# the output variable is %(oname)s.
...
@@ -308,6 +310,8 @@ class Shape_i(gof.Op):
...
@@ -308,6 +310,8 @@ class Shape_i(gof.Op):
@note: Non-differentiable.
@note: Non-differentiable.
"""
"""
_f16_ok
=
True
# Mapping from Type to C code (and version) to use.
# Mapping from Type to C code (and version) to use.
# In the C code, the name of the input variable is %(iname)s,
# In the C code, the name of the input variable is %(iname)s,
# the output variable is %(oname)s.
# the output variable is %(oname)s.
...
...
theano/configdefaults.py
浏览文件 @
1547ecc6
...
@@ -18,12 +18,17 @@ def floatX_convert(s):
...
@@ -18,12 +18,17 @@ def floatX_convert(s):
return
"float32"
return
"float32"
elif
s
==
"64"
:
elif
s
==
"64"
:
return
"float64"
return
"float64"
elif
s
==
"16"
:
return
"float16"
else
:
else
:
return
s
return
s
AddConfigVar
(
'floatX'
,
AddConfigVar
(
'floatX'
,
"Default floating-point precision for python casts"
,
"Default floating-point precision for python casts.
\n
"
EnumStr
(
'float64'
,
'float32'
,
convert
=
floatX_convert
,),
"
\n
"
"Note: float16 support is experimental, use at your own risk."
,
EnumStr
(
'float64'
,
'float32'
,
'float16'
,
convert
=
floatX_convert
,),
)
)
AddConfigVar
(
'warn_float64'
,
AddConfigVar
(
'warn_float64'
,
...
@@ -39,7 +44,7 @@ AddConfigVar('cast_policy',
...
@@ -39,7 +44,7 @@ AddConfigVar('cast_policy',
EnumStr
(
'custom'
,
'numpy+floatX'
,
EnumStr
(
'custom'
,
'numpy+floatX'
,
# The 'numpy' policy was originally planned to provide a
# The 'numpy' policy was originally planned to provide a
# smooth transition from numpy. It was meant to behave the
# smooth transition from numpy. It was meant to behave the
# same asnumpy+floatX, but keeping float64 when numpy
# same as
numpy+floatX, but keeping float64 when numpy
# would. However the current implementation of some cast
# would. However the current implementation of some cast
# mechanisms makes it a bit more complex to add than what
# mechanisms makes it a bit more complex to add than what
# was expected, so it is currently not available.
# was expected, so it is currently not available.
...
...
theano/gof/op.py
浏览文件 @
1547ecc6
...
@@ -726,9 +726,20 @@ class Op(utils.object2, PureOp, CLinkerOp):
...
@@ -726,9 +726,20 @@ class Op(utils.object2, PureOp, CLinkerOp):
node_output_storage
=
[
storage_map
[
r
]
for
r
in
node
.
outputs
]
node_output_storage
=
[
storage_map
[
r
]
for
r
in
node
.
outputs
]
node_input_compute
=
[
compute_map
[
r
]
for
r
in
node
.
inputs
]
node_input_compute
=
[
compute_map
[
r
]
for
r
in
node
.
inputs
]
node_output_compute
=
[
compute_map
[
r
]
for
r
in
node
.
outputs
]
node_output_compute
=
[
compute_map
[
r
]
for
r
in
node
.
outputs
]
#logger.debug('Compiling node %i of graph' % node_idx)
if
self
.
_op_use_c_code
:
if
self
.
_op_use_c_code
:
try
:
try
:
# float16 get special treatment since running
# unprepared C code will get bad results.
if
not
getattr
(
self
,
'_f16_ok'
,
False
):
def
is_f16
(
t
):
return
getattr
(
t
,
'dtype'
,
''
)
==
'float16'
if
(
any
(
is_f16
(
i
.
type
)
for
i
in
node
.
inputs
)
or
any
(
is_f16
(
o
.
type
)
for
o
in
node
.
outputs
)):
print
(
"Disabling C code for
%
s due to unsupported "
"float16"
%
(
self
,))
raise
NotImplementedError
(
"float16"
)
e
=
FunctionGraph
(
node
.
inputs
,
node
.
outputs
)
e
=
FunctionGraph
(
node
.
inputs
,
node
.
outputs
)
e_no_recycling
=
[
new_o
e_no_recycling
=
[
new_o
...
...
theano/sandbox/gpuarray/basic_ops.py
浏览文件 @
1547ecc6
...
@@ -20,6 +20,7 @@ except ImportError:
...
@@ -20,6 +20,7 @@ except ImportError:
pass
pass
from
.type
import
GpuArrayType
from
.type
import
GpuArrayType
from
.fp16_help
import
write_w
def
as_gpuarray_variable
(
x
):
def
as_gpuarray_variable
(
x
):
...
@@ -186,11 +187,8 @@ class GpuKernelBase(object):
...
@@ -186,11 +187,8 @@ class GpuKernelBase(object):
class
HostFromGpu
(
Op
):
class
HostFromGpu
(
Op
):
def
__eq__
(
self
,
other
):
__props__
=
()
return
type
(
self
)
==
type
(
other
)
_f16_ok
=
True
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
__str__
(
self
):
def
__str__
(
self
):
return
'HostFromGpu(gpuarray)'
return
'HostFromGpu(gpuarray)'
...
@@ -269,11 +267,8 @@ host_from_gpu = HostFromGpu()
...
@@ -269,11 +267,8 @@ host_from_gpu = HostFromGpu()
class
GpuFromHost
(
Op
):
class
GpuFromHost
(
Op
):
def
__eq__
(
self
,
other
):
__props__
=
()
return
type
(
self
)
==
type
(
other
)
_f16_ok
=
True
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
__str__
(
self
):
def
__str__
(
self
):
return
'GpuFromHost(gpuarray)'
return
'GpuFromHost(gpuarray)'
...
@@ -573,18 +568,15 @@ cuda_from_gpu = CudaFromGpu()
...
@@ -573,18 +568,15 @@ cuda_from_gpu = CudaFromGpu()
class
GpuAlloc
(
HideC
,
Alloc
):
class
GpuAlloc
(
HideC
,
Alloc
):
__props__
=
(
'memset_0'
,)
_f16_ok
=
True
def
__init__
(
self
,
memset_0
=
False
):
def
__init__
(
self
,
memset_0
=
False
):
"""memset_0 is only an optimized version. True, it mean the
"""memset_0 is only an optimized version. True, it mean the
value is always 0, so the c code call memset as it is faster.
value is always 0, so the c code call memset as it is faster.
"""
"""
self
.
memset_0
=
memset_0
self
.
memset_0
=
memset_0
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
and
self
.
memset_0
==
other
.
memset_0
def
__hash__
(
self
):
return
hash
(
type
(
self
))
^
hash
(
self
.
memset_0
)
def
__str__
(
self
):
def
__str__
(
self
):
# Hide the memset parameter when not used to prevent confusion.
# Hide the memset parameter when not used to prevent confusion.
if
self
.
memset_0
:
if
self
.
memset_0
:
...
@@ -728,25 +720,17 @@ class GpuContiguous(Op):
...
@@ -728,25 +720,17 @@ class GpuContiguous(Op):
Always return a c contiguous output. Copy the input only if it is
Always return a c contiguous output. Copy the input only if it is
not already c contiguous.
not already c contiguous.
"""
"""
__props__
=
()
view_map
=
{
0
:
[
0
]}
view_map
=
{
0
:
[
0
]}
_f16_ok
=
True
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
grad
(
self
,
inputs
,
dout
):
def
grad
(
self
,
inputs
,
dout
):
x
,
=
inputs
x
,
=
inputs
dout
,
=
dout
dout
,
=
dout
dout
=
as_gpuarray_variable
(
dout
)
dout
=
as_gpuarray_variable
(
dout
)
return
[
dout
]
return
[
dout
]
def
__str__
(
self
):
return
self
.
__class__
.
__name__
def
make_node
(
self
,
input
):
def
make_node
(
self
,
input
):
input
=
as_gpuarray_variable
(
input
)
input
=
as_gpuarray_variable
(
input
)
return
Apply
(
self
,
[
input
],
[
input
.
type
()])
return
Apply
(
self
,
[
input
],
[
input
.
type
()])
...
@@ -794,6 +778,8 @@ class GpuReshape(HideC, tensor.Reshape):
...
@@ -794,6 +778,8 @@ class GpuReshape(HideC, tensor.Reshape):
"""
"""
Implement Reshape on the gpu.
Implement Reshape on the gpu.
"""
"""
_f16_ok
=
True
# __hash__, __eq__, __str__ come from tensor.Reshape
# __hash__, __eq__, __str__ come from tensor.Reshape
def
make_node
(
self
,
x
,
shp
):
def
make_node
(
self
,
x
,
shp
):
x
=
as_gpuarray_variable
(
x
)
x
=
as_gpuarray_variable
(
x
)
...
@@ -831,6 +817,8 @@ class GpuReshape(HideC, tensor.Reshape):
...
@@ -831,6 +817,8 @@ class GpuReshape(HideC, tensor.Reshape):
class
GpuJoin
(
HideC
,
Join
):
class
GpuJoin
(
HideC
,
Join
):
_f16_ok
=
True
def
make_node
(
self
,
axis
,
*
tensors
):
def
make_node
(
self
,
axis
,
*
tensors
):
node
=
Join
.
make_node
(
self
,
axis
,
*
tensors
)
node
=
Join
.
make_node
(
self
,
axis
,
*
tensors
)
...
@@ -888,6 +876,9 @@ class GpuSplit(HideC, Split):
...
@@ -888,6 +876,9 @@ class GpuSplit(HideC, Split):
class
GpuEye
(
GpuKernelBase
,
Op
):
class
GpuEye
(
GpuKernelBase
,
Op
):
__props__
=
(
'dtype'
,)
_f16_ok
=
True
def
__init__
(
self
,
dtype
=
None
):
def
__init__
(
self
,
dtype
=
None
):
if
dtype
is
None
:
if
dtype
is
None
:
dtype
=
config
.
floatX
dtype
=
config
.
floatX
...
@@ -915,20 +906,15 @@ class GpuEye(GpuKernelBase, Op):
...
@@ -915,20 +906,15 @@ class GpuEye(GpuKernelBase, Op):
return
[
grad_undefined
(
self
,
i
,
inp
[
i
])
return
[
grad_undefined
(
self
,
i
,
inp
[
i
])
for
i
in
xrange
(
3
)]
for
i
in
xrange
(
3
)]
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
and
self
.
dtype
==
other
.
dtype
def
__hash__
(
self
):
return
hash
(
self
.
dtype
)
^
hash
(
type
(
self
))
def
gpu_kernels
(
self
,
node
,
name
):
def
gpu_kernels
(
self
,
node
,
name
):
code
=
"""
code
=
"""
KERNEL void k(GLOBAL_MEM
%(ctype)
s *a, ga_size n, ga_size m) {
KERNEL void k(GLOBAL_MEM
%(ctype)
s *a, ga_size n, ga_size m) {
ga_size nb = n < m ? n : m;
ga_size nb = n < m ? n : m;
for (ga_size i = LID_0; i < nb; i += LDIM_0) {
for (ga_size i = LID_0; i < nb; i += LDIM_0) {
a[i*m + i] =
1
;
a[i*m + i] =
%(write_a)
s(1)
;
}
}
}"""
%
dict
(
ctype
=
pygpu
.
gpuarray
.
dtype_to_ctype
(
self
.
dtype
),
name
=
name
)
}"""
%
dict
(
ctype
=
pygpu
.
gpuarray
.
dtype_to_ctype
(
self
.
dtype
),
name
=
name
,
write_a
=
write_w
(
self
.
dtype
))
return
[
Kernel
(
return
[
Kernel
(
code
=
code
,
name
=
"k"
,
code
=
code
,
name
=
"k"
,
params
=
[
gpuarray
.
GpuArray
,
gpuarray
.
SIZE
,
gpuarray
.
SIZE
],
params
=
[
gpuarray
.
GpuArray
,
gpuarray
.
SIZE
,
gpuarray
.
SIZE
],
...
...
theano/sandbox/gpuarray/elemwise.py
浏览文件 @
1547ecc6
差异被折叠。
点击展开。
theano/sandbox/gpuarray/fp16_help.py
0 → 100644
浏览文件 @
1547ecc6
def
work_dtype
(
dtype
):
if
dtype
==
'float16'
:
return
'float32'
else
:
return
dtype
def
load_w
(
dtype
):
if
dtype
==
'float16'
:
return
'__half2float'
else
:
return
''
def
write_w
(
dtype
):
if
dtype
==
'float16'
:
return
'__float2half_rn'
else
:
return
''
theano/sandbox/gpuarray/nnet.py
浏览文件 @
1547ecc6
...
@@ -16,6 +16,7 @@ from .type import GpuArrayType
...
@@ -16,6 +16,7 @@ from .type import GpuArrayType
from
.kernel_codegen
import
(
nvcc_kernel
,
from
.kernel_codegen
import
(
nvcc_kernel
,
inline_softmax
,
inline_softmax
,
inline_softmax_fixed_shared
)
inline_softmax_fixed_shared
)
from
.fp16_help
import
work_dtype
,
load_w
,
write_w
class
GpuCrossentropySoftmaxArgmax1HotWithBias
(
Op
):
class
GpuCrossentropySoftmaxArgmax1HotWithBias
(
Op
):
...
@@ -24,15 +25,8 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
...
@@ -24,15 +25,8 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
"""
"""
nin
=
3
nin
=
3
nout
=
3
nout
=
3
__props__
=
()
def
__eq__
(
self
,
other
):
_f16_ok
=
True
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
__str__
(
self
):
return
self
.
__class__
.
__name__
def
make_node
(
self
,
x
,
b
,
y_idx
):
def
make_node
(
self
,
x
,
b
,
y_idx
):
# N.B. won't work when we don't cast y_idx to float anymore
# N.B. won't work when we don't cast y_idx to float anymore
...
@@ -52,6 +46,12 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
...
@@ -52,6 +46,12 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
dtype_x
=
node
.
inputs
[
0
]
.
dtype
dtype_x
=
node
.
inputs
[
0
]
.
dtype
dtype_b
=
node
.
inputs
[
1
]
.
dtype
dtype_b
=
node
.
inputs
[
1
]
.
dtype
dtype_y_idx
=
node
.
inputs
[
2
]
.
dtype
dtype_y_idx
=
node
.
inputs
[
2
]
.
dtype
work_x
=
work_dtype
(
dtype_x
)
work_b
=
work_dtype
(
dtype_b
)
load_x
=
load_w
(
dtype_x
)
load_b
=
load_w
(
dtype_b
)
write_x
=
write_w
(
dtype_x
)
write_b
=
write_w
(
dtype_b
)
return
"""
return
"""
__global__ void k_xent_sm_1hot_bias_
%(nodename)
s(int M, int N,
__global__ void k_xent_sm_1hot_bias_
%(nodename)
s(int M, int N,
const npy_
%(dtype_x)
s* x_data, int xs0, int xs1,
const npy_
%(dtype_x)
s* x_data, int xs0, int xs1,
...
@@ -67,12 +67,13 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
...
@@ -67,12 +67,13 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
const npy_
%(dtype_y_idx)
s y_idx = y_idx_data[row * y_idxs0];
const npy_
%(dtype_y_idx)
s y_idx = y_idx_data[row * y_idxs0];
npy_
%(dtype_x)
s* sm = sm_data + sms0 * row;
npy_
%(dtype_x)
s* sm = sm_data + sms0 * row;
npy_
%(
dtype
_x)
s sum = 0.0;
npy_
%(
work
_x)
s sum = 0.0;
int row_max_j = 0;
int row_max_j = 0;
npy_
%(
dtype_x)
s row_max = x[0] + b[0]
;
npy_
%(
work_x)
s row_max =
%(load_x)
s(x[0]) +
%(load_b)
s(b[0])
;
for (int j = 1; j < N; ++j)
for (int j = 1; j < N; ++j)
{
{
npy_
%(dtype_x)
s row_ij = x[j*xs1] + b[j*bs0];
npy_
%(work_x)
s row_ij =
%(load_x)
s(x[j*xs1]) +
%(load_b)
s(b[j*bs0]);
//todo: store to shared memory
//todo: store to shared memory
row_max_j = (row_ij > row_max) ? j : row_max_j;
row_max_j = (row_ij > row_max) ? j : row_max_j;
row_max = (row_ij > row_max) ? row_ij : row_max;
row_max = (row_ij > row_max) ? row_ij : row_max;
...
@@ -80,27 +81,30 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
...
@@ -80,27 +81,30 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
//compute the exp
//compute the exp
for (int j = 0; j < N; ++j)
for (int j = 0; j < N; ++j)
{
{
npy_
%(dtype_x)
s row_ij = x[j*xs1] + b[j*bs0];
npy_
%(work_x)
s row_ij =
%(load_x)
s(x[j*xs1]) +
npy_
%(dtype_x)
s sm_ij = exp(row_ij - row_max);
%(load_b)
s(b[j*bs0]);
npy_
%(work_x)
s sm_ij = exp(row_ij - row_max);
sum += sm_ij;
sum += sm_ij;
sm[j * sms1] =
sm_ij
;
sm[j * sms1] =
%(write_x)
s(sm_ij)
;
}
}
npy_
%(
dtype
_x)
s sum_inv = 1.0 / sum;
npy_
%(
work
_x)
s sum_inv = 1.0 / sum;
for (int j = 0; j < N; ++j)
for (int j = 0; j < N; ++j)
{
{
sm[j * sms1] *= sum_inv;
npy_
%(work_x)
s __tmp =
%(load_x)
s(sm[j * sms1]);
__tmp *= sum_inv;
sm[j * sms1] =
%(write_x)
s(__tmp);
}
}
if ((y_idx >= N) || (y_idx < 0))
if ((y_idx >= N) || (y_idx < 0))
{
{
//TODO: set raise an error bit in a global var?
//TODO: set raise an error bit in a global var?
nll_data[row*nlls0] =
0.0
; // raise some suspicion at least...
nll_data[row*nlls0] =
%(write_x)
s(0.0)
; // raise some suspicion at least...
}
}
else
else
{
{
nll_data[row*nlls0] =
- x[y_idx*xs1]
nll_data[row*nlls0] =
%(write_x)
s(-
%(load_x)
s(x[y_idx*xs1])
-
b[y_idx*bs0]
-
%(load_b)
s(b[y_idx*bs0])
+ row_max
+ row_max
+ log(sum);
+ log(sum)
)
;
}
}
am_data[row*ams0] = row_max_j;
am_data[row*ams0] = row_max_j;
}
}
...
@@ -259,8 +263,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
...
@@ -259,8 +263,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
return
sio
.
getvalue
()
return
sio
.
getvalue
()
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# return ()
return
(
6
,)
return
(
5
,)
def
c_compiler
(
self
):
def
c_compiler
(
self
):
return
NVCC_compiler
return
NVCC_compiler
...
@@ -272,21 +275,13 @@ gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1Ho
...
@@ -272,21 +275,13 @@ gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1Ho
class
GpuCrossentropySoftmax1HotWithBiasDx
(
Op
):
class
GpuCrossentropySoftmax1HotWithBiasDx
(
Op
):
"""
"""
Implement CrossentropySoftmax1HotWithBiasDx on the gpu.
Implement CrossentropySoftmax1HotWithBiasDx on the gpu.
Gradient wrt x of the CrossentropySoftmax1Hot Op
"""
"""
nin
=
3
nin
=
3
nout
=
1
nout
=
1
"""Gradient wrt x of the CrossentropySoftmax1Hot Op"""
__props__
=
()
def
__init__
(
self
,
**
kwargs
):
_f16_ok
=
True
Op
.
__init__
(
self
,
**
kwargs
)
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
__str__
(
self
):
return
self
.
__class__
.
__name__
def
make_node
(
self
,
dnll
,
sm
,
y_idx
):
def
make_node
(
self
,
dnll
,
sm
,
y_idx
):
dnll
=
as_gpuarray_variable
(
dnll
)
dnll
=
as_gpuarray_variable
(
dnll
)
...
@@ -295,8 +290,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
...
@@ -295,8 +290,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
return
Apply
(
self
,
[
dnll
,
sm
,
y_idx
],
[
sm
.
type
()])
return
Apply
(
self
,
[
dnll
,
sm
,
y_idx
],
[
sm
.
type
()])
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# return ()
return
(
9
,)
return
(
8
,)
def
c_headers
(
self
):
def
c_headers
(
self
):
return
[
'cuda.h'
,
'<gpuarray/extension.h>'
,
'<numpy_compat.h>'
]
return
[
'cuda.h'
,
'<gpuarray/extension.h>'
,
'<numpy_compat.h>'
]
...
@@ -421,6 +415,10 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
...
@@ -421,6 +415,10 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
dtype_sm
=
node
.
inputs
[
1
]
.
dtype
dtype_sm
=
node
.
inputs
[
1
]
.
dtype
dtype_y_idx
=
node
.
inputs
[
2
]
.
dtype
dtype_y_idx
=
node
.
inputs
[
2
]
.
dtype
dtype_dx
=
node
.
outputs
[
0
]
.
dtype
dtype_dx
=
node
.
outputs
[
0
]
.
dtype
work_dnll
=
work_dtype
(
dtype_dnll
)
load_dnll
=
load_w
(
dtype_dnll
)
load_sm
=
load_w
(
dtype_sm
)
write_dx
=
write_w
(
dtype_dx
)
return
"""
return
"""
__global__ void kCrossEntropySoftmax1HotWithBiasDx_
%(nodename)
s(
__global__ void kCrossEntropySoftmax1HotWithBiasDx_
%(nodename)
s(
int N, int K,
int N, int K,
...
@@ -431,7 +429,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
...
@@ -431,7 +429,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
{
{
for (int i = blockIdx.x; i < N; i += gridDim.x)
for (int i = blockIdx.x; i < N; i += gridDim.x)
{
{
npy_
%(
dtype_dnll)
s dnll_i = dnll[i * dnll_s0]
;
npy_
%(
work_dnll)
s dnll_i =
%(load_dnll)
s(dnll[i * dnll_s0])
;
npy_
%(dtype_y_idx)
s y_i = y_idx[i * y_idx_s0];
npy_
%(dtype_y_idx)
s y_i = y_idx[i * y_idx_s0];
for (int j = threadIdx.x; j < K; j += blockDim.x)
for (int j = threadIdx.x; j < K; j += blockDim.x)
...
@@ -439,16 +437,15 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
...
@@ -439,16 +437,15 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
if (y_i == j)
if (y_i == j)
{
{
dx[i * dx_s0 + j * dx_s1] =
dx[i * dx_s0 + j * dx_s1] =
dnll_i * (sm[i * sm_s0 + j * sm_s1]-1.0);
%(write_dx)
s(dnll_i *
(
%(load_sm)
s(sm[i * sm_s0 + j * sm_s1]) - 1.0));
}
}
else
else
{
{
dx[i * dx_s0 + j * dx_s1] =
dx[i * dx_s0 + j * dx_s1] =
dnll_i * sm[i * sm_s0 + j * sm_s1];
%(write_dx)
s(dnll_i *
%(load_sm)
s(sm[i * sm_s0 + j * sm_s1]));
}
}
//dx[i * dx_s0 + j * dx_s1] =
// dnll_i * sm[i * sm_s0 + j * sm_s1];
//dx[i*dx_s0+j*dx_s1] = 0;
}
}
}
}
}
}
...
@@ -466,14 +463,7 @@ class GpuSoftmax (Op):
...
@@ -466,14 +463,7 @@ class GpuSoftmax (Op):
"""
"""
Implement Softmax on the gpu.
Implement Softmax on the gpu.
"""
"""
def
__eq__
(
self
,
other
):
__props__
=
()
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
__str__
(
self
):
return
self
.
__class__
.
__name__
def
make_node
(
self
,
x
):
def
make_node
(
self
,
x
):
x
=
as_gpuarray_variable
(
x
)
x
=
as_gpuarray_variable
(
x
)
...
@@ -658,15 +648,7 @@ class GpuSoftmaxWithBias (Op):
...
@@ -658,15 +648,7 @@ class GpuSoftmaxWithBias (Op):
"""
"""
nin
=
2
nin
=
2
nout
=
1
nout
=
1
__props__
=
()
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
__str__
(
self
):
return
self
.
__class__
.
__name__
def
make_node
(
self
,
x
,
b
):
def
make_node
(
self
,
x
,
b
):
x
=
as_gpuarray_variable
(
x
)
x
=
as_gpuarray_variable
(
x
)
...
...
theano/sandbox/gpuarray/opt.py
浏览文件 @
1547ecc6
...
@@ -10,9 +10,7 @@ except ImportError:
...
@@ -10,9 +10,7 @@ except ImportError:
from
theano
import
tensor
,
scalar
,
gof
from
theano
import
tensor
,
scalar
,
gof
from
theano.compile
import
optdb
from
theano.compile
import
optdb
from
theano.gof
import
(
local_optimizer
,
EquilibriumDB
,
from
theano.gof
import
(
local_optimizer
,
EquilibriumDB
,
SequenceDB
,
ProxyDB
,
SequenceDB
,
Optimizer
,
toolbox
)
Optimizer
,
toolbox
,
InconsistencyError
,
EquilibriumOptimizer
)
from
theano.scan_module
import
scan_utils
,
scan_op
,
scan_opt
from
theano.scan_module
import
scan_utils
,
scan_op
,
scan_opt
...
@@ -28,8 +26,7 @@ from .conv import GpuConv
...
@@ -28,8 +26,7 @@ from .conv import GpuConv
from
.nnet
import
(
GpuCrossentropySoftmaxArgmax1HotWithBias
,
from
.nnet
import
(
GpuCrossentropySoftmaxArgmax1HotWithBias
,
GpuCrossentropySoftmax1HotWithBiasDx
,
GpuCrossentropySoftmax1HotWithBiasDx
,
GpuSoftmaxWithBias
,
GpuSoftmax
)
GpuSoftmaxWithBias
,
GpuSoftmax
)
from
.elemwise
import
(
GpuElemwise
,
_is_scalar
,
from
.elemwise
import
(
GpuElemwise
,
GpuDimShuffle
,
GpuCAReduceCuda
,
GpuDimShuffle
,
GpuCAReduceCuda
,
GpuCAReduceCPY
)
GpuCAReduceCPY
)
from
.subtensor
import
(
GpuIncSubtensor
,
GpuSubtensor
,
from
.subtensor
import
(
GpuIncSubtensor
,
GpuSubtensor
,
GpuAdvancedIncSubtensor1
,
GpuAdvancedIncSubtensor1
,
...
@@ -134,7 +131,7 @@ class InputToGpuOptimizer(Optimizer):
...
@@ -134,7 +131,7 @@ class InputToGpuOptimizer(Optimizer):
new_input
=
host_from_gpu
(
gpu_from_host
(
input
))
new_input
=
host_from_gpu
(
gpu_from_host
(
input
))
fgraph
.
replace_validate
(
input
,
new_input
,
fgraph
.
replace_validate
(
input
,
new_input
,
"InputToGpuOptimizer"
)
"InputToGpuOptimizer"
)
except
TypeError
as
e
:
except
TypeError
:
# This could fail if the inputs are not TensorTypes
# This could fail if the inputs are not TensorTypes
pass
pass
...
@@ -253,10 +250,11 @@ def local_gpuflatten(node):
...
@@ -253,10 +250,11 @@ def local_gpuflatten(node):
@op_lifter
([
tensor
.
Elemwise
])
@op_lifter
([
tensor
.
Elemwise
])
def
local_gpu_elemwise
(
node
):
def
local_gpu_elemwise
(
node
):
op
=
node
.
op
op
=
node
.
op
scal_op
=
op
.
scalar_op
name
=
op
.
name
name
=
op
.
name
if
name
:
if
name
:
name
=
'Gpu'
+
name
name
=
'Gpu'
+
name
res
=
GpuElemwise
(
op
.
scalar
_op
,
name
=
name
,
res
=
GpuElemwise
(
scal
_op
,
name
=
name
,
inplace_pattern
=
copy
.
copy
(
op
.
inplace_pattern
),
inplace_pattern
=
copy
.
copy
(
op
.
inplace_pattern
),
nfunc_spec
=
op
.
nfunc_spec
)
nfunc_spec
=
op
.
nfunc_spec
)
return
res
return
res
...
@@ -391,7 +389,6 @@ def local_gpua_advanced_incsubtensor(node):
...
@@ -391,7 +389,6 @@ def local_gpua_advanced_incsubtensor(node):
return
None
return
None
x
,
y
=
node
.
inputs
[
0
:
2
]
x
,
y
=
node
.
inputs
[
0
:
2
]
coords
=
node
.
inputs
[
2
:]
set_instead_of_inc
=
node
.
op
.
set_instead_of_inc
set_instead_of_inc
=
node
.
op
.
set_instead_of_inc
active_device_no
=
theano
.
sandbox
.
cuda
.
active_device_number
()
active_device_no
=
theano
.
sandbox
.
cuda
.
active_device_number
()
device_properties
=
theano
.
sandbox
.
cuda
.
device_properties
device_properties
=
theano
.
sandbox
.
cuda
.
device_properties
...
@@ -640,8 +637,7 @@ def local_gpu_elemwise_careduce(node):
...
@@ -640,8 +637,7 @@ def local_gpu_elemwise_careduce(node):
# automatically add more case, as some like trigonometic
# automatically add more case, as some like trigonometic
# operation with some reduction pattern will probably result
# operation with some reduction pattern will probably result
# to slow down.
# to slow down.
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
.
scalar_op
,
scalar
.
basic
.
Sqr
)
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
.
scalar_op
,
scalar
.
basic
.
Sqr
)):
):
op
=
node
.
op
op
=
node
.
op
inp
=
node
.
inputs
[
0
]
.
owner
.
inputs
[
0
]
inp
=
node
.
inputs
[
0
]
.
owner
.
inputs
[
0
]
return
[
GpuCAReduceCuda
(
scalar_op
=
op
.
scalar_op
,
return
[
GpuCAReduceCuda
(
scalar_op
=
op
.
scalar_op
,
...
...
theano/sandbox/gpuarray/subtensor.py
浏览文件 @
1547ecc6
...
@@ -21,6 +21,8 @@ from .comp import NVCC_compiler
...
@@ -21,6 +21,8 @@ from .comp import NVCC_compiler
class
GpuSubtensor
(
HideC
,
Subtensor
):
class
GpuSubtensor
(
HideC
,
Subtensor
):
_f16_ok
=
True
def
make_node
(
self
,
x
,
*
inputs
):
def
make_node
(
self
,
x
,
*
inputs
):
rval
=
tensor
.
Subtensor
.
make_node
(
self
,
x
,
*
inputs
)
rval
=
tensor
.
Subtensor
.
make_node
(
self
,
x
,
*
inputs
)
otype
=
GpuArrayType
(
dtype
=
rval
.
outputs
[
0
]
.
type
.
dtype
,
otype
=
GpuArrayType
(
dtype
=
rval
.
outputs
[
0
]
.
type
.
dtype
,
...
...
theano/sandbox/gpuarray/tests/test_basic_ops.py
浏览文件 @
1547ecc6
...
@@ -436,7 +436,7 @@ def test_gpueye():
...
@@ -436,7 +436,7 @@ def test_gpueye():
assert
any
([
isinstance
(
node
.
op
,
GpuEye
)
assert
any
([
isinstance
(
node
.
op
,
GpuEye
)
for
node
in
f
.
maker
.
fgraph
.
toposort
()])
for
node
in
f
.
maker
.
fgraph
.
toposort
()])
for
dtype
in
[
'float32'
,
'int32'
]:
for
dtype
in
[
'float32'
,
'int32'
,
'float16'
]:
yield
check
,
dtype
,
3
yield
check
,
dtype
,
3
# M != N, k = 0
# M != N, k = 0
yield
check
,
dtype
,
3
,
5
yield
check
,
dtype
,
3
,
5
...
...
theano/sandbox/gpuarray/type.py
浏览文件 @
1547ecc6
...
@@ -185,6 +185,7 @@ class GpuArrayType(Type):
...
@@ -185,6 +185,7 @@ class GpuArrayType(Type):
# complex64, etc.
# complex64, etc.
try
:
try
:
return
{
return
{
'float16'
:
(
float
,
'npy_float16'
,
'NPY_FLOAT16'
),
'float32'
:
(
float
,
'npy_float32'
,
'NPY_FLOAT32'
),
'float32'
:
(
float
,
'npy_float32'
,
'NPY_FLOAT32'
),
'float64'
:
(
float
,
'npy_float64'
,
'NPY_FLOAT64'
),
'float64'
:
(
float
,
'npy_float64'
,
'NPY_FLOAT64'
),
'uint8'
:
(
int
,
'npy_uint8'
,
'NPY_UINT8'
),
'uint8'
:
(
int
,
'npy_uint8'
,
'NPY_UINT8'
),
...
@@ -309,7 +310,11 @@ class GpuArrayConstant(_operators, Constant):
...
@@ -309,7 +310,11 @@ class GpuArrayConstant(_operators, Constant):
def
__str__
(
self
):
def
__str__
(
self
):
if
self
.
name
is
not
None
:
if
self
.
name
is
not
None
:
return
self
.
name
return
self
.
name
return
"GpuArrayConstant{
%
s}"
%
numpy
.
asarray
(
self
.
data
)
try
:
np_data
=
numpy
.
asarray
(
self
.
data
)
except
gpuarray
.
GpuArrayException
:
np_data
=
self
.
data
return
"GpuArrayConstant{
%
s}"
%
np_data
GpuArrayType
.
Constant
=
GpuArrayConstant
GpuArrayType
.
Constant
=
GpuArrayConstant
...
...
theano/sandbox/test_rng_mrg.py
浏览文件 @
1547ecc6
...
@@ -613,18 +613,15 @@ def test_binomial():
...
@@ -613,18 +613,15 @@ def test_binomial():
# test empty size (scalar)
# test empty size (scalar)
((),
(),
[],
[]),
((),
(),
[],
[]),
]:
]:
yield
(
t_binomial
,
mean
,
size
,
const_size
,
var_input
,
input
,
steps
,
rtol
)
# print ''
# print 'ON CPU with size=(%s) and mean(%d):' % (str(size), mean)
def
t_binomial
(
mean
,
size
,
const_size
,
var_input
,
input
,
steps
,
rtol
):
R
=
MRG_RandomStreams
(
234
,
use_cuda
=
False
)
R
=
MRG_RandomStreams
(
234
,
use_cuda
=
False
)
# Note: we specify `nstreams` to avoid a warning.
u
=
R
.
binomial
(
size
=
size
,
p
=
mean
)
u
=
R
.
binomial
(
size
=
size
,
p
=
mean
,
nstreams
=
rng_mrg
.
guess_n_streams
(
size
,
warn
=
False
))
f
=
theano
.
function
(
var_input
,
u
,
mode
=
mode
)
f
=
theano
.
function
(
var_input
,
u
,
mode
=
mode
)
# theano.printing.debugprint(f)
out
=
f
(
*
input
)
out
=
f
(
*
input
)
# print 'random?[:10]\n', out[0, 0:10]
# print 'random?[-1,-10:]\n', out[-1, -10:]
# Increase the number of steps if sizes implies only a few samples
# Increase the number of steps if sizes implies only a few samples
if
numpy
.
prod
(
const_size
)
<
10
:
if
numpy
.
prod
(
const_size
)
<
10
:
...
@@ -636,30 +633,21 @@ def test_binomial():
...
@@ -636,30 +633,21 @@ def test_binomial():
target_avg
=
mean
,
mean_rtol
=
rtol
)
target_avg
=
mean
,
mean_rtol
=
rtol
)
if
mode
!=
'FAST_COMPILE'
and
cuda_available
:
if
mode
!=
'FAST_COMPILE'
and
cuda_available
:
# print ''
# print 'ON GPU with size=(%s) and mean(%d):' % (str(size), mean)
R
=
MRG_RandomStreams
(
234
,
use_cuda
=
True
)
R
=
MRG_RandomStreams
(
234
,
use_cuda
=
True
)
u
=
R
.
binomial
(
size
=
size
,
p
=
mean
,
dtype
=
'float32'
,
u
=
R
.
binomial
(
size
=
size
,
p
=
mean
,
dtype
=
'float32'
)
nstreams
=
rng_mrg
.
guess_n_streams
(
size
,
warn
=
False
))
# well, it's really that this test w GPU doesn't make sense otw
# well, it's really that this test w GPU doesn't make sense otw
assert
u
.
dtype
==
'float32'
assert
u
.
dtype
==
'float32'
f
=
theano
.
function
(
var_input
,
theano
.
Out
(
f
=
theano
.
function
(
var_input
,
theano
.
Out
(
theano
.
sandbox
.
cuda
.
basic_ops
.
gpu_from_host
(
u
),
theano
.
sandbox
.
cuda
.
basic_ops
.
gpu_from_host
(
u
),
borrow
=
True
),
mode
=
mode_with_gpu
)
borrow
=
True
),
mode
=
mode_with_gpu
)
# theano.printing.debugprint(f)
gpu_out
=
numpy
.
asarray
(
f
(
*
input
))
gpu_out
=
numpy
.
asarray
(
f
(
*
input
))
# print 'random?[:10]\n', gpu_out[0, 0:10]
# print 'random?[-1,-10:]\n', gpu_out[-1, -10:]
basictest
(
f
,
steps_
,
const_size
,
prefix
=
'mrg gpu'
,
basictest
(
f
,
steps_
,
const_size
,
prefix
=
'mrg gpu'
,
inputs
=
input
,
allow_01
=
True
,
inputs
=
input
,
allow_01
=
True
,
target_avg
=
mean
,
mean_rtol
=
rtol
)
target_avg
=
mean
,
mean_rtol
=
rtol
)
numpy
.
testing
.
assert_array_almost_equal
(
out
,
gpu_out
,
numpy
.
testing
.
assert_array_almost_equal
(
out
,
gpu_out
,
decimal
=
6
)
decimal
=
6
)
# print ''
# print 'ON CPU w NUMPY with size=(%s) and mean(%d):' % (str(size),
# mean)
RR
=
theano
.
tensor
.
shared_randomstreams
.
RandomStreams
(
234
)
RR
=
theano
.
tensor
.
shared_randomstreams
.
RandomStreams
(
234
)
uu
=
RR
.
binomial
(
size
=
size
,
p
=
mean
)
uu
=
RR
.
binomial
(
size
=
size
,
p
=
mean
)
...
...
theano/scalar/basic.py
浏览文件 @
1547ecc6
...
@@ -50,25 +50,33 @@ class IntegerDivisionError(Exception):
...
@@ -50,25 +50,33 @@ class IntegerDivisionError(Exception):
def
upcast
(
dtype
,
*
dtypes
):
def
upcast
(
dtype
,
*
dtypes
):
# Should we try to keep float32 instead of float64? This is used so that
# This tries to keep data in floatX or lower precision, unless we
# for instance mixing int64 with float32 yields float32 instead of float64.
# explicitely request a higher precision datatype.
# Note that we store this boolean as a one-element list so that it can be
# modified within `make_array`.
keep_float32
=
[(
config
.
cast_policy
==
'numpy+floatX'
and
keep_float32
=
[(
config
.
cast_policy
==
'numpy+floatX'
and
config
.
floatX
==
'float32'
)]
config
.
floatX
==
'float32'
)]
keep_float16
=
[(
config
.
cast_policy
==
'numpy+floatX'
and
config
.
floatX
==
'float16'
)]
def
make_array
(
dt
):
def
make_array
(
dt
):
if
dt
==
'float64'
:
if
dt
==
'float64'
:
# There is an explicit float64 dtype: we cannot keep float32.
# There is an explicit float64 dtype: we cannot keep float32.
keep_float32
[
0
]
=
False
keep_float32
[
0
]
=
False
keep_float16
[
0
]
=
False
if
dt
==
'float32'
:
keep_float16
[
0
]
=
False
return
numpy
.
zeros
((),
dtype
=
dt
)
return
numpy
.
zeros
((),
dtype
=
dt
)
z
=
make_array
(
dtype
)
z
=
make_array
(
dtype
)
for
dt
in
dtypes
:
for
dt
in
dtypes
:
z
=
z
+
make_array
(
dt
=
dt
)
z
=
z
+
make_array
(
dt
=
dt
)
rval
=
str
(
z
.
dtype
)
rval
=
str
(
z
.
dtype
)
if
rval
==
'float64'
and
keep_float32
[
0
]:
if
rval
==
'float64'
:
if
keep_float16
[
0
]:
return
'float16'
if
keep_float32
[
0
]:
return
'float32'
return
'float32'
else
:
elif
rval
==
'float32'
:
if
keep_float16
[
0
]:
return
'float16'
return
rval
return
rval
...
@@ -232,6 +240,7 @@ class Scalar(Type):
...
@@ -232,6 +240,7 @@ class Scalar(Type):
print(dtype, np.zeros(1, dtype=dtype).dtype.num)
print(dtype, np.zeros(1, dtype=dtype).dtype.num)
"""
"""
return
{
# dtype: (py_type, c_type, cls_name)
return
{
# dtype: (py_type, c_type, cls_name)
'float16'
:
(
numpy
.
float16
,
'npy_float16'
,
'Float16'
),
'float32'
:
(
numpy
.
float32
,
'npy_float32'
,
'Float32'
),
'float32'
:
(
numpy
.
float32
,
'npy_float32'
,
'Float32'
),
'float64'
:
(
numpy
.
float64
,
'npy_float64'
,
'Float64'
),
'float64'
:
(
numpy
.
float64
,
'npy_float64'
,
'Float64'
),
'complex128'
:
(
numpy
.
complex128
,
'theano_complex128'
,
'complex128'
:
(
numpy
.
complex128
,
'theano_complex128'
,
...
@@ -501,6 +510,7 @@ uint8 = get_scalar_type('uint8')
...
@@ -501,6 +510,7 @@ uint8 = get_scalar_type('uint8')
uint16
=
get_scalar_type
(
'uint16'
)
uint16
=
get_scalar_type
(
'uint16'
)
uint32
=
get_scalar_type
(
'uint32'
)
uint32
=
get_scalar_type
(
'uint32'
)
uint64
=
get_scalar_type
(
'uint64'
)
uint64
=
get_scalar_type
(
'uint64'
)
float16
=
get_scalar_type
(
'float16'
)
float32
=
get_scalar_type
(
'float32'
)
float32
=
get_scalar_type
(
'float32'
)
float64
=
get_scalar_type
(
'float64'
)
float64
=
get_scalar_type
(
'float64'
)
complex64
=
get_scalar_type
(
'complex64'
)
complex64
=
get_scalar_type
(
'complex64'
)
...
@@ -508,7 +518,7 @@ complex128 = get_scalar_type('complex128')
...
@@ -508,7 +518,7 @@ complex128 = get_scalar_type('complex128')
int_types
=
int8
,
int16
,
int32
,
int64
int_types
=
int8
,
int16
,
int32
,
int64
uint_types
=
uint8
,
uint16
,
uint32
,
uint64
uint_types
=
uint8
,
uint16
,
uint32
,
uint64
float_types
=
float32
,
float64
float_types
=
float
16
,
float
32
,
float64
complex_types
=
complex64
,
complex128
complex_types
=
complex64
,
complex128
discrete_types
=
int_types
+
uint_types
discrete_types
=
int_types
+
uint_types
...
@@ -1995,6 +2005,7 @@ convert_to_uint8 = Cast(uint8, name='convert_to_uint8')
...
@@ -1995,6 +2005,7 @@ convert_to_uint8 = Cast(uint8, name='convert_to_uint8')
convert_to_uint16
=
Cast
(
uint16
,
name
=
'convert_to_uint16'
)
convert_to_uint16
=
Cast
(
uint16
,
name
=
'convert_to_uint16'
)
convert_to_uint32
=
Cast
(
uint32
,
name
=
'convert_to_uint32'
)
convert_to_uint32
=
Cast
(
uint32
,
name
=
'convert_to_uint32'
)
convert_to_uint64
=
Cast
(
uint64
,
name
=
'convert_to_uint64'
)
convert_to_uint64
=
Cast
(
uint64
,
name
=
'convert_to_uint64'
)
convert_to_float16
=
Cast
(
float16
,
name
=
'convert_to_float16'
)
convert_to_float32
=
Cast
(
float32
,
name
=
'convert_to_float32'
)
convert_to_float32
=
Cast
(
float32
,
name
=
'convert_to_float32'
)
convert_to_float64
=
Cast
(
float64
,
name
=
'convert_to_float64'
)
convert_to_float64
=
Cast
(
float64
,
name
=
'convert_to_float64'
)
convert_to_complex64
=
Cast
(
complex64
,
name
=
'convert_to_complex64'
)
convert_to_complex64
=
Cast
(
complex64
,
name
=
'convert_to_complex64'
)
...
@@ -2009,6 +2020,7 @@ _cast_mapping = {
...
@@ -2009,6 +2020,7 @@ _cast_mapping = {
'uint16'
:
convert_to_uint16
,
'uint16'
:
convert_to_uint16
,
'uint32'
:
convert_to_uint32
,
'uint32'
:
convert_to_uint32
,
'uint64'
:
convert_to_uint64
,
'uint64'
:
convert_to_uint64
,
'float16'
:
convert_to_float16
,
'float32'
:
convert_to_float32
,
'float32'
:
convert_to_float32
,
'float64'
:
convert_to_float64
,
'float64'
:
convert_to_float64
,
'complex64'
:
convert_to_complex64
,
'complex64'
:
convert_to_complex64
,
...
@@ -3286,14 +3298,20 @@ class Composite(ScalarOp):
...
@@ -3286,14 +3298,20 @@ class Composite(ScalarOp):
+
zip
(
self
.
fgraph
.
outputs
,
+
zip
(
self
.
fgraph
.
outputs
,
[
"
%%
(o
%
i)s"
%
i
for
i
in
xrange
(
len
(
self
.
fgraph
.
outputs
))]))
[
"
%%
(o
%
i)s"
%
i
for
i
in
xrange
(
len
(
self
.
fgraph
.
outputs
))]))
for
orphan
in
self
.
fgraph
.
variables
:
# fgraph.orphans:
for
var
in
self
.
fgraph
.
variables
:
if
orphan
.
owner
is
None
and
orphan
not
in
self
.
fgraph
.
inputs
:
if
var
.
owner
is
None
:
if
isinstance
(
orphan
,
Constant
):
if
var
not
in
self
.
fgraph
.
inputs
:
subd
[
orphan
]
=
orphan
.
type
.
c_literal
(
orphan
.
data
)
# This is an orphan
if
isinstance
(
var
,
Constant
):
subd
[
var
]
=
var
.
type
.
c_literal
(
var
.
data
)
else
:
else
:
raise
ValueError
(
raise
ValueError
(
"All orphans in the fgraph to Composite must"
"All orphans in the fgraph to Composite must"
" be Constant instances."
)
" be Constant instances."
)
elif
(
any
(
i
.
dtype
==
'float16'
for
i
in
var
.
owner
.
inputs
)
or
any
(
o
.
dtype
==
'float16'
for
o
in
var
.
owner
.
outputs
)):
# flag for elemwise ops to check.
self
.
inner_float16
=
True
_c_code
=
"{
\n
"
_c_code
=
"{
\n
"
self
.
nodenames
=
[
"
%(nodename)
s_"
+
(
'subnode
%
i'
%
j
)
self
.
nodenames
=
[
"
%(nodename)
s_"
+
(
'subnode
%
i'
%
j
)
...
...
theano/sparse/tests/test_basic.py
浏览文件 @
1547ecc6
...
@@ -2370,6 +2370,9 @@ class CastTester(utt.InferShapeTester):
...
@@ -2370,6 +2370,9 @@ class CastTester(utt.InferShapeTester):
for
format
in
sparse
.
sparse_formats
:
for
format
in
sparse
.
sparse_formats
:
for
i_dtype
in
sparse
.
float_dtypes
:
for
i_dtype
in
sparse
.
float_dtypes
:
for
o_dtype
in
tensor
.
float_dtypes
:
for
o_dtype
in
tensor
.
float_dtypes
:
if
o_dtype
==
'float16'
:
# Don't test float16 output.
continue
_
,
data
=
sparse_random_inputs
(
_
,
data
=
sparse_random_inputs
(
format
,
format
,
shape
=
(
4
,
7
),
shape
=
(
4
,
7
),
...
...
theano/tensor/basic.py
浏览文件 @
1547ecc6
...
@@ -252,10 +252,10 @@ class NumpyAutocaster(object):
...
@@ -252,10 +252,10 @@ class NumpyAutocaster(object):
return
numpy
.
asarray
(
x
)
return
numpy
.
asarray
(
x
)
elif
config
.
cast_policy
==
'numpy+floatX'
:
elif
config
.
cast_policy
==
'numpy+floatX'
:
rval
=
numpy
.
asarray
(
x
)
rval
=
numpy
.
asarray
(
x
)
if
((
rval
.
dtype
==
'float64'
and
# numpy wants float64
if
((
not
hasattr
(
x
,
'dtype'
)
and
config
.
floatX
==
'float32'
and
# but we prefer float32
rval
.
dtype
in
(
'float64'
,
'float32'
)
and
not
hasattr
(
x
,
'dtype'
))):
# and `x` was not typed
rval
.
dtype
!=
config
.
floatX
)):
rval
=
theano
.
_asarray
(
rval
,
dtype
=
'float32'
)
rval
=
theano
.
_asarray
(
rval
,
dtype
=
config
.
floatX
)
return
rval
return
rval
# The following is the original code, corresponding to the 'custom'
# The following is the original code, corresponding to the 'custom'
...
@@ -278,11 +278,14 @@ class NumpyAutocaster(object):
...
@@ -278,11 +278,14 @@ class NumpyAutocaster(object):
# recall: float is numpy.float
# recall: float is numpy.float
if
((
isinstance
(
x
,
float
)
and
if
((
isinstance
(
x
,
float
)
and
config
.
floatX
in
self
.
dtypes
and
config
.
floatX
in
self
.
dtypes
and
config
.
floatX
==
'float32'
)):
config
.
floatX
!=
'float64'
)):
return
theano
.
_asarray
(
x
,
dtype
=
config
.
floatX
)
return
theano
.
_asarray
(
x
,
dtype
=
'float32'
)
# Don't autocast to float16 unless config.floatX is float16
try_dtypes
=
[
d
for
d
in
self
.
dtypes
if
config
.
floatX
==
'float16'
or
d
!=
'float16'
]
for
dtype
in
self
.
dtypes
:
for
dtype
in
try_
dtypes
:
x_
=
theano
.
_asarray
(
x
,
dtype
=
dtype
)
x_
=
theano
.
_asarray
(
x
,
dtype
=
dtype
)
if
numpy
.
all
(
x
==
x_
):
if
numpy
.
all
(
x
==
x_
):
break
break
...
@@ -290,7 +293,7 @@ class NumpyAutocaster(object):
...
@@ -290,7 +293,7 @@ class NumpyAutocaster(object):
return
x_
return
x_
autocast_int
=
NumpyAutocaster
((
'int8'
,
'int16'
,
'int32'
,
'int64'
))
autocast_int
=
NumpyAutocaster
((
'int8'
,
'int16'
,
'int32'
,
'int64'
))
autocast_float
=
NumpyAutocaster
((
'float32'
,
'float64'
))
autocast_float
=
NumpyAutocaster
((
'float
16'
,
'float
32'
,
'float64'
))
# autocast_float dtypes might be manipulated in tensor.__init__
# autocast_float dtypes might be manipulated in tensor.__init__
...
@@ -313,7 +316,7 @@ class autocast_float_as(object):
...
@@ -313,7 +316,7 @@ class autocast_float_as(object):
If `config.cast_policy` is not 'custom', an exception is raised.
If `config.cast_policy` is not 'custom', an exception is raised.
For example:
For example:
>>> with autocast_float_as('float32')
as _dummy
:
>>> with autocast_float_as('float32'):
... assert (fvector() + 1.1).dtype == 'float32' # temporary downcasting
... assert (fvector() + 1.1).dtype == 'float32' # temporary downcasting
>>> assert (fvector() + 1.1).dtype == 'float64' # back to default behaviour
>>> assert (fvector() + 1.1).dtype == 'float64' # back to default behaviour
...
@@ -1137,6 +1140,10 @@ _convert_to_uint64 = _conversion(
...
@@ -1137,6 +1140,10 @@ _convert_to_uint64 = _conversion(
elemwise
.
Elemwise
(
scal
.
convert_to_uint64
),
'uint64'
)
elemwise
.
Elemwise
(
scal
.
convert_to_uint64
),
'uint64'
)
"""Cast to unsigned 64-bit integer"""
"""Cast to unsigned 64-bit integer"""
_convert_to_float16
=
_conversion
(
elemwise
.
Elemwise
(
scal
.
convert_to_float16
),
'float16'
)
"""Cast to half-precision floating point"""
_convert_to_float32
=
_conversion
(
_convert_to_float32
=
_conversion
(
elemwise
.
Elemwise
(
scal
.
convert_to_float32
),
'float32'
)
elemwise
.
Elemwise
(
scal
.
convert_to_float32
),
'float32'
)
"""Cast to single-precision floating point"""
"""Cast to single-precision floating point"""
...
@@ -1162,6 +1169,7 @@ _cast_mapping = {
...
@@ -1162,6 +1169,7 @@ _cast_mapping = {
'uint16'
:
_convert_to_uint16
,
'uint16'
:
_convert_to_uint16
,
'uint32'
:
_convert_to_uint32
,
'uint32'
:
_convert_to_uint32
,
'uint64'
:
_convert_to_uint64
,
'uint64'
:
_convert_to_uint64
,
'float16'
:
_convert_to_float16
,
'float32'
:
_convert_to_float32
,
'float32'
:
_convert_to_float32
,
'float64'
:
_convert_to_float64
,
'float64'
:
_convert_to_float64
,
'complex64'
:
_convert_to_complex64
,
'complex64'
:
_convert_to_complex64
,
...
@@ -2757,9 +2765,13 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
...
@@ -2757,9 +2765,13 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
# sum() will complain if it is not suitable.
# sum() will complain if it is not suitable.
sum_dtype
=
dtype
sum_dtype
=
dtype
else
:
else
:
# Let sum() infer the appropriate dtype.
sum_dtype
=
None
sum_dtype
=
None
# float16 overflows way too fast for sum
if
((
sum_dtype
==
'float16'
or
input
.
dtype
==
'float16'
)
and
acc_dtype
!=
'float16'
):
sum_dtype
==
'float32'
s
=
sum
(
input
,
axis
=
axis
,
dtype
=
sum_dtype
,
keepdims
=
keepdims
,
s
=
sum
(
input
,
axis
=
axis
,
dtype
=
sum_dtype
,
keepdims
=
keepdims
,
acc_dtype
=
acc_dtype
)
acc_dtype
=
acc_dtype
)
shp
=
shape
(
input
)
shp
=
shape
(
input
)
...
@@ -2767,7 +2779,7 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
...
@@ -2767,7 +2779,7 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
# Cast shp into a float type
# Cast shp into a float type
# TODO Once we have a consistent casting policy, we could simply
# TODO Once we have a consistent casting policy, we could simply
# use true_div.
# use true_div.
if
s
.
dtype
in
(
'float32'
,
'complex64'
):
if
s
.
dtype
in
(
'float
16'
,
'float
32'
,
'complex64'
):
shp
=
cast
(
shp
,
'float32'
)
shp
=
cast
(
shp
,
'float32'
)
else
:
else
:
shp
=
cast
(
shp
,
'float64'
)
shp
=
cast
(
shp
,
'float64'
)
...
@@ -2785,6 +2797,9 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
...
@@ -2785,6 +2797,9 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
for
i
in
axis
:
for
i
in
axis
:
s
=
true_div
(
s
,
shp
[
i
])
s
=
true_div
(
s
,
shp
[
i
])
if
dtype
==
'float16'
or
(
dtype
is
None
and
input
.
dtype
==
'float16'
):
s
=
cast
(
s
,
'float16'
)
return
s
return
s
...
...
theano/tensor/blas.py
浏览文件 @
1547ecc6
...
@@ -417,7 +417,8 @@ class Gemv(Op):
...
@@ -417,7 +417,8 @@ class Gemv(Op):
def
perform
(
self
,
node
,
inputs
,
out_storage
):
def
perform
(
self
,
node
,
inputs
,
out_storage
):
y
,
alpha
,
A
,
x
,
beta
=
inputs
y
,
alpha
,
A
,
x
,
beta
=
inputs
if
have_fblas
and
y
.
shape
[
0
]
!=
0
and
x
.
shape
[
0
]
!=
0
:
if
(
have_fblas
and
y
.
shape
[
0
]
!=
0
and
x
.
shape
[
0
]
!=
0
and
y
.
dtype
in
_blas_gemv_fns
):
gemv
=
_blas_gemv_fns
[
y
.
dtype
]
gemv
=
_blas_gemv_fns
[
y
.
dtype
]
if
(
A
.
shape
[
0
]
!=
y
.
shape
[
0
]
or
A
.
shape
[
1
]
!=
x
.
shape
[
0
]):
if
(
A
.
shape
[
0
]
!=
y
.
shape
[
0
]
or
A
.
shape
[
1
]
!=
x
.
shape
[
0
]):
...
@@ -1727,7 +1728,7 @@ def local_dot_to_dot22(node):
...
@@ -1727,7 +1728,7 @@ def local_dot_to_dot22(node):
x
,
y
,
x
.
type
,
y
.
type
)
x
,
y
,
x
.
type
,
y
.
type
)
return
return
if
y
.
type
.
dtype
.
startswith
(
'float'
)
or
y
.
type
.
dtype
.
startswith
(
'complex'
)
:
if
y
.
type
.
dtype
in
[
'float32'
,
'float64'
,
'complex64'
,
'complex128'
]
:
if
x
.
ndim
==
2
and
y
.
ndim
==
2
:
if
x
.
ndim
==
2
and
y
.
ndim
==
2
:
# print "local_dot_to_dot22: MM"
# print "local_dot_to_dot22: MM"
return
[
_dot22
(
*
node
.
inputs
)]
return
[
_dot22
(
*
node
.
inputs
)]
...
...
theano/tensor/elemwise.py
浏览文件 @
1547ecc6
...
@@ -95,6 +95,7 @@ class DimShuffle(Op):
...
@@ -95,6 +95,7 @@ class DimShuffle(Op):
transpose function.
transpose function.
Adding, subtracting dimensions can be done with reshape.
Adding, subtracting dimensions can be done with reshape.
"""
"""
_f16_ok
=
True
check_input
=
False
check_input
=
False
...
@@ -1171,6 +1172,12 @@ class Elemwise(OpenMPOp):
...
@@ -1171,6 +1172,12 @@ class Elemwise(OpenMPOp):
return
decl
,
checks
,
alloc
,
loop
return
decl
,
checks
,
alloc
,
loop
def
c_code
(
self
,
node
,
nodename
,
inames
,
onames
,
sub
):
def
c_code
(
self
,
node
,
nodename
,
inames
,
onames
,
sub
):
if
(
any
(
i
.
dtype
==
'float16'
for
i
in
node
.
inputs
)
or
any
(
o
.
dtype
==
'float16'
for
o
in
node
.
outputs
)
or
# This is for Composite
getattr
(
self
.
scalar_op
,
'inner_float16'
,
False
)):
# Disable C code for float16 vars
super
(
Elemwise
,
self
)
.
c_code
(
node
,
nodename
,
inames
,
onames
,
sub
)
code
=
"
\n
"
.
join
(
self
.
_c_all
(
node
,
nodename
,
inames
,
onames
,
sub
))
code
=
"
\n
"
.
join
(
self
.
_c_all
(
node
,
nodename
,
inames
,
onames
,
sub
))
return
code
return
code
...
@@ -1186,7 +1193,7 @@ class Elemwise(OpenMPOp):
...
@@ -1186,7 +1193,7 @@ class Elemwise(OpenMPOp):
return
support_code
return
support_code
def
c_code_cache_version_apply
(
self
,
node
):
def
c_code_cache_version_apply
(
self
,
node
):
version
=
[
1
1
]
# the version corresponding to the c code in this Op
version
=
[
1
2
]
# the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend...
# now we insert versions for the ops on which we depend...
scalar_node
=
Apply
(
self
.
scalar_op
,
scalar_node
=
Apply
(
self
.
scalar_op
,
...
@@ -1806,6 +1813,7 @@ class CAReduceDtype(CAReduce):
...
@@ -1806,6 +1813,7 @@ class CAReduceDtype(CAReduce):
uint8
=
'uint64'
,
uint8
=
'uint64'
,
uint16
=
'uint64'
,
uint16
=
'uint64'
,
uint32
=
'uint64'
,
uint32
=
'uint64'
,
float16
=
'float32'
,
float32
=
'float64'
,
float32
=
'float64'
,
complex64
=
'complex128'
,
complex64
=
'complex128'
,
)
.
get
(
idtype
,
idtype
)
)
.
get
(
idtype
,
idtype
)
...
...
theano/tensor/nnet/nnet.py
浏览文件 @
1547ecc6
...
@@ -1746,7 +1746,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
...
@@ -1746,7 +1746,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
# Check z is zeros_like(log(sm))
# Check z is zeros_like(log(sm))
if
not
_is_const
(
z
,
0
):
if
not
_is_const
(
z
,
0
):
return
return
if
z
.
type
not
in
(
dmatrix
,
fmatrix
):
if
z
.
broadcastable
!=
(
False
,
False
):
if
not
(
vector_softmax
and
z
.
broadcastable
==
(
True
,
False
)):
if
not
(
vector_softmax
and
z
.
broadcastable
==
(
True
,
False
)):
return
return
# here we know that we are incrementing a matrix of zeros
# here we know that we are incrementing a matrix of zeros
...
@@ -1758,14 +1758,15 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
...
@@ -1758,14 +1758,15 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
if
incr
.
ndim
!=
1
or
incr
.
dtype
not
in
tensor
.
float_dtypes
:
if
incr
.
ndim
!=
1
or
incr
.
dtype
not
in
tensor
.
float_dtypes
:
return
return
# here we know that we are incrementing some part of matrix z by a vector
# here we know that we are incrementing some part of
# matrix z by a vector
# unless the user has taken care to mark that the data and
labels have the
# unless the user has taken care to mark that the data and
#
same number of rows, we cannot be sure here that
#
labels have the same number of rows, we cannot be sure
#
len(y) == len(z)
#
here that len(y) == len(z) However, in the common case
#
However, in the common case that these are predictions and labels it is true.
#
that these are predictions and labels it is true. We
#
We leave it to the Op to crash (and the user to complain) if this assumption is
#
leave it to the Op to crash (and the user to complain)
# ever not true.
#
if this assumption is
ever not true.
out_grad
=
-
incr
out_grad
=
-
incr
...
...
theano/tensor/opt.py
浏览文件 @
1547ecc6
...
@@ -1564,6 +1564,7 @@ class Assert(T.Op):
...
@@ -1564,6 +1564,7 @@ class Assert(T.Op):
used in the function computing the graph, but it doesn't have to be
used in the function computing the graph, but it doesn't have to be
returned.
returned.
"""
"""
__props__
=
(
'msg'
,)
view_map
=
{
0
:
[
0
]}
view_map
=
{
0
:
[
0
]}
check_input
=
False
check_input
=
False
...
@@ -1583,24 +1584,18 @@ class Assert(T.Op):
...
@@ -1583,24 +1584,18 @@ class Assert(T.Op):
assert
numpy
.
all
([
c
.
type
.
ndim
==
0
for
c
in
cond
])
assert
numpy
.
all
([
c
.
type
.
ndim
==
0
for
c
in
cond
])
return
gof
.
Apply
(
self
,
[
value
]
+
cond
,
[
value
.
type
()])
return
gof
.
Apply
(
self
,
[
value
]
+
cond
,
[
value
.
type
()])
def
__str__
(
self
):
return
self
.
__class__
.
__name__
def
perform
(
self
,
node
,
inputs
,
out_
):
def
perform
(
self
,
node
,
inputs
,
out_
):
out
,
=
out_
out
,
=
out_
v
=
inputs
[
0
]
v
=
inputs
[
0
]
out
[
0
]
=
v
out
[
0
]
=
v
assert
numpy
.
all
(
inputs
[
1
:]),
self
.
msg
assert
numpy
.
all
(
inputs
[
1
:]),
self
.
msg
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
and
self
.
msg
==
other
.
msg
def
__hash__
(
self
):
return
hash
(
type
(
self
))
^
hash
(
self
.
msg
)
def
grad
(
self
,
input
,
output_gradients
):
def
grad
(
self
,
input
,
output_gradients
):
return
output_gradients
+
[
DisconnectedType
()()]
*
(
len
(
input
)
-
1
)
return
output_gradients
+
[
DisconnectedType
()()]
*
(
len
(
input
)
-
1
)
def
connection_pattern
(
self
,
node
):
return
[[
1
]]
+
[[
0
]]
*
(
len
(
node
.
inputs
)
-
1
)
def
c_code
(
self
,
node
,
name
,
inames
,
onames
,
sub
):
def
c_code
(
self
,
node
,
name
,
inames
,
onames
,
sub
):
value
=
inames
[
0
]
value
=
inames
[
0
]
out
=
onames
[
0
]
out
=
onames
[
0
]
...
...
theano/tensor/tests/test_basic.py
浏览文件 @
1547ecc6
...
@@ -5820,40 +5820,24 @@ def _test_autocast_custom():
...
@@ -5820,40 +5820,24 @@ def _test_autocast_custom():
orig_autocast
=
autocast_float
.
dtypes
orig_autocast
=
autocast_float
.
dtypes
# Test that autocast_float_as sets the autocast dtype correctly
# Test that autocast_float_as sets the autocast dtype correctly
try
:
# ghetto 2.4 version of with
with
autocast_float_as
(
'float32'
):
ac
=
autocast_float_as
(
'float32'
)
ac
.
__enter__
()
assert
autocast_float
.
dtypes
==
(
'float32'
,)
assert
autocast_float
.
dtypes
==
(
'float32'
,)
finally
:
ac
.
__exit__
()
assert
autocast_float
.
dtypes
==
orig_autocast
assert
autocast_float
.
dtypes
==
orig_autocast
try
:
# ghetto 2.4 version of with
ac
=
autocast_float_as
(
'float64'
)
with
autocast_float_as
(
'float64'
):
ac
.
__enter__
()
assert
autocast_float
.
dtypes
==
(
'float64'
,)
assert
autocast_float
.
dtypes
==
(
'float64'
,)
finally
:
ac
.
__exit__
()
assert
autocast_float
.
dtypes
==
orig_autocast
assert
autocast_float
.
dtypes
==
orig_autocast
# Test that we can set it back to something, and nest it
# Test that we can set it back to something, and nest it
try
:
# ghetto 2.4 version of with
with
autocast_float_as
(
'float32'
):
ac
=
autocast_float_as
(
'float32'
)
ac
.
__enter__
()
assert
autocast_float
.
dtypes
==
(
'float32'
,)
assert
autocast_float
.
dtypes
==
(
'float32'
,)
try
:
# ghetto 2.4 version of with
with
autocast_float_as
(
'float64'
):
ac2
=
autocast_float_as
(
'float64'
)
ac2
.
__enter__
()
assert
autocast_float
.
dtypes
==
(
'float64'
,)
assert
autocast_float
.
dtypes
==
(
'float64'
,)
finally
:
ac2
.
__exit__
()
assert
autocast_float
.
dtypes
==
(
'float32'
,)
assert
autocast_float
.
dtypes
==
(
'float32'
,)
finally
:
ac
.
__exit__
()
assert
autocast_float
.
dtypes
==
orig_autocast
assert
autocast_float
.
dtypes
==
orig_autocast
# Test that the autocasting dtype is used correctly in expression-building
# Test that the autocasting dtype is used correctly in expression-building
try
:
# ghetto 2.4 version of with
with
autocast_float_as
(
'float32'
):
ac
=
autocast_float_as
(
'float32'
)
ac
.
__enter__
()
assert
(
dvector
()
+
1.1
)
.
dtype
==
'float64'
assert
(
dvector
()
+
1.1
)
.
dtype
==
'float64'
assert
(
fvector
()
+
1.1
)
.
dtype
==
'float32'
assert
(
fvector
()
+
1.1
)
.
dtype
==
'float32'
assert
(
fvector
()
+
theano
.
_asarray
(
1.1
,
dtype
=
'float64'
))
.
dtype
==
\
assert
(
fvector
()
+
theano
.
_asarray
(
1.1
,
dtype
=
'float64'
))
.
dtype
==
\
...
@@ -5863,13 +5847,9 @@ def _test_autocast_custom():
...
@@ -5863,13 +5847,9 @@ def _test_autocast_custom():
assert
(
dvector
()
+
1
)
.
dtype
==
'float64'
assert
(
dvector
()
+
1
)
.
dtype
==
'float64'
assert
(
fvector
()
+
1
)
.
dtype
==
'float32'
assert
(
fvector
()
+
1
)
.
dtype
==
'float32'
finally
:
ac
.
__exit__
()
# Test that the autocasting dtype is used correctly in expression-building
# Test that the autocasting dtype is used correctly in expression-building
try
:
# ghetto 2.4 version of with
with
autocast_float_as
(
'float64'
):
ac
=
autocast_float_as
(
'float64'
)
ac
.
__enter__
()
assert
(
dvector
()
+
1.1
)
.
dtype
==
'float64'
assert
(
dvector
()
+
1.1
)
.
dtype
==
'float64'
assert
(
fvector
()
+
1.1
)
.
dtype
==
'float64'
assert
(
fvector
()
+
1.1
)
.
dtype
==
'float64'
assert
(
fvector
()
+
1.0
)
.
dtype
==
'float64'
assert
(
fvector
()
+
1.0
)
.
dtype
==
'float64'
...
@@ -5880,13 +5860,9 @@ def _test_autocast_custom():
...
@@ -5880,13 +5860,9 @@ def _test_autocast_custom():
assert
(
dvector
()
+
1
)
.
dtype
==
'float64'
assert
(
dvector
()
+
1
)
.
dtype
==
'float64'
assert
(
fvector
()
+
1
)
.
dtype
==
'float32'
assert
(
fvector
()
+
1
)
.
dtype
==
'float32'
finally
:
ac
.
__exit__
()
# Test that the autocasting dtype is used correctly in expression-building
# Test that the autocasting dtype is used correctly in expression-building
try
:
# ghetto 2.4 version of with
with
autocast_float_as
(
'float32'
,
'float64'
):
ac
=
autocast_float_as
(
'float32'
,
'float64'
)
ac
.
__enter__
()
assert
(
dvector
()
+
1.1
)
.
dtype
==
'float64'
assert
(
dvector
()
+
1.1
)
.
dtype
==
'float64'
assert
(
fvector
()
+
1.1
)
.
dtype
==
theano
.
config
.
floatX
assert
(
fvector
()
+
1.1
)
.
dtype
==
theano
.
config
.
floatX
assert
(
fvector
()
+
1.0
)
.
dtype
==
'float32'
assert
(
fvector
()
+
1.0
)
.
dtype
==
'float32'
...
@@ -5903,14 +5879,8 @@ def _test_autocast_custom():
...
@@ -5903,14 +5879,8 @@ def _test_autocast_custom():
assert
(
ivector
()
+
numpy
.
int8
(
1
))
.
dtype
==
'int32'
assert
(
ivector
()
+
numpy
.
int8
(
1
))
.
dtype
==
'int32'
assert
(
wvector
()
+
numpy
.
int8
(
1
))
.
dtype
==
'int16'
assert
(
wvector
()
+
numpy
.
int8
(
1
))
.
dtype
==
'int16'
assert
(
bvector
()
+
numpy
.
int8
(
1
))
.
dtype
==
'int8'
assert
(
bvector
()
+
numpy
.
int8
(
1
))
.
dtype
==
'int8'
try
:
# ghetto 2.4 version of with
with
autocast_float_as
(
'float64'
):
ac2
=
autocast_float_as
(
'float64'
)
ac2
.
__enter__
()
assert
(
fvector
()
+
1.0
)
.
dtype
==
'float64'
assert
(
fvector
()
+
1.0
)
.
dtype
==
'float64'
finally
:
ac2
.
__exit__
()
finally
:
ac
.
__exit__
()
def
_test_autocast_numpy
():
def
_test_autocast_numpy
():
...
@@ -6036,17 +6006,8 @@ class test_arithmetic_cast(unittest.TestCase):
...
@@ -6036,17 +6006,8 @@ class test_arithmetic_cast(unittest.TestCase):
config
.
int_division
==
'raise'
)
config
.
int_division
==
'raise'
)
# This is the expected behavior.
# This is the expected behavior.
continue
continue
# For numpy we have a problem:
numpy_dtype
=
op
(
numpy_args
[
0
](
a_type
),
# http://projects.scipy.org/numpy/ticket/1827
numpy_args
[
1
](
b_type
))
.
dtype
# As a result we only consider the highest data
# type that numpy may return.
numpy_dtypes
=
[
op
(
numpy_args
[
0
](
a_type
),
numpy_args
[
1
](
b_type
))
.
dtype
,
op
(
numpy_args
[
1
](
b_type
),
numpy_args
[
0
](
a_type
))
.
dtype
]
numpy_dtype
=
theano
.
scalar
.
upcast
(
*
map
(
str
,
numpy_dtypes
))
if
numpy_dtype
==
theano_dtype
:
if
numpy_dtype
==
theano_dtype
:
# Same data type found, all is good!
# Same data type found, all is good!
continue
continue
...
@@ -6078,9 +6039,7 @@ class test_arithmetic_cast(unittest.TestCase):
...
@@ -6078,9 +6039,7 @@ class test_arithmetic_cast(unittest.TestCase):
# Theano upcasted the result array.
# Theano upcasted the result array.
theano_dtype
==
up_type
and
theano_dtype
==
up_type
and
# But Numpy kept its original type.
# But Numpy kept its original type.
# (not an equality because of numpy bug
array_type
==
numpy_dtype
):
# mentioned above).
array_type
in
numpy_dtypes
):
# Then we accept this difference in
# Then we accept this difference in
# behavior.
# behavior.
continue
continue
...
@@ -6092,17 +6051,20 @@ class test_arithmetic_cast(unittest.TestCase):
...
@@ -6092,17 +6051,20 @@ class test_arithmetic_cast(unittest.TestCase):
numpy
.
__version__
.
split
(
'.'
)[:
2
]]
numpy
.
__version__
.
split
(
'.'
)[:
2
]]
if
(
cfg
==
'numpy+floatX'
and
if
(
cfg
==
'numpy+floatX'
and
a_type
==
'complex128'
and
a_type
==
'complex128'
and
b_type
==
'float32'
and
(
b_type
==
'float32'
or
b_type
==
'float16'
)
and
combo
==
(
'scalar'
,
'array'
)
and
combo
==
(
'scalar'
,
'array'
)
and
bool
(
numpy_version
>=
[
1
,
6
])
and
bool
(
numpy_version
>=
[
1
,
6
])
and
theano_dtype
==
'complex128'
and
theano_dtype
==
'complex128'
and
numpy_dtypes
==
[
'complex64'
,
numpy_dtype
==
'complex64'
):
'complex64'
]):
# In numpy 1.6.x adding a
# In numpy 1.6.x adding a complex128 with
# complex128 with a float32 or
# a float32 may result in a complex64. This
# float16 may result in a
# may be a bug (investigation is currently
# complex64. This may be a bug
# in progress), so in the meantime we just
# (investigation is currently in
# mark this test as a known failure.
# progress), so in the meantime we
# just mark this test as a known
# failure.
raise
KnownFailureTest
(
'Known issue with '
raise
KnownFailureTest
(
'Known issue with '
'numpy >= 1.6.x see #761'
)
'numpy >= 1.6.x see #761'
)
...
...
theano/tensor/tests/test_elemwise.py
浏览文件 @
1547ecc6
...
@@ -1024,6 +1024,7 @@ class T_prod_without_zeros_dtype(unittest.TestCase):
...
@@ -1024,6 +1024,7 @@ class T_prod_without_zeros_dtype(unittest.TestCase):
uint8
=
'uint64'
,
uint8
=
'uint64'
,
uint16
=
'uint64'
,
uint16
=
'uint64'
,
uint32
=
'uint64'
,
uint32
=
'uint64'
,
float16
=
'float32'
,
float32
=
'float64'
,
float32
=
'float64'
,
complex64
=
'complex128'
complex64
=
'complex128'
)
.
get
(
dtype
,
dtype
)
)
.
get
(
dtype
,
dtype
)
...
...
theano/tensor/type.py
浏览文件 @
1547ecc6
...
@@ -234,6 +234,7 @@ class TensorType(Type):
...
@@ -234,6 +234,7 @@ class TensorType(Type):
# complex64, etc.
# complex64, etc.
try
:
try
:
return
{
return
{
'float16'
:
(
float
,
'npy_float16'
,
'NPY_FLOAT16'
),
'float32'
:
(
float
,
'npy_float32'
,
'NPY_FLOAT32'
),
'float32'
:
(
float
,
'npy_float32'
,
'NPY_FLOAT32'
),
'float64'
:
(
float
,
'npy_float64'
,
'NPY_FLOAT64'
),
'float64'
:
(
float
,
'npy_float64'
,
'NPY_FLOAT64'
),
'uint8'
:
(
int
,
'npy_uint8'
,
'NPY_UINT8'
),
'uint8'
:
(
int
,
'npy_uint8'
,
'NPY_UINT8'
),
...
...
theano/tests/test_flake8.py
浏览文件 @
1547ecc6
...
@@ -6,6 +6,7 @@ from nose.plugins.skip import SkipTest
...
@@ -6,6 +6,7 @@ from nose.plugins.skip import SkipTest
import
os
import
os
from
fnmatch
import
fnmatch
from
fnmatch
import
fnmatch
import
theano
import
theano
from
theano.compat
import
PY3
try
:
try
:
import
flake8.engine
import
flake8.engine
import
flake8.main
import
flake8.main
...
@@ -227,7 +228,6 @@ whitelist_flake8 = [
...
@@ -227,7 +228,6 @@ whitelist_flake8 = [
"sandbox/gpuarray/elemwise.py"
,
"sandbox/gpuarray/elemwise.py"
,
"sandbox/gpuarray/type.py"
,
"sandbox/gpuarray/type.py"
,
"sandbox/gpuarray/__init__.py"
,
"sandbox/gpuarray/__init__.py"
,
"sandbox/gpuarray/opt.py"
,
"sandbox/gpuarray/blas.py"
,
"sandbox/gpuarray/blas.py"
,
"sandbox/gpuarray/kernel_codegen.py"
,
"sandbox/gpuarray/kernel_codegen.py"
,
"sandbox/gpuarray/conv.py"
,
"sandbox/gpuarray/conv.py"
,
...
@@ -347,6 +347,8 @@ def test_format_flake8():
...
@@ -347,6 +347,8 @@ def test_format_flake8():
"""
"""
if
not
flake8_available
:
if
not
flake8_available
:
raise
SkipTest
(
"flake8 is not installed"
)
raise
SkipTest
(
"flake8 is not installed"
)
if
PY3
:
raise
SkipTest
(
"not testing in python3 since 2to3 ran"
)
total_errors
=
0
total_errors
=
0
for
path
in
list_files
():
for
path
in
list_files
():
rel_path
=
os
.
path
.
relpath
(
path
,
theano
.
__path__
[
0
])
rel_path
=
os
.
path
.
relpath
(
path
,
theano
.
__path__
[
0
])
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论