Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
1547ecc6
提交
1547ecc6
authored
5月 07, 2015
作者:
Frédéric Bastien
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2800 from abergeron/fp16_merge
Fp16 merge
上级
03d0e784
3226ffd2
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
24 个修改的文件
包含
317 行增加
和
297 行删除
+317
-297
cop.txt
doc/extending/cop.txt
+15
-0
ops.py
theano/compile/ops.py
+4
-0
configdefaults.py
theano/configdefaults.py
+8
-3
op.py
theano/gof/op.py
+12
-1
basic_ops.py
theano/sandbox/gpuarray/basic_ops.py
+20
-34
elemwise.py
theano/sandbox/gpuarray/elemwise.py
+0
-0
fp16_help.py
theano/sandbox/gpuarray/fp16_help.py
+19
-0
nnet.py
theano/sandbox/gpuarray/nnet.py
+50
-68
opt.py
theano/sandbox/gpuarray/opt.py
+22
-26
subtensor.py
theano/sandbox/gpuarray/subtensor.py
+2
-0
test_basic_ops.py
theano/sandbox/gpuarray/tests/test_basic_ops.py
+1
-1
type.py
theano/sandbox/gpuarray/type.py
+6
-1
test_rng_mrg.py
theano/sandbox/test_rng_mrg.py
+40
-52
basic.py
theano/scalar/basic.py
+35
-17
test_basic.py
theano/sparse/tests/test_basic.py
+3
-0
basic.py
theano/tensor/basic.py
+26
-11
blas.py
theano/tensor/blas.py
+3
-2
elemwise.py
theano/tensor/elemwise.py
+9
-1
nnet.py
theano/tensor/nnet/nnet.py
+9
-8
opt.py
theano/tensor/opt.py
+4
-9
test_basic.py
theano/tensor/tests/test_basic.py
+24
-62
test_elemwise.py
theano/tensor/tests/test_elemwise.py
+1
-0
type.py
theano/tensor/type.py
+1
-0
test_flake8.py
theano/tests/test_flake8.py
+3
-1
没有找到文件。
doc/extending/cop.txt
浏览文件 @
1547ecc6
...
...
@@ -212,6 +212,21 @@ There are less methods to define for an Op than for a Type:
Op *must* have a `context_type` property with the Type to use
for the context variable.
.. attribute:: _f16_ok
(optional) If this attribute is absent or evaluates to `False`,
C code will be disabled for the op if any of its inputs or
outputs contains float16 data. This is added as a check to make
sure we don't compute wrong results since there is no hardware
float16 type so special care must be taken to make sure
operations are done correctly.
If you don't intend to deal with float16 data you can leave
this undefined.
This attribute is internal and may go away at any point during
developpment if a better solution is found.
The ``name`` argument is currently given an invalid value, so steer
away from it. As was the case with Type, ``sub['fail']`` provides
failure code that you *must* use if you want to raise an exception,
...
...
theano/compile/ops.py
浏览文件 @
1547ecc6
...
...
@@ -215,6 +215,8 @@ class Shape(gof.Op):
@note: Non-differentiable.
"""
_f16_ok
=
True
# Mapping from Type to C code (and version) to use.
# In the C code, the name of the input variable is %(iname)s,
# the output variable is %(oname)s.
...
...
@@ -308,6 +310,8 @@ class Shape_i(gof.Op):
@note: Non-differentiable.
"""
_f16_ok
=
True
# Mapping from Type to C code (and version) to use.
# In the C code, the name of the input variable is %(iname)s,
# the output variable is %(oname)s.
...
...
theano/configdefaults.py
浏览文件 @
1547ecc6
...
...
@@ -18,12 +18,17 @@ def floatX_convert(s):
return
"float32"
elif
s
==
"64"
:
return
"float64"
elif
s
==
"16"
:
return
"float16"
else
:
return
s
AddConfigVar
(
'floatX'
,
"Default floating-point precision for python casts"
,
EnumStr
(
'float64'
,
'float32'
,
convert
=
floatX_convert
,),
"Default floating-point precision for python casts.
\n
"
"
\n
"
"Note: float16 support is experimental, use at your own risk."
,
EnumStr
(
'float64'
,
'float32'
,
'float16'
,
convert
=
floatX_convert
,),
)
AddConfigVar
(
'warn_float64'
,
...
...
@@ -39,7 +44,7 @@ AddConfigVar('cast_policy',
EnumStr
(
'custom'
,
'numpy+floatX'
,
# The 'numpy' policy was originally planned to provide a
# smooth transition from numpy. It was meant to behave the
# same asnumpy+floatX, but keeping float64 when numpy
# same as
numpy+floatX, but keeping float64 when numpy
# would. However the current implementation of some cast
# mechanisms makes it a bit more complex to add than what
# was expected, so it is currently not available.
...
...
theano/gof/op.py
浏览文件 @
1547ecc6
...
...
@@ -726,9 +726,20 @@ class Op(utils.object2, PureOp, CLinkerOp):
node_output_storage
=
[
storage_map
[
r
]
for
r
in
node
.
outputs
]
node_input_compute
=
[
compute_map
[
r
]
for
r
in
node
.
inputs
]
node_output_compute
=
[
compute_map
[
r
]
for
r
in
node
.
outputs
]
#logger.debug('Compiling node %i of graph' % node_idx)
if
self
.
_op_use_c_code
:
try
:
# float16 get special treatment since running
# unprepared C code will get bad results.
if
not
getattr
(
self
,
'_f16_ok'
,
False
):
def
is_f16
(
t
):
return
getattr
(
t
,
'dtype'
,
''
)
==
'float16'
if
(
any
(
is_f16
(
i
.
type
)
for
i
in
node
.
inputs
)
or
any
(
is_f16
(
o
.
type
)
for
o
in
node
.
outputs
)):
print
(
"Disabling C code for
%
s due to unsupported "
"float16"
%
(
self
,))
raise
NotImplementedError
(
"float16"
)
e
=
FunctionGraph
(
node
.
inputs
,
node
.
outputs
)
e_no_recycling
=
[
new_o
...
...
theano/sandbox/gpuarray/basic_ops.py
浏览文件 @
1547ecc6
...
...
@@ -20,6 +20,7 @@ except ImportError:
pass
from
.type
import
GpuArrayType
from
.fp16_help
import
write_w
def
as_gpuarray_variable
(
x
):
...
...
@@ -186,11 +187,8 @@ class GpuKernelBase(object):
class
HostFromGpu
(
Op
):
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
__props__
=
()
_f16_ok
=
True
def
__str__
(
self
):
return
'HostFromGpu(gpuarray)'
...
...
@@ -269,11 +267,8 @@ host_from_gpu = HostFromGpu()
class
GpuFromHost
(
Op
):
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
__props__
=
()
_f16_ok
=
True
def
__str__
(
self
):
return
'GpuFromHost(gpuarray)'
...
...
@@ -573,18 +568,15 @@ cuda_from_gpu = CudaFromGpu()
class
GpuAlloc
(
HideC
,
Alloc
):
__props__
=
(
'memset_0'
,)
_f16_ok
=
True
def
__init__
(
self
,
memset_0
=
False
):
"""memset_0 is only an optimized version. True, it mean the
value is always 0, so the c code call memset as it is faster.
"""
self
.
memset_0
=
memset_0
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
and
self
.
memset_0
==
other
.
memset_0
def
__hash__
(
self
):
return
hash
(
type
(
self
))
^
hash
(
self
.
memset_0
)
def
__str__
(
self
):
# Hide the memset parameter when not used to prevent confusion.
if
self
.
memset_0
:
...
...
@@ -728,25 +720,17 @@ class GpuContiguous(Op):
Always return a c contiguous output. Copy the input only if it is
not already c contiguous.
"""
__props__
=
()
view_map
=
{
0
:
[
0
]}
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
_f16_ok
=
True
def
grad
(
self
,
inputs
,
dout
):
x
,
=
inputs
dout
,
=
dout
dout
=
as_gpuarray_variable
(
dout
)
return
[
dout
]
def
__str__
(
self
):
return
self
.
__class__
.
__name__
def
make_node
(
self
,
input
):
input
=
as_gpuarray_variable
(
input
)
return
Apply
(
self
,
[
input
],
[
input
.
type
()])
...
...
@@ -794,6 +778,8 @@ class GpuReshape(HideC, tensor.Reshape):
"""
Implement Reshape on the gpu.
"""
_f16_ok
=
True
# __hash__, __eq__, __str__ come from tensor.Reshape
def
make_node
(
self
,
x
,
shp
):
x
=
as_gpuarray_variable
(
x
)
...
...
@@ -831,6 +817,8 @@ class GpuReshape(HideC, tensor.Reshape):
class
GpuJoin
(
HideC
,
Join
):
_f16_ok
=
True
def
make_node
(
self
,
axis
,
*
tensors
):
node
=
Join
.
make_node
(
self
,
axis
,
*
tensors
)
...
...
@@ -888,6 +876,9 @@ class GpuSplit(HideC, Split):
class
GpuEye
(
GpuKernelBase
,
Op
):
__props__
=
(
'dtype'
,)
_f16_ok
=
True
def
__init__
(
self
,
dtype
=
None
):
if
dtype
is
None
:
dtype
=
config
.
floatX
...
...
@@ -915,20 +906,15 @@ class GpuEye(GpuKernelBase, Op):
return
[
grad_undefined
(
self
,
i
,
inp
[
i
])
for
i
in
xrange
(
3
)]
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
and
self
.
dtype
==
other
.
dtype
def
__hash__
(
self
):
return
hash
(
self
.
dtype
)
^
hash
(
type
(
self
))
def
gpu_kernels
(
self
,
node
,
name
):
code
=
"""
KERNEL void k(GLOBAL_MEM
%(ctype)
s *a, ga_size n, ga_size m) {
ga_size nb = n < m ? n : m;
for (ga_size i = LID_0; i < nb; i += LDIM_0) {
a[i*m + i] =
1
;
a[i*m + i] =
%(write_a)
s(1)
;
}
}"""
%
dict
(
ctype
=
pygpu
.
gpuarray
.
dtype_to_ctype
(
self
.
dtype
),
name
=
name
)
}"""
%
dict
(
ctype
=
pygpu
.
gpuarray
.
dtype_to_ctype
(
self
.
dtype
),
name
=
name
,
write_a
=
write_w
(
self
.
dtype
))
return
[
Kernel
(
code
=
code
,
name
=
"k"
,
params
=
[
gpuarray
.
GpuArray
,
gpuarray
.
SIZE
,
gpuarray
.
SIZE
],
...
...
theano/sandbox/gpuarray/elemwise.py
浏览文件 @
1547ecc6
差异被折叠。
点击展开。
theano/sandbox/gpuarray/fp16_help.py
0 → 100644
浏览文件 @
1547ecc6
def
work_dtype
(
dtype
):
if
dtype
==
'float16'
:
return
'float32'
else
:
return
dtype
def
load_w
(
dtype
):
if
dtype
==
'float16'
:
return
'__half2float'
else
:
return
''
def
write_w
(
dtype
):
if
dtype
==
'float16'
:
return
'__float2half_rn'
else
:
return
''
theano/sandbox/gpuarray/nnet.py
浏览文件 @
1547ecc6
...
...
@@ -16,6 +16,7 @@ from .type import GpuArrayType
from
.kernel_codegen
import
(
nvcc_kernel
,
inline_softmax
,
inline_softmax_fixed_shared
)
from
.fp16_help
import
work_dtype
,
load_w
,
write_w
class
GpuCrossentropySoftmaxArgmax1HotWithBias
(
Op
):
...
...
@@ -24,15 +25,8 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
"""
nin
=
3
nout
=
3
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
__str__
(
self
):
return
self
.
__class__
.
__name__
__props__
=
()
_f16_ok
=
True
def
make_node
(
self
,
x
,
b
,
y_idx
):
# N.B. won't work when we don't cast y_idx to float anymore
...
...
@@ -52,6 +46,12 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
dtype_x
=
node
.
inputs
[
0
]
.
dtype
dtype_b
=
node
.
inputs
[
1
]
.
dtype
dtype_y_idx
=
node
.
inputs
[
2
]
.
dtype
work_x
=
work_dtype
(
dtype_x
)
work_b
=
work_dtype
(
dtype_b
)
load_x
=
load_w
(
dtype_x
)
load_b
=
load_w
(
dtype_b
)
write_x
=
write_w
(
dtype_x
)
write_b
=
write_w
(
dtype_b
)
return
"""
__global__ void k_xent_sm_1hot_bias_
%(nodename)
s(int M, int N,
const npy_
%(dtype_x)
s* x_data, int xs0, int xs1,
...
...
@@ -67,12 +67,13 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
const npy_
%(dtype_y_idx)
s y_idx = y_idx_data[row * y_idxs0];
npy_
%(dtype_x)
s* sm = sm_data + sms0 * row;
npy_
%(
dtype
_x)
s sum = 0.0;
npy_
%(
work
_x)
s sum = 0.0;
int row_max_j = 0;
npy_
%(
dtype_x)
s row_max = x[0] + b[0]
;
npy_
%(
work_x)
s row_max =
%(load_x)
s(x[0]) +
%(load_b)
s(b[0])
;
for (int j = 1; j < N; ++j)
{
npy_
%(dtype_x)
s row_ij = x[j*xs1] + b[j*bs0];
npy_
%(work_x)
s row_ij =
%(load_x)
s(x[j*xs1]) +
%(load_b)
s(b[j*bs0]);
//todo: store to shared memory
row_max_j = (row_ij > row_max) ? j : row_max_j;
row_max = (row_ij > row_max) ? row_ij : row_max;
...
...
@@ -80,27 +81,30 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
//compute the exp
for (int j = 0; j < N; ++j)
{
npy_
%(dtype_x)
s row_ij = x[j*xs1] + b[j*bs0];
npy_
%(dtype_x)
s sm_ij = exp(row_ij - row_max);
npy_
%(work_x)
s row_ij =
%(load_x)
s(x[j*xs1]) +
%(load_b)
s(b[j*bs0]);
npy_
%(work_x)
s sm_ij = exp(row_ij - row_max);
sum += sm_ij;
sm[j * sms1] =
sm_ij
;
sm[j * sms1] =
%(write_x)
s(sm_ij)
;
}
npy_
%(
dtype
_x)
s sum_inv = 1.0 / sum;
npy_
%(
work
_x)
s sum_inv = 1.0 / sum;
for (int j = 0; j < N; ++j)
{
sm[j * sms1] *= sum_inv;
npy_
%(work_x)
s __tmp =
%(load_x)
s(sm[j * sms1]);
__tmp *= sum_inv;
sm[j * sms1] =
%(write_x)
s(__tmp);
}
if ((y_idx >= N) || (y_idx < 0))
{
//TODO: set raise an error bit in a global var?
nll_data[row*nlls0] =
0.0
; // raise some suspicion at least...
nll_data[row*nlls0] =
%(write_x)
s(0.0)
; // raise some suspicion at least...
}
else
{
nll_data[row*nlls0] =
- x[y_idx*xs1]
-
b[y_idx*bs0]
nll_data[row*nlls0] =
%(write_x)
s(-
%(load_x)
s(x[y_idx*xs1])
-
%(load_b)
s(b[y_idx*bs0])
+ row_max
+ log(sum);
+ log(sum)
)
;
}
am_data[row*ams0] = row_max_j;
}
...
...
@@ -259,8 +263,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
return
sio
.
getvalue
()
def
c_code_cache_version
(
self
):
# return ()
return
(
5
,)
return
(
6
,)
def
c_compiler
(
self
):
return
NVCC_compiler
...
...
@@ -272,21 +275,13 @@ gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1Ho
class
GpuCrossentropySoftmax1HotWithBiasDx
(
Op
):
"""
Implement CrossentropySoftmax1HotWithBiasDx on the gpu.
Gradient wrt x of the CrossentropySoftmax1Hot Op
"""
nin
=
3
nout
=
1
"""Gradient wrt x of the CrossentropySoftmax1Hot Op"""
def
__init__
(
self
,
**
kwargs
):
Op
.
__init__
(
self
,
**
kwargs
)
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
__str__
(
self
):
return
self
.
__class__
.
__name__
__props__
=
()
_f16_ok
=
True
def
make_node
(
self
,
dnll
,
sm
,
y_idx
):
dnll
=
as_gpuarray_variable
(
dnll
)
...
...
@@ -295,8 +290,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
return
Apply
(
self
,
[
dnll
,
sm
,
y_idx
],
[
sm
.
type
()])
def
c_code_cache_version
(
self
):
# return ()
return
(
8
,)
return
(
9
,)
def
c_headers
(
self
):
return
[
'cuda.h'
,
'<gpuarray/extension.h>'
,
'<numpy_compat.h>'
]
...
...
@@ -421,6 +415,10 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
dtype_sm
=
node
.
inputs
[
1
]
.
dtype
dtype_y_idx
=
node
.
inputs
[
2
]
.
dtype
dtype_dx
=
node
.
outputs
[
0
]
.
dtype
work_dnll
=
work_dtype
(
dtype_dnll
)
load_dnll
=
load_w
(
dtype_dnll
)
load_sm
=
load_w
(
dtype_sm
)
write_dx
=
write_w
(
dtype_dx
)
return
"""
__global__ void kCrossEntropySoftmax1HotWithBiasDx_
%(nodename)
s(
int N, int K,
...
...
@@ -431,7 +429,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
{
for (int i = blockIdx.x; i < N; i += gridDim.x)
{
npy_
%(
dtype_dnll)
s dnll_i = dnll[i * dnll_s0]
;
npy_
%(
work_dnll)
s dnll_i =
%(load_dnll)
s(dnll[i * dnll_s0])
;
npy_
%(dtype_y_idx)
s y_i = y_idx[i * y_idx_s0];
for (int j = threadIdx.x; j < K; j += blockDim.x)
...
...
@@ -439,16 +437,15 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
if (y_i == j)
{
dx[i * dx_s0 + j * dx_s1] =
dnll_i * (sm[i * sm_s0 + j * sm_s1]-1.0);
%(write_dx)
s(dnll_i *
(
%(load_sm)
s(sm[i * sm_s0 + j * sm_s1]) - 1.0));
}
else
{
dx[i * dx_s0 + j * dx_s1] =
dnll_i * sm[i * sm_s0 + j * sm_s1];
%(write_dx)
s(dnll_i *
%(load_sm)
s(sm[i * sm_s0 + j * sm_s1]));
}
//dx[i * dx_s0 + j * dx_s1] =
// dnll_i * sm[i * sm_s0 + j * sm_s1];
//dx[i*dx_s0+j*dx_s1] = 0;
}
}
}
...
...
@@ -466,14 +463,7 @@ class GpuSoftmax (Op):
"""
Implement Softmax on the gpu.
"""
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
__str__
(
self
):
return
self
.
__class__
.
__name__
__props__
=
()
def
make_node
(
self
,
x
):
x
=
as_gpuarray_variable
(
x
)
...
...
@@ -484,14 +474,14 @@ class GpuSoftmax (Op):
def
c_code_cache_version
(
self
):
return
(
12
,)
+
inline_softmax
.
code_version
def
c_headers
(
self
):
return
[
'cuda.h'
,
'<gpuarray/extension.h>'
,
'<numpy_compat.h>'
,
'<gpuarray/ext_cuda.h>'
]
def
c_compiler
(
self
):
return
NVCC_compiler
def
c_init_code
(
self
):
return
[
'setup_ext_cuda();'
]
...
...
@@ -527,7 +517,7 @@ class GpuSoftmax (Op):
pygpu_default_context(), Py_None);
if (!
%(z)
s) {
%(fail)
s
}
}
}
{
int n_blocks = std::min(PyGpuArray_DIMS(
%(x)
s)[0],
...
...
@@ -658,15 +648,7 @@ class GpuSoftmaxWithBias (Op):
"""
nin
=
2
nout
=
1
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
__str__
(
self
):
return
self
.
__class__
.
__name__
__props__
=
()
def
make_node
(
self
,
x
,
b
):
x
=
as_gpuarray_variable
(
x
)
...
...
@@ -675,20 +657,20 @@ class GpuSoftmaxWithBias (Op):
def
infer_shape
(
self
,
node
,
shape
):
return
[
shape
[
0
]]
def
c_code_cache_version
(
self
):
return
(
11
,)
+
inline_softmax
.
code_version
def
c_headers
(
self
):
return
[
'cuda.h'
,
'<gpuarray/extension.h>'
,
'<numpy_compat.h>'
,
'<gpuarray/ext_cuda.h>'
]
def
c_compiler
(
self
):
return
NVCC_compiler
def
c_init_code
(
self
):
return
[
'setup_ext_cuda();'
]
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
dtype_x
=
node
.
inputs
[
0
]
.
dtype
dtype_b
=
node
.
inputs
[
1
]
.
dtype
...
...
@@ -738,7 +720,7 @@ class GpuSoftmaxWithBias (Op):
pygpu_default_context(), Py_None);
if (!
%(z)
s) {
%(fail)
s
}
}
}
{
int n_blocks = std::min(PyGpuArray_DIMS(
%(x)
s)[0], (size_t)(32*1024));
...
...
theano/sandbox/gpuarray/opt.py
浏览文件 @
1547ecc6
...
...
@@ -10,9 +10,7 @@ except ImportError:
from
theano
import
tensor
,
scalar
,
gof
from
theano.compile
import
optdb
from
theano.gof
import
(
local_optimizer
,
EquilibriumDB
,
SequenceDB
,
ProxyDB
,
Optimizer
,
toolbox
,
InconsistencyError
,
EquilibriumOptimizer
)
SequenceDB
,
Optimizer
,
toolbox
)
from
theano.scan_module
import
scan_utils
,
scan_op
,
scan_opt
...
...
@@ -28,8 +26,7 @@ from .conv import GpuConv
from
.nnet
import
(
GpuCrossentropySoftmaxArgmax1HotWithBias
,
GpuCrossentropySoftmax1HotWithBiasDx
,
GpuSoftmaxWithBias
,
GpuSoftmax
)
from
.elemwise
import
(
GpuElemwise
,
_is_scalar
,
GpuDimShuffle
,
GpuCAReduceCuda
,
from
.elemwise
import
(
GpuElemwise
,
GpuDimShuffle
,
GpuCAReduceCuda
,
GpuCAReduceCPY
)
from
.subtensor
import
(
GpuIncSubtensor
,
GpuSubtensor
,
GpuAdvancedIncSubtensor1
,
...
...
@@ -134,7 +131,7 @@ class InputToGpuOptimizer(Optimizer):
new_input
=
host_from_gpu
(
gpu_from_host
(
input
))
fgraph
.
replace_validate
(
input
,
new_input
,
"InputToGpuOptimizer"
)
except
TypeError
as
e
:
except
TypeError
:
# This could fail if the inputs are not TensorTypes
pass
...
...
@@ -199,8 +196,8 @@ def local_gpualloc_memset_0(node):
if
isinstance
(
node
.
op
,
GpuAlloc
)
and
not
node
.
op
.
memset_0
:
inp
=
node
.
inputs
[
0
]
if
(
isinstance
(
inp
,
GpuArrayConstant
)
and
inp
.
data
.
size
==
1
and
(
numpy
.
asarray
(
inp
.
data
)
==
0
)
.
all
()):
inp
.
data
.
size
==
1
and
(
numpy
.
asarray
(
inp
.
data
)
==
0
)
.
all
()):
new_out
=
GpuAlloc
(
memset_0
=
True
)(
*
node
.
inputs
)
return
[
new_out
]
...
...
@@ -253,10 +250,11 @@ def local_gpuflatten(node):
@op_lifter
([
tensor
.
Elemwise
])
def
local_gpu_elemwise
(
node
):
op
=
node
.
op
scal_op
=
op
.
scalar_op
name
=
op
.
name
if
name
:
name
=
'Gpu'
+
name
res
=
GpuElemwise
(
op
.
scalar
_op
,
name
=
name
,
res
=
GpuElemwise
(
scal
_op
,
name
=
name
,
inplace_pattern
=
copy
.
copy
(
op
.
inplace_pattern
),
nfunc_spec
=
op
.
nfunc_spec
)
return
res
...
...
@@ -343,7 +341,7 @@ def local_gpua_join(node):
def
local_gpuajoin_1
(
node
):
# join of a single element
if
(
isinstance
(
node
.
op
,
GpuJoin
)
and
len
(
node
.
inputs
)
==
2
):
len
(
node
.
inputs
)
==
2
):
return
[
node
.
inputs
[
1
]]
...
...
@@ -360,13 +358,13 @@ def local_gpua_subtensor(node):
if
(
x
.
owner
and
isinstance
(
x
.
owner
.
op
,
HostFromGpu
)):
gpu_x
=
x
.
owner
.
inputs
[
0
]
if
(
gpu_x
.
owner
and
isinstance
(
gpu_x
.
owner
.
op
,
GpuFromHost
)
and
# And it is a shared var or an input of the graph.
not
gpu_x
.
owner
.
inputs
[
0
]
.
owner
):
isinstance
(
gpu_x
.
owner
.
op
,
GpuFromHost
)
and
# And it is a shared var or an input of the graph.
not
gpu_x
.
owner
.
inputs
[
0
]
.
owner
):
if
len
(
x
.
clients
)
==
1
:
if
any
([
n
==
'output'
or
any
([
isinstance
(
v
.
type
,
GpuArrayType
)
for
v
in
n
.
inputs
+
n
.
outputs
])
for
n
,
_
in
node
.
outputs
[
0
]
.
clients
]):
for
n
,
_
in
node
.
outputs
[
0
]
.
clients
]):
return
else
:
return
[
host_from_gpu
(
gpu_from_host
(
node
.
outputs
[
0
]))]
...
...
@@ -391,7 +389,6 @@ def local_gpua_advanced_incsubtensor(node):
return
None
x
,
y
=
node
.
inputs
[
0
:
2
]
coords
=
node
.
inputs
[
2
:]
set_instead_of_inc
=
node
.
op
.
set_instead_of_inc
active_device_no
=
theano
.
sandbox
.
cuda
.
active_device_number
()
device_properties
=
theano
.
sandbox
.
cuda
.
device_properties
...
...
@@ -429,7 +426,7 @@ def local_gpua_careduce(node):
# We need to have the make node called, otherwise the mask can
# be None
if
(
op
is
GpuCAReduceCPY
or
gvar
.
owner
.
op
.
supports_c_code
([
gpu_from_host
(
x
)])):
gvar
.
owner
.
op
.
supports_c_code
([
gpu_from_host
(
x
)])):
return
greduce
else
:
# Try to make a simpler pattern based on reshaping
...
...
@@ -573,7 +570,7 @@ def local_gpu_conv(node):
version
=
op
.
version
,
verbose
=
op
.
verbose
,
imshp
=
op
.
imshp
,
)
)
if
op
.
imshp_logical
is
not
None
:
logical_img_hw
=
op
.
imshp_logical
[
1
:
3
]
if
logical_img_hw
!=
op
.
imshp
[
1
:
3
]:
...
...
@@ -633,15 +630,14 @@ def local_gpu_conv(node):
def
local_gpu_elemwise_careduce
(
node
):
""" Merge some GpuCAReduceCuda and GPUElemwise"""
if
(
isinstance
(
node
.
op
,
GpuCAReduceCuda
)
and
node
.
op
.
pre_scalar_op
is
None
and
node
.
inputs
[
0
]
.
owner
and
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
,
GpuElemwise
)
and
# The Op support all scalar with 1 inputs. We don't
# automatically add more case, as some like trigonometic
# operation with some reduction pattern will probably result
# to slow down.
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
.
scalar_op
,
scalar
.
basic
.
Sqr
)
):
node
.
op
.
pre_scalar_op
is
None
and
node
.
inputs
[
0
]
.
owner
and
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
,
GpuElemwise
)
and
# The Op support all scalar with 1 inputs. We don't
# automatically add more case, as some like trigonometic
# operation with some reduction pattern will probably result
# to slow down.
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
.
scalar_op
,
scalar
.
basic
.
Sqr
)):
op
=
node
.
op
inp
=
node
.
inputs
[
0
]
.
owner
.
inputs
[
0
]
return
[
GpuCAReduceCuda
(
scalar_op
=
op
.
scalar_op
,
...
...
theano/sandbox/gpuarray/subtensor.py
浏览文件 @
1547ecc6
...
...
@@ -21,6 +21,8 @@ from .comp import NVCC_compiler
class
GpuSubtensor
(
HideC
,
Subtensor
):
_f16_ok
=
True
def
make_node
(
self
,
x
,
*
inputs
):
rval
=
tensor
.
Subtensor
.
make_node
(
self
,
x
,
*
inputs
)
otype
=
GpuArrayType
(
dtype
=
rval
.
outputs
[
0
]
.
type
.
dtype
,
...
...
theano/sandbox/gpuarray/tests/test_basic_ops.py
浏览文件 @
1547ecc6
...
...
@@ -436,7 +436,7 @@ def test_gpueye():
assert
any
([
isinstance
(
node
.
op
,
GpuEye
)
for
node
in
f
.
maker
.
fgraph
.
toposort
()])
for
dtype
in
[
'float32'
,
'int32'
]:
for
dtype
in
[
'float32'
,
'int32'
,
'float16'
]:
yield
check
,
dtype
,
3
# M != N, k = 0
yield
check
,
dtype
,
3
,
5
...
...
theano/sandbox/gpuarray/type.py
浏览文件 @
1547ecc6
...
...
@@ -185,6 +185,7 @@ class GpuArrayType(Type):
# complex64, etc.
try
:
return
{
'float16'
:
(
float
,
'npy_float16'
,
'NPY_FLOAT16'
),
'float32'
:
(
float
,
'npy_float32'
,
'NPY_FLOAT32'
),
'float64'
:
(
float
,
'npy_float64'
,
'NPY_FLOAT64'
),
'uint8'
:
(
int
,
'npy_uint8'
,
'NPY_UINT8'
),
...
...
@@ -309,7 +310,11 @@ class GpuArrayConstant(_operators, Constant):
def
__str__
(
self
):
if
self
.
name
is
not
None
:
return
self
.
name
return
"GpuArrayConstant{
%
s}"
%
numpy
.
asarray
(
self
.
data
)
try
:
np_data
=
numpy
.
asarray
(
self
.
data
)
except
gpuarray
.
GpuArrayException
:
np_data
=
self
.
data
return
"GpuArrayConstant{
%
s}"
%
np_data
GpuArrayType
.
Constant
=
GpuArrayConstant
...
...
theano/sandbox/test_rng_mrg.py
浏览文件 @
1547ecc6
...
...
@@ -613,60 +613,48 @@ def test_binomial():
# test empty size (scalar)
((),
(),
[],
[]),
]:
yield
(
t_binomial
,
mean
,
size
,
const_size
,
var_input
,
input
,
steps
,
rtol
)
# print ''
# print 'ON CPU with size=(%s) and mean(%d):' % (str(size), mean)
R
=
MRG_RandomStreams
(
234
,
use_cuda
=
False
)
# Note: we specify `nstreams` to avoid a warning.
u
=
R
.
binomial
(
size
=
size
,
p
=
mean
,
nstreams
=
rng_mrg
.
guess_n_streams
(
size
,
warn
=
False
))
f
=
theano
.
function
(
var_input
,
u
,
mode
=
mode
)
# theano.printing.debugprint(f)
out
=
f
(
*
input
)
# print 'random?[:10]\n', out[0, 0:10]
# print 'random?[-1,-10:]\n', out[-1, -10:]
# Increase the number of steps if sizes implies only a few samples
if
numpy
.
prod
(
const_size
)
<
10
:
steps_
=
steps
*
100
else
:
steps_
=
steps
basictest
(
f
,
steps_
,
const_size
,
prefix
=
'mrg cpu'
,
inputs
=
input
,
allow_01
=
True
,
target_avg
=
mean
,
mean_rtol
=
rtol
)
if
mode
!=
'FAST_COMPILE'
and
cuda_available
:
# print ''
# print 'ON GPU with size=(%s) and mean(%d):' % (str(size), mean)
R
=
MRG_RandomStreams
(
234
,
use_cuda
=
True
)
u
=
R
.
binomial
(
size
=
size
,
p
=
mean
,
dtype
=
'float32'
,
nstreams
=
rng_mrg
.
guess_n_streams
(
size
,
warn
=
False
))
# well, it's really that this test w GPU doesn't make sense otw
assert
u
.
dtype
==
'float32'
f
=
theano
.
function
(
var_input
,
theano
.
Out
(
theano
.
sandbox
.
cuda
.
basic_ops
.
gpu_from_host
(
u
),
borrow
=
True
),
mode
=
mode_with_gpu
)
# theano.printing.debugprint(f)
gpu_out
=
numpy
.
asarray
(
f
(
*
input
))
# print 'random?[:10]\n', gpu_out[0, 0:10]
# print 'random?[-1,-10:]\n', gpu_out[-1, -10:]
basictest
(
f
,
steps_
,
const_size
,
prefix
=
'mrg gpu'
,
inputs
=
input
,
allow_01
=
True
,
target_avg
=
mean
,
mean_rtol
=
rtol
)
numpy
.
testing
.
assert_array_almost_equal
(
out
,
gpu_out
,
decimal
=
6
)
# print ''
# print 'ON CPU w NUMPY with size=(%s) and mean(%d):' % (str(size),
# mean)
RR
=
theano
.
tensor
.
shared_randomstreams
.
RandomStreams
(
234
)
uu
=
RR
.
binomial
(
size
=
size
,
p
=
mean
)
ff
=
theano
.
function
(
var_input
,
uu
,
mode
=
mode
)
# It's not our problem if numpy generates 0 or 1
basictest
(
ff
,
steps_
,
const_size
,
prefix
=
'numpy'
,
allow_01
=
True
,
inputs
=
input
,
target_avg
=
mean
,
mean_rtol
=
rtol
)
def
t_binomial
(
mean
,
size
,
const_size
,
var_input
,
input
,
steps
,
rtol
):
R
=
MRG_RandomStreams
(
234
,
use_cuda
=
False
)
u
=
R
.
binomial
(
size
=
size
,
p
=
mean
)
f
=
theano
.
function
(
var_input
,
u
,
mode
=
mode
)
out
=
f
(
*
input
)
# Increase the number of steps if sizes implies only a few samples
if
numpy
.
prod
(
const_size
)
<
10
:
steps_
=
steps
*
100
else
:
steps_
=
steps
basictest
(
f
,
steps_
,
const_size
,
prefix
=
'mrg cpu'
,
inputs
=
input
,
allow_01
=
True
,
target_avg
=
mean
,
mean_rtol
=
rtol
)
if
mode
!=
'FAST_COMPILE'
and
cuda_available
:
R
=
MRG_RandomStreams
(
234
,
use_cuda
=
True
)
u
=
R
.
binomial
(
size
=
size
,
p
=
mean
,
dtype
=
'float32'
)
# well, it's really that this test w GPU doesn't make sense otw
assert
u
.
dtype
==
'float32'
f
=
theano
.
function
(
var_input
,
theano
.
Out
(
theano
.
sandbox
.
cuda
.
basic_ops
.
gpu_from_host
(
u
),
borrow
=
True
),
mode
=
mode_with_gpu
)
gpu_out
=
numpy
.
asarray
(
f
(
*
input
))
basictest
(
f
,
steps_
,
const_size
,
prefix
=
'mrg gpu'
,
inputs
=
input
,
allow_01
=
True
,
target_avg
=
mean
,
mean_rtol
=
rtol
)
numpy
.
testing
.
assert_array_almost_equal
(
out
,
gpu_out
,
decimal
=
6
)
RR
=
theano
.
tensor
.
shared_randomstreams
.
RandomStreams
(
234
)
uu
=
RR
.
binomial
(
size
=
size
,
p
=
mean
)
ff
=
theano
.
function
(
var_input
,
uu
,
mode
=
mode
)
# It's not our problem if numpy generates 0 or 1
basictest
(
ff
,
steps_
,
const_size
,
prefix
=
'numpy'
,
allow_01
=
True
,
inputs
=
input
,
target_avg
=
mean
,
mean_rtol
=
rtol
)
@attr
(
'slow'
)
...
...
theano/scalar/basic.py
浏览文件 @
1547ecc6
...
...
@@ -50,26 +50,34 @@ class IntegerDivisionError(Exception):
def
upcast
(
dtype
,
*
dtypes
):
# Should we try to keep float32 instead of float64? This is used so that
# for instance mixing int64 with float32 yields float32 instead of float64.
# Note that we store this boolean as a one-element list so that it can be
# modified within `make_array`.
# This tries to keep data in floatX or lower precision, unless we
# explicitely request a higher precision datatype.
keep_float32
=
[(
config
.
cast_policy
==
'numpy+floatX'
and
config
.
floatX
==
'float32'
)]
keep_float16
=
[(
config
.
cast_policy
==
'numpy+floatX'
and
config
.
floatX
==
'float16'
)]
def
make_array
(
dt
):
if
dt
==
'float64'
:
# There is an explicit float64 dtype: we cannot keep float32.
keep_float32
[
0
]
=
False
keep_float16
[
0
]
=
False
if
dt
==
'float32'
:
keep_float16
[
0
]
=
False
return
numpy
.
zeros
((),
dtype
=
dt
)
z
=
make_array
(
dtype
)
for
dt
in
dtypes
:
z
=
z
+
make_array
(
dt
=
dt
)
rval
=
str
(
z
.
dtype
)
if
rval
==
'float64'
and
keep_float32
[
0
]:
return
'float32'
else
:
return
rval
if
rval
==
'float64'
:
if
keep_float16
[
0
]:
return
'float16'
if
keep_float32
[
0
]:
return
'float32'
elif
rval
==
'float32'
:
if
keep_float16
[
0
]:
return
'float16'
return
rval
def
get_scalar_type
(
dtype
):
...
...
@@ -232,6 +240,7 @@ class Scalar(Type):
print(dtype, np.zeros(1, dtype=dtype).dtype.num)
"""
return
{
# dtype: (py_type, c_type, cls_name)
'float16'
:
(
numpy
.
float16
,
'npy_float16'
,
'Float16'
),
'float32'
:
(
numpy
.
float32
,
'npy_float32'
,
'Float32'
),
'float64'
:
(
numpy
.
float64
,
'npy_float64'
,
'Float64'
),
'complex128'
:
(
numpy
.
complex128
,
'theano_complex128'
,
...
...
@@ -501,6 +510,7 @@ uint8 = get_scalar_type('uint8')
uint16
=
get_scalar_type
(
'uint16'
)
uint32
=
get_scalar_type
(
'uint32'
)
uint64
=
get_scalar_type
(
'uint64'
)
float16
=
get_scalar_type
(
'float16'
)
float32
=
get_scalar_type
(
'float32'
)
float64
=
get_scalar_type
(
'float64'
)
complex64
=
get_scalar_type
(
'complex64'
)
...
...
@@ -508,7 +518,7 @@ complex128 = get_scalar_type('complex128')
int_types
=
int8
,
int16
,
int32
,
int64
uint_types
=
uint8
,
uint16
,
uint32
,
uint64
float_types
=
float32
,
float64
float_types
=
float
16
,
float
32
,
float64
complex_types
=
complex64
,
complex128
discrete_types
=
int_types
+
uint_types
...
...
@@ -1995,6 +2005,7 @@ convert_to_uint8 = Cast(uint8, name='convert_to_uint8')
convert_to_uint16
=
Cast
(
uint16
,
name
=
'convert_to_uint16'
)
convert_to_uint32
=
Cast
(
uint32
,
name
=
'convert_to_uint32'
)
convert_to_uint64
=
Cast
(
uint64
,
name
=
'convert_to_uint64'
)
convert_to_float16
=
Cast
(
float16
,
name
=
'convert_to_float16'
)
convert_to_float32
=
Cast
(
float32
,
name
=
'convert_to_float32'
)
convert_to_float64
=
Cast
(
float64
,
name
=
'convert_to_float64'
)
convert_to_complex64
=
Cast
(
complex64
,
name
=
'convert_to_complex64'
)
...
...
@@ -2009,6 +2020,7 @@ _cast_mapping = {
'uint16'
:
convert_to_uint16
,
'uint32'
:
convert_to_uint32
,
'uint64'
:
convert_to_uint64
,
'float16'
:
convert_to_float16
,
'float32'
:
convert_to_float32
,
'float64'
:
convert_to_float64
,
'complex64'
:
convert_to_complex64
,
...
...
@@ -3286,14 +3298,20 @@ class Composite(ScalarOp):
+
zip
(
self
.
fgraph
.
outputs
,
[
"
%%
(o
%
i)s"
%
i
for
i
in
xrange
(
len
(
self
.
fgraph
.
outputs
))]))
for
orphan
in
self
.
fgraph
.
variables
:
# fgraph.orphans:
if
orphan
.
owner
is
None
and
orphan
not
in
self
.
fgraph
.
inputs
:
if
isinstance
(
orphan
,
Constant
):
subd
[
orphan
]
=
orphan
.
type
.
c_literal
(
orphan
.
data
)
else
:
raise
ValueError
(
"All orphans in the fgraph to Composite must"
" be Constant instances."
)
for
var
in
self
.
fgraph
.
variables
:
if
var
.
owner
is
None
:
if
var
not
in
self
.
fgraph
.
inputs
:
# This is an orphan
if
isinstance
(
var
,
Constant
):
subd
[
var
]
=
var
.
type
.
c_literal
(
var
.
data
)
else
:
raise
ValueError
(
"All orphans in the fgraph to Composite must"
" be Constant instances."
)
elif
(
any
(
i
.
dtype
==
'float16'
for
i
in
var
.
owner
.
inputs
)
or
any
(
o
.
dtype
==
'float16'
for
o
in
var
.
owner
.
outputs
)):
# flag for elemwise ops to check.
self
.
inner_float16
=
True
_c_code
=
"{
\n
"
self
.
nodenames
=
[
"
%(nodename)
s_"
+
(
'subnode
%
i'
%
j
)
...
...
theano/sparse/tests/test_basic.py
浏览文件 @
1547ecc6
...
...
@@ -2370,6 +2370,9 @@ class CastTester(utt.InferShapeTester):
for
format
in
sparse
.
sparse_formats
:
for
i_dtype
in
sparse
.
float_dtypes
:
for
o_dtype
in
tensor
.
float_dtypes
:
if
o_dtype
==
'float16'
:
# Don't test float16 output.
continue
_
,
data
=
sparse_random_inputs
(
format
,
shape
=
(
4
,
7
),
...
...
theano/tensor/basic.py
浏览文件 @
1547ecc6
...
...
@@ -252,10 +252,10 @@ class NumpyAutocaster(object):
return
numpy
.
asarray
(
x
)
elif
config
.
cast_policy
==
'numpy+floatX'
:
rval
=
numpy
.
asarray
(
x
)
if
((
rval
.
dtype
==
'float64'
and
# numpy wants float64
config
.
floatX
==
'float32'
and
# but we prefer float32
not
hasattr
(
x
,
'dtype'
))):
# and `x` was not typed
rval
=
theano
.
_asarray
(
rval
,
dtype
=
'float32'
)
if
((
not
hasattr
(
x
,
'dtype'
)
and
rval
.
dtype
in
(
'float64'
,
'float32'
)
and
rval
.
dtype
!=
config
.
floatX
)):
rval
=
theano
.
_asarray
(
rval
,
dtype
=
config
.
floatX
)
return
rval
# The following is the original code, corresponding to the 'custom'
...
...
@@ -278,11 +278,14 @@ class NumpyAutocaster(object):
# recall: float is numpy.float
if
((
isinstance
(
x
,
float
)
and
config
.
floatX
in
self
.
dtypes
and
config
.
floatX
==
'float32'
)):
config
.
floatX
!=
'float64'
)):
return
theano
.
_asarray
(
x
,
dtype
=
config
.
floatX
)
return
theano
.
_asarray
(
x
,
dtype
=
'float32'
)
# Don't autocast to float16 unless config.floatX is float16
try_dtypes
=
[
d
for
d
in
self
.
dtypes
if
config
.
floatX
==
'float16'
or
d
!=
'float16'
]
for
dtype
in
self
.
dtypes
:
for
dtype
in
try_
dtypes
:
x_
=
theano
.
_asarray
(
x
,
dtype
=
dtype
)
if
numpy
.
all
(
x
==
x_
):
break
...
...
@@ -290,7 +293,7 @@ class NumpyAutocaster(object):
return
x_
autocast_int
=
NumpyAutocaster
((
'int8'
,
'int16'
,
'int32'
,
'int64'
))
autocast_float
=
NumpyAutocaster
((
'float32'
,
'float64'
))
autocast_float
=
NumpyAutocaster
((
'float
16'
,
'float
32'
,
'float64'
))
# autocast_float dtypes might be manipulated in tensor.__init__
...
...
@@ -313,7 +316,7 @@ class autocast_float_as(object):
If `config.cast_policy` is not 'custom', an exception is raised.
For example:
>>> with autocast_float_as('float32')
as _dummy
:
>>> with autocast_float_as('float32'):
... assert (fvector() + 1.1).dtype == 'float32' # temporary downcasting
>>> assert (fvector() + 1.1).dtype == 'float64' # back to default behaviour
...
...
@@ -1137,6 +1140,10 @@ _convert_to_uint64 = _conversion(
elemwise
.
Elemwise
(
scal
.
convert_to_uint64
),
'uint64'
)
"""Cast to unsigned 64-bit integer"""
_convert_to_float16
=
_conversion
(
elemwise
.
Elemwise
(
scal
.
convert_to_float16
),
'float16'
)
"""Cast to half-precision floating point"""
_convert_to_float32
=
_conversion
(
elemwise
.
Elemwise
(
scal
.
convert_to_float32
),
'float32'
)
"""Cast to single-precision floating point"""
...
...
@@ -1162,6 +1169,7 @@ _cast_mapping = {
'uint16'
:
_convert_to_uint16
,
'uint32'
:
_convert_to_uint32
,
'uint64'
:
_convert_to_uint64
,
'float16'
:
_convert_to_float16
,
'float32'
:
_convert_to_float32
,
'float64'
:
_convert_to_float64
,
'complex64'
:
_convert_to_complex64
,
...
...
@@ -2757,9 +2765,13 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
# sum() will complain if it is not suitable.
sum_dtype
=
dtype
else
:
# Let sum() infer the appropriate dtype.
sum_dtype
=
None
# float16 overflows way too fast for sum
if
((
sum_dtype
==
'float16'
or
input
.
dtype
==
'float16'
)
and
acc_dtype
!=
'float16'
):
sum_dtype
==
'float32'
s
=
sum
(
input
,
axis
=
axis
,
dtype
=
sum_dtype
,
keepdims
=
keepdims
,
acc_dtype
=
acc_dtype
)
shp
=
shape
(
input
)
...
...
@@ -2767,7 +2779,7 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
# Cast shp into a float type
# TODO Once we have a consistent casting policy, we could simply
# use true_div.
if
s
.
dtype
in
(
'float32'
,
'complex64'
):
if
s
.
dtype
in
(
'float
16'
,
'float
32'
,
'complex64'
):
shp
=
cast
(
shp
,
'float32'
)
else
:
shp
=
cast
(
shp
,
'float64'
)
...
...
@@ -2785,6 +2797,9 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False,
for
i
in
axis
:
s
=
true_div
(
s
,
shp
[
i
])
if
dtype
==
'float16'
or
(
dtype
is
None
and
input
.
dtype
==
'float16'
):
s
=
cast
(
s
,
'float16'
)
return
s
...
...
theano/tensor/blas.py
浏览文件 @
1547ecc6
...
...
@@ -417,7 +417,8 @@ class Gemv(Op):
def
perform
(
self
,
node
,
inputs
,
out_storage
):
y
,
alpha
,
A
,
x
,
beta
=
inputs
if
have_fblas
and
y
.
shape
[
0
]
!=
0
and
x
.
shape
[
0
]
!=
0
:
if
(
have_fblas
and
y
.
shape
[
0
]
!=
0
and
x
.
shape
[
0
]
!=
0
and
y
.
dtype
in
_blas_gemv_fns
):
gemv
=
_blas_gemv_fns
[
y
.
dtype
]
if
(
A
.
shape
[
0
]
!=
y
.
shape
[
0
]
or
A
.
shape
[
1
]
!=
x
.
shape
[
0
]):
...
...
@@ -1727,7 +1728,7 @@ def local_dot_to_dot22(node):
x
,
y
,
x
.
type
,
y
.
type
)
return
if
y
.
type
.
dtype
.
startswith
(
'float'
)
or
y
.
type
.
dtype
.
startswith
(
'complex'
)
:
if
y
.
type
.
dtype
in
[
'float32'
,
'float64'
,
'complex64'
,
'complex128'
]
:
if
x
.
ndim
==
2
and
y
.
ndim
==
2
:
# print "local_dot_to_dot22: MM"
return
[
_dot22
(
*
node
.
inputs
)]
...
...
theano/tensor/elemwise.py
浏览文件 @
1547ecc6
...
...
@@ -95,6 +95,7 @@ class DimShuffle(Op):
transpose function.
Adding, subtracting dimensions can be done with reshape.
"""
_f16_ok
=
True
check_input
=
False
...
...
@@ -1171,6 +1172,12 @@ class Elemwise(OpenMPOp):
return
decl
,
checks
,
alloc
,
loop
def
c_code
(
self
,
node
,
nodename
,
inames
,
onames
,
sub
):
if
(
any
(
i
.
dtype
==
'float16'
for
i
in
node
.
inputs
)
or
any
(
o
.
dtype
==
'float16'
for
o
in
node
.
outputs
)
or
# This is for Composite
getattr
(
self
.
scalar_op
,
'inner_float16'
,
False
)):
# Disable C code for float16 vars
super
(
Elemwise
,
self
)
.
c_code
(
node
,
nodename
,
inames
,
onames
,
sub
)
code
=
"
\n
"
.
join
(
self
.
_c_all
(
node
,
nodename
,
inames
,
onames
,
sub
))
return
code
...
...
@@ -1186,7 +1193,7 @@ class Elemwise(OpenMPOp):
return
support_code
def
c_code_cache_version_apply
(
self
,
node
):
version
=
[
1
1
]
# the version corresponding to the c code in this Op
version
=
[
1
2
]
# the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend...
scalar_node
=
Apply
(
self
.
scalar_op
,
...
...
@@ -1806,6 +1813,7 @@ class CAReduceDtype(CAReduce):
uint8
=
'uint64'
,
uint16
=
'uint64'
,
uint32
=
'uint64'
,
float16
=
'float32'
,
float32
=
'float64'
,
complex64
=
'complex128'
,
)
.
get
(
idtype
,
idtype
)
...
...
theano/tensor/nnet/nnet.py
浏览文件 @
1547ecc6
...
...
@@ -1746,7 +1746,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
# Check z is zeros_like(log(sm))
if
not
_is_const
(
z
,
0
):
return
if
z
.
type
not
in
(
dmatrix
,
fmatrix
):
if
z
.
broadcastable
!=
(
False
,
False
):
if
not
(
vector_softmax
and
z
.
broadcastable
==
(
True
,
False
)):
return
# here we know that we are incrementing a matrix of zeros
...
...
@@ -1758,14 +1758,15 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
if
incr
.
ndim
!=
1
or
incr
.
dtype
not
in
tensor
.
float_dtypes
:
return
# here we know that we are incrementing some part of matrix z by a vector
# here we know that we are incrementing some part of
# matrix z by a vector
# unless the user has taken care to mark that the data and
labels have the
#
same number of rows, we cannot be sure here that
#
len(y) == len(z)
#
However, in the common case that these are predictions and labels it is true.
#
We leave it to the Op to crash (and the user to complain) if this assumption is
# ever not true.
# unless the user has taken care to mark that the data and
#
labels have the same number of rows, we cannot be sure
#
here that len(y) == len(z) However, in the common case
#
that these are predictions and labels it is true. We
#
leave it to the Op to crash (and the user to complain)
#
if this assumption is
ever not true.
out_grad
=
-
incr
...
...
theano/tensor/opt.py
浏览文件 @
1547ecc6
...
...
@@ -1564,6 +1564,7 @@ class Assert(T.Op):
used in the function computing the graph, but it doesn't have to be
returned.
"""
__props__
=
(
'msg'
,)
view_map
=
{
0
:
[
0
]}
check_input
=
False
...
...
@@ -1583,24 +1584,18 @@ class Assert(T.Op):
assert
numpy
.
all
([
c
.
type
.
ndim
==
0
for
c
in
cond
])
return
gof
.
Apply
(
self
,
[
value
]
+
cond
,
[
value
.
type
()])
def
__str__
(
self
):
return
self
.
__class__
.
__name__
def
perform
(
self
,
node
,
inputs
,
out_
):
out
,
=
out_
v
=
inputs
[
0
]
out
[
0
]
=
v
assert
numpy
.
all
(
inputs
[
1
:]),
self
.
msg
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
and
self
.
msg
==
other
.
msg
def
__hash__
(
self
):
return
hash
(
type
(
self
))
^
hash
(
self
.
msg
)
def
grad
(
self
,
input
,
output_gradients
):
return
output_gradients
+
[
DisconnectedType
()()]
*
(
len
(
input
)
-
1
)
def
connection_pattern
(
self
,
node
):
return
[[
1
]]
+
[[
0
]]
*
(
len
(
node
.
inputs
)
-
1
)
def
c_code
(
self
,
node
,
name
,
inames
,
onames
,
sub
):
value
=
inames
[
0
]
out
=
onames
[
0
]
...
...
theano/tensor/tests/test_basic.py
浏览文件 @
1547ecc6
...
...
@@ -5820,40 +5820,24 @@ def _test_autocast_custom():
orig_autocast
=
autocast_float
.
dtypes
# Test that autocast_float_as sets the autocast dtype correctly
try
:
# ghetto 2.4 version of with
ac
=
autocast_float_as
(
'float32'
)
ac
.
__enter__
()
with
autocast_float_as
(
'float32'
):
assert
autocast_float
.
dtypes
==
(
'float32'
,)
finally
:
ac
.
__exit__
()
assert
autocast_float
.
dtypes
==
orig_autocast
try
:
# ghetto 2.4 version of with
ac
=
autocast_float_as
(
'float64'
)
ac
.
__enter__
()
with
autocast_float_as
(
'float64'
):
assert
autocast_float
.
dtypes
==
(
'float64'
,)
finally
:
ac
.
__exit__
()
assert
autocast_float
.
dtypes
==
orig_autocast
# Test that we can set it back to something, and nest it
try
:
# ghetto 2.4 version of with
ac
=
autocast_float_as
(
'float32'
)
ac
.
__enter__
()
with
autocast_float_as
(
'float32'
):
assert
autocast_float
.
dtypes
==
(
'float32'
,)
try
:
# ghetto 2.4 version of with
ac2
=
autocast_float_as
(
'float64'
)
ac2
.
__enter__
()
with
autocast_float_as
(
'float64'
):
assert
autocast_float
.
dtypes
==
(
'float64'
,)
finally
:
ac2
.
__exit__
()
assert
autocast_float
.
dtypes
==
(
'float32'
,)
finally
:
ac
.
__exit__
()
assert
autocast_float
.
dtypes
==
orig_autocast
# Test that the autocasting dtype is used correctly in expression-building
try
:
# ghetto 2.4 version of with
ac
=
autocast_float_as
(
'float32'
)
ac
.
__enter__
()
with
autocast_float_as
(
'float32'
):
assert
(
dvector
()
+
1.1
)
.
dtype
==
'float64'
assert
(
fvector
()
+
1.1
)
.
dtype
==
'float32'
assert
(
fvector
()
+
theano
.
_asarray
(
1.1
,
dtype
=
'float64'
))
.
dtype
==
\
...
...
@@ -5863,13 +5847,9 @@ def _test_autocast_custom():
assert
(
dvector
()
+
1
)
.
dtype
==
'float64'
assert
(
fvector
()
+
1
)
.
dtype
==
'float32'
finally
:
ac
.
__exit__
()
# Test that the autocasting dtype is used correctly in expression-building
try
:
# ghetto 2.4 version of with
ac
=
autocast_float_as
(
'float64'
)
ac
.
__enter__
()
with
autocast_float_as
(
'float64'
):
assert
(
dvector
()
+
1.1
)
.
dtype
==
'float64'
assert
(
fvector
()
+
1.1
)
.
dtype
==
'float64'
assert
(
fvector
()
+
1.0
)
.
dtype
==
'float64'
...
...
@@ -5880,13 +5860,9 @@ def _test_autocast_custom():
assert
(
dvector
()
+
1
)
.
dtype
==
'float64'
assert
(
fvector
()
+
1
)
.
dtype
==
'float32'
finally
:
ac
.
__exit__
()
# Test that the autocasting dtype is used correctly in expression-building
try
:
# ghetto 2.4 version of with
ac
=
autocast_float_as
(
'float32'
,
'float64'
)
ac
.
__enter__
()
with
autocast_float_as
(
'float32'
,
'float64'
):
assert
(
dvector
()
+
1.1
)
.
dtype
==
'float64'
assert
(
fvector
()
+
1.1
)
.
dtype
==
theano
.
config
.
floatX
assert
(
fvector
()
+
1.0
)
.
dtype
==
'float32'
...
...
@@ -5903,14 +5879,8 @@ def _test_autocast_custom():
assert
(
ivector
()
+
numpy
.
int8
(
1
))
.
dtype
==
'int32'
assert
(
wvector
()
+
numpy
.
int8
(
1
))
.
dtype
==
'int16'
assert
(
bvector
()
+
numpy
.
int8
(
1
))
.
dtype
==
'int8'
try
:
# ghetto 2.4 version of with
ac2
=
autocast_float_as
(
'float64'
)
ac2
.
__enter__
()
with
autocast_float_as
(
'float64'
):
assert
(
fvector
()
+
1.0
)
.
dtype
==
'float64'
finally
:
ac2
.
__exit__
()
finally
:
ac
.
__exit__
()
def
_test_autocast_numpy
():
...
...
@@ -6036,17 +6006,8 @@ class test_arithmetic_cast(unittest.TestCase):
config
.
int_division
==
'raise'
)
# This is the expected behavior.
continue
# For numpy we have a problem:
# http://projects.scipy.org/numpy/ticket/1827
# As a result we only consider the highest data
# type that numpy may return.
numpy_dtypes
=
[
op
(
numpy_args
[
0
](
a_type
),
numpy_args
[
1
](
b_type
))
.
dtype
,
op
(
numpy_args
[
1
](
b_type
),
numpy_args
[
0
](
a_type
))
.
dtype
]
numpy_dtype
=
theano
.
scalar
.
upcast
(
*
map
(
str
,
numpy_dtypes
))
numpy_dtype
=
op
(
numpy_args
[
0
](
a_type
),
numpy_args
[
1
](
b_type
))
.
dtype
if
numpy_dtype
==
theano_dtype
:
# Same data type found, all is good!
continue
...
...
@@ -6078,9 +6039,7 @@ class test_arithmetic_cast(unittest.TestCase):
# Theano upcasted the result array.
theano_dtype
==
up_type
and
# But Numpy kept its original type.
# (not an equality because of numpy bug
# mentioned above).
array_type
in
numpy_dtypes
):
array_type
==
numpy_dtype
):
# Then we accept this difference in
# behavior.
continue
...
...
@@ -6092,17 +6051,20 @@ class test_arithmetic_cast(unittest.TestCase):
numpy
.
__version__
.
split
(
'.'
)[:
2
]]
if
(
cfg
==
'numpy+floatX'
and
a_type
==
'complex128'
and
b_type
==
'float32'
and
(
b_type
==
'float32'
or
b_type
==
'float16'
)
and
combo
==
(
'scalar'
,
'array'
)
and
bool
(
numpy_version
>=
[
1
,
6
])
and
theano_dtype
==
'complex128'
and
numpy_dtypes
==
[
'complex64'
,
'complex64'
]):
# In numpy 1.6.x adding a complex128 with
# a float32 may result in a complex64. This
# may be a bug (investigation is currently
# in progress), so in the meantime we just
# mark this test as a known failure.
numpy_dtype
==
'complex64'
):
# In numpy 1.6.x adding a
# complex128 with a float32 or
# float16 may result in a
# complex64. This may be a bug
# (investigation is currently in
# progress), so in the meantime we
# just mark this test as a known
# failure.
raise
KnownFailureTest
(
'Known issue with '
'numpy >= 1.6.x see #761'
)
...
...
theano/tensor/tests/test_elemwise.py
浏览文件 @
1547ecc6
...
...
@@ -1024,6 +1024,7 @@ class T_prod_without_zeros_dtype(unittest.TestCase):
uint8
=
'uint64'
,
uint16
=
'uint64'
,
uint32
=
'uint64'
,
float16
=
'float32'
,
float32
=
'float64'
,
complex64
=
'complex128'
)
.
get
(
dtype
,
dtype
)
...
...
theano/tensor/type.py
浏览文件 @
1547ecc6
...
...
@@ -234,6 +234,7 @@ class TensorType(Type):
# complex64, etc.
try
:
return
{
'float16'
:
(
float
,
'npy_float16'
,
'NPY_FLOAT16'
),
'float32'
:
(
float
,
'npy_float32'
,
'NPY_FLOAT32'
),
'float64'
:
(
float
,
'npy_float64'
,
'NPY_FLOAT64'
),
'uint8'
:
(
int
,
'npy_uint8'
,
'NPY_UINT8'
),
...
...
theano/tests/test_flake8.py
浏览文件 @
1547ecc6
...
...
@@ -6,6 +6,7 @@ from nose.plugins.skip import SkipTest
import
os
from
fnmatch
import
fnmatch
import
theano
from
theano.compat
import
PY3
try
:
import
flake8.engine
import
flake8.main
...
...
@@ -227,7 +228,6 @@ whitelist_flake8 = [
"sandbox/gpuarray/elemwise.py"
,
"sandbox/gpuarray/type.py"
,
"sandbox/gpuarray/__init__.py"
,
"sandbox/gpuarray/opt.py"
,
"sandbox/gpuarray/blas.py"
,
"sandbox/gpuarray/kernel_codegen.py"
,
"sandbox/gpuarray/conv.py"
,
...
...
@@ -347,6 +347,8 @@ def test_format_flake8():
"""
if
not
flake8_available
:
raise
SkipTest
(
"flake8 is not installed"
)
if
PY3
:
raise
SkipTest
(
"not testing in python3 since 2to3 ran"
)
total_errors
=
0
for
path
in
list_files
():
rel_path
=
os
.
path
.
relpath
(
path
,
theano
.
__path__
[
0
])
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论