Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
a85a44fc
提交
a85a44fc
authored
11月 12, 2015
作者:
Xavier Bouthillier
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3361 from adbrebs/h_softmax
Two-layer hierarchical softmax
上级
f979d9e5
43a27b25
隐藏空白字符变更
内嵌
并排
正在显示
15 个修改的文件
包含
511 行增加
和
286 行删除
+511
-286
blocksparse.txt
doc/library/tensor/nnet/blocksparse.txt
+5
-7
index.txt
doc/library/tensor/nnet/index.txt
+1
-0
nnet.txt
doc/library/tensor/nnet/nnet.txt
+4
-0
__init__.py
theano/sandbox/__init__.py
+0
-1
blocksparse.py
theano/sandbox/blocksparse.py
+12
-266
opt.py
theano/sandbox/cuda/opt.py
+1
-1
test_blocksparse.py
theano/sandbox/cuda/tests/test_blocksparse.py
+2
-2
test_opt.py
theano/sandbox/cuda/tests/test_opt.py
+1
-1
__init__.py
theano/tensor/nnet/__init__.py
+24
-1
blocksparse.py
theano/tensor/nnet/blocksparse.py
+265
-0
nnet.py
theano/tensor/nnet/nnet.py
+123
-0
opt.py
theano/tensor/nnet/opt.py
+2
-2
test_blocksparse.py
theano/tensor/nnet/tests/test_blocksparse.py
+1
-1
test_nnet.py
theano/tensor/nnet/tests/test_nnet.py
+69
-3
test_opt.py
theano/tensor/nnet/tests/test_opt.py
+1
-1
没有找到文件。
doc/library/
sandbox
/blocksparse.txt
→
doc/library/
tensor/nnet
/blocksparse.txt
浏览文件 @
a85a44fc
.. _libdoc_blocksparse:
===========================================================================
:mod:`
sandbox.
blocksparse` -- Block sparse dot operations (gemv and outer)
===========================================================================
===========================================================================
====
:mod:`blocksparse` -- Block sparse dot operations (gemv and outer)
===========================================================================
====
.. module::
sandbox
.blocksparse
.. module::
tensor.nnet
.blocksparse
:platform: Unix, Windows
:synopsis: Block sparse dot
.. moduleauthor:: LISA
API
===
.. automodule:: theano.
sandbox
.blocksparse
.. automodule:: theano.
tensor.nnet
.blocksparse
:members:
doc/library/tensor/nnet/index.txt
浏览文件 @
a85a44fc
...
...
@@ -20,3 +20,4 @@ and ops which are particular to neural networks and deep learning.
nnet
neighbours
bn
blocksparse
doc/library/tensor/nnet/nnet.txt
浏览文件 @
a85a44fc
...
...
@@ -21,6 +21,7 @@
- :func:`relu() <theano.tensor.nnet.relu>`
- :func:`binary_crossentropy`
- :func:`.categorical_crossentropy`
- :func:`h_softmax() <theano.tensor.nnet.h_softmax>`
.. function:: sigmoid(x)
...
...
@@ -204,3 +205,6 @@
y = T.nnet.softmax(T.dot(W, x) + b)
cost = T.nnet.categorical_crossentropy(y, o)
# o is either the above-mentioned 1-of-N vector or 2D tensor
.. autofunction:: theano.tensor.nnet.h_softmax
theano/sandbox/__init__.py
浏览文件 @
a85a44fc
from
.
import
opt
theano/sandbox/blocksparse.py
浏览文件 @
a85a44fc
import
numpy
import
theano
from
theano
import
Op
,
Apply
from
theano
import
tensor
from
theano.tensor
import
discrete_dtypes
from
theano.gradient
import
grad_undefined
class
SparseBlockGemv
(
Op
):
"""
This op computes the dot product of specified pieces of vectors
and matrices, returning pieces of vectors::
for b in range(batch_size):
for j in range(o.shape[1]):
for i in range(h.shape[1]):
o[b, j, :] += numpy.dot(h[b, i], W[iIdx[b, i], oIdx[b, j]])
where b, h, W, o iIdx, oIdx are defined in the docstring of make_node.
.. image:: ../../images/blocksparse.png
:scale: 50
%
"""
registered_opts
=
[]
def
__init__
(
self
,
inplace
=
False
):
self
.
inplace
=
inplace
if
self
.
inplace
:
self
.
destroy_map
=
{
0
:
[
0
]}
def
make_node
(
self
,
o
,
W
,
h
,
inputIdx
,
outputIdx
):
"""
Compute the dot product of the specified pieces of vectors
and matrices.
The parameter types are actually their expected shapes
relative to each other.
Parameters
----------
o : batch, oWin, oSize
output vector
W : iBlocks, oBlocks, iSize, oSize
weight matrix
h : batch, iWin, iSize
input from lower layer (sparse)
inputIdx : batch, iWin
indexes of the input blocks
outputIdx : batch, oWin
indexes of the output blocks
Returns
-------
(batch, oWin, oSize)
dot(W[i, j], h[i]) + o[j]
Notes
-----
- `batch` is the number of examples in a minibatch (batch size).
- `iBlocks` is the total number of blocks in the input (from lower
layer).
- `iSize` is the size of each of these input blocks.
- `iWin` is the number of blocks that will be used as inputs. Which
blocks will be used is specified in `inputIdx`.
- `oBlocks` is the number or possible output blocks.
- `oSize` is the size of each of these output blocks.
- `oWin` is the number of output blocks that will actually be computed.
Which blocks will be computed is specified in `outputIdx`.
"""
o
=
theano
.
tensor
.
as_tensor_variable
(
o
)
W
=
theano
.
tensor
.
as_tensor_variable
(
W
)
h
=
theano
.
tensor
.
as_tensor_variable
(
h
)
inputIdx
=
theano
.
tensor
.
as_tensor_variable
(
inputIdx
)
outputIdx
=
theano
.
tensor
.
as_tensor_variable
(
outputIdx
)
if
o
.
ndim
!=
3
:
raise
TypeError
(
'The output o must be a 2D tensor'
)
if
W
.
ndim
!=
4
:
raise
TypeError
(
'The weight matrix W must be a 4D tensor'
)
if
h
.
ndim
!=
3
:
raise
TypeError
(
'The input h must be a 3D tensor'
)
if
inputIdx
.
ndim
!=
2
:
raise
TypeError
(
'The input indices inputIdx must be a 2D tensor'
)
if
outputIdx
.
ndim
!=
2
:
raise
TypeError
(
'The output indices outputIdx must be a 2D tensor'
)
assert
inputIdx
.
type
.
dtype
in
discrete_dtypes
assert
outputIdx
.
type
.
dtype
in
discrete_dtypes
output
=
o
.
type
.
__class__
(
dtype
=
o
.
type
.
dtype
,
broadcastable
=
(
False
,)
*
o
.
ndim
)()
return
Apply
(
self
,
[
o
,
W
,
h
,
inputIdx
,
outputIdx
],
[
output
])
def
perform
(
self
,
node
,
inp
,
out_
):
o
,
W
,
h
,
iIdx
,
oIdx
=
inp
[:
5
]
if
not
self
.
inplace
:
o
=
o
.
copy
()
for
b
in
range
(
o
.
shape
[
0
]):
for
j
in
range
(
o
.
shape
[
1
]):
outputIdx
=
oIdx
[
b
,
j
]
for
i
in
range
(
h
.
shape
[
1
]):
inputIdx
=
iIdx
[
b
,
i
]
w
=
W
[
inputIdx
,
outputIdx
]
o
[
b
,
j
,
:]
+=
numpy
.
dot
(
h
[
b
,
i
],
w
)
out_
[
0
][
0
]
=
o
def
grad
(
self
,
inputs
,
grads
):
o
,
W
,
h
,
inputIdx
,
outputIdx
=
inputs
go
=
grads
[
0
]
outer_fun
=
SparseBlockOuter
(
self
.
inplace
)
gemv_fun
=
SparseBlockGemv
(
self
.
inplace
)
Wgrad
=
outer_fun
(
W
.
zeros_like
(),
h
,
go
,
inputIdx
,
outputIdx
)
hgrad
=
gemv_fun
(
h
.
zeros_like
(),
W
.
dimshuffle
((
1
,
0
,
3
,
2
)),
go
,
outputIdx
,
inputIdx
)
return
[
go
,
Wgrad
,
hgrad
,
grad_undefined
(
self
,
3
,
inputIdx
,
"grad of inputIdx makes no sense"
),
grad_undefined
(
self
,
4
,
outputIdx
,
"grad of outputIdx makes no sense"
)]
class
SparseBlockOuter
(
Op
):
"""
This computes the outer product of two sets of pieces of vectors
updating a full matrix with the results::
for b in range(batch_size):
o[xIdx[b, i], yIdx[b, j]] += (alpha * outer(x[b, i], y[b, j]))
This op is involved in the gradient of SparseBlockGemv.
"""
registered_opts
=
[]
def
__init__
(
self
,
inplace
=
False
):
self
.
inplace
=
inplace
if
self
.
inplace
:
self
.
destroy_map
=
{
0
:
[
0
]}
def
make_node
(
self
,
o
,
x
,
y
,
xIdx
,
yIdx
,
alpha
=
None
):
"""
Compute the dot product of the specified pieces of vectors
and matrices.
The parameter types are actually their expected shapes
relative to each other.
Parameters
----------
o : xBlocks, yBlocks, xSize, ySize
x : batch, xWin, xSize
y : batch, yWin, ySize
xIdx : batch, iWin
indexes of the x blocks
yIdx : batch, oWin
indexes of the y blocks
Returns
-------
(xBlocks, yBlocks, xSize, ySize)
outer(x[i], y[j]) + o[i, j]
Notes
-----
- `batch` is the number of examples in a minibatch (batch size).
- `xBlocks` is the total number of blocks in x.
- `xSize` is the size of each of these x blocks.
- `xWin` is the number of blocks that will be used as x. Which blocks
will be used is specified in `xIdx`.
- `yBlocks` is the number or possible y blocks.
- `ySize` is the size of each of these y blocks.
- `yWin` is the number of y blocks that will actually be computed.
Which blocks will be computed is specified in `yIdx`.
"""
one
=
tensor
.
constant
(
numpy
.
asarray
(
1.0
,
dtype
=
'float32'
))
o
=
theano
.
tensor
.
as_tensor_variable
(
o
)
x
=
theano
.
tensor
.
as_tensor_variable
(
x
)
y
=
theano
.
tensor
.
as_tensor_variable
(
y
)
if
alpha
is
None
:
alpha
=
one
output
=
o
.
type
.
__class__
(
dtype
=
o
.
type
.
dtype
,
broadcastable
=
(
False
,)
*
o
.
ndim
)()
return
Apply
(
self
,
[
o
,
x
,
y
,
xIdx
,
yIdx
,
alpha
],
[
output
])
def
perform
(
self
,
node
,
inp
,
out_
):
o
,
x
,
y
,
xIdx
,
yIdx
,
alpha
=
inp
[:
6
]
if
not
self
.
inplace
:
o
=
o
.
copy
()
for
b
in
range
(
x
.
shape
[
0
]):
for
i
in
range
(
xIdx
.
shape
[
1
]):
for
j
in
range
(
yIdx
.
shape
[
1
]):
o
[
xIdx
[
b
,
i
],
yIdx
[
b
,
j
]]
+=
numpy
.
outer
(
x
[
b
,
i
],
y
[
b
,
j
,
:])
out_
[
0
][
0
]
=
o
sparse_block_gemv
=
SparseBlockGemv
(
False
)
sparse_block_gemv_inplace
=
SparseBlockGemv
(
True
)
sparse_block_outer
=
SparseBlockOuter
(
False
)
sparse_block_outer_inplace
=
SparseBlockOuter
(
True
)
def
sparse_block_dot
(
W
,
h
,
inputIdx
,
b
,
outputIdx
):
"""
Compute the dot product (plus bias) of the specified pieces of vectors
and matrices. See SparseBlockGemv to get more information.
The parameter types are actually their expected shapes relative to
each other.
Parameters
----------
W : iBlocks, oBlocks, iSize, oSize
weight matrix
h : batch, iWin, iSize
input from lower layer (sparse)
inputIdx : batch, iWin
indexes of the input blocks
b : oBlocks, oSize
bias vector
outputIdx : batch, oWin
indexes of the output blocks
Returns
-------
(batch, oWin, oSize)
dot(W[i, j], h[i]) + b[j] but b[j] is only added once
Notes
-----
- `batch` is the number of examples in a minibatch (batch size).
- `iBlocks` is the total number of blocks in the input (from lower layer).
- `iSize` is the size of each of these input blocks.
- `iWin` is the number of blocks that will be used as inputs. Which blocks
will be used is specified in `inputIdx`.
- `oBlocks` is the number or possible output blocks.
- `oSize` is the size of each of these output blocks.
- `oWin` is the number of output blocks that will actually be computed.
Which blocks will be computed is specified in `outputIdx`.
"""
assert
inputIdx
.
ndim
==
h
.
ndim
-
1
assert
outputIdx
.
ndim
==
inputIdx
.
ndim
if
h
.
ndim
==
2
:
h
=
h
.
dimshuffle
(
'x'
,
0
,
1
)
inputIdx
=
inputIdx
.
dimshuffle
(
'x'
,
0
)
outputIdx
=
outputIdx
.
dimshuffle
(
'x'
,
0
)
return
SparseBlockGemv
()(
b
.
take
(
outputIdx
,
axis
=
0
),
W
,
h
,
inputIdx
,
outputIdx
)
import
warnings
from
theano.tensor.nnet.blocksparse
import
(
SparseBlockGemv
,
SparseBlockOuter
,
sparse_block_dot
,
sparse_block_gemv
,
sparse_block_gemv_inplace
,
sparse_block_outer
,
sparse_block_outer_inplace
)
__all__
=
[
SparseBlockGemv
,
SparseBlockOuter
,
sparse_block_dot
,
sparse_block_gemv
,
sparse_block_gemv_inplace
,
sparse_block_outer
,
sparse_block_outer_inplace
]
warnings
.
warn
(
"DEPRECATION: theano.sandbox.blocksparse does not exist anymore,"
"it has been moved to theano.tensor.nnet.blocksparse."
,
category
=
DeprecationWarning
)
theano/sandbox/cuda/opt.py
浏览文件 @
a85a44fc
...
...
@@ -46,7 +46,7 @@ from theano.sandbox.cuda.blas import (
GpuDownsampleFactorMax
,
GpuDownsampleFactorMaxGrad
,
GpuDownsampleFactorMaxGradGrad
)
from
theano.
sandbox
.blocksparse
import
SparseBlockGemv
,
SparseBlockOuter
from
theano.
tensor.nnet
.blocksparse
import
SparseBlockGemv
,
SparseBlockOuter
from
theano.sandbox.cuda.blocksparse
import
(
GpuSparseBlockGemv
,
GpuSparseBlockOuter
,
...
...
theano/sandbox/cuda/tests/test_blocksparse.py
浏览文件 @
a85a44fc
...
...
@@ -4,7 +4,7 @@ from nose.plugins.skip import SkipTest
import
theano
from
theano
import
tensor
import
theano.tests.unittest_tools
as
utt
import
theano.
sandbox
.tests.test_blocksparse
import
theano.
tensor.nnet
.tests.test_blocksparse
import
theano.sandbox.cuda
as
cuda_ndarray
from
theano.sandbox.cuda.blocksparse
import
(
GpuSparseBlockOuter
,
...
...
@@ -22,7 +22,7 @@ else:
class
BlockSparse_Gemv_and_Outer
(
theano
.
sandbox
.
tests
.
test_blocksparse
.
BlockSparse_Gemv_and_Outer
):
theano
.
tensor
.
nnet
.
tests
.
test_blocksparse
.
BlockSparse_Gemv_and_Outer
):
def
setUp
(
self
):
utt
.
seed_rng
()
self
.
mode
=
mode_with_gpu
.
excluding
(
'constant_folding'
)
...
...
theano/sandbox/cuda/tests/test_opt.py
浏览文件 @
a85a44fc
...
...
@@ -29,7 +29,7 @@ from theano.sandbox.cuda import basic_ops
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.scalar.basic_scipy
import
erfinv
from
theano.
sandbox
.blocksparse
import
sparse_block_dot
from
theano.
tensor.nnet
.blocksparse
import
sparse_block_dot
from
theano.sandbox.cuda.blocksparse
import
GpuSparseBlockGemv
,
GpuSparseBlockOuter
...
...
theano/tensor/nnet/__init__.py
浏览文件 @
a85a44fc
from
.nnet
import
*
from
.nnet
import
(
CrossentropyCategorical1Hot
,
CrossentropyCategorical1HotGrad
,
CrossentropySoftmax1HotWithBiasDx
,
CrossentropySoftmaxArgmax1HotWithBias
,
Prepend_scalar_constant_to_each_row
,
Prepend_scalar_to_each_row
,
Softmax
,
SoftmaxGrad
,
SoftmaxWithBias
,
binary_crossentropy
,
categorical_crossentropy
,
crossentropy_categorical_1hot
,
crossentropy_categorical_1hot_grad
,
crossentropy_softmax_1hot
,
crossentropy_softmax_1hot_with_bias
,
crossentropy_softmax_1hot_with_bias_dx
,
crossentropy_softmax_argmax_1hot_with_bias
,
crossentropy_softmax_max_and_argmax_1hot
,
crossentropy_softmax_max_and_argmax_1hot_with_bias
,
crossentropy_to_crossentropy_with_softmax
,
crossentropy_to_crossentropy_with_softmax_with_bias
,
graph_merge_softmax_with_crossentropy_softmax
,
h_softmax
,
local_advanced_indexing_crossentropy_onehot
,
local_advanced_indexing_crossentropy_onehot_grad
,
local_argmax_pushdown
,
local_log_softmax
,
local_softmax_grad_to_crossentropy_with_softmax_grad
,
local_softmax_with_bias
,
local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc
,
make_out_pattern
,
prepend_0_to_each_row
,
prepend_1_to_each_row
,
prepend_scalar_to_each_row
,
relu
,
softmax
,
softmax_grad
,
softmax_graph
,
softmax_op
,
softmax_simplifier
,
softmax_with_bias
)
from
.
import
opt
from
.conv
import
conv2d
,
ConvOp
from
.Conv3D
import
*
from
.ConvGrad3D
import
*
...
...
theano/tensor/nnet/blocksparse.py
0 → 100644
浏览文件 @
a85a44fc
import
numpy
import
theano
from
theano
import
Op
,
Apply
from
theano.tensor
import
discrete_dtypes
from
theano.gradient
import
grad_undefined
class
SparseBlockGemv
(
Op
):
"""
This op computes the dot product of specified pieces of vectors
and matrices, returning pieces of vectors::
for b in range(batch_size):
for j in range(o.shape[1]):
for i in range(h.shape[1]):
o[b, j, :] += numpy.dot(h[b, i], W[iIdx[b, i], oIdx[b, j]])
where b, h, W, o iIdx, oIdx are defined in the docstring of make_node.
.. image:: ../../../images/blocksparse.png
:scale: 50
%
"""
registered_opts
=
[]
def
__init__
(
self
,
inplace
=
False
):
self
.
inplace
=
inplace
if
self
.
inplace
:
self
.
destroy_map
=
{
0
:
[
0
]}
def
make_node
(
self
,
o
,
W
,
h
,
inputIdx
,
outputIdx
):
"""
Compute the dot product of the specified pieces of vectors
and matrices.
The parameter types are actually their expected shapes
relative to each other.
Parameters
----------
o : batch, oWin, oSize
output vector
W : iBlocks, oBlocks, iSize, oSize
weight matrix
h : batch, iWin, iSize
input from lower layer (sparse)
inputIdx : batch, iWin
indexes of the input blocks
outputIdx : batch, oWin
indexes of the output blocks
Returns
-------
(batch, oWin, oSize)
dot(W[i, j], h[i]) + o[j]
Notes
-----
- `batch` is the number of examples in a minibatch (batch size).
- `iBlocks` is the total number of blocks in the input (from lower
layer).
- `iSize` is the size of each of these input blocks.
- `iWin` is the number of blocks that will be used as inputs. Which
blocks will be used is specified in `inputIdx`.
- `oBlocks` is the number or possible output blocks.
- `oSize` is the size of each of these output blocks.
- `oWin` is the number of output blocks that will actually be computed.
Which blocks will be computed is specified in `outputIdx`.
"""
o
=
theano
.
tensor
.
as_tensor_variable
(
o
)
W
=
theano
.
tensor
.
as_tensor_variable
(
W
)
h
=
theano
.
tensor
.
as_tensor_variable
(
h
)
inputIdx
=
theano
.
tensor
.
as_tensor_variable
(
inputIdx
)
outputIdx
=
theano
.
tensor
.
as_tensor_variable
(
outputIdx
)
if
o
.
ndim
!=
3
:
raise
TypeError
(
'The output o must be a 2D tensor'
)
if
W
.
ndim
!=
4
:
raise
TypeError
(
'The weight matrix W must be a 4D tensor'
)
if
h
.
ndim
!=
3
:
raise
TypeError
(
'The input h must be a 3D tensor'
)
if
inputIdx
.
ndim
!=
2
:
raise
TypeError
(
'The input indices inputIdx must be a 2D tensor'
)
if
outputIdx
.
ndim
!=
2
:
raise
TypeError
(
'The output indices outputIdx must be a 2D tensor'
)
assert
inputIdx
.
type
.
dtype
in
discrete_dtypes
assert
outputIdx
.
type
.
dtype
in
discrete_dtypes
output
=
o
.
type
.
__class__
(
dtype
=
o
.
type
.
dtype
,
broadcastable
=
(
False
,)
*
o
.
ndim
)()
return
Apply
(
self
,
[
o
,
W
,
h
,
inputIdx
,
outputIdx
],
[
output
])
def
perform
(
self
,
node
,
inp
,
out_
):
o
,
W
,
h
,
iIdx
,
oIdx
=
inp
[:
5
]
if
not
self
.
inplace
:
o
=
o
.
copy
()
for
b
in
range
(
o
.
shape
[
0
]):
for
j
in
range
(
o
.
shape
[
1
]):
outputIdx
=
oIdx
[
b
,
j
]
for
i
in
range
(
h
.
shape
[
1
]):
inputIdx
=
iIdx
[
b
,
i
]
w
=
W
[
inputIdx
,
outputIdx
]
o
[
b
,
j
,
:]
+=
numpy
.
dot
(
h
[
b
,
i
],
w
)
out_
[
0
][
0
]
=
o
def
grad
(
self
,
inputs
,
grads
):
o
,
W
,
h
,
inputIdx
,
outputIdx
=
inputs
go
=
grads
[
0
]
outer_fun
=
SparseBlockOuter
(
self
.
inplace
)
gemv_fun
=
SparseBlockGemv
(
self
.
inplace
)
Wgrad
=
outer_fun
(
W
.
zeros_like
(),
h
,
go
,
inputIdx
,
outputIdx
)
hgrad
=
gemv_fun
(
h
.
zeros_like
(),
W
.
dimshuffle
((
1
,
0
,
3
,
2
)),
go
,
outputIdx
,
inputIdx
)
return
[
go
,
Wgrad
,
hgrad
,
grad_undefined
(
self
,
3
,
inputIdx
,
"grad of inputIdx makes no sense"
),
grad_undefined
(
self
,
4
,
outputIdx
,
"grad of outputIdx makes no sense"
)]
class
SparseBlockOuter
(
Op
):
"""
This computes the outer product of two sets of pieces of vectors
updating a full matrix with the results::
for b in range(batch_size):
o[xIdx[b, i], yIdx[b, j]] += (alpha * outer(x[b, i], y[b, j]))
This op is involved in the gradient of SparseBlockGemv.
"""
registered_opts
=
[]
def
__init__
(
self
,
inplace
=
False
):
self
.
inplace
=
inplace
if
self
.
inplace
:
self
.
destroy_map
=
{
0
:
[
0
]}
def
make_node
(
self
,
o
,
x
,
y
,
xIdx
,
yIdx
,
alpha
=
None
):
"""
Compute the dot product of the specified pieces of vectors
and matrices.
The parameter types are actually their expected shapes
relative to each other.
Parameters
----------
o : xBlocks, yBlocks, xSize, ySize
x : batch, xWin, xSize
y : batch, yWin, ySize
xIdx : batch, iWin
indexes of the x blocks
yIdx : batch, oWin
indexes of the y blocks
Returns
-------
(xBlocks, yBlocks, xSize, ySize)
outer(x[i], y[j]) + o[i, j]
Notes
-----
- `batch` is the number of examples in a minibatch (batch size).
- `xBlocks` is the total number of blocks in x.
- `xSize` is the size of each of these x blocks.
- `xWin` is the number of blocks that will be used as x. Which blocks
will be used is specified in `xIdx`.
- `yBlocks` is the number or possible y blocks.
- `ySize` is the size of each of these y blocks.
- `yWin` is the number of y blocks that will actually be computed.
Which blocks will be computed is specified in `yIdx`.
"""
one
=
theano
.
tensor
.
constant
(
numpy
.
asarray
(
1.0
,
dtype
=
'float32'
))
o
=
theano
.
tensor
.
as_tensor_variable
(
o
)
x
=
theano
.
tensor
.
as_tensor_variable
(
x
)
y
=
theano
.
tensor
.
as_tensor_variable
(
y
)
if
alpha
is
None
:
alpha
=
one
output
=
o
.
type
.
__class__
(
dtype
=
o
.
type
.
dtype
,
broadcastable
=
(
False
,)
*
o
.
ndim
)()
return
Apply
(
self
,
[
o
,
x
,
y
,
xIdx
,
yIdx
,
alpha
],
[
output
])
def
perform
(
self
,
node
,
inp
,
out_
):
o
,
x
,
y
,
xIdx
,
yIdx
,
alpha
=
inp
[:
6
]
if
not
self
.
inplace
:
o
=
o
.
copy
()
for
b
in
range
(
x
.
shape
[
0
]):
for
i
in
range
(
xIdx
.
shape
[
1
]):
for
j
in
range
(
yIdx
.
shape
[
1
]):
o
[
xIdx
[
b
,
i
],
yIdx
[
b
,
j
]]
+=
numpy
.
outer
(
x
[
b
,
i
],
y
[
b
,
j
,
:])
out_
[
0
][
0
]
=
o
sparse_block_gemv
=
SparseBlockGemv
(
False
)
sparse_block_gemv_inplace
=
SparseBlockGemv
(
True
)
sparse_block_outer
=
SparseBlockOuter
(
False
)
sparse_block_outer_inplace
=
SparseBlockOuter
(
True
)
def
sparse_block_dot
(
W
,
h
,
inputIdx
,
b
,
outputIdx
):
"""
Compute the dot product (plus bias) of the specified pieces of vectors
and matrices. See SparseBlockGemv to get more information.
The parameter types are actually their expected shapes relative to
each other.
Parameters
----------
W : iBlocks, oBlocks, iSize, oSize
weight matrix
h : batch, iWin, iSize
input from lower layer (sparse)
inputIdx : batch, iWin
indexes of the input blocks
b : oBlocks, oSize
bias vector
outputIdx : batch, oWin
indexes of the output blocks
Returns
-------
(batch, oWin, oSize)
dot(W[i, j], h[i]) + b[j] but b[j] is only added once
Notes
-----
- `batch` is the number of examples in a minibatch (batch size).
- `iBlocks` is the total number of blocks in the input (from lower layer).
- `iSize` is the size of each of these input blocks.
- `iWin` is the number of blocks that will be used as inputs. Which blocks
will be used is specified in `inputIdx`.
- `oBlocks` is the number or possible output blocks.
- `oSize` is the size of each of these output blocks.
- `oWin` is the number of output blocks that will actually be computed.
Which blocks will be computed is specified in `outputIdx`.
"""
assert
inputIdx
.
ndim
==
h
.
ndim
-
1
assert
outputIdx
.
ndim
==
inputIdx
.
ndim
if
h
.
ndim
==
2
:
h
=
h
.
dimshuffle
(
'x'
,
0
,
1
)
inputIdx
=
inputIdx
.
dimshuffle
(
'x'
,
0
)
outputIdx
=
outputIdx
.
dimshuffle
(
'x'
,
0
)
return
SparseBlockGemv
()(
b
.
take
(
outputIdx
,
axis
=
0
),
W
,
h
,
inputIdx
,
outputIdx
)
theano/tensor/nnet/nnet.py
浏览文件 @
a85a44fc
...
...
@@ -29,6 +29,7 @@ from theano.gof import Apply
from
theano.tensor.nnet.sigm
import
sigmoid
,
softplus
from
theano.gradient
import
DisconnectedType
from
theano.gradient
import
grad_not_implemented
from
theano.tensor.nnet.blocksparse
import
sparse_block_dot
from
theano.tensor.type
import
values_eq_approx_remove_nan
...
...
@@ -2041,3 +2042,125 @@ def relu(x, alpha=0):
f1
=
0.5
*
(
1
+
alpha
)
f2
=
0.5
*
(
1
-
alpha
)
return
f1
*
x
+
f2
*
abs
(
x
)
def
h_softmax
(
x
,
batch_size
,
n_outputs
,
n_classes
,
n_outputs_per_class
,
W1
,
b1
,
W2
,
b2
,
target
=
None
):
""" Two-level hierarchical softmax.
The architecture is composed of two softmax layers: the first predicts the
class of the input x while the second predicts the output of the input x in
the predicted class.
More explanations can be found in the original paper [1]_.
If target is specified, it will only compute the outputs of the
corresponding targets. Otherwise, if target is None, it will compute all
the outputs.
The outputs are grouped in the same order as they are initially defined.
.. versionadded:: 0.7.1
Parameters
----------
x: tensor of shape (batch_size, number of features)
the minibatch input of the two-layer hierarchical softmax.
batch_size: int
the size of the minibatch input x.
n_outputs: int
the number of outputs.
n_classes: int
the number of classes of the two-layer hierarchical softmax. It
corresponds to the number of outputs of the first softmax. See note at
the end.
n_outputs_per_class: int
the number of outputs per class. See note at the end.
W1: tensor of shape (number of features of the input x, n_classes)
the weight matrix of the first softmax, which maps the input x to the
probabilities of the classes.
b1: tensor of shape (n_classes,)
the bias vector of the first softmax layer.
W2: tensor of shape (n_classes, number of features of the input x, n_outputs_per_class)
the weight matrix of the second softmax, which maps the input x to
the probabilities of the outputs.
b2: tensor of shape (n_classes, n_outputs_per_class)
the bias vector of the second softmax layer.
target: tensor of shape either (batch_size,) or (batch_size, 1)
(optional, default None)
contains the indices of the targets for the minibatch
input x. For each input, the function computes the output for its
corresponding target. If target is None, then all the outputs are
computed for each input.
Returns
-------
output_probs: tensor of shape (batch_size, n_outputs) or (batch_size, 1)
Output of the two-layer hierarchical softmax for input x. If target is
not specified (None), then all the outputs are computed and the
returned tensor has shape (batch_size, n_outputs). Otherwise, when
target is specified, only the corresponding outputs are computed and
the returned tensor has thus shape (batch_size, 1).
Notes
-----
The product of n_outputs_per_class and n_classes has to be greater or equal
to n_outputs. If it is strictly greater, then the irrelevant outputs will
be ignored.
n_outputs_per_class and n_classes have to be the same as the corresponding
dimensions of the tensors of W1, b1, W2 and b2.
The most computational efficient configuration is when n_outputs_per_class
and n_classes are equal to the square root of n_outputs.
References
----------
.. [1] J. Goodman, "Classes for Fast Maximum Entropy Training,"
ICASSP, 2001, <http://arxiv.org/abs/cs/0108006>`.
"""
# First softmax that computes the probabilities of belonging to each class
class_probs
=
theano
.
tensor
.
nnet
.
softmax
(
tensor
.
dot
(
x
,
W1
)
+
b1
)
if
target
is
None
:
# Computes the probabilites of all the outputs
class_ids
=
tensor
.
tile
(
tensor
.
arange
(
n_classes
,
dtype
=
"int32"
)[
None
,
:],
(
batch_size
,
1
))
# Second softmax that computes the output probabilities
activations
=
sparse_block_dot
(
W2
[
None
,
:,
:,
:],
x
[:,
None
,
:],
tensor
.
zeros
((
batch_size
,
1
),
dtype
=
'int32'
),
b2
,
class_ids
)
output_probs
=
theano
.
tensor
.
nnet
.
softmax
(
activations
.
reshape
((
-
1
,
n_outputs_per_class
)))
output_probs
=
output_probs
.
reshape
((
batch_size
,
n_classes
,
-
1
))
output_probs
=
class_probs
[:,
:,
None
]
*
output_probs
output_probs
=
output_probs
.
reshape
((
batch_size
,
-
1
))
# output_probs.shape[1] is n_classes * n_outputs_per_class, which might
# be greater than n_outputs, so we ignore the potential irrelevant
# outputs with the next line:
output_probs
=
output_probs
[:,
:
n_outputs
]
else
:
# Computes the probabilities of the outputs specified by the targets
target
=
target
.
flatten
()
# Classes to which belong each target
target_classes
=
target
//
n_outputs_per_class
# Outputs to which belong each target inside a class
target_outputs_in_class
=
target
%
n_outputs_per_class
# Second softmax that computes the output probabilities
activations
=
sparse_block_dot
(
W2
[
None
,
:,
:,
:],
x
[:,
None
,
:],
tensor
.
zeros
((
batch_size
,
1
),
dtype
=
'int32'
),
b2
,
target_classes
[:,
None
])
output_probs
=
theano
.
tensor
.
nnet
.
softmax
(
activations
[:,
0
,
:])
target_class_probs
=
class_probs
[
tensor
.
arange
(
batch_size
),
target_classes
]
output_probs
=
output_probs
[
tensor
.
arange
(
batch_size
),
target_outputs_in_class
]
output_probs
=
target_class_probs
*
output_probs
return
output_probs
theano/
sandbox
/opt.py
→
theano/
tensor/nnet
/opt.py
浏览文件 @
a85a44fc
"""
Optimizations addressing the ops in
sandbox
root directory
Optimizations addressing the ops in
nnet
root directory
"""
from
theano
import
compile
# to register the optimizer built by this file
from
theano
import
gof
from
theano.
sandbox
.blocksparse
import
(
from
theano.
tensor.nnet
.blocksparse
import
(
SparseBlockGemv
,
SparseBlockOuter
,
sparse_block_gemv_inplace
,
...
...
theano/
sandbox
/tests/test_blocksparse.py
→
theano/
tensor/nnet
/tests/test_blocksparse.py
浏览文件 @
a85a44fc
...
...
@@ -10,7 +10,7 @@ import theano
from
theano
import
tensor
import
theano.tests.unittest_tools
as
utt
from
theano.
sandbox
.blocksparse
import
sparse_block_dot
,
\
from
theano.
tensor.nnet
.blocksparse
import
sparse_block_dot
,
\
sparse_block_gemv
,
sparse_block_outer
...
...
theano/tensor/nnet/tests/test_nnet.py
浏览文件 @
a85a44fc
...
...
@@ -28,7 +28,8 @@ from theano.tensor.nnet import (categorical_crossentropy,
softmax_with_bias
,
SoftmaxGrad
,
Prepend_scalar_constant_to_each_row
,
Prepend_scalar_to_each_row
,
relu
)
relu
,
h_softmax
)
from
theano.tensor
import
matrix
,
vector
,
lvector
,
scalar
...
...
@@ -1437,5 +1438,70 @@ def test_relu():
assert
numpy
.
allclose
(
y
,
numpy
.
where
(
X
>
0
,
X
,
A
*
X
),
rtol
=
3e-5
)
if
__name__
==
'__main__'
:
unittest
.
main
()
def
test_h_softmax
():
"""
Tests the output dimensions of the h_softmax when a target is provided or
not.
"""
#############
# Config
#############
input_size
=
4
batch_size
=
2
h_softmax_level1_size
=
5
h_softmax_level2_size
=
3
output_size
=
h_softmax_level1_size
*
h_softmax_level2_size
#############
# Initialize shared variables
#############
floatX
=
theano
.
config
.
floatX
shared
=
theano
.
shared
# First level of h_softmax
W1
=
numpy
.
asarray
(
numpy
.
random
.
normal
(
size
=
(
input_size
,
h_softmax_level1_size
)),
dtype
=
floatX
)
W1
=
shared
(
W1
)
b1
=
shared
(
numpy
.
asarray
(
numpy
.
zeros
((
h_softmax_level1_size
,)),
dtype
=
floatX
))
# Second level of h_softmax
W2
=
numpy
.
asarray
(
numpy
.
random
.
normal
(
size
=
(
h_softmax_level1_size
,
input_size
,
h_softmax_level2_size
)),
dtype
=
floatX
)
W2
=
shared
(
W2
)
b2
=
shared
(
numpy
.
asarray
(
numpy
.
zeros
((
h_softmax_level1_size
,
h_softmax_level2_size
)),
dtype
=
floatX
))
#############
# Build graph
#############
x
=
tensor
.
matrix
(
'x'
)
y
=
tensor
.
ivector
(
'y'
)
# This only computes the output corresponding to the target
y_hat_tg
=
h_softmax
(
x
,
batch_size
,
output_size
,
h_softmax_level1_size
,
h_softmax_level2_size
,
W1
,
b1
,
W2
,
b2
,
y
)
# This computes all the outputs
y_hat_all
=
h_softmax
(
x
,
batch_size
,
output_size
,
h_softmax_level1_size
,
h_softmax_level2_size
,
W1
,
b1
,
W2
,
b2
)
#############
# Compile functions
#############
fun_output_tg
=
theano
.
function
([
x
,
y
],
y_hat_tg
)
fun_output
=
theano
.
function
([
x
],
y_hat_all
)
#############
# Test
#############
x_mat
=
numpy
.
random
.
normal
(
size
=
(
batch_size
,
input_size
))
.
astype
(
floatX
)
y_mat
=
numpy
.
random
.
randint
(
0
,
output_size
,
batch_size
)
.
astype
(
'int32'
)
assert
(
fun_output_tg
(
x_mat
,
y_mat
)
.
shape
==
(
batch_size
,))
assert
(
fun_output
(
x_mat
)
.
shape
==
(
batch_size
,
output_size
))
theano/
sandbox
/tests/test_opt.py
→
theano/
tensor/nnet
/tests/test_opt.py
浏览文件 @
a85a44fc
import
theano
from
theano
import
tensor
from
theano.
sandbox
.blocksparse
import
sparse_block_dot
from
theano.
tensor.nnet
.blocksparse
import
sparse_block_dot
def
test_blocksparse_inplace_gemv_opt
():
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论