提交 a4d527ca authored 作者: Melanie Ducoffe's avatar Melanie Ducoffe

optimisation and blas

上级 88924540
...@@ -554,6 +554,8 @@ def local_gpu_dot22(node): ...@@ -554,6 +554,8 @@ def local_gpu_dot22(node):
@local_optimizer([gpu_from_host, tensor.blas.Dot22Scalar]) @local_optimizer([gpu_from_host, tensor.blas.Dot22Scalar])
def local_gpu_dot22scalar(node): def local_gpu_dot22scalar(node):
""" """
Deprecated : _dot22scalar has been replace by gemm
see Dot22scalar for more details
gpu_from_host(dot22scalar) -> gpudot(gpu_from_host) gpu_from_host(dot22scalar) -> gpudot(gpu_from_host)
dot(host_from_gpu) -> host_from_gpu(gpudot22scalar) dot(host_from_gpu) -> host_from_gpu(gpudot22scalar)
......
...@@ -1818,13 +1818,11 @@ def local_dot22_to_ger_or_gemv(node): ...@@ -1818,13 +1818,11 @@ def local_dot22_to_ger_or_gemv(node):
xb = x.broadcastable xb = x.broadcastable
yb = y.broadcastable yb = y.broadcastable
one = T.as_tensor_variable(numpy.asarray(1, dtype=x.dtype)) one = T.as_tensor_variable(numpy.asarray(1, dtype=x.dtype))
# zero = T.as_tensor_variable(numpy.asarray(0, dtype=x.dtype)) zero = T.as_tensor_variable(numpy.asarray(0, dtype=x.dtype))
zero = T.AllocEmpty(x.dtype)(1)
if xb[1] and yb[0]: if xb[1] and yb[0]:
# x and y are both vectors so this might qualifies for a GER # x and y are both vectors so this might qualifies for a GER
xv = x.dimshuffle(0) xv = x.dimshuffle(0)
yv = y.dimshuffle(1) yv = y.dimshuffle(1)
zeros = T.zeros([x.shape[0], y.shape[1]], dtype=x.dtype) zeros = T.zeros([x.shape[0], y.shape[1]], dtype=x.dtype)
rval = ger(zeros, one, xv, yv) rval = ger(zeros, one, xv, yv)
return [rval] return [rval]
...@@ -1832,19 +1830,22 @@ def local_dot22_to_ger_or_gemv(node): ...@@ -1832,19 +1830,22 @@ def local_dot22_to_ger_or_gemv(node):
# x and y are both vectors so this qualifies for a sdot / ddot # x and y are both vectors so this qualifies for a sdot / ddot
# TODO: Theano doesn't have a sdot, but gemv is better than _dot22 # TODO: Theano doesn't have a sdot, but gemv is better than _dot22
xv = x.dimshuffle(1) xv = x.dimshuffle(1)
zeros = T.zeros([1], x.dtype) #zeros = T.zeros([1], x.dtype)
zeros = T.AllocEmpty(x.dtype)(1)
rval = gemv_no_inplace(zeros, one, y.T, xv, zero) rval = gemv_no_inplace(zeros, one, y.T, xv, zero)
return [rval.dimshuffle('x', 0)] return [rval.dimshuffle('x', 0)]
if xb[0] and not yb[0] and not yb[1]: if xb[0] and not yb[0] and not yb[1]:
# x is vector, y is matrix so try gemv # x is vector, y is matrix so try gemv
xv = x.dimshuffle(1) xv = x.dimshuffle(1)
zeros = T.zeros([y.shape[1]], x.dtype) #zeros = T.zeros([y.shape[1]], x.dtype)
zeros = T.AllocEmpty(x.dtype)(y.shape[1])
rval = gemv_no_inplace(zeros, one, y.T, xv, zero) rval = gemv_no_inplace(zeros, one, y.T, xv, zero)
return [rval.dimshuffle('x', 0)] return [rval.dimshuffle('x', 0)]
if not xb[0] and not xb[1] and yb[1]: if not xb[0] and not xb[1] and yb[1]:
# x is matrix, y is vector, try gemv # x is matrix, y is vector, try gemv
yv = y.dimshuffle(0) yv = y.dimshuffle(0)
zeros = T.zeros([x.shape[0]], dtype=x.dtype) #zeros = T.zeros([x.shape[0]], dtype=x.dtype)
zeros = T.AllocEmpty(x.dtype)(x.shape[0])
rval = gemv_no_inplace(zeros, one, x, yv, zero) rval = gemv_no_inplace(zeros, one, x, yv, zero)
return [rval.dimshuffle(0, 'x')] return [rval.dimshuffle(0, 'x')]
...@@ -2043,8 +2044,14 @@ def local_dot22_to_dot22scalar(node): ...@@ -2043,8 +2044,14 @@ def local_dot22_to_dot22scalar(node):
a = T.cast(_as_scalar(m.owner.inputs[scalar_idx], a = T.cast(_as_scalar(m.owner.inputs[scalar_idx],
dtype=d.dtype), d.type.dtype) dtype=d.dtype), d.type.dtype)
assert not a.type.ndim assert not a.type.ndim
dot = _dot22scalar(d.owner.inputs[0], d.owner.inputs[1], a)
# Deprecated :
#dot = _dot22scalar(d.owner.inputs[0], d.owner.inputs[1], a)
z = T.AllocEmpty(d.owner.inputs[0].dtype)(d.owner.inputs[0].shape[0],
d.owner.inputs[1].shape[1])
zero = T.as_tensor_variable(numpy.asarray(0, dtype=a.dtype))
dot = gemm(z, a, d.owner.inputs[0], d.owner.inputs[1], zero)
# The other inputs to the original node that were # The other inputs to the original node that were
# neither part of the dot22 or this mul should be # neither part of the dot22 or this mul should be
# factors in the returned "mul" node. # factors in the returned "mul" node.
...@@ -2079,10 +2086,22 @@ def local_dot22_to_dot22scalar(node): ...@@ -2079,10 +2086,22 @@ def local_dot22_to_dot22scalar(node):
a = T.cast(i_scalar[scalar_idx], d.type.dtype) a = T.cast(i_scalar[scalar_idx], d.type.dtype)
assert not a.type.ndim assert not a.type.ndim
if len(o) == 0: if len(o) == 0:
return [_dot22scalar(d.owner.inputs[0], d.owner.inputs[1], a)] # Deprecated
#return [_dot22scalar(d.owner.inputs[0], d.owner.inputs[1], a)]
z = T.AllocEmpty(d.owner.inputs[0].dtype)(d.owner.inputs[0].shape[0],
d.owner.inputs[1].shape[1])
zero = T.as_tensor_variable(numpy.asarray(0, dtype=a.dtype))
return [gemm(z, a, d.owner.inputs[0], d.owner.inputs[1], zero)]
else: else:
return [T.mul(_dot22scalar(d.owner.inputs[0], # Deprecated
d.owner.inputs[1], a), *o)] #return [T.mul(_dot22scalar(d.owner.inputs[0],
# d.owner.inputs[1], a), *o)]
z = T.AllocEmpty(d.owner.inputs[0].dtype)(d.owner.inputs[0].shape[0],
d.owner.inputs[1].shape[1])
zero = T.as_tensor_variable(numpy.asarray(0, dtype=a.dtype))
return [T.mul(gemm(z, a, d.owner.inputs[0], d.owner.inputs[1],
zero), *o)]
# must happen after gemm as the gemm optimizer don't understant # must happen after gemm as the gemm optimizer don't understant
# dot22scalar and gemm give more speed up then dot22scalar # dot22scalar and gemm give more speed up then dot22scalar
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论