提交 149263a9 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Use shape_i when inserting Gemm16 to avoid recomputing stuff for their shapes.

上级 7f7749d2
...@@ -3,6 +3,7 @@ import theano ...@@ -3,6 +3,7 @@ import theano
from theano import Op, Apply, Variable, tensor from theano import Op, Apply, Variable, tensor
from theano.compile import optdb from theano.compile import optdb
from theano.compile.ops import shape_i
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.scalar import as_scalar, constant from theano.scalar import as_scalar, constant
...@@ -96,10 +97,11 @@ def local_dot_to_gemm16(node): ...@@ -96,10 +97,11 @@ def local_dot_to_gemm16(node):
node.inputs[0].dtype == 'float16' and node.inputs[0].dtype == 'float16' and
node.inputs[1].dtype == 'float16' and node.inputs[1].dtype == 'float16' and
node.inputs[0].ndim == 2 and node.inputs[1].ndim == 2): node.inputs[0].ndim == 2 and node.inputs[1].ndim == 2):
fgraph = node.inputs[0].fgraph
A = gpu_from_host(node.inputs[0]) A = gpu_from_host(node.inputs[0])
B = gpu_from_host(node.inputs[1]) B = gpu_from_host(node.inputs[1])
C = gpu_alloc(numpy.asarray(0, dtype='float16'), C = gpu_alloc(numpy.asarray(0, dtype='float16'),
A.shape[0], B.shape[1]) shape_i(A, 0, fgraph), shape_i(B, 1, fgraph))
return [host_from_gpu(Gemm16()(C, 1.0, A, B, 0.0))] return [host_from_gpu(Gemm16()(C, 1.0, A, B, 0.0))]
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论