提交 17388820 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

- clean up imports

- Add opt to lift HostFromGpu through Rebroadcast - Add broadcast pattern check on the GpuAlloc opt.
上级 ac8ed0a7
...@@ -161,7 +161,7 @@ class HostFromGpu(Op): ...@@ -161,7 +161,7 @@ class HostFromGpu(Op):
raise TypeError(x) raise TypeError(x)
return Apply(self, [x], return Apply(self, [x],
[tensor.TensorType(dtype=x.dtype, [tensor.TensorType(dtype=x.dtype,
broadcastable=x.broadcastable,)()]) broadcastable=x.broadcastable)()])
def perform(self, node, inp, out): def perform(self, node, inp, out):
x, = inp x, = inp
......
...@@ -13,18 +13,17 @@ from theano.scan_module import scan_utils, scan_op, scan_opt ...@@ -13,18 +13,17 @@ from theano.scan_module import scan_utils, scan_op, scan_opt
from theano.gof.python25 import all, any from theano.gof.python25 import all, any
from theano.tensor.nnet.conv import ConvOp from theano.tensor.nnet.conv import ConvOp
from theano.sandbox.gpuarray.type import GpuArrayType from theano.sandbox.gpuarray.type import GpuArrayType
from theano.sandbox.gpuarray.basic_ops import (host_from_gpu, from theano.sandbox.gpuarray.basic_ops import (
gpu_from_host, host_from_gpu, gpu_from_host, HostFromGpu,
gpu_alloc, gpu_alloc, GpuAlloc, GpuReshape, GpuEye
GpuAlloc, )
GpuReshape,
GpuEye)
from theano.sandbox.gpuarray.blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer from theano.sandbox.gpuarray.blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer
from theano.sandbox.gpuarray.conv import GpuConv from theano.sandbox.gpuarray.conv import GpuConv
from theano.sandbox.gpuarray.nnet import (GpuCrossentropySoftmaxArgmax1HotWithBias, from theano.sandbox.gpuarray.nnet import (
GpuCrossentropySoftmax1HotWithBiasDx, GpuCrossentropySoftmaxArgmax1HotWithBias,
GpuSoftmaxWithBias, GpuCrossentropySoftmax1HotWithBiasDx,
GpuSoftmax) GpuSoftmaxWithBias, GpuSoftmax
)
from theano.sandbox.gpuarray.elemwise import (GpuElemwise, _is_scalar, from theano.sandbox.gpuarray.elemwise import (GpuElemwise, _is_scalar,
GpuDimShuffle, GpuCAReduceCuda) GpuDimShuffle, GpuCAReduceCuda)
from theano.sandbox.gpuarray.subtensor import GpuIncSubtensor, GpuSubtensor from theano.sandbox.gpuarray.subtensor import GpuIncSubtensor, GpuSubtensor
...@@ -134,7 +133,17 @@ optdb['canonicalize'].register('local_cut_gpua_host_gpua', ...@@ -134,7 +133,17 @@ optdb['canonicalize'].register('local_cut_gpua_host_gpua',
@register_opt() @register_opt()
@op_lifter([tensor.Alloc]) @op_lifter([tensor.Alloc])
def local_gpualloc(node): def local_gpualloc(node):
return gpu_alloc new_out = gpu_alloc(*node.inputs)
# We need to hide new broadcastable dimensions because
# ReplaceValidate doesn't like when they change.
if new_out.broadcastable != node.outputs[0].broadcastable:
# but if a dim is suddenly not broadcastable anymore then that's a bug
for b_old, b_new in zip(node.outputs[0].broadcastable,
new_out.broadcastable):
assert b_new or (not b_old)
new_out = tensor.patternbroadcast(new_out,
node.outputs[0].broadcastable)
return (new_out,)
@register_opt() @register_opt()
...@@ -160,6 +169,13 @@ def local_gpureshape(node): ...@@ -160,6 +169,13 @@ def local_gpureshape(node):
return res return res
@register_opt()
@op_lifter([tensor.Rebroadcast])
def local_gpu_rebroadcast(node):
if isinstance(node.inputs[0].owner.op, HostFromGpu):
return node.op(node.inputs[0].owner.inputs[0])
@register_opt() @register_opt()
@op_lifter([tensor.Flatten]) @op_lifter([tensor.Flatten])
def local_gpuflatten(node): def local_gpuflatten(node):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论