提交 ff1bbc05 authored 作者: Frederic Bastien's avatar Frederic Bastien

fix the gpu conv optimizer when the cpu and gpu conv op don't have the same pattern.

上级 22896568
...@@ -497,7 +497,9 @@ def local_gpu_conv(node): ...@@ -497,7 +497,9 @@ def local_gpu_conv(node):
if host_input.owner and isinstance(host_input.owner.op, conv.ConvOp): if host_input.owner and isinstance(host_input.owner.op, conv.ConvOp):
gpu_conv = GpuConvOp_from_ConvOp(host_input.owner.op) gpu_conv = GpuConvOp_from_ConvOp(host_input.owner.op)
img, kern = host_input.owner.inputs img, kern = host_input.owner.inputs
return [gpu_conv(gpu_from_host(img), gpu_from_host(kern))] #in some case the ConvOp broadcast the last 2 dimensions differently then the gpu ConvOp
return [tensor.patternbroadcast(gpu_conv(gpu_from_host(img), gpu_from_host(kern)),
node.outputs[0].broadcastable)]
if isinstance(node.op, conv.ConvOp): if isinstance(node.op, conv.ConvOp):
#conv(host_from_gpu) -> host_from_gpu(gpu_conv) #conv(host_from_gpu) -> host_from_gpu(gpu_conv)
...@@ -506,7 +508,10 @@ def local_gpu_conv(node): ...@@ -506,7 +508,10 @@ def local_gpu_conv(node):
kern_on_gpu = (kern.owner and kern.owner.op == host_from_gpu) kern_on_gpu = (kern.owner and kern.owner.op == host_from_gpu)
if img_on_gpu or kern_on_gpu: if img_on_gpu or kern_on_gpu:
gpu_conv = GpuConvOp_from_ConvOp(node.op) gpu_conv = GpuConvOp_from_ConvOp(node.op)
return [host_from_gpu(gpu_conv(gpu_from_host(img), gpu_from_host(kern)))] #in some case the ConvOp broadcast the last 2 dimensions differently then the gpu ConvOp
return [tensor.patternbroadcast(host_from_gpu(gpu_conv(gpu_from_host(img),
gpu_from_host(kern))),
node.outputs[0].broadcastable)]
import theano.tensor.signal.downsample as downsample import theano.tensor.signal.downsample as downsample
@register_opt() @register_opt()
......
...@@ -2744,6 +2744,14 @@ def unbroadcast(x, *axes): ...@@ -2744,6 +2744,14 @@ def unbroadcast(x, *axes):
rval = Rebroadcast(*[(axis, False) for axis in axes])(x) rval = Rebroadcast(*[(axis, False) for axis in axes])(x)
return theano.tensor.opt.apply_rebroadcast_opt(rval) return theano.tensor.opt.apply_rebroadcast_opt(rval)
def patternbroadcast(x, broadcastable):
"""
Make the input impossible to broadcast in the specified axes.
We apply the opt here to don't pollute the graph especially during the gpu optimization
"""
rval = Rebroadcast(*[(i,broadcastable[i]) for i in range(len(broadcastable))])(x)
return theano.tensor.opt.apply_rebroadcast_opt(rval)
class Join(Op): class Join(Op):
""" """
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论