提交 e5e40aec authored 作者: Frederic's avatar Frederic

make fast_compile work with the new gpu back-end.

上级 c10087da
...@@ -44,9 +44,9 @@ gpu_cut_copies = EquilibriumDB() ...@@ -44,9 +44,9 @@ gpu_cut_copies = EquilibriumDB()
gpu_seqopt = SequenceDB() gpu_seqopt = SequenceDB()
gpu_seqopt.register('gpuarray_local_optimiziations', gpu_optimizer, 1, gpu_seqopt.register('gpuarray_local_optimiziations', gpu_optimizer, 1,
'fast_run', 'inplace', 'gpuarray') 'fast_compile', 'fast_run', 'inplace', 'gpuarray')
gpu_seqopt.register('gpuarray_cut_transfers', gpu_cut_copies, 2, gpu_seqopt.register('gpuarray_cut_transfers', gpu_cut_copies, 2,
'fast_run', 'gpuarray') 'fast_compile', 'fast_run', 'gpuarray')
# do not add 'fast_run' to these two as this would always enable gpuarray mode # do not add 'fast_run' to these two as this would always enable gpuarray mode
optdb.register('gpuarray_opt', gpu_seqopt, optdb.register('gpuarray_opt', gpu_seqopt,
...@@ -61,7 +61,7 @@ def register_opt(*tags, **kwargs): ...@@ -61,7 +61,7 @@ def register_opt(*tags, **kwargs):
return local_opt return local_opt
return f return f
register_opt()(theano.tensor.opt.local_track_shape_i) register_opt('fast_compile')(theano.tensor.opt.local_track_shape_i)
def safe_to_gpu(x): def safe_to_gpu(x):
...@@ -145,19 +145,20 @@ def local_cut_gpu_host_gpu(node): ...@@ -145,19 +145,20 @@ def local_cut_gpu_host_gpu(node):
return [node.inputs[0].owner.inputs[0]] return [node.inputs[0].owner.inputs[0]]
return False return False
gpu_cut_copies.register('cut_gpua_host_transfers', local_cut_gpu_host_gpu, gpu_cut_copies.register('cut_gpua_host_transfers', local_cut_gpu_host_gpu,
'fast_run', 'inplace', 'gpuarray') 'fast_compile', 'fast_run', 'inplace', 'gpuarray')
gpu_cut_copies.register('cut_gpua_constant_transfers', gpu_cut_copies.register('cut_gpua_constant_transfers',
tensor.opt.constant_folding, tensor.opt.constant_folding,
'fast_run', 'gpuarray') 'fast_compile', 'fast_run', 'gpuarray')
optdb['canonicalize'].register('local_cut_gpua_host_gpua', optdb['canonicalize'].register('local_cut_gpua_host_gpua',
local_cut_gpu_host_gpu, 'fast_run', 'gpuarray') local_cut_gpu_host_gpu,
'fast_compile', 'fast_run', 'gpuarray')
@register_opt() @register_opt('fast_compile')
@local_optimizer([tensor.Alloc]) @local_optimizer([tensor.Alloc])
def local_gpuaalloc2(node): def local_gpuaalloc2(node):
""" """
Join(axis, Alloc, Alloc, ...) -> Join(axis, GpuAlloc, Alloc, ...) Join(axis, {Alloc or HostFromGPU}, ...) -> Join(axis, GpuAlloc, Alloc, ...)
Moves an alloc that is an input to join to the gpu. Moves an alloc that is an input to join to the gpu.
""" """
...@@ -171,7 +172,7 @@ def local_gpuaalloc2(node): ...@@ -171,7 +172,7 @@ def local_gpuaalloc2(node):
return [host_from_gpu(gpu_alloc(*node.inputs))] return [host_from_gpu(gpu_alloc(*node.inputs))]
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.Alloc]) @op_lifter([tensor.Alloc])
def local_gpuaalloc(node): def local_gpuaalloc(node):
new_out = gpu_alloc(*node.inputs) new_out = gpu_alloc(*node.inputs)
...@@ -199,7 +200,7 @@ def local_gpualloc_memset_0(node): ...@@ -199,7 +200,7 @@ def local_gpualloc_memset_0(node):
return [new_out] return [new_out]
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.Reshape]) @op_lifter([tensor.Reshape])
def local_gpureshape(node): def local_gpureshape(node):
op = node.op op = node.op
...@@ -210,14 +211,14 @@ def local_gpureshape(node): ...@@ -210,14 +211,14 @@ def local_gpureshape(node):
return res return res
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.Rebroadcast]) @op_lifter([tensor.Rebroadcast])
def local_gpu_rebroadcast(node): def local_gpu_rebroadcast(node):
if isinstance(node.inputs[0].owner.op, HostFromGpu): if isinstance(node.inputs[0].owner.op, HostFromGpu):
return node.op(node.inputs[0].owner.inputs[0]) return node.op(node.inputs[0].owner.inputs[0])
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.Flatten]) @op_lifter([tensor.Flatten])
def local_gpuflatten(node): def local_gpuflatten(node):
op = node.op op = node.op
...@@ -230,7 +231,7 @@ def local_gpuflatten(node): ...@@ -230,7 +231,7 @@ def local_gpuflatten(node):
return o return o
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.Elemwise]) @op_lifter([tensor.Elemwise])
def local_gpu_elemwise(node): def local_gpu_elemwise(node):
op = node.op op = node.op
...@@ -273,14 +274,14 @@ optdb.register('gpua_inplace_opt', inplace_gpu_elemwise_opt, 75, ...@@ -273,14 +274,14 @@ optdb.register('gpua_inplace_opt', inplace_gpu_elemwise_opt, 75,
'inplace_elemwise_optimizer', 'fast_run', 'inplace', 'gpuarray') 'inplace_elemwise_optimizer', 'fast_run', 'inplace', 'gpuarray')
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.DimShuffle]) @op_lifter([tensor.DimShuffle])
def local_gpua_dimshuffle(node): def local_gpua_dimshuffle(node):
return GpuDimShuffle(node.op.input_broadcastable, return GpuDimShuffle(node.op.input_broadcastable,
node.op.new_order) node.op.new_order)
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.SpecifyShape]) @op_lifter([tensor.SpecifyShape])
def local_gpua_specifyShape(node): def local_gpua_specifyShape(node):
if isinstance(node.inputs[0].type, GpuArrayType): if isinstance(node.inputs[0].type, GpuArrayType):
...@@ -293,7 +294,7 @@ def gpu_print_wrapper(op, cnda): ...@@ -293,7 +294,7 @@ def gpu_print_wrapper(op, cnda):
op.old_op.global_fn(op.old_op, numpy.asarray(cnda)) op.old_op.global_fn(op.old_op, numpy.asarray(cnda))
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.printing.Print]) @op_lifter([tensor.printing.Print])
def local_gpu_print_op(node): def local_gpu_print_op(node):
x, = node.inputs x, = node.inputs
...@@ -303,13 +304,13 @@ def local_gpu_print_op(node): ...@@ -303,13 +304,13 @@ def local_gpu_print_op(node):
return new_op(gpu_x) return new_op(gpu_x)
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.Join]) @op_lifter([tensor.Join])
def local_gpua_join(node): def local_gpua_join(node):
return gpu_join return gpu_join
@register_opt() @register_opt('fast_compile')
@local_optimizer([GpuJoin]) @local_optimizer([GpuJoin])
def local_gpuajoin_1(node): def local_gpuajoin_1(node):
# join of a single element # join of a single element
...@@ -318,19 +319,19 @@ def local_gpuajoin_1(node): ...@@ -318,19 +319,19 @@ def local_gpuajoin_1(node):
return [node.inputs[1]] return [node.inputs[1]]
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.Split]) @op_lifter([tensor.Split])
def local_gpua_split(node): def local_gpua_split(node):
return GpuSplit(node.op.len_splits) return GpuSplit(node.op.len_splits)
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.Subtensor]) @op_lifter([tensor.Subtensor])
def local_gpua_subtensor(node): def local_gpua_subtensor(node):
return GpuSubtensor(node.op.idx_list) return GpuSubtensor(node.op.idx_list)
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.IncSubtensor]) @op_lifter([tensor.IncSubtensor])
def local_gpua_incsubtensor(node): def local_gpua_incsubtensor(node):
return GpuIncSubtensor(node.op.idx_list, node.op.inplace, return GpuIncSubtensor(node.op.idx_list, node.op.inplace,
...@@ -338,7 +339,7 @@ def local_gpua_incsubtensor(node): ...@@ -338,7 +339,7 @@ def local_gpua_incsubtensor(node):
node.op.destroyhandler_tolerate_aliased) node.op.destroyhandler_tolerate_aliased)
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.AdvancedIncSubtensor1]) @op_lifter([tensor.AdvancedIncSubtensor1])
def local_gpua_advanced_incsubtensor(node): def local_gpua_advanced_incsubtensor(node):
...@@ -362,7 +363,7 @@ def local_gpua_advanced_incsubtensor(node): ...@@ -362,7 +363,7 @@ def local_gpua_advanced_incsubtensor(node):
set_instead_of_inc=set_instead_of_inc) set_instead_of_inc=set_instead_of_inc)
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.CAReduce, tensor.Sum, tensor.elemwise.Prod]) @op_lifter([tensor.CAReduce, tensor.Sum, tensor.elemwise.Prod])
def local_gpua_careduce(node): def local_gpua_careduce(node):
if isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul, if isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul,
...@@ -442,71 +443,67 @@ def local_gpua_careduce(node): ...@@ -442,71 +443,67 @@ def local_gpua_careduce(node):
return [unreshaped_reduce] return [unreshaped_reduce]
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.blas.Gemv, tensor.blas_c.CGemv]) @op_lifter([tensor.blas.Gemv, tensor.blas_c.CGemv])
def local_gpua_gemv(node): def local_gpua_gemv(node):
return GpuGemv(inplace=node.op.inplace) return GpuGemv(inplace=node.op.inplace)
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.blas.Gemm]) @op_lifter([tensor.blas.Gemm])
def local_gpua_gemm(node): def local_gpua_gemm(node):
return GpuGemm(inplace=node.op.inplace) return GpuGemm(inplace=node.op.inplace)
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.blas.Ger, tensor.blas_c.CGer, tensor.blas_scipy.ScipyGer]) @op_lifter([tensor.blas.Ger, tensor.blas_c.CGer, tensor.blas_scipy.ScipyGer])
def local_gpua_ger(node): def local_gpua_ger(node):
return GpuGer(destructive=node.op.destructive) return GpuGer(destructive=node.op.destructive)
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.blas.Dot22]) @op_lifter([tensor.blas.Dot22])
def local_gpua_dot22(node): def local_gpua_dot22(node):
return gpu_dot22 return gpu_dot22
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.basic.Eye]) @op_lifter([tensor.basic.Eye])
def local_gpua_eye(node): def local_gpua_eye(node):
return GpuEye(dtype=node.op.dtype) return GpuEye(dtype=node.op.dtype)
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias]) @op_lifter([tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias])
def local_gpua_crossentropysoftmaxargmax1hotwithbias(node): def local_gpua_crossentropysoftmaxargmax1hotwithbias(node):
return GpuCrossentropySoftmaxArgmax1HotWithBias() return GpuCrossentropySoftmaxArgmax1HotWithBias()
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.nnet.CrossentropySoftmax1HotWithBiasDx]) @op_lifter([tensor.nnet.CrossentropySoftmax1HotWithBiasDx])
def local_gpua_crossentropysoftmax1hotwithbiasdx(node): def local_gpua_crossentropysoftmax1hotwithbiasdx(node):
return GpuCrossentropySoftmax1HotWithBiasDx() return GpuCrossentropySoftmax1HotWithBiasDx()
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.nnet.Softmax]) @op_lifter([tensor.nnet.Softmax])
def local_gpua_softmax(node): def local_gpua_softmax(node):
return GpuSoftmax() return GpuSoftmax()
@register_opt() @register_opt('fast_compile')
@op_lifter([tensor.nnet.SoftmaxWithBias]) @op_lifter([tensor.nnet.SoftmaxWithBias])
def local_gpua_softmaxwithbias(node): def local_gpua_softmaxwithbias(node):
return GpuSoftmaxWithBias() return GpuSoftmaxWithBias()
@register_opt() @register_opt('fast_compile')
@local_optimizer([theano.tensor.opt.Assert]) @op_lifter([theano.tensor.opt.Assert])
def local_assert(node): def local_assert(node):
if (isinstance(node.op, theano.tensor.opt.Assert) and
node.inputs[0].owner and
isinstance(node.inputs[0].owner.op,
HostFromGpu)):
return [host_from_gpu(node.op(node.inputs[0].owner.inputs[0]))] return [host_from_gpu(node.op(node.inputs[0].owner.inputs[0]))]
@register_opt() @register_opt('fast_compile')
@op_lifter([gpu_from_host, ConvOp]) @op_lifter([gpu_from_host, ConvOp])
def local_gpu_conv(node): def local_gpu_conv(node):
""" """
...@@ -654,7 +651,7 @@ def gpu_reconstruct_graph(inputs, outputs, tag=None): ...@@ -654,7 +651,7 @@ def gpu_reconstruct_graph(inputs, outputs, tag=None):
return (nw_inputs, nw_outputs) return (nw_inputs, nw_outputs)
@register_opt('scan') @register_opt('scan', 'fast_compile')
@op_lifter([scan_op.Scan]) @op_lifter([scan_op.Scan])
def local_scan_to_gpua(node): def local_scan_to_gpua(node):
info = copy.deepcopy(node.op.info) info = copy.deepcopy(node.op.info)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论