make fast_compile work with the new gpu back-end.

e5e40aec · Frederic · c10087da · e5e40aec
--- a/theano/sandbox/gpuarray/opt.py
+++ b/theano/sandbox/gpuarray/opt.py
@@ -44,9 +44,9 @@ gpu_cut_copies = EquilibriumDB()
 gpu_seqopt = SequenceDB()
 gpu_seqopt.register('gpuarray_local_optimiziations', gpu_optimizer, 1,
-                    'fast_run', 'inplace', 'gpuarray')
+                    'fast_compile', 'fast_run', 'inplace', 'gpuarray')
 gpu_seqopt.register('gpuarray_cut_transfers', gpu_cut_copies, 2,
-                    'fast_run', 'gpuarray')
+                    'fast_compile', 'fast_run', 'gpuarray')
 # do not add 'fast_run' to these two as this would always enable gpuarray mode
 optdb.register('gpuarray_opt', gpu_seqopt,
@@ -61,7 +61,7 @@ def register_opt(*tags, **kwargs):
        return local_opt
    return f
-register_opt()(theano.tensor.opt.local_track_shape_i)
+register_opt('fast_compile')(theano.tensor.opt.local_track_shape_i)
 def safe_to_gpu(x):
@@ -145,19 +145,20 @@ def local_cut_gpu_host_gpu(node):
        return [node.inputs[0].owner.inputs[0]]
    return False
 gpu_cut_copies.register('cut_gpua_host_transfers', local_cut_gpu_host_gpu,
-                        'fast_run', 'inplace', 'gpuarray')
+                        'fast_compile', 'fast_run', 'inplace', 'gpuarray')
 gpu_cut_copies.register('cut_gpua_constant_transfers',
                        tensor.opt.constant_folding,
-                        'fast_run', 'gpuarray')
+                        'fast_compile', 'fast_run', 'gpuarray')
 optdb['canonicalize'].register('local_cut_gpua_host_gpua',
-                               local_cut_gpu_host_gpu, 'fast_run', 'gpuarray')
+                               local_cut_gpu_host_gpu,
+                               'fast_compile', 'fast_run', 'gpuarray')
-@register_opt()
+@register_opt('fast_compile')
 @local_optimizer([tensor.Alloc])
 def local_gpuaalloc2(node):
    """
-    Join(axis, Alloc, Alloc, ...) -> Join(axis, GpuAlloc, Alloc, ...)
+    Join(axis, {Alloc or HostFromGPU}, ...) -> Join(axis, GpuAlloc, Alloc, ...)
    Moves an alloc that is an input to join to the gpu.
    """
@@ -171,7 +172,7 @@ def local_gpuaalloc2(node):
        return [host_from_gpu(gpu_alloc(*node.inputs))]
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.Alloc])
 def local_gpuaalloc(node):
    new_out = gpu_alloc(*node.inputs)
@@ -199,7 +200,7 @@ def local_gpualloc_memset_0(node):
            return [new_out]
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.Reshape])
 def local_gpureshape(node):
    op = node.op
@@ -210,14 +211,14 @@ def local_gpureshape(node):
    return res
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.Rebroadcast])
 def local_gpu_rebroadcast(node):
    if isinstance(node.inputs[0].owner.op, HostFromGpu):
        return node.op(node.inputs[0].owner.inputs[0])
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.Flatten])
 def local_gpuflatten(node):
    op = node.op
@@ -230,7 +231,7 @@ def local_gpuflatten(node):
    return o
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.Elemwise])
 def local_gpu_elemwise(node):
    op = node.op
@@ -273,14 +274,14 @@ optdb.register('gpua_inplace_opt', inplace_gpu_elemwise_opt, 75,
               'inplace_elemwise_optimizer', 'fast_run', 'inplace', 'gpuarray')
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.DimShuffle])
 def local_gpua_dimshuffle(node):
    return GpuDimShuffle(node.op.input_broadcastable,
                         node.op.new_order)
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.SpecifyShape])
 def local_gpua_specifyShape(node):
    if isinstance(node.inputs[0].type, GpuArrayType):
@@ -293,7 +294,7 @@ def gpu_print_wrapper(op, cnda):
    op.old_op.global_fn(op.old_op, numpy.asarray(cnda))
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.printing.Print])
 def local_gpu_print_op(node):
    x, = node.inputs
@@ -303,13 +304,13 @@ def local_gpu_print_op(node):
    return new_op(gpu_x)
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.Join])
 def local_gpua_join(node):
    return gpu_join
-@register_opt()
+@register_opt('fast_compile')
 @local_optimizer([GpuJoin])
 def local_gpuajoin_1(node):
    # join of a single element
@@ -318,19 +319,19 @@ def local_gpuajoin_1(node):
        return [node.inputs[1]]
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.Split])
 def local_gpua_split(node):
    return GpuSplit(node.op.len_splits)
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.Subtensor])
 def local_gpua_subtensor(node):
    return GpuSubtensor(node.op.idx_list)
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.IncSubtensor])
 def local_gpua_incsubtensor(node):
    return GpuIncSubtensor(node.op.idx_list, node.op.inplace,
@@ -338,7 +339,7 @@ def local_gpua_incsubtensor(node):
                           node.op.destroyhandler_tolerate_aliased)
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.AdvancedIncSubtensor1])
 def local_gpua_advanced_incsubtensor(node):
@@ -362,7 +363,7 @@ def local_gpua_advanced_incsubtensor(node):
            set_instead_of_inc=set_instead_of_inc)
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.CAReduce, tensor.Sum, tensor.elemwise.Prod])
 def local_gpua_careduce(node):
    if isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul,
@@ -442,71 +443,67 @@ def local_gpua_careduce(node):
                return [unreshaped_reduce]
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.blas.Gemv, tensor.blas_c.CGemv])
 def local_gpua_gemv(node):
    return GpuGemv(inplace=node.op.inplace)
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.blas.Gemm])
 def local_gpua_gemm(node):
    return GpuGemm(inplace=node.op.inplace)
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.blas.Ger, tensor.blas_c.CGer, tensor.blas_scipy.ScipyGer])
 def local_gpua_ger(node):
    return GpuGer(destructive=node.op.destructive)
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.blas.Dot22])
 def local_gpua_dot22(node):
    return gpu_dot22
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.basic.Eye])
 def local_gpua_eye(node):
    return GpuEye(dtype=node.op.dtype)
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias])
 def local_gpua_crossentropysoftmaxargmax1hotwithbias(node):
    return GpuCrossentropySoftmaxArgmax1HotWithBias()
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.nnet.CrossentropySoftmax1HotWithBiasDx])
 def local_gpua_crossentropysoftmax1hotwithbiasdx(node):
    return GpuCrossentropySoftmax1HotWithBiasDx()
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.nnet.Softmax])
 def local_gpua_softmax(node):
    return GpuSoftmax()
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([tensor.nnet.SoftmaxWithBias])
 def local_gpua_softmaxwithbias(node):
    return GpuSoftmaxWithBias()
-@register_opt()
+@register_opt('fast_compile')
-@local_optimizer([theano.tensor.opt.Assert])
+@op_lifter([theano.tensor.opt.Assert])
 def local_assert(node):
-    if (isinstance(node.op, theano.tensor.opt.Assert) and
-        node.inputs[0].owner and
-        isinstance(node.inputs[0].owner.op,
-                   HostFromGpu)):
    return [host_from_gpu(node.op(node.inputs[0].owner.inputs[0]))]
-@register_opt()
+@register_opt('fast_compile')
 @op_lifter([gpu_from_host, ConvOp])
 def local_gpu_conv(node):
    """
@@ -654,7 +651,7 @@ def gpu_reconstruct_graph(inputs, outputs, tag=None):
    return (nw_inputs, nw_outputs)
-@register_opt('scan')
+@register_opt('scan', 'fast_compile')
 @op_lifter([scan_op.Scan])
 def local_scan_to_gpua(node):
    info = copy.deepcopy(node.op.info)