Fix flake8 errors in gpuarray optimizations

上级 5c271602
......@@ -162,6 +162,7 @@ def register_inplace(*tags, **kwargs):
return local_opt
return f
register_opt('fast_compile')(theano.tensor.opt.local_track_shape_i)
register_opt(final_opt=True, name='gpua_constant_folding')(
tensor.opt.constant_folding)
......@@ -582,6 +583,7 @@ def local_cut_gpu_transfers(node):
else:
return [node.op(n2.inputs[0])]
gpu_cut_copies.register('cut_gpua_host_transfers', local_cut_gpu_transfers,
'fast_compile', 'fast_run', 'gpuarray')
gpu_cut_copies.register('cut_gpua_constant_transfers',
......@@ -652,6 +654,8 @@ def local_gpua_alloc_empty_to_zeros(node):
z = np.asarray(0, dtype=node.outputs[0].dtype)
return [GpuAlloc(context_name)(as_gpuarray_variable(z, context_name),
*node.inputs)]
optdb.register('local_gpua_alloc_empty_to_zeros',
theano.tensor.opt.in2out(local_gpua_alloc_empty_to_zeros),
# After move to gpu and merge2, before inplace.
......@@ -1521,6 +1525,8 @@ def local_conv_gpu_conv(node):
return [tensor.as_tensor_variable(out)]
else:
return [out]
register_opt()(local_conv_gpu_conv)
......@@ -1792,6 +1798,8 @@ def local_gpu_pool(op, ctx_name, inputs, outputs):
inp_padded = pad_dims(inp, 2, nd)
ret_padded = op(inp_padded, ws, stride, pad)
return unpad_dims(ret_padded, inp, 2, nd)
pool_db = LocalGroupDB()
pool_db2 = LocalGroupDB(local_opt=theano.gof.opt.GraphToGPULocalOptGroup)
pool_db2.__name__ = "pool_db2"
......@@ -1829,6 +1837,8 @@ def local_gpu_max_pool_grad(op, ctx_name, inputs, outputs):
ret_padded = op(inp_padded, out_padded, out_grad_padded,
ws, stride, pad)
return unpad_dims(ret_padded, inp, 2, nd)
lifter = op_lifter([pool.MaxPoolGrad])(local_gpu_max_pool_grad)
pool_db.register("local_gpu_max_pool_grad", lifter,
'gpuarray', 'fast_compile', 'fast_run',
......@@ -1859,6 +1869,8 @@ def local_gpu_average_pool_grad(op, ctx_name, inputs, outputs):
ret_padded = op(inp_padded, out_grad_padded,
ws, stride, pad)
return unpad_dims(ret_padded, inp, 2, nd)
lifter = op_lifter([pool.AveragePoolGrad])(local_gpu_average_pool_grad)
pool_db.register("local_gpu_average_pool_grad", lifter,
'gpuarray', 'fast_compile', 'fast_run',
......@@ -1956,6 +1968,7 @@ def local_assert_no_cpu_op(node):
elif config.assert_no_cpu_op == "pdb":
pdb.set_trace()
# Register the local_assert_no_cpu_op:
assert_no_cpu_op = theano.tensor.opt.in2out(local_assert_no_cpu_op,
name='assert_no_cpu_op')
......@@ -2280,15 +2293,17 @@ def local_gpu_magma_svd(op, context_name, inputs, outputs):
out = [out.astype('float16')]
return out
@register_opt('ctc', 'fast_compile')
@op_lifter([theano.tensor.nnet.ctc.ConnectionistTemporalClassification])
@register_opt2([theano.tensor.nnet.ctc.ConnectionistTemporalClassification], 'ctc', 'fast_compile')
@register_opt2([ConnectionistTemporalClassification], 'ctc', 'fast_compile')
def local_gpu_ctc(op, context_name, inputs, outputs):
if not config.ctc.enabled:
return
op = GpuConnectionistTemporalClassification(compute_grad=op.compute_grad)
return list(op(*inputs))
# Do not register in fast_run or fast_compile.
# It will be added to fast_run if the GPU is enabled.
optdb.register('gpua_scanOp_make_inplace',
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论