Fix flake8 errors in gpuarray optimizations

上级 5c271602
...@@ -162,6 +162,7 @@ def register_inplace(*tags, **kwargs): ...@@ -162,6 +162,7 @@ def register_inplace(*tags, **kwargs):
return local_opt return local_opt
return f return f
register_opt('fast_compile')(theano.tensor.opt.local_track_shape_i) register_opt('fast_compile')(theano.tensor.opt.local_track_shape_i)
register_opt(final_opt=True, name='gpua_constant_folding')( register_opt(final_opt=True, name='gpua_constant_folding')(
tensor.opt.constant_folding) tensor.opt.constant_folding)
...@@ -582,6 +583,7 @@ def local_cut_gpu_transfers(node): ...@@ -582,6 +583,7 @@ def local_cut_gpu_transfers(node):
else: else:
return [node.op(n2.inputs[0])] return [node.op(n2.inputs[0])]
gpu_cut_copies.register('cut_gpua_host_transfers', local_cut_gpu_transfers, gpu_cut_copies.register('cut_gpua_host_transfers', local_cut_gpu_transfers,
'fast_compile', 'fast_run', 'gpuarray') 'fast_compile', 'fast_run', 'gpuarray')
gpu_cut_copies.register('cut_gpua_constant_transfers', gpu_cut_copies.register('cut_gpua_constant_transfers',
...@@ -652,6 +654,8 @@ def local_gpua_alloc_empty_to_zeros(node): ...@@ -652,6 +654,8 @@ def local_gpua_alloc_empty_to_zeros(node):
z = np.asarray(0, dtype=node.outputs[0].dtype) z = np.asarray(0, dtype=node.outputs[0].dtype)
return [GpuAlloc(context_name)(as_gpuarray_variable(z, context_name), return [GpuAlloc(context_name)(as_gpuarray_variable(z, context_name),
*node.inputs)] *node.inputs)]
optdb.register('local_gpua_alloc_empty_to_zeros', optdb.register('local_gpua_alloc_empty_to_zeros',
theano.tensor.opt.in2out(local_gpua_alloc_empty_to_zeros), theano.tensor.opt.in2out(local_gpua_alloc_empty_to_zeros),
# After move to gpu and merge2, before inplace. # After move to gpu and merge2, before inplace.
...@@ -1521,6 +1525,8 @@ def local_conv_gpu_conv(node): ...@@ -1521,6 +1525,8 @@ def local_conv_gpu_conv(node):
return [tensor.as_tensor_variable(out)] return [tensor.as_tensor_variable(out)]
else: else:
return [out] return [out]
register_opt()(local_conv_gpu_conv) register_opt()(local_conv_gpu_conv)
...@@ -1792,6 +1798,8 @@ def local_gpu_pool(op, ctx_name, inputs, outputs): ...@@ -1792,6 +1798,8 @@ def local_gpu_pool(op, ctx_name, inputs, outputs):
inp_padded = pad_dims(inp, 2, nd) inp_padded = pad_dims(inp, 2, nd)
ret_padded = op(inp_padded, ws, stride, pad) ret_padded = op(inp_padded, ws, stride, pad)
return unpad_dims(ret_padded, inp, 2, nd) return unpad_dims(ret_padded, inp, 2, nd)
pool_db = LocalGroupDB() pool_db = LocalGroupDB()
pool_db2 = LocalGroupDB(local_opt=theano.gof.opt.GraphToGPULocalOptGroup) pool_db2 = LocalGroupDB(local_opt=theano.gof.opt.GraphToGPULocalOptGroup)
pool_db2.__name__ = "pool_db2" pool_db2.__name__ = "pool_db2"
...@@ -1829,6 +1837,8 @@ def local_gpu_max_pool_grad(op, ctx_name, inputs, outputs): ...@@ -1829,6 +1837,8 @@ def local_gpu_max_pool_grad(op, ctx_name, inputs, outputs):
ret_padded = op(inp_padded, out_padded, out_grad_padded, ret_padded = op(inp_padded, out_padded, out_grad_padded,
ws, stride, pad) ws, stride, pad)
return unpad_dims(ret_padded, inp, 2, nd) return unpad_dims(ret_padded, inp, 2, nd)
lifter = op_lifter([pool.MaxPoolGrad])(local_gpu_max_pool_grad) lifter = op_lifter([pool.MaxPoolGrad])(local_gpu_max_pool_grad)
pool_db.register("local_gpu_max_pool_grad", lifter, pool_db.register("local_gpu_max_pool_grad", lifter,
'gpuarray', 'fast_compile', 'fast_run', 'gpuarray', 'fast_compile', 'fast_run',
...@@ -1859,6 +1869,8 @@ def local_gpu_average_pool_grad(op, ctx_name, inputs, outputs): ...@@ -1859,6 +1869,8 @@ def local_gpu_average_pool_grad(op, ctx_name, inputs, outputs):
ret_padded = op(inp_padded, out_grad_padded, ret_padded = op(inp_padded, out_grad_padded,
ws, stride, pad) ws, stride, pad)
return unpad_dims(ret_padded, inp, 2, nd) return unpad_dims(ret_padded, inp, 2, nd)
lifter = op_lifter([pool.AveragePoolGrad])(local_gpu_average_pool_grad) lifter = op_lifter([pool.AveragePoolGrad])(local_gpu_average_pool_grad)
pool_db.register("local_gpu_average_pool_grad", lifter, pool_db.register("local_gpu_average_pool_grad", lifter,
'gpuarray', 'fast_compile', 'fast_run', 'gpuarray', 'fast_compile', 'fast_run',
...@@ -1956,6 +1968,7 @@ def local_assert_no_cpu_op(node): ...@@ -1956,6 +1968,7 @@ def local_assert_no_cpu_op(node):
elif config.assert_no_cpu_op == "pdb": elif config.assert_no_cpu_op == "pdb":
pdb.set_trace() pdb.set_trace()
# Register the local_assert_no_cpu_op: # Register the local_assert_no_cpu_op:
assert_no_cpu_op = theano.tensor.opt.in2out(local_assert_no_cpu_op, assert_no_cpu_op = theano.tensor.opt.in2out(local_assert_no_cpu_op,
name='assert_no_cpu_op') name='assert_no_cpu_op')
...@@ -2280,15 +2293,17 @@ def local_gpu_magma_svd(op, context_name, inputs, outputs): ...@@ -2280,15 +2293,17 @@ def local_gpu_magma_svd(op, context_name, inputs, outputs):
out = [out.astype('float16')] out = [out.astype('float16')]
return out return out
@register_opt('ctc', 'fast_compile') @register_opt('ctc', 'fast_compile')
@op_lifter([theano.tensor.nnet.ctc.ConnectionistTemporalClassification]) @op_lifter([theano.tensor.nnet.ctc.ConnectionistTemporalClassification])
@register_opt2([theano.tensor.nnet.ctc.ConnectionistTemporalClassification], 'ctc', 'fast_compile') @register_opt2([ConnectionistTemporalClassification], 'ctc', 'fast_compile')
def local_gpu_ctc(op, context_name, inputs, outputs): def local_gpu_ctc(op, context_name, inputs, outputs):
if not config.ctc.enabled: if not config.ctc.enabled:
return return
op = GpuConnectionistTemporalClassification(compute_grad=op.compute_grad) op = GpuConnectionistTemporalClassification(compute_grad=op.compute_grad)
return list(op(*inputs)) return list(op(*inputs))
# Do not register in fast_run or fast_compile. # Do not register in fast_run or fast_compile.
# It will be added to fast_run if the GPU is enabled. # It will be added to fast_run if the GPU is enabled.
optdb.register('gpua_scanOp_make_inplace', optdb.register('gpua_scanOp_make_inplace',
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论