提交 d0dfb0be authored 作者: sentient07's avatar sentient07

Cleaned up and fixed pep8

上级 c3e8f153
...@@ -25,7 +25,7 @@ from theano.tensor.signal.pool import ( ...@@ -25,7 +25,7 @@ from theano.tensor.signal.pool import (
from . import pygpu from . import pygpu
from .type import get_context, gpu_context_type, list_contexts, GpuArrayType from .type import get_context, gpu_context_type, list_contexts, GpuArrayType
from .basic_ops import (as_gpuarray_variable, infer_context_name, from .basic_ops import (as_gpuarray_variable, infer_context_name,
gpu_contiguous, GpuAllocEmpty, gpu_alloc_empty, gpu_contiguous, gpu_alloc_empty,
empty_like) empty_like)
from .elemwise import GpuElemwise from .elemwise import GpuElemwise
...@@ -1462,8 +1462,8 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs): ...@@ -1462,8 +1462,8 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs):
@local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights, @local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs]) AbstractConv2d_gradInputs])
def local_abstractconv_cudnn(node): def local_abstractconv_cudnn(node):
ctx = infer_context(*node.inputs) ctx = infer_context_name(*node.inputs)
return local_abstractconv_dnn_graph(node.op, ctx, node.inputs) return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs)
conv_groupopt.register('local_abstractconv_cudnn_graph', conv_groupopt.register('local_abstractconv_cudnn_graph',
local_abstractconv_cudnn_graph, 20, local_abstractconv_cudnn_graph, 20,
......
...@@ -9,7 +9,7 @@ except ImportError: ...@@ -9,7 +9,7 @@ except ImportError:
pass pass
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel,
infer_context_name, GpuFromHost) infer_context_name)
from .opt import register_opt, op_lifter, register_opt2 from .opt import register_opt, op_lifter, register_opt2
...@@ -450,10 +450,11 @@ class GpuCumsum(GpuKernelBase, Op): ...@@ -450,10 +450,11 @@ class GpuCumsum(GpuKernelBase, Op):
""" % locals() """ % locals()
return super(GpuCumsum, self).c_support_code_struct(node, nodename) + code return super(GpuCumsum, self).c_support_code_struct(node, nodename) + code
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([CumsumOp]) @op_lifter([CumsumOp])
@register_opt2([CumsumOp], 'fast_compile') @register_opt2([CumsumOp], 'fast_compile')
def use_gpu_cumsumop(op, ctx_name, inputs, ): def use_gpu_cumsumop(op, ctx_name, inputs):
if inputs[0].dtype == 'float32': if inputs[0].dtype == 'float32':
axis = op.axis axis = op.axis
x = inputs[0] x = inputs[0]
......
...@@ -10,7 +10,7 @@ from theano.scalar import as_scalar, constant ...@@ -10,7 +10,7 @@ from theano.scalar import as_scalar, constant
from . import opt from . import opt
from .basic_ops import (as_gpuarray_variable, GpuAllocEmpty, from .basic_ops import (as_gpuarray_variable, GpuAllocEmpty,
infer_context_name) infer_context_name, gpu_alloc_empty)
from .type import gpu_context_type from .type import gpu_context_type
from .opt_util import alpha_merge, output_merge from .opt_util import alpha_merge, output_merge
......
...@@ -8,15 +8,15 @@ from six import itervalues, iteritems ...@@ -8,15 +8,15 @@ from six import itervalues, iteritems
from six.moves import xrange from six.moves import xrange
import theano import theano
from theano.compat import OrderedDict
from theano import tensor, scalar, gof, config from theano import tensor, scalar, gof, config
from theano.compile import optdb from theano.compile import optdb
from theano.compile.ops import shape_i from theano.compile.ops import shape_i
from theano.gof import (local_optimizer, EquilibriumDB, TopoOptimizer, from theano.gof import (local_optimizer, EquilibriumDB, TopoOptimizer,
SequenceDB, Optimizer, DB, toolbox, graph) SequenceDB, Optimizer, DB, toolbox, graph)
from theano.gof.opt import ChangeTracker, NavigatorOptimizer from theano.gof.opt import NavigatorOptimizer
from theano.gof.optdb import LocalGroupDB from theano.gof.optdb import LocalGroupDB
from theano.ifelse import IfElse from theano.ifelse import IfElse
from theano.misc.ordered_set import OrderedSet
from theano.scalar.basic import Scalar, Pow, Cast from theano.scalar.basic import Scalar, Pow, Cast
from theano.scan_module import scan_utils, scan_op, scan_opt from theano.scan_module import scan_utils, scan_op, scan_opt
...@@ -30,7 +30,7 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d, ...@@ -30,7 +30,7 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
from theano.tests.breakpoint import PdbBreakpoint from theano.tests.breakpoint import PdbBreakpoint
from .type import (GpuArrayType, GpuArrayConstant, get_context, from .type import (GpuArrayType, GpuArrayConstant, get_context,
ContextNotDefined, GpuArrayVariable, GpuArraySharedVariable) ContextNotDefined, GpuArraySharedVariable, GpuArrayVariable)
from .basic_ops import (as_gpuarray_variable, infer_context_name, from .basic_ops import (as_gpuarray_variable, infer_context_name,
host_from_gpu, GpuToGpu, host_from_gpu, GpuToGpu,
HostFromGpu, GpuFromHost, HostFromGpu, GpuFromHost,
...@@ -55,8 +55,6 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor, ...@@ -55,8 +55,6 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedIncSubtensor1_dev20) GpuAdvancedIncSubtensor1_dev20)
from .opt_util import alpha_merge, output_merge from .opt_util import alpha_merge, output_merge
_logger = logging.getLogger("theano.gpuarray.opt") _logger = logging.getLogger("theano.gpuarray.opt")
...@@ -263,7 +261,6 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -263,7 +261,6 @@ class GraphToGPU(NavigatorOptimizer):
self.local_optimizers_all = local_optimizers_all self.local_optimizers_all = local_optimizers_all
self.local_optimizers_map = local_optimizers_map self.local_optimizers_map = local_optimizers_map
self.failure_callback = None self.failure_callback = None
self.new_opts = []
def add_requirements(self, fgraph): def add_requirements(self, fgraph):
fgraph.attach_feature(toolbox.ReplaceValidate()) fgraph.attach_feature(toolbox.ReplaceValidate())
...@@ -281,13 +278,10 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -281,13 +278,10 @@ class GraphToGPU(NavigatorOptimizer):
def apply(self, fgraph): def apply(self, fgraph):
mapping = {} mapping = {}
start_nb_nodes = len(fgraph.apply_nodes)
max_nb_nodes = len(fgraph.apply_nodes)
io_toposort_timing = []
nb_nodes = []
time_opts = {} time_opts = {}
node_created = {} node_created = {}
process_count = {} process_count = {}
io_toposort_timing = []
# Building a new graph # Building a new graph
# Iterating through inputs of graph # Iterating through inputs of graph
for i in fgraph.inputs: for i in fgraph.inputs:
...@@ -307,29 +301,25 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -307,29 +301,25 @@ class GraphToGPU(NavigatorOptimizer):
node_created.setdefault(lopt, 0) node_created.setdefault(lopt, 0)
t_topo = time.time() t_topo = time.time()
topo = fgraph.toposort() fgraph.toposort()
time_topo = time.time() - t_topo time_topo = time.time() - t_topo
io_toposort_timing.append(time_topo - t_topo)
for node in fgraph.toposort(): for node in fgraph.toposort():
t0 = time.time()
if isinstance(node.op, HostFromGpu): if isinstance(node.op, HostFromGpu):
mapping[node.outputs[0]] = node.inputs[0] mapping[node.outputs[0]] = node.inputs[0]
continue continue
# Move only if any of the inputs are on the GPU. # Move only if any of the inputs are on the GPU.
move_to_GPU = True move_to_GPU = False
'''
if any([isinstance(i, GpuArrayVariable) or if any([isinstance(i, GpuArrayVariable) or
isinstance(i, GpuArraySharedVariable) isinstance(i, GpuArraySharedVariable)
for i in [mapping[v] for v in node.inputs] + for i in [mapping[v] for v in node.inputs] +
node.outputs]): node.outputs]):
move_to_GPU = True move_to_GPU = True
'''
out_clients = [o.clients for o in node.outputs]
context_name = None context_name = None
for i in [mapping[i] for i in node.inputs]: for i in [mapping[i] for i in node.inputs]:
...@@ -340,18 +330,15 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -340,18 +330,15 @@ class GraphToGPU(NavigatorOptimizer):
new_ops = None new_ops = None
outputs = [] outputs = []
ex_opt_time = None
# Apply the lifter # Apply the lifter
for lopt in (self.local_optimizers_all + for lopt in (self.local_optimizers_all +
self.local_optimizers_map.get(type(node.op), []) + self.local_optimizers_map.get(type(node.op), []) +
self.local_optimizers_map.get(node.op, [])): self.local_optimizers_map.get(node.op, [])):
process_count[lopt] += 1
if move_to_GPU: if move_to_GPU:
t_opt = time.time() t_opt = time.time()
try: try:
new_ops = lopt.transform( new_ops = lopt.transform(node.op, context_name,
node.op, context_name,
[mapping[i] for i in node.inputs]) [mapping[i] for i in node.inputs])
except TypeError: except TypeError:
# Updating again because else we'd be counting # Updating again because else we'd be counting
...@@ -362,7 +349,9 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -362,7 +349,9 @@ class GraphToGPU(NavigatorOptimizer):
node.outputs) node.outputs)
finally: finally:
t_opt2 = time.time() t_opt2 = time.time()
time_opts[lopt] += t_opt2 - t_opt
if new_ops: if new_ops:
process_count[lopt] += 1
break break
if not new_ops: if not new_ops:
newnode = node.clone_with_new_inputs([mapping.get(i) newnode = node.clone_with_new_inputs([mapping.get(i)
...@@ -385,9 +374,7 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -385,9 +374,7 @@ class GraphToGPU(NavigatorOptimizer):
return_list=True) return_list=True)
if new_ops: if new_ops:
node_created[lopt] += len(theano.gof.graph.ops([mapping[i] for i in node.inputs], outputs)) node_created[lopt] += len(graph.ops([mapping[i] for i in node.inputs], outputs))
self.new_opts.append(lopt)
time_opts[lopt] = t_opt2 - t_opt
for new_o, old_o in zip(outputs, node.outputs): for new_o, old_o in zip(outputs, node.outputs):
mapping[old_o] = new_o mapping[old_o] = new_o
...@@ -402,47 +389,26 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -402,47 +389,26 @@ class GraphToGPU(NavigatorOptimizer):
new_nodes.append(new_o) new_nodes.append(new_o)
fgraph.replace_all_validate(zip(fgraph.outputs, new_nodes)) fgraph.replace_all_validate(zip(fgraph.outputs, new_nodes))
end_nb_nodes = len(fgraph.apply_nodes) return (self, io_toposort_timing, time_opts, node_created, process_count)
return (self, start_nb_nodes, end_nb_nodes, max_nb_nodes, io_toposort_timing,
nb_nodes, time_opts, node_created)
@staticmethod @staticmethod
def print_profile(stream, prof, level=0): def print_profile(stream, prof, level=0):
(opt, start_nb_nodes, end_nb_nodes, max_nb_nodes, io_toposort_timing, (opt, io_toposort_timing, time_opts, node_created, process_count) = prof
nb_nodes, time_opts, node_created) = prof
blanc = (' ' * level) blanc = (' ' * level)
print(blanc, "GraphToGPUOptimizer", end=' ', file=stream) print(blanc, "GraphToGPUOptimizer", end=' ', file=stream)
print(blanc, getattr(opt, "name", print(blanc, getattr(opt, "name",
getattr(opt, "__name__", "")), file=stream) getattr(opt, "__name__", "")), file=stream)
print(blanc, " nb nodes (start, end, max) %d %d %d" % (
start_nb_nodes, end_nb_nodes, max_nb_nodes), file=stream)
print(blanc, " time io_toposort %.3fs" % sum( print(blanc, " time io_toposort %.3fs" % sum(
io_toposort_timing), file=stream) io_toposort_timing), file=stream)
s = sum([time_opts[o] for o in opt.new_opts]) s = sum([v for k, v in time_opts.iteritems()])
print(blanc, "Total time taken by local optimizers %.3fs " % s, file=stream)
print(blanc, " time in local optimizers %.3fs" % s, file=stream)
# Build a dictionary of opt and time taken
opt_time_dict = dict()
for o in opt.new_opts:
if o not in opt_time_dict:
opt_time_dict[o] = time_opts[o]
else:
opt_time_dict[o] += time_opts[o]
# print time per each optimizer
for k,v in opt_time_dict.iteritems():
print(blanc, "Local Optimizer :" + str(k) + " takes time : %.3f" %v, file=stream)
count_opt = [] count_opt = []
not_used = [] not_used = []
not_used_time = 0 not_used_time = 0
process_count = {}
for o in (opt.new_opts):
process_count.setdefault(o, 0)
process_count[o] + 1
for o, count in iteritems(process_count): for o, count in iteritems(process_count):
if count > 0: if count > 0:
...@@ -454,13 +420,13 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -454,13 +420,13 @@ class GraphToGPU(NavigatorOptimizer):
if count_opt: if count_opt:
print(blanc, print(blanc,
' times - times applied - nb node created - name:', ' times - times applied - Node created - name:',
file=stream) file=stream)
count_opt.sort() count_opt.sort()
for (t, count, n_created, o) in count_opt[::-1]: for (t, count, n_created, o) in count_opt[::-1]:
print(blanc, ' %.3fs - %d - %d - %s' % ( print(blanc, ' %.3fs - %d - %d - %s' % (
t, count, n_created, o), file=stream) t, count, n_created, o), file=stream)
print(blanc, ' %.3fs - in %d optimization that where not used (display only those with a runtime > 0)' % ( print(blanc, ' %.3fs - in %d optimization that were not used (display only those with a runtime > 0)' % (
not_used_time, len(not_used)), file=stream) not_used_time, len(not_used)), file=stream)
not_used.sort(key=lambda nu: (nu[0], str(nu[1]))) not_used.sort(key=lambda nu: (nu[0], str(nu[1])))
for (t, o) in not_used[::-1]: for (t, o) in not_used[::-1]:
...@@ -469,7 +435,6 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -469,7 +435,6 @@ class GraphToGPU(NavigatorOptimizer):
print(blanc + " ", ' %.3fs - %s' % (t, o), file=stream) print(blanc + " ", ' %.3fs - %s' % (t, o), file=stream)
print(file=stream) print(file=stream)
@staticmethod @staticmethod
def merge_profile(prof1, prof2): def merge_profile(prof1, prof2):
# (opt, loop_timing, loop_process_count, max_nb_nodes, # (opt, loop_timing, loop_process_count, max_nb_nodes,
...@@ -491,8 +456,7 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -491,8 +456,7 @@ class GraphToGPU(NavigatorOptimizer):
local_optimizers_map = merge_dict(prof1[0].local_optimizers_map, local_optimizers_map = merge_dict(prof1[0].local_optimizers_map,
prof2[0].local_optimizers_map) prof2[0].local_optimizers_map)
new_opt = GraphToGPU(local_optimizers, local_optimizers_map)
new_opt = GraphToGPU(local_optimizers,local_optimizers_map)
def merge_list(l1, l2): def merge_list(l1, l2):
l = copy.copy(l1) l = copy.copy(l1)
...@@ -503,21 +467,15 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -503,21 +467,15 @@ class GraphToGPU(NavigatorOptimizer):
l.append(nb) l.append(nb)
return l return l
max_nb_nodes = max(prof1[3], prof2[3]) io_toposort_timing = merge_list(prof1[1], prof2[1])
time_opts = merge_dict(prof1[2], prof2[2])
io_toposort_timing = merge_list(prof1[4], prof2[4]) node_created = merge_dict(prof1[3], prof2[3])
process_count = merge_dict(prof1[4], prof2[4])
nb_nodes = merge_list(prof1[5], prof2[5])
time_opts = merge_dict(prof1[6], prof2[6])
node_created = merge_dict(prof1[7], prof2[7])
return (new_opt, return (new_opt,
max_nb_nodes,
io_toposort_timing, io_toposort_timing,
nb_nodes,
time_opts, time_opts,
node_created) node_created,
process_count)
@local_optimizer([GpuFromHost, GpuToGpu, HostFromGpu]) @local_optimizer([GpuFromHost, GpuToGpu, HostFromGpu])
...@@ -917,7 +875,7 @@ def local_gpuajoin_1(node): ...@@ -917,7 +875,7 @@ def local_gpuajoin_1(node):
@op_lifter([tensor.Split]) @op_lifter([tensor.Split])
@register_opt2([tensor.Split], 'fast_compile') @register_opt2([tensor.Split], 'fast_compile')
def local_gpua_split(op, context_name, inputs): def local_gpua_split(op, context_name, inputs):
#TODO use props # TODO use props
return GpuSplit(op.len_splits) return GpuSplit(op.len_splits)
...@@ -1233,7 +1191,6 @@ def local_assert(op, context_name, inputs): ...@@ -1233,7 +1191,6 @@ def local_assert(op, context_name, inputs):
*inputs[1:]))] *inputs[1:]))]
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([ConvOp]) @op_lifter([ConvOp])
def local_error_convop(op, context_name, inputs): def local_error_convop(op, context_name, inputs):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论