提交 de33e6ad authored 作者: Frederic's avatar Frederic

replace numpy.all/any to all/any, faster.

also a few pep8, as we support python 2.6+ now this is valid.
上级 180a6e40
...@@ -185,10 +185,10 @@ def local_gpu_elemwise_0(node): ...@@ -185,10 +185,10 @@ def local_gpu_elemwise_0(node):
""" """
if (isinstance(node.op, tensor.Elemwise) and if (isinstance(node.op, tensor.Elemwise) and
dtype_in_elemwise_supported(node.op)): dtype_in_elemwise_supported(node.op)):
if numpy.any([i.owner and if any([i.owner and
isinstance(i.owner.op, HostFromGpu) isinstance(i.owner.op, HostFromGpu)
for i in node.inputs]): for i in node.inputs]):
if numpy.all([o.type.dtype == 'float32' for o in node.outputs]): if all([o.type.dtype == 'float32' for o in node.outputs]):
# Don't set any inplace pattern. # Don't set any inplace pattern.
# gpu_inplace_elemwise_optimizer will do it later # gpu_inplace_elemwise_optimizer will do it later
...@@ -205,14 +205,14 @@ def local_gpu_elemwise_0(node): ...@@ -205,14 +205,14 @@ def local_gpu_elemwise_0(node):
upcastable = set(['float32', 'int8', 'int16', 'uint8', upcastable = set(['float32', 'int8', 'int16', 'uint8',
'uint16']) 'uint16'])
# case 1 - all inputs are already float32 # case 1 - all inputs are already float32
if numpy.all([i.type.dtype == 'float32' for i in node.inputs]): if all([i.type.dtype == 'float32' for i in node.inputs]):
#TODO: change this when fusion makes Elemwise with multiple #TODO: change this when fusion makes Elemwise with multiple
# outputs # outputs
gpu_elemwise = new_op(*(gpu_from_host(i) gpu_elemwise = new_op(*(gpu_from_host(i)
for i in node.inputs)) for i in node.inputs))
# case 2 - it is still ok if some inputs were upcast to float32 # case 2 - it is still ok if some inputs were upcast to float32
elif numpy.all([i.type.dtype in upcastable elif all([i.type.dtype in upcastable
for i in node.inputs]): for i in node.inputs]):
# second - establish that a new node with upcasted inputs # second - establish that a new node with upcasted inputs
# has the same outputs types as the original node # has the same outputs types as the original node
upcasted = node.op.make_node(*[tensor.cast(i, 'float32') upcasted = node.op.make_node(*[tensor.cast(i, 'float32')
...@@ -361,8 +361,8 @@ def local_gpu_dot_to_dot22(node): ...@@ -361,8 +361,8 @@ def local_gpu_dot_to_dot22(node):
if node.op == tensor.basic.dot: if node.op == tensor.basic.dot:
if node.outputs[0].type.dtype != 'float32': if node.outputs[0].type.dtype != 'float32':
return False return False
if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu) if any([i.owner and isinstance(i.owner.op, HostFromGpu)
for i in node.inputs]): for i in node.inputs]):
x, y = node.inputs x, y = node.inputs
if _is_real_vector(x) and _is_real_matrix(y): if _is_real_vector(x) and _is_real_matrix(y):
new_op = GpuDimShuffle((False,), ['x', 0]) new_op = GpuDimShuffle((False,), ['x', 0])
...@@ -395,10 +395,10 @@ def local_gpu_lazy_ifelse(node): ...@@ -395,10 +395,10 @@ def local_gpu_lazy_ifelse(node):
gpu_ifelse = theano.ifelse.IfElse(node.op.n_outs, gpu=True) gpu_ifelse = theano.ifelse.IfElse(node.op.n_outs, gpu=True)
outs_clients = reduce(list.__add__, outs_clients = reduce(list.__add__,
[out.clients for out in node.outputs]) [out.clients for out in node.outputs])
if numpy.any([(i.owner and isinstance(i.owner.op, HostFromGpu)) if any([(i.owner and isinstance(i.owner.op, HostFromGpu))
for i in node.inputs]) or numpy.any( for i in node.inputs]) or any(
[c != 'output' and c.op == gpu_from_host for c, idx [c != 'output' and c.op == gpu_from_host for c, idx
in outs_clients]): in outs_clients]):
c = node.inputs[0] c = node.inputs[0]
outs = node.inputs[1:] outs = node.inputs[1:]
...@@ -456,8 +456,8 @@ def local_gpu_dot22(node): ...@@ -456,8 +456,8 @@ def local_gpu_dot22(node):
x, y = host_input.owner.inputs x, y = host_input.owner.inputs
return [gpu_dot22(gpu_from_host(x), gpu_from_host(y))] return [gpu_dot22(gpu_from_host(x), gpu_from_host(y))]
if isinstance(node.op, tensor.blas.Dot22): if isinstance(node.op, tensor.blas.Dot22):
if numpy.any([(i.owner and isinstance(i.owner.op, HostFromGpu)) if any([(i.owner and isinstance(i.owner.op, HostFromGpu))
for i in node.inputs]): for i in node.inputs]):
x, y = node.inputs x, y = node.inputs
return [host_from_gpu(gpu_dot22(gpu_from_host(x), return [host_from_gpu(gpu_dot22(gpu_from_host(x),
gpu_from_host(y)))] gpu_from_host(y)))]
...@@ -481,8 +481,8 @@ def local_gpu_dot22scalar(node): ...@@ -481,8 +481,8 @@ def local_gpu_dot22scalar(node):
return [gpu_dot22scalar(gpu_from_host(x), gpu_from_host(y), return [gpu_dot22scalar(gpu_from_host(x), gpu_from_host(y),
tensor.blas._as_scalar(scalar))] tensor.blas._as_scalar(scalar))]
if isinstance(node.op, tensor.blas.Dot22Scalar): if isinstance(node.op, tensor.blas.Dot22Scalar):
if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu) if any([i.owner and isinstance(i.owner.op, HostFromGpu)
for i in node.inputs]): for i in node.inputs]):
x, y, scalar = node.inputs x, y, scalar = node.inputs
return [host_from_gpu( return [host_from_gpu(
gpu_dot22scalar(gpu_from_host(x), gpu_dot22scalar(gpu_from_host(x),
...@@ -585,10 +585,10 @@ def local_gpu_gemm(node): ...@@ -585,10 +585,10 @@ def local_gpu_gemm(node):
op = host_input.owner.op op = host_input.owner.op
z, a, x, y, b = host_input.owner.inputs z, a, x, y, b = host_input.owner.inputs
return [gpu_gemm_no_inplace(gpu_from_host(z), return [gpu_gemm_no_inplace(gpu_from_host(z),
a, a,
gpu_from_host(x), gpu_from_host(x),
gpu_from_host(y), gpu_from_host(y),
b)] b)]
if isinstance(node.op, tensor.blas.Gemm): if isinstance(node.op, tensor.blas.Gemm):
z, a, x, y, b = node.inputs z, a, x, y, b = node.inputs
x_on_gpu = (x.owner and isinstance(x.owner.op, HostFromGpu)) x_on_gpu = (x.owner and isinstance(x.owner.op, HostFromGpu))
...@@ -1195,7 +1195,7 @@ def local_gpu_join(node): ...@@ -1195,7 +1195,7 @@ def local_gpu_join(node):
#print "OPT: matches =", matches #print "OPT: matches =", matches
# if all input tensors are host_from_gpu'ified # if all input tensors are host_from_gpu'ified
if numpy.all(matches): if all(matches):
# the extra gpu_from_host introduced here will # the extra gpu_from_host introduced here will
# be removed by further optimizations # be removed by further optimizations
new_tensors = [gpu_from_host(t) for t in axis_and_tensors[1:]] new_tensors = [gpu_from_host(t) for t in axis_and_tensors[1:]]
...@@ -1372,15 +1372,15 @@ def local_gpualloc(node): ...@@ -1372,15 +1372,15 @@ def local_gpualloc(node):
isinstance(node.inputs[0].owner.op, HostFromGpu): isinstance(node.inputs[0].owner.op, HostFromGpu):
replace = True replace = True
elif all([c != 'output' and c.op == gpu_from_host elif all([c != 'output' and c.op == gpu_from_host
for c, idx in node.outputs[0].clients]): for c, idx in node.outputs[0].clients]):
# if all clients are on gpu # if all clients are on gpu
replace = True replace = True
elif all([c != 'output' and elif all([c != 'output' and
c.op == tensor.join and c.op == tensor.join and
all([i.owner and all([i.owner and
i.owner.op in [host_from_gpu, tensor.alloc] i.owner.op in [host_from_gpu, tensor.alloc]
for i in c.inputs[1:]]) for i in c.inputs[1:]])
for c, idx in node.outputs[0].clients]): for c, idx in node.outputs[0].clients]):
# if the client is a subtensor with input on gpu or alloc # if the client is a subtensor with input on gpu or alloc
replace = True replace = True
if replace and node.inputs[0].dtype != 'float32': if replace and node.inputs[0].dtype != 'float32':
...@@ -1437,8 +1437,8 @@ def local_gpu_eye(node): ...@@ -1437,8 +1437,8 @@ def local_gpu_eye(node):
host_input.owner.op.dtype == "float32"): host_input.owner.op.dtype == "float32"):
return [gpu_eye(*host_input.owner.inputs)] return [gpu_eye(*host_input.owner.inputs)]
if isinstance(node.op, tensor.Eye) and node.op.dtype == "float32": if isinstance(node.op, tensor.Eye) and node.op.dtype == "float32":
if numpy.any([(i.owner and isinstance(i.owner.op, HostFromGpu)) if any([(i.owner and isinstance(i.owner.op, HostFromGpu))
for i in node.inputs]): for i in node.inputs]):
return [host_from_gpu(gpu_eye(*node.inputs))] return [host_from_gpu(gpu_eye(*node.inputs))]
return False return False
...@@ -1606,8 +1606,8 @@ def gpuScanOptimization(node): ...@@ -1606,8 +1606,8 @@ def gpuScanOptimization(node):
#scan(host_from_gpu) -> host_from_gpu(GPUscan) #scan(host_from_gpu) -> host_from_gpu(GPUscan)
if (type(node.op) == scan_op.Scan if (type(node.op) == scan_op.Scan
and not node.op.info['gpu']): and not node.op.info['gpu']):
if numpy.any([(i.owner and isinstance(i.owner.op, HostFromGpu)) if any([(i.owner and isinstance(i.owner.op, HostFromGpu))
for i in node.inputs]): for i in node.inputs]):
thescan = node.op thescan = node.op
info = copy.deepcopy(thescan.info) info = copy.deepcopy(thescan.info)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论