提交 e532ac67 authored 作者: Frederic's avatar Frederic

Faster gpu opt: use isinstance instead of ==, as it is faster.

Also import a module in an opt only once.
上级 cefb3421
...@@ -18,7 +18,7 @@ from theano.gof import (local_optimizer, EquilibriumDB, SequenceDB, ProxyDB, ...@@ -18,7 +18,7 @@ from theano.gof import (local_optimizer, EquilibriumDB, SequenceDB, ProxyDB,
from theano.gof.python25 import all, any from theano.gof.python25 import all, any
from theano.sandbox.cuda.basic_ops import ( from theano.sandbox.cuda.basic_ops import (
device_properties, gpu_eye, device_properties, gpu_eye,
gpu_from_host, host_from_gpu, HostFromGpu, gpu_from_host, host_from_gpu, GpuFromHost, HostFromGpu,
GpuElemwise, GpuDimShuffle, GpuReshape, GpuCAReduce, GpuFlatten, GpuElemwise, GpuDimShuffle, GpuReshape, GpuCAReduce, GpuFlatten,
GpuSubtensor, GpuAdvancedSubtensor1, GpuSubtensor, GpuAdvancedSubtensor1,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20, GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20,
...@@ -42,6 +42,7 @@ from theano.sandbox.cuda.elemwise import erfinv_gpu ...@@ -42,6 +42,7 @@ from theano.sandbox.cuda.elemwise import erfinv_gpu
from theano.sandbox.cuda.var import CudaNdarrayConstant from theano.sandbox.cuda.var import CudaNdarrayConstant
from theano.scan_module import scan_utils, scan_op, scan_opt from theano.scan_module import scan_utils, scan_op, scan_opt
from theano.tensor.blas import _is_real_vector, _is_real_matrix from theano.tensor.blas import _is_real_vector, _is_real_matrix
linalg = None
#optdb.print_summary() # shows what is currently registered #optdb.print_summary() # shows what is currently registered
...@@ -236,7 +237,7 @@ def local_gpu_elemwise_1(node): ...@@ -236,7 +237,7 @@ def local_gpu_elemwise_1(node):
""" """
gpu_from_host(Elemwise)) -> GpuElemwise(gpu_from_host(...)) gpu_from_host(Elemwise)) -> GpuElemwise(gpu_from_host(...))
""" """
if node.op == gpu_from_host: if isinstance(node.op, GpuFromHost):
host_i, = node.inputs host_i, = node.inputs
if (host_i.owner and if (host_i.owner and
isinstance(host_i.owner.op, tensor.Elemwise) and isinstance(host_i.owner.op, tensor.Elemwise) and
...@@ -280,7 +281,7 @@ def local_gpu_dimshuffle_0(node): ...@@ -280,7 +281,7 @@ def local_gpu_dimshuffle_0(node):
new_op = GpuDimShuffle(node.op.input_broadcastable, new_op = GpuDimShuffle(node.op.input_broadcastable,
node.op.new_order) node.op.new_order)
return [host_from_gpu(new_op(gpu_from_host(input)))] return [host_from_gpu(new_op(gpu_from_host(input)))]
if node.op == gpu_from_host: if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
if host_input.owner and isinstance(host_input.owner.op, if host_input.owner and isinstance(host_input.owner.op,
tensor.DimShuffle): tensor.DimShuffle):
...@@ -303,7 +304,7 @@ def local_gpu_specifyShape_0(node): ...@@ -303,7 +304,7 @@ def local_gpu_specifyShape_0(node):
if input.owner and isinstance(input.owner.op, HostFromGpu): if input.owner and isinstance(input.owner.op, HostFromGpu):
return [host_from_gpu(tensor.specify_shape(gpu_from_host(input), return [host_from_gpu(tensor.specify_shape(gpu_from_host(input),
*node.inputs[1:]))] *node.inputs[1:]))]
if node.op == gpu_from_host: if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
if host_input.owner and isinstance(host_input.owner.op, if host_input.owner and isinstance(host_input.owner.op,
tensor.SpecifyShape): tensor.SpecifyShape):
...@@ -330,7 +331,7 @@ def local_gpu_dot_to_dot22(node): ...@@ -330,7 +331,7 @@ def local_gpu_dot_to_dot22(node):
# In case the got do input upcast, we much check that we can # In case the got do input upcast, we much check that we can
# make it run on the gpu. # make it run on the gpu.
if node.op == gpu_from_host: if isinstance(node.op, GpuFromHost):
if node.outputs[0].type.dtype != 'float32': if node.outputs[0].type.dtype != 'float32':
return False return False
host_input = node.inputs[0] host_input = node.inputs[0]
...@@ -355,7 +356,7 @@ def local_gpu_dot_to_dot22(node): ...@@ -355,7 +356,7 @@ def local_gpu_dot_to_dot22(node):
if node.op == tensor.basic.dot: if node.op == tensor.basic.dot:
if node.outputs[0].type.dtype != 'float32': if node.outputs[0].type.dtype != 'float32':
return False return False
if numpy.any([(i.owner and i.owner.op == host_from_gpu) if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu)
for i in node.inputs]): for i in node.inputs]):
x, y = node.inputs x, y = node.inputs
if _is_real_vector(x) and _is_real_matrix(y): if _is_real_vector(x) and _is_real_matrix(y):
...@@ -389,7 +390,7 @@ def local_gpu_lazy_ifelse(node): ...@@ -389,7 +390,7 @@ def local_gpu_lazy_ifelse(node):
gpu_ifelse = theano.ifelse.IfElse(node.op.n_outs, gpu=True) gpu_ifelse = theano.ifelse.IfElse(node.op.n_outs, gpu=True)
outs_clients = reduce(list.__add__, outs_clients = reduce(list.__add__,
[out.clients for out in node.outputs]) [out.clients for out in node.outputs])
if numpy.any([(i.owner and i.owner.op == host_from_gpu) if numpy.any([(i.owner and isinstance(i.owner.op, HostFromGpu))
for i in node.inputs]) or numpy.any( for i in node.inputs]) or numpy.any(
[c != 'output' and c.op == gpu_from_host for c, idx [c != 'output' and c.op == gpu_from_host for c, idx
in outs_clients]): in outs_clients]):
...@@ -406,7 +407,7 @@ def local_gpu_lazy_ifelse(node): ...@@ -406,7 +407,7 @@ def local_gpu_lazy_ifelse(node):
return [host_from_gpu(out) for out in return [host_from_gpu(out) for out in
gpu_ifelse.make_node(c, *outs).outputs] gpu_ifelse.make_node(c, *outs).outputs]
if node.op == gpu_from_host: if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
if (host_input.owner and if (host_input.owner and
isinstance(host_input.owner.op, theano.ifelse.IfElse) and isinstance(host_input.owner.op, theano.ifelse.IfElse) and
...@@ -443,13 +444,14 @@ def local_gpu_dot22(node): ...@@ -443,13 +444,14 @@ def local_gpu_dot22(node):
dot(host_from_gpu) -> host_from_gpu(gpudot22) dot(host_from_gpu) -> host_from_gpu(gpudot22)
""" """
if node.op == gpu_from_host: if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
if host_input.owner and host_input.owner.op == tensor.blas._dot22: if host_input.owner and isinstance(host_input.owner.op,
tensor.blas.Dot22):
x, y = host_input.owner.inputs x, y = host_input.owner.inputs
return [gpu_dot22(gpu_from_host(x), gpu_from_host(y))] return [gpu_dot22(gpu_from_host(x), gpu_from_host(y))]
if node.op == tensor.blas._dot22: if isinstance(node.op, tensor.blas.Dot22):
if numpy.any([(i.owner and i.owner.op == host_from_gpu) if numpy.any([(i.owner and isinstance(i.owner.op, HostFromGpu))
for i in node.inputs]): for i in node.inputs]):
x, y = node.inputs x, y = node.inputs
return [host_from_gpu(gpu_dot22(gpu_from_host(x), return [host_from_gpu(gpu_dot22(gpu_from_host(x),
...@@ -465,15 +467,16 @@ def local_gpu_dot22scalar(node): ...@@ -465,15 +467,16 @@ def local_gpu_dot22scalar(node):
dot(host_from_gpu) -> host_from_gpu(gpudot22scalar) dot(host_from_gpu) -> host_from_gpu(gpudot22scalar)
""" """
if node.op == gpu_from_host: if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
if (host_input.owner and if (host_input.owner and
host_input.owner.op == tensor.blas._dot22scalar): isinstance(host_input.owner.op,
tensor.blas.Dot22Scalar)):
x, y, scalar = host_input.owner.inputs x, y, scalar = host_input.owner.inputs
return [gpu_dot22scalar(gpu_from_host(x), gpu_from_host(y), return [gpu_dot22scalar(gpu_from_host(x), gpu_from_host(y),
tensor.blas._as_scalar(scalar))] tensor.blas._as_scalar(scalar))]
if node.op == tensor.blas._dot22scalar: if isinstance(node.op, tensor.blas.Dot22Scalar):
if numpy.any([(i.owner and i.owner.op == host_from_gpu) if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu)
for i in node.inputs]): for i in node.inputs]):
x, y, scalar = node.inputs x, y, scalar = node.inputs
return [host_from_gpu( return [host_from_gpu(
...@@ -491,14 +494,12 @@ def local_gpu_gemv(node): ...@@ -491,14 +494,12 @@ def local_gpu_gemv(node):
gemv(host_from_gpu) -> host_from_gpu(gpu_gemv) gemv(host_from_gpu) -> host_from_gpu(gpu_gemv)
""" """
gemvs = [tensor.blas.gemv_inplace, gemvs = (tensor.blas.Gemv,
tensor.blas.gemv_no_inplace, tensor.blas_c.CGemv,
tensor.blas_c.cgemv_inplace, )
tensor.blas_c.cgemv_no_inplace, if isinstance(node.op, GpuFromHost):
]
if node.op == gpu_from_host:
host_input = node.inputs[0] host_input = node.inputs[0]
if host_input.owner and host_input.owner.op in gemvs: if host_input.owner and isinstance(host_input.owner.op, gemvs):
op = host_input.owner.op op = host_input.owner.op
z, a, x, y, b = host_input.owner.inputs z, a, x, y, b = host_input.owner.inputs
return [gpu_gemv_no_inplace( return [gpu_gemv_no_inplace(
...@@ -507,11 +508,11 @@ def local_gpu_gemv(node): ...@@ -507,11 +508,11 @@ def local_gpu_gemv(node):
gpu_from_host(x), gpu_from_host(x),
gpu_from_host(y), gpu_from_host(y),
b)] b)]
if node.op in gemvs: if isinstance(node.op, gemvs):
z, a, x, y, b = node.inputs z, a, x, y, b = node.inputs
x_on_gpu = (x.owner and x.owner.op == host_from_gpu) x_on_gpu = (x.owner and isinstance(x.owner.op, HostFromGpu))
y_on_gpu = (y.owner and y.owner.op == host_from_gpu) y_on_gpu = (y.owner and isinstance(y.owner.op, HostFromGpu))
z_on_gpu = (z.owner and z.owner.op == host_from_gpu) z_on_gpu = (z.owner and isinstance(z.owner.op, HostFromGpu))
if x_on_gpu or y_on_gpu or z_on_gpu: if x_on_gpu or y_on_gpu or z_on_gpu:
return [host_from_gpu( return [host_from_gpu(
gpu_gemv_no_inplace( gpu_gemv_no_inplace(
...@@ -532,17 +533,14 @@ def local_gpu_ger(node): ...@@ -532,17 +533,14 @@ def local_gpu_ger(node):
ger(host_from_gpu) -> host_from_gpu(gpu_ger) ger(host_from_gpu) -> host_from_gpu(gpu_ger)
""" """
gers = [tensor.blas_c.cger_inplace, gers = (tensor.blas_c.CGer,
tensor.blas_c.cger_no_inplace, tensor.blas.Ger,
tensor.blas.ger_destructive, tensor.blas_scipy.ScipyGer,
tensor.blas.ger, )
tensor.blas_scipy.scipy_ger_inplace,
tensor.blas_scipy.scipy_ger_no_inplace, if isinstance(node.op, GpuFromHost):
]
if node.op == gpu_from_host:
host_input = node.inputs[0] host_input = node.inputs[0]
if host_input.owner and host_input.owner.op in gers: if host_input.owner and isinstance(host_input.owner.op, gers):
op = host_input.owner.op op = host_input.owner.op
z, a, x, y = host_input.owner.inputs z, a, x, y = host_input.owner.inputs
return [gpu_ger_no_inplace( return [gpu_ger_no_inplace(
...@@ -551,11 +549,11 @@ def local_gpu_ger(node): ...@@ -551,11 +549,11 @@ def local_gpu_ger(node):
gpu_from_host(x), gpu_from_host(x),
gpu_from_host(y) gpu_from_host(y)
)] )]
if node.op in gers: if isinstance(node.op, gers):
z, a, x, y = node.inputs z, a, x, y = node.inputs
x_on_gpu = (x.owner and x.owner.op == host_from_gpu) x_on_gpu = (x.owner and isinstance(x.owner.op, HostFromGpu))
y_on_gpu = (y.owner and y.owner.op == host_from_gpu) y_on_gpu = (y.owner and isinstance(y.owner.op, HostFromGpu))
z_on_gpu = (z.owner and z.owner.op == host_from_gpu) z_on_gpu = (z.owner and isinstance(z.owner.op, HostFromGpu))
if x_on_gpu or y_on_gpu or z_on_gpu: if x_on_gpu or y_on_gpu or z_on_gpu:
return [host_from_gpu( return [host_from_gpu(
gpu_ger_no_inplace( gpu_ger_no_inplace(
...@@ -575,13 +573,10 @@ def local_gpu_gemm(node): ...@@ -575,13 +573,10 @@ def local_gpu_gemm(node):
gemm(host_from_gpu) -> host_from_gpu(gpu_gemm) gemm(host_from_gpu) -> host_from_gpu(gpu_gemm)
""" """
gemms = [ if isinstance(node.op, GpuFromHost):
tensor.blas.gemm_inplace,
tensor.blas.gemm_no_inplace,
]
if node.op == gpu_from_host:
host_input = node.inputs[0] host_input = node.inputs[0]
if host_input.owner and host_input.owner.op in gemms: if host_input.owner and isinstance(host_input.owner.op,
tensor.blas.Gemm):
op = host_input.owner.op op = host_input.owner.op
z, a, x, y, b = host_input.owner.inputs z, a, x, y, b = host_input.owner.inputs
return [gpu_gemm_no_inplace(gpu_from_host(z), return [gpu_gemm_no_inplace(gpu_from_host(z),
...@@ -589,11 +584,11 @@ def local_gpu_gemm(node): ...@@ -589,11 +584,11 @@ def local_gpu_gemm(node):
gpu_from_host(x), gpu_from_host(x),
gpu_from_host(y), gpu_from_host(y),
b)] b)]
if node.op in gemms: if isinstance(node.op, tensor.blas.Gemm):
z, a, x, y, b = node.inputs z, a, x, y, b = node.inputs
x_on_gpu = (x.owner and x.owner.op == host_from_gpu) x_on_gpu = (x.owner and isinstance(x.owner.op, HostFromGpu))
y_on_gpu = (y.owner and y.owner.op == host_from_gpu) y_on_gpu = (y.owner and isinstance(y.owner.op, HostFromGpu))
z_on_gpu = (z.owner and z.owner.op == host_from_gpu) z_on_gpu = (z.owner and isinstance(z.owner.op, HostFromGpu))
if x_on_gpu or y_on_gpu or z_on_gpu: if x_on_gpu or y_on_gpu or z_on_gpu:
return [host_from_gpu(gpu_gemm_no_inplace(gpu_from_host(z), return [host_from_gpu(gpu_gemm_no_inplace(gpu_from_host(z),
a, a,
...@@ -618,7 +613,7 @@ def local_gpu_careduce(node): ...@@ -618,7 +613,7 @@ def local_gpu_careduce(node):
# and max does not support all combinations of axes # and max does not support all combinations of axes
if node.op.scalar_op in [scal.add, scal.mul, scal.maximum, scal.minimum]: if node.op.scalar_op in [scal.add, scal.mul, scal.maximum, scal.minimum]:
x, = node.inputs x, = node.inputs
if x.owner and x.owner.op == host_from_gpu: if x.owner and isinstance(x.owner.op, HostFromGpu):
if node.op.axis is None: if node.op.axis is None:
reduce_mask = [1] * x.type.ndim reduce_mask = [1] * x.type.ndim
else: else:
...@@ -688,7 +683,7 @@ def local_gpu_careduce(node): ...@@ -688,7 +683,7 @@ def local_gpu_careduce(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.Reshape]) @local_optimizer([gpu_from_host, tensor.Reshape])
def local_gpu_reshape(node): def local_gpu_reshape(node):
if node.op == gpu_from_host: if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
if host_input.owner and \ if host_input.owner and \
isinstance(host_input.owner.op, tensor.Reshape): isinstance(host_input.owner.op, tensor.Reshape):
...@@ -705,7 +700,7 @@ def local_gpu_reshape(node): ...@@ -705,7 +700,7 @@ def local_gpu_reshape(node):
return [gpu_reshape] return [gpu_reshape]
if isinstance(node.op, tensor.Reshape): if isinstance(node.op, tensor.Reshape):
x, shp = node.inputs x, shp = node.inputs
if x.owner and x.owner.op == host_from_gpu: if x.owner and isinstance(x.owner.op, HostFromGpu):
gpu_x, = x.owner.inputs gpu_x, = x.owner.inputs
gpu_reshape = GpuReshape(node.op.ndim)(gpu_x, shp) gpu_reshape = GpuReshape(node.op.ndim)(gpu_x, shp)
if gpu_reshape.broadcastable != node.outputs[0].broadcastable: if gpu_reshape.broadcastable != node.outputs[0].broadcastable:
...@@ -722,7 +717,7 @@ def local_gpu_reshape(node): ...@@ -722,7 +717,7 @@ def local_gpu_reshape(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.Flatten]) @local_optimizer([gpu_from_host, tensor.Flatten])
def local_gpu_flatten(node): def local_gpu_flatten(node):
if node.op == gpu_from_host: if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
if host_input.owner and \ if host_input.owner and \
isinstance(host_input.owner.op, tensor.Flatten): isinstance(host_input.owner.op, tensor.Flatten):
...@@ -732,7 +727,7 @@ def local_gpu_flatten(node): ...@@ -732,7 +727,7 @@ def local_gpu_flatten(node):
if isinstance(node.op, tensor.Flatten): if isinstance(node.op, tensor.Flatten):
x, = node.inputs x, = node.inputs
outdim = node.op.outdim outdim = node.op.outdim
if x.owner and x.owner.op == host_from_gpu: if x.owner and isinstance(x.owner.op, HostFromGpu):
gpu_x, = x.owner.inputs gpu_x, = x.owner.inputs
return [host_from_gpu(GpuFlatten(outdim)(gpu_x))] return [host_from_gpu(GpuFlatten(outdim)(gpu_x))]
return False return False
...@@ -741,7 +736,7 @@ def local_gpu_flatten(node): ...@@ -741,7 +736,7 @@ def local_gpu_flatten(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.Subtensor]) @local_optimizer([gpu_from_host, tensor.Subtensor])
def local_gpu_subtensor(node): def local_gpu_subtensor(node):
if node.op == gpu_from_host: if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
if host_input.owner and \ if host_input.owner and \
isinstance(host_input.owner.op, tensor.Subtensor): isinstance(host_input.owner.op, tensor.Subtensor):
...@@ -751,9 +746,11 @@ def local_gpu_subtensor(node): ...@@ -751,9 +746,11 @@ def local_gpu_subtensor(node):
return [GpuSubtensor(subt.idx_list)(gpu_from_host(x), *coords)] return [GpuSubtensor(subt.idx_list)(gpu_from_host(x), *coords)]
if isinstance(node.op, tensor.Subtensor): if isinstance(node.op, tensor.Subtensor):
x = node.inputs[0] x = node.inputs[0]
coords = node.inputs[1:] if (x.owner and
if x.owner and x.owner.op == host_from_gpu and x.dtype == "float32": isinstance(x.owner.op, HostFromGpu) and
x.dtype == "float32"):
gpu_x, = x.owner.inputs gpu_x, = x.owner.inputs
coords = node.inputs[1:]
return [host_from_gpu(GpuSubtensor( return [host_from_gpu(GpuSubtensor(
node.op.idx_list)(gpu_x, *coords))] node.op.idx_list)(gpu_x, *coords))]
return False return False
...@@ -762,7 +759,7 @@ def local_gpu_subtensor(node): ...@@ -762,7 +759,7 @@ def local_gpu_subtensor(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.AdvancedSubtensor1]) @local_optimizer([gpu_from_host, tensor.AdvancedSubtensor1])
def local_gpu_advanced_subtensor1(node): def local_gpu_advanced_subtensor1(node):
if node.op == gpu_from_host: if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
if host_input.owner and \ if host_input.owner and \
host_input.owner.op.__class__ is tensor.AdvancedSubtensor1: host_input.owner.op.__class__ is tensor.AdvancedSubtensor1:
...@@ -772,7 +769,7 @@ def local_gpu_advanced_subtensor1(node): ...@@ -772,7 +769,7 @@ def local_gpu_advanced_subtensor1(node):
if node.op.__class__ is tensor.AdvancedSubtensor1: if node.op.__class__ is tensor.AdvancedSubtensor1:
x = node.inputs[0] x = node.inputs[0]
coords = node.inputs[1:] coords = node.inputs[1:]
if x.owner and x.owner.op == host_from_gpu and x.dtype == "float32": if x.owner and isinstance(x.owner.op, HostFromGpu) and x.dtype == "float32":
gpu_x, = x.owner.inputs gpu_x, = x.owner.inputs
return [host_from_gpu(GpuAdvancedSubtensor1()(gpu_x, *coords))] return [host_from_gpu(GpuAdvancedSubtensor1()(gpu_x, *coords))]
return False return False
...@@ -781,7 +778,7 @@ def local_gpu_advanced_subtensor1(node): ...@@ -781,7 +778,7 @@ def local_gpu_advanced_subtensor1(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.AdvancedIncSubtensor1]) @local_optimizer([gpu_from_host, tensor.AdvancedIncSubtensor1])
def local_gpu_advanced_incsubtensor1(node): def local_gpu_advanced_incsubtensor1(node):
if node.op == gpu_from_host: if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
# Should not execute for GpuAdvancedIncSubtensor1 # Should not execute for GpuAdvancedIncSubtensor1
if host_input.owner and \ if host_input.owner and \
...@@ -816,12 +813,12 @@ def local_gpu_advanced_incsubtensor1(node): ...@@ -816,12 +813,12 @@ def local_gpu_advanced_incsubtensor1(node):
x, y = node.inputs[0:2] x, y = node.inputs[0:2]
coords = node.inputs[2:] coords = node.inputs[2:]
go_gpu = False go_gpu = False
if x.owner and x.owner.op == host_from_gpu: if x.owner and isinstance(x.owner.op, HostFromGpu):
go_gpu = True go_gpu = True
gpu_x, = x.owner.inputs gpu_x, = x.owner.inputs
else: else:
gpu_x = gpu_from_host(x) gpu_x = gpu_from_host(x)
if y.owner and y.owner.op == host_from_gpu: if y.owner and isinstance(y.owner.op, HostFromGpu):
go_gpu = True go_gpu = True
gpu_y, = y.owner.inputs gpu_y, = y.owner.inputs
else: else:
...@@ -855,7 +852,7 @@ def local_gpu_advanced_incsubtensor1(node): ...@@ -855,7 +852,7 @@ def local_gpu_advanced_incsubtensor1(node):
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.IncSubtensor]) @local_optimizer([gpu_from_host, tensor.IncSubtensor])
def local_gpu_incsubtensor(node): def local_gpu_incsubtensor(node):
if node.op == gpu_from_host: if isinstance(node.op, GpuFromHost):
host_output = node.inputs[0] host_output = node.inputs[0]
if host_output.owner and \ if host_output.owner and \
type(host_output.owner.op) == tensor.IncSubtensor: type(host_output.owner.op) == tensor.IncSubtensor:
...@@ -879,12 +876,12 @@ def local_gpu_incsubtensor(node): ...@@ -879,12 +876,12 @@ def local_gpu_incsubtensor(node):
assert isinstance(y.type, tensor.TensorType) assert isinstance(y.type, tensor.TensorType)
coords = node.inputs[2:] coords = node.inputs[2:]
go_gpu = False go_gpu = False
if x.owner and x.owner.op == host_from_gpu: if x.owner and isinstance(x.owner.op, HostFromGpu):
go_gpu = True go_gpu = True
gpu_x, = x.owner.inputs gpu_x, = x.owner.inputs
else: else:
gpu_x = gpu_from_host(x) gpu_x = gpu_from_host(x)
if y.owner and y.owner.op == host_from_gpu: if y.owner and isinstance(y.owner.op, HostFromGpu):
go_gpu = True go_gpu = True
gpu_y, = y.owner.inputs gpu_y, = y.owner.inputs
else: else:
...@@ -904,7 +901,7 @@ def local_gpu_incsubtensor(node): ...@@ -904,7 +901,7 @@ def local_gpu_incsubtensor(node):
def local_gpu_shape(node): def local_gpu_shape(node):
if isinstance(node.op, tensor.Shape): if isinstance(node.op, tensor.Shape):
x, = node.inputs x, = node.inputs
if x.owner and x.owner.op == host_from_gpu: if x.owner and isinstance(x.owner.op, HostFromGpu):
gpu_x, = x.owner.inputs gpu_x, = x.owner.inputs
return [gpu_shape(gpu_x)] return [gpu_shape(gpu_x)]
return False return False
...@@ -916,7 +913,7 @@ def local_gpu_rebroadcast(node): ...@@ -916,7 +913,7 @@ def local_gpu_rebroadcast(node):
'''rebroadcast(host_from_gpu(x)) -> host_from_gpu(rebroadcast(x))''' '''rebroadcast(host_from_gpu(x)) -> host_from_gpu(rebroadcast(x))'''
if isinstance(node.op, tensor.Rebroadcast): if isinstance(node.op, tensor.Rebroadcast):
x, = node.inputs x, = node.inputs
if (x.owner and x.owner.op == host_from_gpu): if (x.owner and isinstance(x.owner.op, HostFromGpu)):
gpu_x = x.owner.inputs[0] gpu_x = x.owner.inputs[0]
return [host_from_gpu(node.op(gpu_x))] return [host_from_gpu(node.op(gpu_x))]
...@@ -930,7 +927,7 @@ def gpu_print_wrapper(op, cnda): ...@@ -930,7 +927,7 @@ def gpu_print_wrapper(op, cnda):
def local_gpu_print_op(node): def local_gpu_print_op(node):
if isinstance(node.op, tensor.printing.Print): if isinstance(node.op, tensor.printing.Print):
x, = node.inputs x, = node.inputs
if x.owner and x.owner.op == host_from_gpu: if x.owner and isinstance(x.owner.op, HostFromGpu):
gpu_x, = x.owner.inputs gpu_x, = x.owner.inputs
new_op = node.op.__class__(global_fn=gpu_print_wrapper) new_op = node.op.__class__(global_fn=gpu_print_wrapper)
new_op.old_op = node.op new_op.old_op = node.op
...@@ -951,7 +948,7 @@ import theano.tensor.nnet ...@@ -951,7 +948,7 @@ import theano.tensor.nnet
def local_gpu_crossentorpy_softmax_argmax_1hot_with_bias(node): def local_gpu_crossentorpy_softmax_argmax_1hot_with_bias(node):
if isinstance(node.op, tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias): if isinstance(node.op, tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias):
x, b, y = node.inputs x, b, y = node.inputs
if x.owner and x.owner.op == host_from_gpu: if x.owner and isinstance(x.owner.op, HostFromGpu):
gpu_x, = x.owner.inputs gpu_x, = x.owner.inputs
# if y is a cast to integers, we can go to the underlying # if y is a cast to integers, we can go to the underlying
# thing if we want, since this gpu op will cast to integers # thing if we want, since this gpu op will cast to integers
...@@ -981,7 +978,7 @@ def local_gpu_crossentorpy_softmax_argmax_1hot_with_bias(node): ...@@ -981,7 +978,7 @@ def local_gpu_crossentorpy_softmax_argmax_1hot_with_bias(node):
def local_gpu_crossentorpy_softmax_1hot_with_bias_dx(node): def local_gpu_crossentorpy_softmax_1hot_with_bias_dx(node):
if isinstance(node.op, tensor.nnet.CrossentropySoftmax1HotWithBiasDx): if isinstance(node.op, tensor.nnet.CrossentropySoftmax1HotWithBiasDx):
dnll, sm, yidx = node.inputs dnll, sm, yidx = node.inputs
if sm.owner and sm.owner.op == host_from_gpu: if sm.owner and isinstance(sm.owner.op, HostFromGpu):
gpu_sm, = sm.owner.inputs gpu_sm, = sm.owner.inputs
gpu_dx = GpuCrossentropySoftmax1HotWithBiasDx()( gpu_dx = GpuCrossentropySoftmax1HotWithBiasDx()(
gpu_from_host(dnll), gpu_from_host(dnll),
...@@ -996,7 +993,7 @@ def local_gpu_crossentorpy_softmax_1hot_with_bias_dx(node): ...@@ -996,7 +993,7 @@ def local_gpu_crossentorpy_softmax_1hot_with_bias_dx(node):
def local_gpu_softmax(node): def local_gpu_softmax(node):
if isinstance(node.op, tensor.nnet.Softmax): if isinstance(node.op, tensor.nnet.Softmax):
x, = node.inputs x, = node.inputs
if x.owner and x.owner.op == host_from_gpu: if x.owner and isinstance(x.owner.op, HostFromGpu):
gpu_x, = x.owner.inputs gpu_x, = x.owner.inputs
gpu_sm = GpuSoftmax()(gpu_x) gpu_sm = GpuSoftmax()(gpu_x)
return [host_from_gpu(gpu_sm)] return [host_from_gpu(gpu_sm)]
...@@ -1008,8 +1005,8 @@ def local_gpu_softmax(node): ...@@ -1008,8 +1005,8 @@ def local_gpu_softmax(node):
def local_gpu_softmax_with_bias(node): def local_gpu_softmax_with_bias(node):
if isinstance(node.op, tensor.nnet.SoftmaxWithBias): if isinstance(node.op, tensor.nnet.SoftmaxWithBias):
x, b = node.inputs x, b = node.inputs
x_on_gpu = x.owner and x.owner.op == host_from_gpu x_on_gpu = x.owner and isinstance(x.owner.op, HostFromGpu)
b_on_gpu = b.owner and b.owner.op == host_from_gpu b_on_gpu = b.owner and isinstance(b.owner.op, HostFromGpu)
if x_on_gpu or b_on_gpu: if x_on_gpu or b_on_gpu:
gpu_sm = GpuSoftmaxWithBias()(gpu_from_host(x), gpu_from_host(b)) gpu_sm = GpuSoftmaxWithBias()(gpu_from_host(x), gpu_from_host(b))
return [host_from_gpu(gpu_sm)] return [host_from_gpu(gpu_sm)]
...@@ -1081,7 +1078,7 @@ def local_gpu_conv(node): ...@@ -1081,7 +1078,7 @@ def local_gpu_conv(node):
atol = 3e-5 atol = 3e-5
return CudaNdarrayType.values_eq_approx(a, b, atol=atol) return CudaNdarrayType.values_eq_approx(a, b, atol=atol)
if node.op == gpu_from_host: if isinstance(node.op, GpuFromHost):
#gpu_from_host(conv) -> gpu_conv(gpu_from_host) #gpu_from_host(conv) -> gpu_conv(gpu_from_host)
host_input = node.inputs[0] host_input = node.inputs[0]
if host_input.owner and isinstance(host_input.owner.op, conv.ConvOp): if host_input.owner and isinstance(host_input.owner.op, conv.ConvOp):
...@@ -1101,8 +1098,8 @@ def local_gpu_conv(node): ...@@ -1101,8 +1098,8 @@ def local_gpu_conv(node):
if isinstance(node.op, conv.ConvOp): if isinstance(node.op, conv.ConvOp):
#conv(host_from_gpu) -> host_from_gpu(gpu_conv) #conv(host_from_gpu) -> host_from_gpu(gpu_conv)
img, kern = node.inputs img, kern = node.inputs
img_on_gpu = (img.owner and img.owner.op == host_from_gpu) img_on_gpu = (img.owner and isinstance(img.owner.op, HostFromGpu))
kern_on_gpu = (kern.owner and kern.owner.op == host_from_gpu) kern_on_gpu = (kern.owner and isinstance(kern.owner.op, HostFromGpu))
if img_on_gpu or kern_on_gpu: if img_on_gpu or kern_on_gpu:
gpu_conv = GpuConvOp_from_ConvOp(node.op) gpu_conv = GpuConvOp_from_ConvOp(node.op)
if gpu_conv is None: if gpu_conv is None:
...@@ -1125,7 +1122,7 @@ import theano.tensor.signal.downsample as downsample ...@@ -1125,7 +1122,7 @@ import theano.tensor.signal.downsample as downsample
def local_gpu_downsample_factor_max(node): def local_gpu_downsample_factor_max(node):
if isinstance(node.op, downsample.DownsampleFactorMax): if isinstance(node.op, downsample.DownsampleFactorMax):
x, = node.inputs x, = node.inputs
if (x.owner and x.owner.op == host_from_gpu): if (x.owner and isinstance(x.owner.op, HostFromGpu)):
gpu_ds = GpuDownsampleFactorMax(node.op.ds, node.op.ignore_border) gpu_ds = GpuDownsampleFactorMax(node.op.ds, node.op.ignore_border)
return [host_from_gpu(gpu_ds(x.owner.inputs[0]))] return [host_from_gpu(gpu_ds(x.owner.inputs[0]))]
...@@ -1135,7 +1132,7 @@ def local_gpu_downsample_factor_max(node): ...@@ -1135,7 +1132,7 @@ def local_gpu_downsample_factor_max(node):
def local_gpu_downsample_factor_max_grad(node): def local_gpu_downsample_factor_max_grad(node):
if isinstance(node.op, downsample.DownsampleFactorMaxGrad): if isinstance(node.op, downsample.DownsampleFactorMaxGrad):
x, z, gz = node.inputs x, z, gz = node.inputs
if (x.owner and x.owner.op == host_from_gpu): if (x.owner and isinstance(x.owner.op, HostFromGpu)):
gpu_ds_grad = GpuDownsampleFactorMaxGrad(node.op.ds, gpu_ds_grad = GpuDownsampleFactorMaxGrad(node.op.ds,
node.op.ignore_border) node.op.ignore_border)
return [host_from_gpu(gpu_ds_grad(x.owner.inputs[0], return [host_from_gpu(gpu_ds_grad(x.owner.inputs[0],
...@@ -1187,7 +1184,7 @@ def local_gpu_join(node): ...@@ -1187,7 +1184,7 @@ def local_gpu_join(node):
#print "OPT: axis_and_tensors=", axis_and_tensors #print "OPT: axis_and_tensors=", axis_and_tensors
matches = [(not t.owner is None and t.owner.op == host_from_gpu) or matches = [(not t.owner is None and isinstance(t.owner.op, HostFromGpu)) or
isinstance(t, gof.Constant) for t in axis_and_tensors[1:]] isinstance(t, gof.Constant) for t in axis_and_tensors[1:]]
#print "OPT: matches =", matches #print "OPT: matches =", matches
...@@ -1366,7 +1363,7 @@ def local_gpualloc(node): ...@@ -1366,7 +1363,7 @@ def local_gpualloc(node):
replace = False replace = False
if node.op == tensor.alloc: if node.op == tensor.alloc:
if node.inputs[0].owner and \ if node.inputs[0].owner and \
node.inputs[0].owner.op == host_from_gpu: isinstance(node.inputs[0].owner.op, HostFromGpu):
replace = True replace = True
elif all([c != 'output' and c.op == gpu_from_host elif all([c != 'output' and c.op == gpu_from_host
for c, idx in node.outputs[0].clients]): for c, idx in node.outputs[0].clients]):
...@@ -1427,14 +1424,14 @@ def local_gpu_eye(node): ...@@ -1427,14 +1424,14 @@ def local_gpu_eye(node):
eye(host_from_gpu) -> host_from_gpu(gpueye) eye(host_from_gpu) -> host_from_gpu(gpueye)
""" """
if node.op == gpu_from_host: if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
if (host_input.owner and if (host_input.owner and
isinstance(host_input.owner.op, tensor.Eye) and isinstance(host_input.owner.op, tensor.Eye) and
host_input.owner.op.dtype == "float32"): host_input.owner.op.dtype == "float32"):
return [gpu_eye(*host_input.owner.inputs)] return [gpu_eye(*host_input.owner.inputs)]
if isinstance(node.op, tensor.Eye) and node.op.dtype == "float32": if isinstance(node.op, tensor.Eye) and node.op.dtype == "float32":
if numpy.any([(i.owner and i.owner.op == host_from_gpu) if numpy.any([(i.owner and isinstance(i.owner.op, HostFromGpu))
for i in node.inputs]): for i in node.inputs]):
return [host_from_gpu(gpu_eye(*node.inputs))] return [host_from_gpu(gpu_eye(*node.inputs))]
return False return False
...@@ -1510,14 +1507,18 @@ def local_gpu_extract_diagonal(node): ...@@ -1510,14 +1507,18 @@ def local_gpu_extract_diagonal(node):
extract_diagonal(host_from_gpu()) -> host_from_gpu(extract_diagonal) extract_diagonal(host_from_gpu()) -> host_from_gpu(extract_diagonal)
gpu_from_host(extract_diagonal) -> extract_diagonal(gpu_from_host) gpu_from_host(extract_diagonal) -> extract_diagonal(gpu_from_host)
""" """
global linalg
if linalg is None:
from theano.sandbox import linalg from theano.sandbox import linalg
linalg = theano.sandbox.linalg
if (isinstance(node.op, linalg.ops.ExtractDiag) and if (isinstance(node.op, linalg.ops.ExtractDiag) and
isinstance(node.inputs[0].type, isinstance(node.inputs[0].type,
theano.tensor.TensorType)): theano.tensor.TensorType)):
inp = node.inputs[0] inp = node.inputs[0]
if inp.owner and isinstance(inp.owner.op, HostFromGpu): if inp.owner and isinstance(inp.owner.op, HostFromGpu):
return [host_from_gpu(linalg.extract_diag(gpu_from_host(inp)))] return [host_from_gpu(linalg.extract_diag(gpu_from_host(inp)))]
if node.op == gpu_from_host: if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
if (host_input.owner and if (host_input.owner and
isinstance(host_input.owner.op, linalg.ops.ExtractDiag) and isinstance(host_input.owner.op, linalg.ops.ExtractDiag) and
...@@ -1538,7 +1539,7 @@ def gpuScanOptimization(node): ...@@ -1538,7 +1539,7 @@ def gpuScanOptimization(node):
""" """
#gpu_from_host(scan) -> GPUscan(gpu_from_host) #gpu_from_host(scan) -> GPUscan(gpu_from_host)
if node.op == gpu_from_host: if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
if (host_input.owner and if (host_input.owner and
isinstance(host_input.owner.op, scan_op.Scan) and isinstance(host_input.owner.op, scan_op.Scan) and
...@@ -1599,7 +1600,7 @@ def gpuScanOptimization(node): ...@@ -1599,7 +1600,7 @@ def gpuScanOptimization(node):
#scan(host_from_gpu) -> host_from_gpu(GPUscan) #scan(host_from_gpu) -> host_from_gpu(GPUscan)
if (type(node.op) == scan_op.Scan if (type(node.op) == scan_op.Scan
and not node.op.info['gpu']): and not node.op.info['gpu']):
if numpy.any([(i.owner and i.owner.op == host_from_gpu) if numpy.any([(i.owner and isinstance(i.owner.op, HostFromGpu))
for i in node.inputs]): for i in node.inputs]):
thescan = node.op thescan = node.op
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论