提交 8ea065e6 authored 作者: sentient07's avatar sentient07

Made the stricter optimization consistent

上级 8dfe6847
......@@ -23,7 +23,7 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
from theano.tensor.signal.pool import (
Pool, MaxPoolGrad, AveragePoolGrad)
from . import pygpu
from .type import get_context, gpu_context_type, list_contexts, GpuArrayType
from .type import get_context, gpu_context_type, list_contexts
from .basic_ops import (as_gpuarray_variable, infer_context_name,
gpu_contiguous, gpu_alloc_empty,
empty_like)
......@@ -1428,21 +1428,18 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
@register_opt2([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs], 'conv_dnn', 'cudnn', 'gpuarray', 'fast_compile')
AbstractConv2d_gradInputs], 'fast_compile')
def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
if (not isinstance(op, (AbstractConv2d,
AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs))):
return None
inp1 = inputs[0]
inp2 = inputs[1]
return
if (node.op.filter_dilation != (1, 1)):
if (op.filter_dilation != (1, 1)):
return None
if not isinstance(inp1.type, GpuArrayType):
return None
inp1 = as_gpuarray_variable(inputs[0], context_name)
inp2 = as_gpuarray_variable(inputs[1], context_name)
if not dnn_available(inp1.type.context_name):
raise_no_cudnn()
......
......@@ -30,7 +30,7 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
from theano.tests.breakpoint import PdbBreakpoint
from .type import (GpuArrayType, GpuArrayConstant, get_context,
ContextNotDefined, GpuArrayVariable, GpuArraySharedVariable)
ContextNotDefined)
from .basic_ops import (as_gpuarray_variable, infer_context_name,
host_from_gpu, GpuToGpu,
HostFromGpu, GpuFromHost,
......@@ -291,10 +291,9 @@ class GraphToGPU(NavigatorOptimizer):
target = infer_context_name(*fgraph.inputs)
for i in fgraph.inputs:
# Do not move *int* scalar to the GPU.
target = getattr(i.tag, 'target', None)
if (isinstance(i.type, tensor.TensorType) and
(i.ndim > 0 or 'int' not in i.dtype)):
mapping[i] = i.transfer(target)
mapping[i] = as_gpuarray_variable(i, target)
else:
mapping[i] = i
for i in fgraph.variables:
......@@ -316,12 +315,6 @@ class GraphToGPU(NavigatorOptimizer):
# Move only if any of the inputs are on the GPU.
move_to_GPU = False
if any([isinstance(i, GpuArrayVariable) or
isinstance(i, GpuArraySharedVariable)
for i in [mapping[v] for v in node.inputs] +
node.outputs]):
move_to_GPU = True
context_name = None
for i in [mapping[i] for i in node.inputs]:
......@@ -346,20 +339,20 @@ class GraphToGPU(NavigatorOptimizer):
new_ops = None
outputs = []
# Apply the lifter
for lopt in (self.local_optimizers_map.get(node.op, []) +
self.local_optimizers_map.get(type(node.op), []) +
self.local_optimizers_all):
if move_to_GPU:
t_opt = time.time()
new_ops = lopt.transform(node.op, context_name,
[mapping[i] for i in node.inputs],
node.outputs)
t_opt2 = time.time()
time_opts[lopt] += t_opt2 - t_opt
if new_ops:
process_count[lopt] += 1
break
if move_to_GPU:
for lopt in (self.local_optimizers_map.get(node.op, []) +
self.local_optimizers_map.get(type(node.op), []) +
self.local_optimizers_all):
t_opt = time.time()
new_ops = lopt.transform(node.op, context_name,
[mapping[i] for i in node.inputs],
node.outputs)
t_opt2 = time.time()
time_opts[lopt] += t_opt2 - t_opt
if new_ops:
process_count[lopt] += 1
break
if not new_ops:
newnode = node.clone_with_new_inputs([mapping.get(i)
for i in node.inputs])
......@@ -754,7 +747,7 @@ def local_gpua_dimshuffle(op, context_name, inputs, outputs):
@register_opt('fast_compile')
@op_lifter([tensor.SpecifyShape])
@register_opt2([tensor.SpecifyShape], 'fast_compile')
# @register_opt2([tensor.SpecifyShape], 'fast_compile')
def local_gpua_specifyShape(op, context_name, inputs, outputs):
if isinstance(inputs[0].type, GpuArrayType):
return
......@@ -763,9 +756,15 @@ def local_gpua_specifyShape(op, context_name, inputs, outputs):
return tensor.specify_shape(*inp)
@register_opt2([tensor.SpecifyShape], 'fast_compile')
def local_gpua_specifyShape_graph(op, context_name, inputs, outputs):
inp = [as_gpuarray_variable(inputs[0], context_name)]
inp += inputs[1:]
return tensor.specify_shape(*inp)
@register_opt('fast_compile')
@op_lifter([theano.compile.ops.Shape])
@register_opt2([tensor.compile.ops.Shape], 'fast_compile')
def local_gpua_shape(op, context_name, inputs, outputs):
# op_lifter will call this opt too frequently as the output is
# always on the CPU.
......@@ -774,6 +773,13 @@ def local_gpua_shape(op, context_name, inputs, outputs):
return [as_gpuarray_variable(inputs[0], context_name).shape]
@register_opt2([tensor.compile.ops.Shape], 'fast_compile')
def local_gpua_shape_graph(op, context_name, inputs, outputs):
# op_lifter will call this opt too frequently as the output is
# always on the CPU.
return [as_gpuarray_variable(inputs[0], context_name).shape]
def gpu_print_wrapper(op, cnda):
op.old_op.global_fn(op.old_op, numpy.asarray(cnda))
......@@ -863,15 +869,10 @@ def local_gpu_pdbbreakpoint_op(node):
def local_gpua_lazy_ifelse(op, context_name, inputs, outputs):
if op.gpu:
return
# this node is already on GPU, so don't change the graph
if isinstance(inputs[0].type, GpuArrayType):
return
c = inputs[0]
inps = []
for v in inputs[1:]:
if isinstance(v.type, GpuArrayType):
return
elif isinstance(v.type, tensor.TensorType):
if isinstance(v.type, tensor.TensorType):
inps.append(as_gpuarray_variable(v, context_name))
else:
inps.append(v)
......@@ -1230,15 +1231,19 @@ def local_gpua_softmaxwithbias(op, context_name, inputs, outputs):
@register_opt('fast_compile')
@op_lifter([theano.tensor.opt.Assert])
@register_opt2([theano.tensor.opt.Assert], 'fast_compile')
def local_assert(op, context_name, inputs, outputs):
# Check if input nodes are already on the GPU
if isinstance(inputs[0].type, GpuArrayType):
return
return [op(as_gpuarray_variable(inputs[0], context_name),
*inputs[1:])]
@register_opt2([theano.tensor.opt.Assert], 'fast_compile')
def local_assert_graph(op, context_name, inputs, outputs):
return [op(as_gpuarray_variable(inputs[0], context_name),
*inputs[1:])]
@register_opt('fast_compile')
@op_lifter([ConvOp])
@register_opt2([ConvOp], 'fast_compile')
......@@ -1286,15 +1291,12 @@ def local_inplace_sparseblockouter(node):
# This deals with any abstract convs that have a transfer somewhere
@register_opt('fast_compile')
@register_opt('fast_compile', 'conv_dnn')
@op_lifter([AbstractConv2d,
AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs])
@register_opt2([AbstractConv2d,
AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs], 'fast_compile')
def local_lift_abstractconv2d(op, context_name, inputs, outputs):
if isinstance(inputs[0].type, GpuArrayType):
if isinstance(outputs[0].type, GpuArrayType):
# Don't handle this node here, it's already on the GPU.
return
inps = list(inputs)
......@@ -1304,6 +1306,18 @@ def local_lift_abstractconv2d(op, context_name, inputs, outputs):
context_name=context_name)
return [op(*inps)]
@register_opt2([AbstractConv2d,
AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs], 'fast_compile')
def local_lift_abstractconv2d_graph(op, context_name, inputs, outputs):
inps = list(inputs)
inps[0] = as_gpuarray_variable(inputs[0],
context_name=context_name)
inps[1] = as_gpuarray_variable(inputs[1],
context_name=context_name)
return [op(*inps)]
# Register this here so that it goes after the abstract lifting
register_opt('fast_compile')(conv_groupopt)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论