提交 8ea065e6 authored 作者: sentient07's avatar sentient07

Made the stricter optimization consistent

上级 8dfe6847
...@@ -23,7 +23,7 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d, ...@@ -23,7 +23,7 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
from theano.tensor.signal.pool import ( from theano.tensor.signal.pool import (
Pool, MaxPoolGrad, AveragePoolGrad) Pool, MaxPoolGrad, AveragePoolGrad)
from . import pygpu from . import pygpu
from .type import get_context, gpu_context_type, list_contexts, GpuArrayType from .type import get_context, gpu_context_type, list_contexts
from .basic_ops import (as_gpuarray_variable, infer_context_name, from .basic_ops import (as_gpuarray_variable, infer_context_name,
gpu_contiguous, gpu_alloc_empty, gpu_contiguous, gpu_alloc_empty,
empty_like) empty_like)
...@@ -1428,21 +1428,18 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase): ...@@ -1428,21 +1428,18 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
@register_opt2([AbstractConv2d, AbstractConv2d_gradWeights, @register_opt2([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs], 'conv_dnn', 'cudnn', 'gpuarray', 'fast_compile') AbstractConv2d_gradInputs], 'fast_compile')
def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs): def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
if (not isinstance(op, (AbstractConv2d, if (not isinstance(op, (AbstractConv2d,
AbstractConv2d_gradWeights, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs))): AbstractConv2d_gradInputs))):
return None return
inp1 = inputs[0]
inp2 = inputs[1]
if (node.op.filter_dilation != (1, 1)): if (op.filter_dilation != (1, 1)):
return None return None
if not isinstance(inp1.type, GpuArrayType): inp1 = as_gpuarray_variable(inputs[0], context_name)
return None inp2 = as_gpuarray_variable(inputs[1], context_name)
if not dnn_available(inp1.type.context_name): if not dnn_available(inp1.type.context_name):
raise_no_cudnn() raise_no_cudnn()
......
...@@ -30,7 +30,7 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d, ...@@ -30,7 +30,7 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
from theano.tests.breakpoint import PdbBreakpoint from theano.tests.breakpoint import PdbBreakpoint
from .type import (GpuArrayType, GpuArrayConstant, get_context, from .type import (GpuArrayType, GpuArrayConstant, get_context,
ContextNotDefined, GpuArrayVariable, GpuArraySharedVariable) ContextNotDefined)
from .basic_ops import (as_gpuarray_variable, infer_context_name, from .basic_ops import (as_gpuarray_variable, infer_context_name,
host_from_gpu, GpuToGpu, host_from_gpu, GpuToGpu,
HostFromGpu, GpuFromHost, HostFromGpu, GpuFromHost,
...@@ -291,10 +291,9 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -291,10 +291,9 @@ class GraphToGPU(NavigatorOptimizer):
target = infer_context_name(*fgraph.inputs) target = infer_context_name(*fgraph.inputs)
for i in fgraph.inputs: for i in fgraph.inputs:
# Do not move *int* scalar to the GPU. # Do not move *int* scalar to the GPU.
target = getattr(i.tag, 'target', None)
if (isinstance(i.type, tensor.TensorType) and if (isinstance(i.type, tensor.TensorType) and
(i.ndim > 0 or 'int' not in i.dtype)): (i.ndim > 0 or 'int' not in i.dtype)):
mapping[i] = i.transfer(target) mapping[i] = as_gpuarray_variable(i, target)
else: else:
mapping[i] = i mapping[i] = i
for i in fgraph.variables: for i in fgraph.variables:
...@@ -316,12 +315,6 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -316,12 +315,6 @@ class GraphToGPU(NavigatorOptimizer):
# Move only if any of the inputs are on the GPU. # Move only if any of the inputs are on the GPU.
move_to_GPU = False move_to_GPU = False
if any([isinstance(i, GpuArrayVariable) or
isinstance(i, GpuArraySharedVariable)
for i in [mapping[v] for v in node.inputs] +
node.outputs]):
move_to_GPU = True
context_name = None context_name = None
for i in [mapping[i] for i in node.inputs]: for i in [mapping[i] for i in node.inputs]:
...@@ -346,20 +339,20 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -346,20 +339,20 @@ class GraphToGPU(NavigatorOptimizer):
new_ops = None new_ops = None
outputs = [] outputs = []
# Apply the lifter # Apply the lifter
for lopt in (self.local_optimizers_map.get(node.op, []) + if move_to_GPU:
self.local_optimizers_map.get(type(node.op), []) + for lopt in (self.local_optimizers_map.get(node.op, []) +
self.local_optimizers_all): self.local_optimizers_map.get(type(node.op), []) +
if move_to_GPU: self.local_optimizers_all):
t_opt = time.time() t_opt = time.time()
new_ops = lopt.transform(node.op, context_name, new_ops = lopt.transform(node.op, context_name,
[mapping[i] for i in node.inputs], [mapping[i] for i in node.inputs],
node.outputs) node.outputs)
t_opt2 = time.time() t_opt2 = time.time()
time_opts[lopt] += t_opt2 - t_opt time_opts[lopt] += t_opt2 - t_opt
if new_ops: if new_ops:
process_count[lopt] += 1 process_count[lopt] += 1
break break
if not new_ops: if not new_ops:
newnode = node.clone_with_new_inputs([mapping.get(i) newnode = node.clone_with_new_inputs([mapping.get(i)
for i in node.inputs]) for i in node.inputs])
...@@ -754,7 +747,7 @@ def local_gpua_dimshuffle(op, context_name, inputs, outputs): ...@@ -754,7 +747,7 @@ def local_gpua_dimshuffle(op, context_name, inputs, outputs):
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([tensor.SpecifyShape]) @op_lifter([tensor.SpecifyShape])
@register_opt2([tensor.SpecifyShape], 'fast_compile') # @register_opt2([tensor.SpecifyShape], 'fast_compile')
def local_gpua_specifyShape(op, context_name, inputs, outputs): def local_gpua_specifyShape(op, context_name, inputs, outputs):
if isinstance(inputs[0].type, GpuArrayType): if isinstance(inputs[0].type, GpuArrayType):
return return
...@@ -763,9 +756,15 @@ def local_gpua_specifyShape(op, context_name, inputs, outputs): ...@@ -763,9 +756,15 @@ def local_gpua_specifyShape(op, context_name, inputs, outputs):
return tensor.specify_shape(*inp) return tensor.specify_shape(*inp)
@register_opt2([tensor.SpecifyShape], 'fast_compile')
def local_gpua_specifyShape_graph(op, context_name, inputs, outputs):
inp = [as_gpuarray_variable(inputs[0], context_name)]
inp += inputs[1:]
return tensor.specify_shape(*inp)
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([theano.compile.ops.Shape]) @op_lifter([theano.compile.ops.Shape])
@register_opt2([tensor.compile.ops.Shape], 'fast_compile')
def local_gpua_shape(op, context_name, inputs, outputs): def local_gpua_shape(op, context_name, inputs, outputs):
# op_lifter will call this opt too frequently as the output is # op_lifter will call this opt too frequently as the output is
# always on the CPU. # always on the CPU.
...@@ -774,6 +773,13 @@ def local_gpua_shape(op, context_name, inputs, outputs): ...@@ -774,6 +773,13 @@ def local_gpua_shape(op, context_name, inputs, outputs):
return [as_gpuarray_variable(inputs[0], context_name).shape] return [as_gpuarray_variable(inputs[0], context_name).shape]
@register_opt2([tensor.compile.ops.Shape], 'fast_compile')
def local_gpua_shape_graph(op, context_name, inputs, outputs):
# op_lifter will call this opt too frequently as the output is
# always on the CPU.
return [as_gpuarray_variable(inputs[0], context_name).shape]
def gpu_print_wrapper(op, cnda): def gpu_print_wrapper(op, cnda):
op.old_op.global_fn(op.old_op, numpy.asarray(cnda)) op.old_op.global_fn(op.old_op, numpy.asarray(cnda))
...@@ -863,15 +869,10 @@ def local_gpu_pdbbreakpoint_op(node): ...@@ -863,15 +869,10 @@ def local_gpu_pdbbreakpoint_op(node):
def local_gpua_lazy_ifelse(op, context_name, inputs, outputs): def local_gpua_lazy_ifelse(op, context_name, inputs, outputs):
if op.gpu: if op.gpu:
return return
# this node is already on GPU, so don't change the graph
if isinstance(inputs[0].type, GpuArrayType):
return
c = inputs[0] c = inputs[0]
inps = [] inps = []
for v in inputs[1:]: for v in inputs[1:]:
if isinstance(v.type, GpuArrayType): if isinstance(v.type, tensor.TensorType):
return
elif isinstance(v.type, tensor.TensorType):
inps.append(as_gpuarray_variable(v, context_name)) inps.append(as_gpuarray_variable(v, context_name))
else: else:
inps.append(v) inps.append(v)
...@@ -1230,15 +1231,19 @@ def local_gpua_softmaxwithbias(op, context_name, inputs, outputs): ...@@ -1230,15 +1231,19 @@ def local_gpua_softmaxwithbias(op, context_name, inputs, outputs):
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([theano.tensor.opt.Assert]) @op_lifter([theano.tensor.opt.Assert])
@register_opt2([theano.tensor.opt.Assert], 'fast_compile')
def local_assert(op, context_name, inputs, outputs): def local_assert(op, context_name, inputs, outputs):
# Check if input nodes are already on the GPU
if isinstance(inputs[0].type, GpuArrayType): if isinstance(inputs[0].type, GpuArrayType):
return return
return [op(as_gpuarray_variable(inputs[0], context_name), return [op(as_gpuarray_variable(inputs[0], context_name),
*inputs[1:])] *inputs[1:])]
@register_opt2([theano.tensor.opt.Assert], 'fast_compile')
def local_assert_graph(op, context_name, inputs, outputs):
return [op(as_gpuarray_variable(inputs[0], context_name),
*inputs[1:])]
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([ConvOp]) @op_lifter([ConvOp])
@register_opt2([ConvOp], 'fast_compile') @register_opt2([ConvOp], 'fast_compile')
...@@ -1286,15 +1291,12 @@ def local_inplace_sparseblockouter(node): ...@@ -1286,15 +1291,12 @@ def local_inplace_sparseblockouter(node):
# This deals with any abstract convs that have a transfer somewhere # This deals with any abstract convs that have a transfer somewhere
@register_opt('fast_compile') @register_opt('fast_compile', 'conv_dnn')
@op_lifter([AbstractConv2d, @op_lifter([AbstractConv2d,
AbstractConv2d_gradWeights, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs]) AbstractConv2d_gradInputs])
@register_opt2([AbstractConv2d,
AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs], 'fast_compile')
def local_lift_abstractconv2d(op, context_name, inputs, outputs): def local_lift_abstractconv2d(op, context_name, inputs, outputs):
if isinstance(inputs[0].type, GpuArrayType): if isinstance(outputs[0].type, GpuArrayType):
# Don't handle this node here, it's already on the GPU. # Don't handle this node here, it's already on the GPU.
return return
inps = list(inputs) inps = list(inputs)
...@@ -1304,6 +1306,18 @@ def local_lift_abstractconv2d(op, context_name, inputs, outputs): ...@@ -1304,6 +1306,18 @@ def local_lift_abstractconv2d(op, context_name, inputs, outputs):
context_name=context_name) context_name=context_name)
return [op(*inps)] return [op(*inps)]
@register_opt2([AbstractConv2d,
AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs], 'fast_compile')
def local_lift_abstractconv2d_graph(op, context_name, inputs, outputs):
inps = list(inputs)
inps[0] = as_gpuarray_variable(inputs[0],
context_name=context_name)
inps[1] = as_gpuarray_variable(inputs[1],
context_name=context_name)
return [op(*inps)]
# Register this here so that it goes after the abstract lifting # Register this here so that it goes after the abstract lifting
register_opt('fast_compile')(conv_groupopt) register_opt('fast_compile')(conv_groupopt)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论