提交 4ebe109a authored 作者: sentient07's avatar sentient07

Made suggested changes, fixed travis

上级 643f5b1e
...@@ -870,7 +870,7 @@ class GpuAlloc(HideC, Alloc): ...@@ -870,7 +870,7 @@ class GpuAlloc(HideC, Alloc):
return True return True
#Caching GPUAlloc # Caching GPUAlloc
def gpu_alloc(ctx, memset_0=False): def gpu_alloc(ctx, memset_0=False):
key = (ctx, memset_0) key = (ctx, memset_0)
if key not in gpu_alloc.cache: if key not in gpu_alloc.cache:
......
...@@ -1427,10 +1427,10 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase): ...@@ -1427,10 +1427,10 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
return Apply(self, [dy, sm], [sm.type()]) return Apply(self, [dy, sm], [sm.type()])
@local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights, @op_lifter([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs]) AbstractConv2d_gradInputs])
@register_opt2([AbstractConv2d, AbstractConv2d_gradWeights, @register_opt2([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs], 'fast_compile') AbstractConv2d_gradInputs], 'conv_dnn', 'cudnn', 'gpuarray', 'fast_compile')
def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs): def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
if (not isinstance(op, (AbstractConv2d, if (not isinstance(op, (AbstractConv2d,
AbstractConv2d_gradWeights, AbstractConv2d_gradWeights,
......
...@@ -474,4 +474,3 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): ...@@ -474,4 +474,3 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
def use_gpu_images2neibs(op, context_name, inputs): def use_gpu_images2neibs(op, context_name, inputs):
if op.mode in ['valid', 'ignore_borders', 'wrap_centered']: if op.mode in ['valid', 'ignore_borders', 'wrap_centered']:
return GpuImages2Neibs(op.mode) return GpuImages2Neibs(op.mode)
...@@ -305,12 +305,12 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -305,12 +305,12 @@ class GraphToGPU(NavigatorOptimizer):
for node in topo: for node in topo:
if isinstance(node.op, HostFromGpu): if isinstance(node.op, HostFromGpu):
mapping[node.outputs[0]] = node.inputs[0] mapping[node.outputs[0]] = mapping[node.inputs[0]]
continue continue
# Move only if any of the inputs are on the GPU. # Move only if any of the inputs are on the GPU.
move_to_GPU = False move_to_GPU = False
from .type import GpuArrayVariable, GpuArraySharedVariable #when you uncomment from .type import GpuArrayVariable, GpuArraySharedVariable
if any([isinstance(i, GpuArrayVariable) or if any([isinstance(i, GpuArrayVariable) or
isinstance(i, GpuArraySharedVariable) isinstance(i, GpuArraySharedVariable)
for i in [mapping[v] for v in node.inputs] + for i in [mapping[v] for v in node.inputs] +
...@@ -364,14 +364,8 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -364,14 +364,8 @@ class GraphToGPU(NavigatorOptimizer):
elif isinstance(new_ops, (tuple, list)): elif isinstance(new_ops, (tuple, list)):
outputs = [] outputs = []
for o in new_ops: for o in new_ops:
if o.owner and isinstance(o.owner.op, HostFromGpu):
outputs.append(o.owner.inputs[0])
else:
outputs.append(o) outputs.append(o)
elif isinstance(new_ops, theano.Variable): elif isinstance(new_ops, theano.Variable):
if new_ops.owner and isinstance(new_ops.owner.op, HostFromGpu):
outputs = new_ops.owner.inputs
else:
outputs = [new_ops] outputs = [new_ops]
else: else:
outputs = new_ops(*[mapping[i] for i in node.inputs], outputs = new_ops(*[mapping[i] for i in node.inputs],
...@@ -427,9 +421,9 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -427,9 +421,9 @@ class GraphToGPU(NavigatorOptimizer):
for s in list(set(old_not_transferred)): for s in list(set(old_not_transferred)):
print(blanc, 'Nodes not transferred by old opt : ' + str(s), file=stream) print(blanc, 'Nodes not transferred by old opt : ' + str(s), file=stream)
for n in list(set(new_not_transferred)): for n in list(set(new_not_transferred)):
print(blanc, 'Nodes not transferred by new optimizer : ' +str(n), file=stream) print(blanc, 'Nodes not transferred by new optimizer : ' + str(n), file=stream)
for d in list(set(set(new_not_transferred) - set(old_not_transferred))): for d in list(set(set(new_not_transferred) - set(old_not_transferred))):
print(blanc, 'Not transferred difference : ' , str(d), file=stream) print(blanc, 'Not transferred difference : ', str(d), file=stream)
for o, count in iteritems(process_count): for o, count in iteritems(process_count):
if count > 0: if count > 0:
...@@ -592,7 +586,7 @@ def local_gpuaallocempty(op, context_name, inputs, outputs): ...@@ -592,7 +586,7 @@ def local_gpuaallocempty(op, context_name, inputs, outputs):
# We use _props_dict() to make sure that the GPU op know all the # We use _props_dict() to make sure that the GPU op know all the
# CPU op props. # CPU op props.
dtype = op._props_dict().get('dtype') dtype = op._props_dict().get('dtype')
return gpu_alloc_empty(dtype,context_name)(*inputs) return gpu_alloc_empty(dtype, context_name)(*inputs)
@register_opt() @register_opt()
...@@ -889,11 +883,12 @@ def local_gpua_join(op, context_name, inputs, outputs): ...@@ -889,11 +883,12 @@ def local_gpua_join(op, context_name, inputs, outputs):
@register_opt('fast_compile') @register_opt('fast_compile')
@local_optimizer([GpuJoin]) @local_optimizer([GpuJoin])
def local_gpuajoin_1(node): @register_opt2([GpuJoin], 'fast_compile')
def local_gpuajoin_1(op, context_name, inputs, outputs):
# join of a single element # join of a single element
if (isinstance(node.op, GpuJoin) and if (isinstance(op, GpuJoin) and
len(node.inputs) == 2): len(inputs) == 2):
return [node.inputs[1]] return [inputs[1]]
@register_opt('fast_compile') @register_opt('fast_compile')
...@@ -1311,7 +1306,7 @@ def local_lift_abstractconv2d(op, context_name, inputs, outputs): ...@@ -1311,7 +1306,7 @@ def local_lift_abstractconv2d(op, context_name, inputs, outputs):
register_opt('fast_compile')(conv_groupopt) register_opt('fast_compile')(conv_groupopt)
@register_opt("low_memory") @register_opt("low_memory", 'fast_compile')
@local_optimizer([GpuCAReduceCuda]) @local_optimizer([GpuCAReduceCuda])
def local_gpu_elemwise_careduce(node): def local_gpu_elemwise_careduce(node):
""" """
......
...@@ -1553,7 +1553,7 @@ class MRG_RandomStreams(object): ...@@ -1553,7 +1553,7 @@ class MRG_RandomStreams(object):
@register_opt2([mrg_uniform], 'fast_compile') @register_opt2([mrg_uniform], 'fast_compile')
def local_gpua_mrg(op, context_name, inputs, outputs): def local_gpua_mrg1(op, context_name, inputs, outputs):
if (type(op) == mrg_uniform and if (type(op) == mrg_uniform and
isinstance(inputs[0].type, GpuArrayType)): isinstance(inputs[0].type, GpuArrayType)):
outs = GPUA_mrg_uniform.new(inputs[0], outs = GPUA_mrg_uniform.new(inputs[0],
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论