提交 4ebe109a authored 作者: sentient07's avatar sentient07

Made suggested changes, fixed travis

上级 643f5b1e
......@@ -870,7 +870,7 @@ class GpuAlloc(HideC, Alloc):
return True
#Caching GPUAlloc
# Caching GPUAlloc
def gpu_alloc(ctx, memset_0=False):
key = (ctx, memset_0)
if key not in gpu_alloc.cache:
......
......@@ -1427,10 +1427,10 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
return Apply(self, [dy, sm], [sm.type()])
@local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights,
@op_lifter([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs])
@register_opt2([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs], 'fast_compile')
AbstractConv2d_gradInputs], 'conv_dnn', 'cudnn', 'gpuarray', 'fast_compile')
def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
if (not isinstance(op, (AbstractConv2d,
AbstractConv2d_gradWeights,
......
......@@ -474,4 +474,3 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
def use_gpu_images2neibs(op, context_name, inputs):
if op.mode in ['valid', 'ignore_borders', 'wrap_centered']:
return GpuImages2Neibs(op.mode)
......@@ -305,12 +305,12 @@ class GraphToGPU(NavigatorOptimizer):
for node in topo:
if isinstance(node.op, HostFromGpu):
mapping[node.outputs[0]] = node.inputs[0]
mapping[node.outputs[0]] = mapping[node.inputs[0]]
continue
# Move only if any of the inputs are on the GPU.
move_to_GPU = False
from .type import GpuArrayVariable, GpuArraySharedVariable #when you uncomment
from .type import GpuArrayVariable, GpuArraySharedVariable
if any([isinstance(i, GpuArrayVariable) or
isinstance(i, GpuArraySharedVariable)
for i in [mapping[v] for v in node.inputs] +
......@@ -364,14 +364,8 @@ class GraphToGPU(NavigatorOptimizer):
elif isinstance(new_ops, (tuple, list)):
outputs = []
for o in new_ops:
if o.owner and isinstance(o.owner.op, HostFromGpu):
outputs.append(o.owner.inputs[0])
else:
outputs.append(o)
elif isinstance(new_ops, theano.Variable):
if new_ops.owner and isinstance(new_ops.owner.op, HostFromGpu):
outputs = new_ops.owner.inputs
else:
outputs = [new_ops]
else:
outputs = new_ops(*[mapping[i] for i in node.inputs],
......@@ -427,9 +421,9 @@ class GraphToGPU(NavigatorOptimizer):
for s in list(set(old_not_transferred)):
print(blanc, 'Nodes not transferred by old opt : ' + str(s), file=stream)
for n in list(set(new_not_transferred)):
print(blanc, 'Nodes not transferred by new optimizer : ' +str(n), file=stream)
print(blanc, 'Nodes not transferred by new optimizer : ' + str(n), file=stream)
for d in list(set(set(new_not_transferred) - set(old_not_transferred))):
print(blanc, 'Not transferred difference : ' , str(d), file=stream)
print(blanc, 'Not transferred difference : ', str(d), file=stream)
for o, count in iteritems(process_count):
if count > 0:
......@@ -592,7 +586,7 @@ def local_gpuaallocempty(op, context_name, inputs, outputs):
# We use _props_dict() to make sure that the GPU op know all the
# CPU op props.
dtype = op._props_dict().get('dtype')
return gpu_alloc_empty(dtype,context_name)(*inputs)
return gpu_alloc_empty(dtype, context_name)(*inputs)
@register_opt()
......@@ -889,11 +883,12 @@ def local_gpua_join(op, context_name, inputs, outputs):
@register_opt('fast_compile')
@local_optimizer([GpuJoin])
def local_gpuajoin_1(node):
@register_opt2([GpuJoin], 'fast_compile')
def local_gpuajoin_1(op, context_name, inputs, outputs):
# join of a single element
if (isinstance(node.op, GpuJoin) and
len(node.inputs) == 2):
return [node.inputs[1]]
if (isinstance(op, GpuJoin) and
len(inputs) == 2):
return [inputs[1]]
@register_opt('fast_compile')
......@@ -1311,7 +1306,7 @@ def local_lift_abstractconv2d(op, context_name, inputs, outputs):
register_opt('fast_compile')(conv_groupopt)
@register_opt("low_memory")
@register_opt("low_memory", 'fast_compile')
@local_optimizer([GpuCAReduceCuda])
def local_gpu_elemwise_careduce(node):
"""
......
......@@ -1553,7 +1553,7 @@ class MRG_RandomStreams(object):
@register_opt2([mrg_uniform], 'fast_compile')
def local_gpua_mrg(op, context_name, inputs, outputs):
def local_gpua_mrg1(op, context_name, inputs, outputs):
if (type(op) == mrg_uniform and
isinstance(inputs[0].type, GpuArrayType)):
outs = GPUA_mrg_uniform.new(inputs[0],
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论