提交 4ebe109a authored 作者: sentient07's avatar sentient07

Made suggested changes, fixed travis

上级 643f5b1e
......@@ -870,7 +870,7 @@ class GpuAlloc(HideC, Alloc):
return True
#Caching GPUAlloc
# Caching GPUAlloc
def gpu_alloc(ctx, memset_0=False):
key = (ctx, memset_0)
if key not in gpu_alloc.cache:
......
......@@ -968,7 +968,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns)
desc = gpu_dnn_conv_desc(border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode, precision=precision)(kerns.shape)
conv_mode=conv_mode, precision=precision)(kerns.shape)
desc_op = desc.owner.op
# We can use Shape_i and bypass the infer_shape here as this is on
# the input of node and it will always be present.
......@@ -990,7 +990,7 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
topgrad = gpu_contiguous(topgrad)
kerns_shp = as_tensor_variable(kerns_shp)
desc = gpu_dnn_conv_desc(border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode)(kerns_shp)
conv_mode=conv_mode)(kerns_shp)
out = gpu_alloc_empty(img.dtype, ctx_name)(*kerns_shp)
return gpu_dnn_conv_gradW()(img, topgrad, out, desc)
......@@ -1004,7 +1004,7 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
topgrad = gpu_contiguous(topgrad)
img_shp = as_tensor_variable(img_shp)
desc = gpu_dnn_conv_desc(border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode)(kerns.shape)
conv_mode=conv_mode)(kerns.shape)
out = gpu_alloc_empty(kerns.dtype, ctx_name)(*img_shp)
return gpu_dnn_conv_gradI()(kerns, topgrad, out, desc)
......@@ -1427,10 +1427,10 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
return Apply(self, [dy, sm], [sm.type()])
@local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs])
@op_lifter([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs])
@register_opt2([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs], 'fast_compile')
AbstractConv2d_gradInputs], 'conv_dnn', 'cudnn', 'gpuarray', 'fast_compile')
def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
if (not isinstance(op, (AbstractConv2d,
AbstractConv2d_gradWeights,
......
......@@ -474,4 +474,3 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
def use_gpu_images2neibs(op, context_name, inputs):
if op.mode in ['valid', 'ignore_borders', 'wrap_centered']:
return GpuImages2Neibs(op.mode)
......@@ -285,9 +285,9 @@ class GraphToGPU(NavigatorOptimizer):
for i in fgraph.inputs:
# Do not move *int* scalar to the GPU.
target = getattr(i.tag, 'target', None)
if (target != 'cpu' and
isinstance(i.type, tensor.TensorType) and
(i.ndim > 0 or 'int' not in i.dtype)):
if (target != 'cpu' and
isinstance(i.type, tensor.TensorType) and
(i.ndim > 0 or 'int' not in i.dtype)):
mapping[i] = as_gpuarray_variable(i, target)
else:
mapping[i] = i
......@@ -305,12 +305,12 @@ class GraphToGPU(NavigatorOptimizer):
for node in topo:
if isinstance(node.op, HostFromGpu):
mapping[node.outputs[0]] = node.inputs[0]
mapping[node.outputs[0]] = mapping[node.inputs[0]]
continue
# Move only if any of the inputs are on the GPU.
move_to_GPU = False
from .type import GpuArrayVariable, GpuArraySharedVariable #when you uncomment
from .type import GpuArrayVariable, GpuArraySharedVariable
if any([isinstance(i, GpuArrayVariable) or
isinstance(i, GpuArraySharedVariable)
for i in [mapping[v] for v in node.inputs] +
......@@ -364,15 +364,9 @@ class GraphToGPU(NavigatorOptimizer):
elif isinstance(new_ops, (tuple, list)):
outputs = []
for o in new_ops:
if o.owner and isinstance(o.owner.op, HostFromGpu):
outputs.append(o.owner.inputs[0])
else:
outputs.append(o)
outputs.append(o)
elif isinstance(new_ops, theano.Variable):
if new_ops.owner and isinstance(new_ops.owner.op, HostFromGpu):
outputs = new_ops.owner.inputs
else:
outputs = [new_ops]
outputs = [new_ops]
else:
outputs = new_ops(*[mapping[i] for i in node.inputs],
return_list=True)
......@@ -427,9 +421,9 @@ class GraphToGPU(NavigatorOptimizer):
for s in list(set(old_not_transferred)):
print(blanc, 'Nodes not transferred by old opt : ' + str(s), file=stream)
for n in list(set(new_not_transferred)):
print(blanc, 'Nodes not transferred by new optimizer : ' +str(n), file=stream)
print(blanc, 'Nodes not transferred by new optimizer : ' + str(n), file=stream)
for d in list(set(set(new_not_transferred) - set(old_not_transferred))):
print(blanc, 'Not transferred difference : ' , str(d), file=stream)
print(blanc, 'Not transferred difference : ', str(d), file=stream)
for o, count in iteritems(process_count):
if count > 0:
......@@ -592,7 +586,7 @@ def local_gpuaallocempty(op, context_name, inputs, outputs):
# We use _props_dict() to make sure that the GPU op know all the
# CPU op props.
dtype = op._props_dict().get('dtype')
return gpu_alloc_empty(dtype,context_name)(*inputs)
return gpu_alloc_empty(dtype, context_name)(*inputs)
@register_opt()
......@@ -614,7 +608,7 @@ def local_gpua_alloc_empty_to_zeros(node):
context_name = infer_context_name(*node.inputs)
z = numpy.asarray(0, dtype=node.outputs[0].dtype)
return [gpu_alloc(None)(as_gpuarray_variable(z, context_name),
*node.inputs)]
*node.inputs)]
optdb.register('local_gpua_alloc_empty_to_zeros',
theano.tensor.opt.in2out(local_gpua_alloc_empty_to_zeros),
# After move to gpu and merge2, before inplace.
......@@ -889,11 +883,12 @@ def local_gpua_join(op, context_name, inputs, outputs):
@register_opt('fast_compile')
@local_optimizer([GpuJoin])
def local_gpuajoin_1(node):
@register_opt2([GpuJoin], 'fast_compile')
def local_gpuajoin_1(op, context_name, inputs, outputs):
# join of a single element
if (isinstance(node.op, GpuJoin) and
len(node.inputs) == 2):
return [node.inputs[1]]
if (isinstance(op, GpuJoin) and
len(inputs) == 2):
return [inputs[1]]
@register_opt('fast_compile')
......@@ -1311,7 +1306,7 @@ def local_lift_abstractconv2d(op, context_name, inputs, outputs):
register_opt('fast_compile')(conv_groupopt)
@register_opt("low_memory")
@register_opt("low_memory", 'fast_compile')
@local_optimizer([GpuCAReduceCuda])
def local_gpu_elemwise_careduce(node):
"""
......
......@@ -1553,7 +1553,7 @@ class MRG_RandomStreams(object):
@register_opt2([mrg_uniform], 'fast_compile')
def local_gpua_mrg(op, context_name, inputs, outputs):
def local_gpua_mrg1(op, context_name, inputs, outputs):
if (type(op) == mrg_uniform and
isinstance(inputs[0].type, GpuArrayType)):
outs = GPUA_mrg_uniform.new(inputs[0],
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论