提交 a90c7e81 authored 作者: sentient07's avatar sentient07

Added new optimizer to local_abstractconv_cudnn

上级 3c5c1506
......@@ -1384,14 +1384,16 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
@local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs])
def local_abstractconv_cudnn(node):
if (not isinstance(node.op, (AbstractConv2d,
@register_opt2([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs], 'fast_compile')
def local_abstractconv_cudnn(op, context_name, inputs):
if (not isinstance(op, (AbstractConv2d,
AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs))):
return None
inp1 = node.inputs[0]
inp2 = node.inputs[1]
inp1 = inputs[0]
inp2 = inputs[1]
if (node.op.filter_dilation != (1, 1)):
return None
......@@ -1402,30 +1404,30 @@ def local_abstractconv_cudnn(node):
if not dnn_available(inp1.type.context_name):
raise_no_cudnn()
if node.op.filter_flip:
if op.filter_flip:
conv_mode = 'conv'
else:
conv_mode = 'cross'
if isinstance(node.op, AbstractConv2d):
if isinstance(op, AbstractConv2d):
rval = dnn_conv(inp1, inp2,
border_mode=node.op.border_mode,
subsample=node.op.subsample,
border_mode=op.border_mode,
subsample=op.subsample,
direction_hint='forward!',
conv_mode=conv_mode)
if isinstance(node.op, AbstractConv2d_gradWeights):
if isinstance(op, AbstractConv2d_gradWeights):
shape = (inp2.shape[1], inp1.shape[1],
node.inputs[2][0], node.inputs[2][1])
inputs[2][0], inputs[2][1])
rval = dnn_gradweight(inp1, inp2, shape,
border_mode=node.op.border_mode,
subsample=node.op.subsample,
border_mode=op.border_mode,
subsample=op.subsample,
conv_mode=conv_mode)
if isinstance(node.op, AbstractConv2d_gradInputs):
if isinstance(op, AbstractConv2d_gradInputs):
shape = (inp2.shape[0], inp1.shape[1],
node.inputs[2][0], node.inputs[2][1])
inputs[2][0], inputs[2][1])
rval = dnn_gradinput(inp1, inp2, shape,
border_mode=node.op.border_mode,
subsample=node.op.subsample,
border_mode=op.border_mode,
subsample=op.subsample,
conv_mode=conv_mode)
return [rval]
......@@ -1633,7 +1635,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
@register_opt('cudnn', 'fast_compile')
@op_lifter([SoftmaxGrad])
#@register_opt2([SoftmaxGrad], 'fast_compile')
@register_opt2([SoftmaxGrad], 'cudnn', 'fast_compile')
def local_softmax_dnn_grad(op, ctx_name, inputs):
if not dnn_available(ctx_name):
raise_no_cudnn("cuDNN needed for SoftmaxGrad")
......
......@@ -452,8 +452,8 @@ class GpuCumsum(GpuKernelBase, Op):
@register_opt('fast_compile')
@op_lifter([CumsumOp])
@register_opt2([CumsumOp], 'fast_compile')
def use_gpu_cumsumop(op, ctx_name, inputs):
#@register_opt2([CumsumOp], 'fast_compile')
def use_gpu_cumsumop(op, ctx_name, inputs, ):
if inputs[0].dtype == 'float32':
axis = op.axis
x = inputs[0]
......@@ -464,7 +464,7 @@ def use_gpu_cumsumop(op, ctx_name, inputs):
if axis is None and x.ndim > 1:
x = x.flatten()
x = GpuFromHost(ctx_name)(x)
x = as_gpuarray_variable(x, ctx_name)
# ``gpu_cumsum`` assume array has been flattened if needed.
if axis is None:
......@@ -473,4 +473,4 @@ def use_gpu_cumsumop(op, ctx_name, inputs):
return GpuCumsum(axis)(x)
#register_opt('fast_compile')(use_gpu_cumsumop)
#register_opt2([CumsumOp], 'fast_compile')(use_gpu_cumsumop)
\ No newline at end of file
#
\ No newline at end of file
......@@ -282,15 +282,17 @@ class GraphToGPU(Optimizer):
continue
# Move only if any of the inputs are on the GPU.
move_to_GPU = False
move_to_GPU = True
'''
if any([isinstance(i, GpuArrayVariable) or
isinstance(i, GpuArraySharedVariable)
for i in [mapping[v] for v in node.inputs] +
node.outputs]):
move_to_GPU = True
'''
out_clients = [o.clients for o in node.outputs]
context_name = None
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论