提交 a90c7e81 authored 作者: sentient07's avatar sentient07

Added new optimizer to local_abstractconv_cudnn

上级 3c5c1506
...@@ -1384,14 +1384,16 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase): ...@@ -1384,14 +1384,16 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
@local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights, @local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs]) AbstractConv2d_gradInputs])
def local_abstractconv_cudnn(node): @register_opt2([AbstractConv2d, AbstractConv2d_gradWeights,
if (not isinstance(node.op, (AbstractConv2d, AbstractConv2d_gradInputs], 'fast_compile')
def local_abstractconv_cudnn(op, context_name, inputs):
if (not isinstance(op, (AbstractConv2d,
AbstractConv2d_gradWeights, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs))): AbstractConv2d_gradInputs))):
return None return None
inp1 = node.inputs[0] inp1 = inputs[0]
inp2 = node.inputs[1] inp2 = inputs[1]
if (node.op.filter_dilation != (1, 1)): if (node.op.filter_dilation != (1, 1)):
return None return None
...@@ -1402,30 +1404,30 @@ def local_abstractconv_cudnn(node): ...@@ -1402,30 +1404,30 @@ def local_abstractconv_cudnn(node):
if not dnn_available(inp1.type.context_name): if not dnn_available(inp1.type.context_name):
raise_no_cudnn() raise_no_cudnn()
if node.op.filter_flip: if op.filter_flip:
conv_mode = 'conv' conv_mode = 'conv'
else: else:
conv_mode = 'cross' conv_mode = 'cross'
if isinstance(node.op, AbstractConv2d): if isinstance(op, AbstractConv2d):
rval = dnn_conv(inp1, inp2, rval = dnn_conv(inp1, inp2,
border_mode=node.op.border_mode, border_mode=op.border_mode,
subsample=node.op.subsample, subsample=op.subsample,
direction_hint='forward!', direction_hint='forward!',
conv_mode=conv_mode) conv_mode=conv_mode)
if isinstance(node.op, AbstractConv2d_gradWeights): if isinstance(op, AbstractConv2d_gradWeights):
shape = (inp2.shape[1], inp1.shape[1], shape = (inp2.shape[1], inp1.shape[1],
node.inputs[2][0], node.inputs[2][1]) inputs[2][0], inputs[2][1])
rval = dnn_gradweight(inp1, inp2, shape, rval = dnn_gradweight(inp1, inp2, shape,
border_mode=node.op.border_mode, border_mode=op.border_mode,
subsample=node.op.subsample, subsample=op.subsample,
conv_mode=conv_mode) conv_mode=conv_mode)
if isinstance(node.op, AbstractConv2d_gradInputs): if isinstance(op, AbstractConv2d_gradInputs):
shape = (inp2.shape[0], inp1.shape[1], shape = (inp2.shape[0], inp1.shape[1],
node.inputs[2][0], node.inputs[2][1]) inputs[2][0], inputs[2][1])
rval = dnn_gradinput(inp1, inp2, shape, rval = dnn_gradinput(inp1, inp2, shape,
border_mode=node.op.border_mode, border_mode=op.border_mode,
subsample=node.op.subsample, subsample=op.subsample,
conv_mode=conv_mode) conv_mode=conv_mode)
return [rval] return [rval]
...@@ -1633,7 +1635,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn') ...@@ -1633,7 +1635,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
@register_opt('cudnn', 'fast_compile') @register_opt('cudnn', 'fast_compile')
@op_lifter([SoftmaxGrad]) @op_lifter([SoftmaxGrad])
#@register_opt2([SoftmaxGrad], 'fast_compile') @register_opt2([SoftmaxGrad], 'cudnn', 'fast_compile')
def local_softmax_dnn_grad(op, ctx_name, inputs): def local_softmax_dnn_grad(op, ctx_name, inputs):
if not dnn_available(ctx_name): if not dnn_available(ctx_name):
raise_no_cudnn("cuDNN needed for SoftmaxGrad") raise_no_cudnn("cuDNN needed for SoftmaxGrad")
......
...@@ -452,8 +452,8 @@ class GpuCumsum(GpuKernelBase, Op): ...@@ -452,8 +452,8 @@ class GpuCumsum(GpuKernelBase, Op):
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([CumsumOp]) @op_lifter([CumsumOp])
@register_opt2([CumsumOp], 'fast_compile') #@register_opt2([CumsumOp], 'fast_compile')
def use_gpu_cumsumop(op, ctx_name, inputs): def use_gpu_cumsumop(op, ctx_name, inputs, ):
if inputs[0].dtype == 'float32': if inputs[0].dtype == 'float32':
axis = op.axis axis = op.axis
x = inputs[0] x = inputs[0]
...@@ -464,7 +464,7 @@ def use_gpu_cumsumop(op, ctx_name, inputs): ...@@ -464,7 +464,7 @@ def use_gpu_cumsumop(op, ctx_name, inputs):
if axis is None and x.ndim > 1: if axis is None and x.ndim > 1:
x = x.flatten() x = x.flatten()
x = GpuFromHost(ctx_name)(x) x = as_gpuarray_variable(x, ctx_name)
# ``gpu_cumsum`` assume array has been flattened if needed. # ``gpu_cumsum`` assume array has been flattened if needed.
if axis is None: if axis is None:
...@@ -473,4 +473,4 @@ def use_gpu_cumsumop(op, ctx_name, inputs): ...@@ -473,4 +473,4 @@ def use_gpu_cumsumop(op, ctx_name, inputs):
return GpuCumsum(axis)(x) return GpuCumsum(axis)(x)
#register_opt('fast_compile')(use_gpu_cumsumop) #register_opt('fast_compile')(use_gpu_cumsumop)
#register_opt2([CumsumOp], 'fast_compile')(use_gpu_cumsumop) #
\ No newline at end of file \ No newline at end of file
...@@ -282,15 +282,17 @@ class GraphToGPU(Optimizer): ...@@ -282,15 +282,17 @@ class GraphToGPU(Optimizer):
continue continue
# Move only if any of the inputs are on the GPU. # Move only if any of the inputs are on the GPU.
move_to_GPU = False move_to_GPU = True
'''
if any([isinstance(i, GpuArrayVariable) or if any([isinstance(i, GpuArrayVariable) or
isinstance(i, GpuArraySharedVariable) isinstance(i, GpuArraySharedVariable)
for i in [mapping[v] for v in node.inputs] + for i in [mapping[v] for v in node.inputs] +
node.outputs]): node.outputs]):
move_to_GPU = True move_to_GPU = True
'''
out_clients = [o.clients for o in node.outputs] out_clients = [o.clients for o in node.outputs]
context_name = None context_name = None
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论