提交 e01db583 authored 作者: sentient07's avatar sentient07

Cache GpuAllocEmpty

上级 a90c7e81
......@@ -31,7 +31,9 @@ from .elemwise import GpuElemwise
# These don't exist in gpuarray
# GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
from .nnet import GpuSoftmax
from .opt import gpu_seqopt, register_opt, conv_groupopt, op_lifter, register_opt2
from .opt import (gpu_seqopt, register_opt, conv_groupopt,
op_lifter, register_opt2, gpu_alloc_empty)
from .opt_util import alpha_merge, output_merge, inplace_allocempty
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
......@@ -896,7 +898,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
shape2 = shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1
shape3 = shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1
out = GpuAllocEmpty(img.dtype, ctx_name)(
out = gpu_alloc_empty(img.dtype, ctx_name)(
shape_i(kerns, 1, fgraph),
shape_i(img, 1, fgraph), shape2, shape3)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
......@@ -914,7 +916,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
conv_mode = 'cross' if conv_mode == 'conv' else 'conv'
shape2 = shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1
shape3 = shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1
out = GpuAllocEmpty(img.dtype, ctx_name)(shape_i(img, 0, fgraph),
out = gpu_alloc_empty(img.dtype, ctx_name)(shape_i(img, 0, fgraph),
shape_i(kerns, 1, fgraph),
shape2, shape3)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
......@@ -932,7 +934,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
out_shp = GpuDnnConv.get_out_shape(img.shape, kerns.shape,
desc_op.border_mode,
desc_op.subsample)
out = GpuAllocEmpty(img.dtype, ctx_name)(*out_shp)
out = gpu_alloc_empty(img.dtype, ctx_name)(*out_shp)
return GpuDnnConv(algo=algo)(img, kerns, out, desc)
......@@ -946,7 +948,7 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
kerns_shp = as_tensor_variable(kerns_shp)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode)(kerns_shp)
out = GpuAllocEmpty(img.dtype, ctx_name)(*kerns_shp)
out = gpu_alloc_empty(img.dtype, ctx_name)(*kerns_shp)
return GpuDnnConvGradW()(img, topgrad, out, desc)
......@@ -960,7 +962,7 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
img_shp = as_tensor_variable(img_shp)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode)(kerns.shape)
out = GpuAllocEmpty(kerns.dtype, ctx_name)(*img_shp)
out = gpu_alloc_empty(kerns.dtype, ctx_name)(*img_shp)
return GpuDnnConvGradI()(kerns, topgrad, out, desc)
......
......@@ -452,7 +452,7 @@ class GpuCumsum(GpuKernelBase, Op):
@register_opt('fast_compile')
@op_lifter([CumsumOp])
#@register_opt2([CumsumOp], 'fast_compile')
@register_opt2([CumsumOp], 'fast_compile')
def use_gpu_cumsumop(op, ctx_name, inputs, ):
if inputs[0].dtype == 'float32':
axis = op.axis
......@@ -471,6 +471,3 @@ def use_gpu_cumsumop(op, ctx_name, inputs, ):
axis = 0
return GpuCumsum(axis)(x)
#register_opt('fast_compile')(use_gpu_cumsumop)
#
\ No newline at end of file
......@@ -51,6 +51,8 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedIncSubtensor1_dev20)
from .opt_util import alpha_merge, output_merge
_logger = logging.getLogger("theano.gpuarray.opt")
......@@ -59,6 +61,14 @@ gpu_optimizer2 = EquilibriumDB()
gpu_cut_copies = EquilibriumDB()
def gpu_alloc_empty(dtype, ctx):
key = (dtype, ctx)
if key not in gpu_alloc_empty.cache:
gpu_alloc_empty.cache[key] = GpuAllocEmpty(dtype, ctx)
return gpu_alloc_empty.cache[key]
gpu_alloc_empty.cache = {}
class GraphToGPUDB(DB):
"""
Retrieves the list local optimizers based on the optimizer flag's value
......@@ -292,7 +302,7 @@ class GraphToGPU(Optimizer):
move_to_GPU = True
'''
out_clients = [o.clients for o in node.outputs]
context_name = None
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论