提交 d0d53136 authored 作者: sentient07's avatar sentient07

Fixed few error messages and cached GPUAlloc

上级 7946e06d
......@@ -870,6 +870,15 @@ class GpuAlloc(HideC, Alloc):
return True
#Caching GPUAlloc
def gpu_alloc(ctx, memset_0=False):
key = (ctx, memset_0)
if key not in gpu_alloc.cache:
gpu_alloc.cache[key] = GpuAlloc(ctx, memset_0)
return gpu_alloc.cache[key]
gpu_alloc.cache = {}
class GpuAllocEmpty(HideC, Alloc):
"""
Allocate uninitialized memory on the GPU.
......
......@@ -13,7 +13,7 @@ except ImportError:
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel,
infer_context_name)
from .opt import register_opt as register_gpu_opt, op_lifter
from .opt import register_opt2, op_lifter, register_opt
from .type import GpuArrayType
......@@ -468,9 +468,10 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
Op.perform(self, node, inp, out, ctx)
@register_opt('fast_compile')
@op_lifter([Images2Neibs])
@register_opt2([Images2Neibs], 'fast_compile')
def use_gpu_images2neibs(op, context_name, inputs):
if op.mode in ['valid', 'ignore_borders', 'wrap_centered']:
return GpuImages2Neibs(op.mode)
register_gpu_opt()(use_gpu_images2neibs)
......@@ -36,7 +36,7 @@ from .basic_ops import (as_gpuarray_variable, infer_context_name,
HostFromGpu, GpuFromHost,
GpuSplit, GpuContiguous, gpu_contiguous,
GpuAlloc, GpuAllocEmpty, GpuReshape,
GpuEye, gpu_join, GpuJoin, gpu_alloc_empty)
GpuEye, gpu_join, GpuJoin, gpu_alloc_empty, gpu_alloc)
from .blas import (gpu_dot22, GpuGemm, GpuGer, GpuGemmBatch,
gpugemm_no_inplace, gpugemm_inplace, gpugemmbatch_no_inplace,
gpugemv_no_inplace, gpugemv_inplace)
......@@ -281,12 +281,13 @@ class GraphToGPU(NavigatorOptimizer):
# Building a new graph
# Iterating through inputs of graph
target = infer_context_name(*fgraph.inputs)
for i in fgraph.inputs:
# Do not move *int* scalar to the GPU.
target = getattr(i.tag, 'target', None)
if (target != 'cpu' and
isinstance(i.type, tensor.TensorType) and
(i.ndim > 0 or 'int' not in i.dtype)):
if (target != 'cpu' and
isinstance(i.type, tensor.TensorType) and
(i.ndim > 0 or 'int' not in i.dtype)):
mapping[i] = as_gpuarray_variable(i, target)
else:
mapping[i] = i
......@@ -574,14 +575,14 @@ def local_gpuaalloc2(node):
i.owner.op in [host_from_gpu, tensor.alloc]
for i in c.inputs[1:])
for c, idx in node.outputs[0].clients)):
return [host_from_gpu(GpuAlloc(None)(*node.inputs))]
return [host_from_gpu(gpu_alloc(None)(*node.inputs))]
@register_opt('fast_compile')
@op_lifter([tensor.Alloc])
@register_opt2([tensor.Alloc], 'fast_compile')
def local_gpuaalloc(op, context_name, inputs, outputs):
return GpuAlloc(context_name)(*inputs)
return gpu_alloc(context_name)(*inputs)
@register_opt('fast_compile')
......@@ -590,8 +591,8 @@ def local_gpuaalloc(op, context_name, inputs, outputs):
def local_gpuaallocempty(op, context_name, inputs, outputs):
# We use _props_dict() to make sure that the GPU op know all the
# CPU op props.
return GpuAllocEmpty(context_name=context_name,
**op._props_dict())(*inputs)
dtype = op._props_dict().get('dtype')
return gpu_alloc_empty(dtype,context_name)(*inputs)
@register_opt()
......@@ -602,7 +603,7 @@ def local_gpualloc_memset_0(node):
if (isinstance(inp, GpuArrayConstant) and
inp.data.size == 1 and
(numpy.asarray(inp.data) == 0).all()):
new_op = GpuAlloc(node.op.context_name, memset_0=True)
new_op = gpu_alloc(node.op.context_name, memset_0=True)
return [new_op(*node.inputs)]
......@@ -612,7 +613,7 @@ def local_gpua_alloc_empty_to_zeros(node):
if isinstance(node.op, GpuAllocEmpty):
context_name = infer_context_name(*node.inputs)
z = numpy.asarray(0, dtype=node.outputs[0].dtype)
return [GpuAlloc()(as_gpuarray_variable(z, context_name),
return [gpu_alloc(None)(as_gpuarray_variable(z, context_name),
*node.inputs)]
optdb.register('local_gpua_alloc_empty_to_zeros',
theano.tensor.opt.in2out(local_gpua_alloc_empty_to_zeros),
......@@ -1140,7 +1141,7 @@ def local_gpua_hgemm(op, context_name, inputs, outputs):
if (A.ndim == 2 and B.ndim == 2 and
A.dtype == 'float16' and B.dtype == 'float16'):
fgraph = inputs[0].fgraph
C = gpu_alloc_empty(dtype='float16', context_name=context_name)(
C = gpu_alloc_empty('float16', context_name)(
shape_i(A, 0, fgraph),
shape_i(B, 1, fgraph))
return gpugemm_no_inplace(C, 1.0, A, B, 0.0)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论