提交 1d7b9bdb authored 作者: sentient07's avatar sentient07 提交者: Reyhane Askari

Removed caching instances

Conflicts: theano/gpuarray/basic_ops.py theano/gpuarray/dnn.py theano/gpuarray/elemwise.py theano/gpuarray/extra_ops.py theano/gpuarray/opt.py theano/gpuarray/opt_util.py
上级 eb2c7226
......@@ -73,7 +73,7 @@ def as_gpuarray_variable(x, context_name):
# If we couldn't deal with transfers, then maybe it's a tensor
if isinstance(x.type, tensor.TensorType):
return gpu_from_host(context_name)(x)
return GpuFromHost(context_name)(x)
# Try _as_GpuArrayVariable if possible
if hasattr(x, '_as_GpuArrayVariable'):
......@@ -617,7 +617,7 @@ class HostFromGpu(Op):
def grad(self, inputs, grads):
gz, = grads
return [gpu_from_host(inputs[0].type.context_name)(gz)]
return [GpuFromHost(inputs[0].type.context_name)(gz)]
def R_op(self, inputs, eval_points):
ev, = eval_points
......@@ -722,14 +722,6 @@ class GpuFromHost(Op):
return (9,)
# Caching GPUAlloc
def gpu_from_host(ctx):
if ctx not in gpu_alloc.cache:
gpu_from_host.cache[ctx] = GpuFromHost(ctx)
return gpu_from_host.cache[ctx]
gpu_from_host.cache = {}
class GpuToGpu(Op):
"""
Transfer data between GPUs.
......@@ -953,15 +945,6 @@ class GpuAlloc(HideC, Alloc):
return True
# Caching GPUAlloc
def gpu_alloc(ctx, memset_0=False):
key = (ctx, memset_0)
if key not in gpu_alloc.cache:
gpu_alloc.cache[key] = GpuAlloc(ctx, memset_0)
return gpu_alloc.cache[key]
gpu_alloc.cache = {}
class GpuAllocEmpty(HideC, AllocEmpty):
"""
Allocate uninitialized memory on the GPU.
......@@ -971,7 +954,7 @@ class GpuAllocEmpty(HideC, AllocEmpty):
_f16_ok = True
params_type = gpu_context_type
def __init__(self, dtype, context_name):
def __init__(self, dtype, context_name=None):
self.dtype = dtype
self.context_name = context_name
......@@ -1048,14 +1031,6 @@ def empty_like(var):
return GpuAllocEmpty(var.type.dtype, var.type.context_name)(*var.shape)
def gpu_alloc_empty(ctx, dtype):
key = (dtype, ctx)
if key not in gpu_alloc_empty.cache:
gpu_alloc_empty.cache[key] = GpuAllocEmpty(dtype, ctx)
return gpu_alloc_empty.cache[key]
gpu_alloc_empty.cache = {}
class GpuContiguous(Op):
"""
Return a C contiguous version of the input.
......
差异被折叠。
......@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function, division
import os
from theano import Apply, Op
from theano.tensor.extra_ops import CumOp
from .basic_ops import infer_context_name
try:
from pygpu import gpuarray
except ImportError:
......
......@@ -10,7 +10,7 @@ from theano.scalar import as_scalar, constant
from . import opt
from .basic_ops import (as_gpuarray_variable, GpuAllocEmpty,
infer_context_name, gpu_alloc_empty)
infer_context_name)
from .type import gpu_context_type
from .opt_util import alpha_merge, output_merge
......@@ -157,8 +157,8 @@ def local_gpua_dot_to_gemm16(op, ctx_name, inputs, outputs):
B = inputs[1]
if (A.ndim == 2 and B.ndim == 2 and
A.dtype == 'float16' and B.dtype == 'float16'):
fgraph = getattr(outputs[0], 'fgraph', None)
C = gpu_alloc_empty(ctx_name, dtype='float16')(
fgraph = inputs[0].fgraph
C = GpuAllocEmpty('float16', ctx_name)(
shape_i(A, 0, fgraph), shape_i(B, 1, fgraph))
return Gemm16()(C, 1.0, A, B, 0.0)
......
......@@ -44,8 +44,7 @@ from .basic_ops import (as_gpuarray_variable, infer_context_name,
HostFromGpu, GpuFromHost,
GpuSplit, GpuContiguous, gpu_contiguous,
GpuAlloc, GpuAllocEmpty, GpuReshape,
GpuEye, gpu_join, GpuJoin, gpu_alloc_empty,
gpu_alloc, gpu_from_host)
GpuEye, gpu_join, GpuJoin)
from .blas import (gpu_dot22, GpuGemm, GpuGer, GpuGemmBatch,
gpugemm_no_inplace, gpugemm_inplace,
gpugemmbatch_no_inplace,
......@@ -61,7 +60,6 @@ from .blocksparse import (GpuSparseBlockGemv, GpuSparseBlockOuter,
from .nnet import (gpu_crossentropy_softmax_1hot_with_bias_dx,
gpu_crossentropy_softmax_argmax_1hot_with_bias,
gpu_softmax_with_bias, gpu_softmax)
from .elemwise import (GpuElemwise, GpuDimShuffle, GpuCAReduceCuda,
GpuCAReduceCPY, gpu_ca_reduce_cuda, gpu_erfinv, gpu_erfcinv,
max_inputs_to_GpuElemwise)
......@@ -165,7 +163,7 @@ gpu_optimizer.register('local_remove_all_assert',
def safe_to_gpu(x, ctx_name):
if isinstance(x.type, tensor.TensorType):
return gpu_from_host(ctx_name)(x)
return GpuFromHost(ctx_name)(x)
else:
return x
......@@ -269,7 +267,7 @@ class InputToGpuOptimizer(Optimizer):
continue
try:
new_input = gpu_from_host(target)(input).transfer('cpu')
new_input = GpuFromHost(target)(input).transfer('cpu')
fgraph.replace_validate(input, new_input,
"InputToGpuOptimizer")
except TypeError:
......@@ -600,14 +598,14 @@ def local_gpua_alloc2(node):
i.owner.op in [host_from_gpu, tensor.alloc]
for i in c.inputs[1:])
for c, idx in node.outputs[0].clients)):
return [gpu_alloc(None)(*node.inputs).transfer('cpu')]
return [GpuAlloc(None)(*node.inputs).transfer('cpu')]
@register_opt('fast_compile')
@op_lifter([tensor.Alloc])
@register_opt2([tensor.Alloc], 'fast_compile')
def local_gpua_alloc(op, context_name, inputs, outputs):
return gpu_alloc(context_name)
def local_gpuaalloc(op, context_name, inputs, outputs):
return GpuAlloc(context_name)(*inputs)
@register_opt('fast_compile')
......@@ -616,7 +614,7 @@ def local_gpua_alloc(op, context_name, inputs, outputs):
def local_gpua_alloc_empty(op, context_name, inputs, outputs):
# We use _props_dict() to make sure that the GPU op know all the
# CPU op props.
return gpu_alloc_empty(context_name, **op._props_dict())
return GpuAllocEmpty(**op._props_dict())(*inputs)
@register_opt()
......@@ -627,7 +625,7 @@ def local_gpualloc_memset_0(node):
if (isinstance(inp, GpuArrayConstant) and
inp.data.size == 1 and
(np.asarray(inp.data) == 0).all()):
new_op = gpu_alloc(node.op.context_name, memset_0=True)
new_op = GpuAlloc(node.op.context_name, memset_0=True)
return [new_op(*node.inputs)]
......@@ -637,8 +635,8 @@ def local_gpua_alloc_empty_to_zeros(node):
if isinstance(node.op, GpuAllocEmpty):
context_name = infer_context_name(*node.inputs)
z = np.asarray(0, dtype=node.outputs[0].dtype)
return [gpu_alloc(context_name)(as_gpuarray_variable(z, context_name),
*node.inputs)]
return [GpuAlloc(context_name)(as_gpuarray_variable(z, context_name),
*node.inputs)]
optdb.register('local_gpua_alloc_empty_to_zeros',
theano.tensor.opt.in2out(local_gpua_alloc_empty_to_zeros),
# After move to gpu and merge2, before inplace.
......@@ -1234,7 +1232,7 @@ def local_gpua_dot22scalar(op, context_name, inputs, outputs):
x, y, a = inputs
x = as_gpuarray_variable(x, context_name)
y = as_gpuarray_variable(y, context_name)
z = gpu_alloc_empty(context_name, dtype=x.dtype)(x.shape[0], y.shape[1])
z = GpuAllocEmpty(x.dtype, context_name)(x.shape[0], y.shape[1])
return [gpugemm_no_inplace(z, a, x, y, 0)]
......@@ -1804,10 +1802,10 @@ def local_gpu_elemwise_careduce(node):
isinstance(node.inputs[0].owner.op.scalar_op, scalar.basic.Sqr)):
op = node.op
inp = node.inputs[0].owner.inputs[0]
return [gpu_ca_reduce_cuda(scalar_op=op.scalar_op,
axis=op.axis,
reduce_mask=op.reduce_mask,
pre_scalar_op=scalar.basic.sqr)(inp)]
return [GpuCAReduceCuda(scalar_op=op.scalar_op,
axis=op.axis,
reduce_mask=op.reduce_mask,
pre_scalar_op=scalar.basic.sqr)(inp)]
@local_optimizer(None)
......
......@@ -8,7 +8,7 @@ from theano.gof import local_optimizer
from theano.tensor import (DimShuffle, get_scalar_constant_value,
NotScalarConstantError)
from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty, GpuReshape, gpu_alloc_empty
from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty, GpuReshape
from .elemwise import GpuDimShuffle, GpuElemwise
_one = scal.constant(np.asarray(1.0, dtype='float32'))
......@@ -324,7 +324,7 @@ def inplace_allocempty(op, idx):
if (alloc.owner and
isinstance(alloc.owner.op, GpuAllocEmpty) and
len(alloc.clients) > 1):
alloc_op = gpu_alloc_empty(alloc.owner.op.context_name, dtype=alloc.owner.op.dtype)
alloc_op = GpuAllocEmpty(alloc.owner.op.dtype, alloc.owner.op.context_name)
inputs[idx] = alloc_op(*alloc.owner.inputs)
return maker(node, inputs)
return opt
......
......@@ -271,7 +271,7 @@ class GpuArrayType(Type):
return data
def filter_variable(self, other, allow_convert=True):
from theano.gpuarray.basic_ops import gpu_from_host
from theano.gpuarray.basic_ops import GpuFromHost
if hasattr(other, '_as_GpuArrayVariable'):
other = other._as_GpuArrayVariable(self.context_name)
......@@ -303,7 +303,7 @@ class GpuArrayType(Type):
str(self.broadcastable)))
other = other2
return gpu_from_host(self.context_name)(other)
return GpuFromHost(self.context_name)(other)
@staticmethod
def values_eq(a, b, force_same_dtype=True):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论