提交 de536bd5 authored 作者: sentient07's avatar sentient07

Reshape crashfix and added caching at other places

上级 e01db583
...@@ -956,6 +956,14 @@ def empty_like(var): ...@@ -956,6 +956,14 @@ def empty_like(var):
return GpuAllocEmpty(var.type.dtype, var.type.context_name)(*var.shape) return GpuAllocEmpty(var.type.dtype, var.type.context_name)(*var.shape)
def gpu_alloc_empty(dtype, ctx):
key = (dtype, ctx)
if key not in gpu_alloc_empty.cache:
gpu_alloc_empty.cache[key] = GpuAllocEmpty(dtype, ctx)
return gpu_alloc_empty.cache[key]
gpu_alloc_empty.cache = {}
class GpuContiguous(Op): class GpuContiguous(Op):
""" """
Return a C contiguous version of the input. Return a C contiguous version of the input.
...@@ -1031,6 +1039,7 @@ class GpuReshape(HideC, tensor.Reshape): ...@@ -1031,6 +1039,7 @@ class GpuReshape(HideC, tensor.Reshape):
def make_node(self, x, shp): def make_node(self, x, shp):
ctx_name = infer_context_name(x) ctx_name = infer_context_name(x)
x = as_gpuarray_variable(x, context_name=ctx_name) x = as_gpuarray_variable(x, context_name=ctx_name)
shp = tensor.as_tensor_variable(shp)
res = host_from_gpu(x).reshape(shp, ndim=self.ndim) res = host_from_gpu(x).reshape(shp, ndim=self.ndim)
otype = GpuArrayType(dtype=res.dtype, otype = GpuArrayType(dtype=res.dtype,
broadcastable=res.broadcastable, broadcastable=res.broadcastable,
......
...@@ -25,14 +25,15 @@ from theano.tensor.signal.pool import ( ...@@ -25,14 +25,15 @@ from theano.tensor.signal.pool import (
from . import pygpu from . import pygpu
from .type import get_context, gpu_context_type, list_contexts, GpuArrayType from .type import get_context, gpu_context_type, list_contexts, GpuArrayType
from .basic_ops import (as_gpuarray_variable, infer_context_name, from .basic_ops import (as_gpuarray_variable, infer_context_name,
gpu_contiguous, GpuAllocEmpty, empty_like) gpu_contiguous, GpuAllocEmpty, gpu_alloc_empty,
empty_like)
from .elemwise import GpuElemwise from .elemwise import GpuElemwise
# These don't exist in gpuarray # These don't exist in gpuarray
# GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad # GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
from .nnet import GpuSoftmax from .nnet import GpuSoftmax
from .opt import (gpu_seqopt, register_opt, conv_groupopt, from .opt import (gpu_seqopt, register_opt, conv_groupopt,
op_lifter, register_opt2, gpu_alloc_empty) op_lifter, register_opt2)
from .opt_util import alpha_merge, output_merge, inplace_allocempty from .opt_util import alpha_merge, output_merge, inplace_allocempty
......
...@@ -158,7 +158,7 @@ def local_dot_to_gemm16(op, ctx_name, inputs): ...@@ -158,7 +158,7 @@ def local_dot_to_gemm16(op, ctx_name, inputs):
if (A.ndim == 2 and B.ndim == 2 and if (A.ndim == 2 and B.ndim == 2 and
A.dtype == 'float16' and B.dtype == 'float16'): A.dtype == 'float16' and B.dtype == 'float16'):
fgraph = inputs[0].fgraph fgraph = inputs[0].fgraph
C = GpuAllocEmpty(dtype='float16', context_name=ctx_name)( C = gpu_alloc_empty(dtype='float16', context_name=ctx_name)(
shape_i(A, 0, fgraph), shape_i(B, 1, fgraph)) shape_i(A, 0, fgraph), shape_i(B, 1, fgraph))
return Gemm16()(C, 1.0, A, B, 0.0) return Gemm16()(C, 1.0, A, B, 0.0)
......
...@@ -32,7 +32,7 @@ from .basic_ops import (as_gpuarray_variable, infer_context_name, ...@@ -32,7 +32,7 @@ from .basic_ops import (as_gpuarray_variable, infer_context_name,
HostFromGpu, GpuFromHost, HostFromGpu, GpuFromHost,
GpuSplit, GpuContiguous, gpu_contiguous, GpuSplit, GpuContiguous, gpu_contiguous,
GpuAlloc, GpuAllocEmpty, GpuReshape, GpuAlloc, GpuAllocEmpty, GpuReshape,
GpuEye, gpu_join, GpuJoin) GpuEye, gpu_join, GpuJoin, gpu_alloc_empty)
from .blas import (gpu_dot22, GpuGemm, GpuGer, GpuGemmBatch, from .blas import (gpu_dot22, GpuGemm, GpuGer, GpuGemmBatch,
gpugemm_no_inplace, gpugemm_inplace, gpugemmbatch_no_inplace, gpugemm_no_inplace, gpugemm_inplace, gpugemmbatch_no_inplace,
gpugemv_no_inplace, gpugemv_inplace) gpugemv_no_inplace, gpugemv_inplace)
...@@ -61,14 +61,6 @@ gpu_optimizer2 = EquilibriumDB() ...@@ -61,14 +61,6 @@ gpu_optimizer2 = EquilibriumDB()
gpu_cut_copies = EquilibriumDB() gpu_cut_copies = EquilibriumDB()
def gpu_alloc_empty(dtype, ctx):
key = (dtype, ctx)
if key not in gpu_alloc_empty.cache:
gpu_alloc_empty.cache[key] = GpuAllocEmpty(dtype, ctx)
return gpu_alloc_empty.cache[key]
gpu_alloc_empty.cache = {}
class GraphToGPUDB(DB): class GraphToGPUDB(DB):
""" """
Retrieves the list local optimizers based on the optimizer flag's value Retrieves the list local optimizers based on the optimizer flag's value
...@@ -456,7 +448,7 @@ def local_gpuaalloc(op, context_name, inputs): ...@@ -456,7 +448,7 @@ def local_gpuaalloc(op, context_name, inputs):
def local_gpuaallocempty(op, context_name, inputs): def local_gpuaallocempty(op, context_name, inputs):
# We use _props_dict() to make sure that the GPU op know all the # We use _props_dict() to make sure that the GPU op know all the
# CPU op props. # CPU op props.
return GpuAllocEmpty(context_name=context_name, return gpu_alloc_empty(context_name=context_name,
**op._props_dict())(*inputs) **op._props_dict())(*inputs)
...@@ -975,7 +967,7 @@ def local_gpua_hgemm(op, context_name, inputs): ...@@ -975,7 +967,7 @@ def local_gpua_hgemm(op, context_name, inputs):
if (A.ndim == 2 and B.ndim == 2 and if (A.ndim == 2 and B.ndim == 2 and
A.dtype == 'float16' and B.dtype == 'float16'): A.dtype == 'float16' and B.dtype == 'float16'):
fgraph = inputs[0].fgraph fgraph = inputs[0].fgraph
C = GpuAllocEmpty(dtype='float16', context_name=context_name)( C = gpu_alloc_empty(dtype='float16', context_name=context_name)(
shape_i(A, 0, fgraph), shape_i(A, 0, fgraph),
shape_i(B, 1, fgraph)) shape_i(B, 1, fgraph))
return gpugemm_no_inplace(C, 1.0, A, B, 0.0) return gpugemm_no_inplace(C, 1.0, A, B, 0.0)
...@@ -1024,7 +1016,7 @@ def local_gpua_dot22scalar(op, context_name, inputs): ...@@ -1024,7 +1016,7 @@ def local_gpua_dot22scalar(op, context_name, inputs):
x, y, a = inputs x, y, a = inputs
x = as_gpuarray_variable(x, context_name) x = as_gpuarray_variable(x, context_name)
y = as_gpuarray_variable(y, context_name) y = as_gpuarray_variable(y, context_name)
z = GpuAllocEmpty(x.dtype, context_name)(x.shape[0], y.shape[1]) z = gpu_alloc_empty(x.dtype, context_name)(x.shape[0], y.shape[1])
return [gpugemm_no_inplace(z, a, x, y, 0)] return [gpugemm_no_inplace(z, a, x, y, 0)]
......
...@@ -8,7 +8,7 @@ from theano.gof import local_optimizer ...@@ -8,7 +8,7 @@ from theano.gof import local_optimizer
from theano.tensor import (DimShuffle, get_scalar_constant_value, from theano.tensor import (DimShuffle, get_scalar_constant_value,
NotScalarConstantError) NotScalarConstantError)
from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty, gpu_alloc_empty
from .elemwise import GpuDimShuffle, GpuElemwise from .elemwise import GpuDimShuffle, GpuElemwise
_one = scal.constant(numpy.asarray(1.0, dtype='float32')) _one = scal.constant(numpy.asarray(1.0, dtype='float32'))
...@@ -324,7 +324,7 @@ def inplace_allocempty(op, idx): ...@@ -324,7 +324,7 @@ def inplace_allocempty(op, idx):
if (alloc.owner and if (alloc.owner and
isinstance(alloc.owner.op, GpuAllocEmpty) and isinstance(alloc.owner.op, GpuAllocEmpty) and
len(alloc.clients) > 1): len(alloc.clients) > 1):
alloc_op = GpuAllocEmpty(alloc.owner.op.dtype, alloc_op = gpu_alloc_empty(alloc.owner.op.dtype,
alloc.owner.op.context_name) alloc.owner.op.context_name)
inputs[idx] = alloc_op(*alloc.owner.inputs) inputs[idx] = alloc_op(*alloc.owner.inputs)
return maker(node, inputs) return maker(node, inputs)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论