提交 cc9b423a authored 作者: Reyhane Askari's avatar Reyhane Askari

removed context_name set to None and fixed GpuAllocEmpty input order

上级 1d7b9bdb
...@@ -954,7 +954,7 @@ class GpuAllocEmpty(HideC, AllocEmpty): ...@@ -954,7 +954,7 @@ class GpuAllocEmpty(HideC, AllocEmpty):
_f16_ok = True _f16_ok = True
params_type = gpu_context_type params_type = gpu_context_type
def __init__(self, dtype, context_name=None): def __init__(self, dtype, context_name):
self.dtype = dtype self.dtype = dtype
self.context_name = context_name self.context_name = context_name
......
...@@ -966,7 +966,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -966,7 +966,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1, shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1,
shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1) shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(ctx_name, dtype=img.dtype)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode='cross', precision=precision)(out.shape) conv_mode='cross', precision=precision)(out.shape)
conv = GpuDnnConvGradW()(img, kerns, out, desc) conv = GpuDnnConvGradW()(img, kerns, out, desc)
...@@ -985,7 +985,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -985,7 +985,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1, shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1,
shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1) shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(ctx_name, dtype=img.dtype)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision)(kerns.shape)
return GpuDnnConvGradI()(kerns, img, out, desc) return GpuDnnConvGradI()(kerns, img, out, desc)
...@@ -1006,7 +1006,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -1006,7 +1006,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
desc_op.border_mode, desc_op.border_mode,
desc_op.subsample) desc_op.subsample)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(ctx_name, dtype=img.dtype)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
return GpuDnnConv(algo=algo)(img, kerns, out, desc) return GpuDnnConv(algo=algo)(img, kerns, out, desc)
...@@ -1078,7 +1078,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1078,7 +1078,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1, shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1,
shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1) shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(ctx_name, dtype=img.dtype)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1),
conv_mode='cross', precision=precision)(out.shape) conv_mode='cross', precision=precision)(out.shape)
conv = GpuDnnConvGradW()(img, kerns, out, desc) conv = GpuDnnConvGradW()(img, kerns, out, desc)
...@@ -1098,7 +1098,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1098,7 +1098,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1, shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1,
shape_i(img, 4, fgraph) + shape_i(kerns, 4, fgraph) - 1) shape_i(img, 4, fgraph) + shape_i(kerns, 4, fgraph) - 1)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(ctx_name, dtype=img.dtype)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1),
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision)(kerns.shape)
return GpuDnnConvGradI()(kerns, img, out, desc) return GpuDnnConvGradI()(kerns, img, out, desc)
...@@ -1119,7 +1119,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1119,7 +1119,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
desc_op.border_mode, desc_op.border_mode,
desc_op.subsample) desc_op.subsample)
out_shp = assert_conv_shape(out_shp) out_shp = assert_conv_shape(out_shp)
out = GpuAllocEmpty(ctx_name, dtype=img.dtype)(*out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp)
return GpuDnnConv(algo=algo)(img, kerns, out, desc) return GpuDnnConv(algo=algo)(img, kerns, out, desc)
...@@ -1151,7 +1151,6 @@ def dnn_gradweight3d(img, topgrad, kerns_shp, border_mode='valid', ...@@ -1151,7 +1151,6 @@ def dnn_gradweight3d(img, topgrad, kerns_shp, border_mode='valid',
return dnn_gradweight(img, topgrad, kerns_shp, border_mode, return dnn_gradweight(img, topgrad, kerns_shp, border_mode,
subsample, conv_mode, precision) subsample, conv_mode, precision)
def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid', def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
subsample=(1, 1), conv_mode='conv', precision=None): subsample=(1, 1), conv_mode='conv', precision=None):
""" """
...@@ -1180,7 +1179,6 @@ def dnn_gradinput3d(kerns, topgrad, img_shp, border_mode='valid', ...@@ -1180,7 +1179,6 @@ def dnn_gradinput3d(kerns, topgrad, img_shp, border_mode='valid',
return dnn_gradinput(kerns, topgrad, img_shp, border_mode, subsample, return dnn_gradinput(kerns, topgrad, img_shp, border_mode, subsample,
conv_mode, precision) conv_mode, precision)
class GpuDnnPoolDesc(Op): class GpuDnnPoolDesc(Op):
""" """
......
...@@ -8,7 +8,7 @@ try: ...@@ -8,7 +8,7 @@ try:
except ImportError: except ImportError:
pass pass
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, GpuReshape) from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel, GpuReshape, infer_context_name)
from .opt import register_opt, op_lifter, register_opt2 from .opt import register_opt, op_lifter, register_opt2
......
...@@ -157,7 +157,7 @@ def local_gpua_dot_to_gemm16(op, ctx_name, inputs, outputs): ...@@ -157,7 +157,7 @@ def local_gpua_dot_to_gemm16(op, ctx_name, inputs, outputs):
B = inputs[1] B = inputs[1]
if (A.ndim == 2 and B.ndim == 2 and if (A.ndim == 2 and B.ndim == 2 and
A.dtype == 'float16' and B.dtype == 'float16'): A.dtype == 'float16' and B.dtype == 'float16'):
fgraph = inputs[0].fgraph fgraph = getattr(outputs[0], 'fgraph', None)
C = GpuAllocEmpty('float16', ctx_name)( C = GpuAllocEmpty('float16', ctx_name)(
shape_i(A, 0, fgraph), shape_i(B, 1, fgraph)) shape_i(A, 0, fgraph), shape_i(B, 1, fgraph))
return Gemm16()(C, 1.0, A, B, 0.0) return Gemm16()(C, 1.0, A, B, 0.0)
......
...@@ -61,7 +61,7 @@ from .nnet import (gpu_crossentropy_softmax_1hot_with_bias_dx, ...@@ -61,7 +61,7 @@ from .nnet import (gpu_crossentropy_softmax_1hot_with_bias_dx,
gpu_crossentropy_softmax_argmax_1hot_with_bias, gpu_crossentropy_softmax_argmax_1hot_with_bias,
gpu_softmax_with_bias, gpu_softmax) gpu_softmax_with_bias, gpu_softmax)
from .elemwise import (GpuElemwise, GpuDimShuffle, GpuCAReduceCuda, from .elemwise import (GpuElemwise, GpuDimShuffle, GpuCAReduceCuda,
GpuCAReduceCPY, gpu_ca_reduce_cuda, gpu_erfinv, gpu_erfcinv, GpuCAReduceCPY, gpu_erfinv, gpu_erfcinv,
max_inputs_to_GpuElemwise) max_inputs_to_GpuElemwise)
from .subtensor import (GpuIncSubtensor, GpuSubtensor, from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedSubtensor, GpuAdvancedSubtensor,
...@@ -614,7 +614,7 @@ def local_gpuaalloc(op, context_name, inputs, outputs): ...@@ -614,7 +614,7 @@ def local_gpuaalloc(op, context_name, inputs, outputs):
def local_gpua_alloc_empty(op, context_name, inputs, outputs): def local_gpua_alloc_empty(op, context_name, inputs, outputs):
# We use _props_dict() to make sure that the GPU op know all the # We use _props_dict() to make sure that the GPU op know all the
# CPU op props. # CPU op props.
return GpuAllocEmpty(**op._props_dict())(*inputs) return GpuAllocEmpty(context_name=context_name, **op._props_dict())(*inputs)
@register_opt() @register_opt()
......
...@@ -152,7 +152,7 @@ def traverse(out, x, x_copy, d, visited=None): ...@@ -152,7 +152,7 @@ def traverse(out, x, x_copy, d, visited=None):
return d return d
visited.add(out) visited.add(out)
from theano.sandbox import cuda from theano.sandbox import cuda
from theano.gpuarray.basic_ops import gpu_from_host, host_from_gpu from theano.gpuarray.basic_ops import GpuFromHost, host_from_gpu
from theano.gpuarray import pygpu_activated from theano.gpuarray import pygpu_activated
from theano.gpuarray.type import GpuArrayType from theano.gpuarray.type import GpuArrayType
if out == x: if out == x:
...@@ -160,7 +160,7 @@ def traverse(out, x, x_copy, d, visited=None): ...@@ -160,7 +160,7 @@ def traverse(out, x, x_copy, d, visited=None):
d[out] = cuda.gpu_from_host(x_copy) d[out] = cuda.gpu_from_host(x_copy)
else: else:
assert isinstance(x.type, GpuArrayType) assert isinstance(x.type, GpuArrayType)
d[out] = gpu_from_host(x.type.context_name)(x_copy) d[out] = GpuFromHost(x.type.context_name)(x_copy)
return d return d
elif out.owner is None: elif out.owner is None:
return d return d
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论