提交 09928b08 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fixed conv.py for type context.

上级 f8de0e04
......@@ -1465,7 +1465,7 @@ PyGpuArray_Conv(PyGpuArrayObject *img, PyGpuArrayObject * kern,
rval = pygpu_zeros(4, out_dim,
img->ga.typecode, GA_C_ORDER,
pygpu_default_context(), Py_None);
img->ctx, Py_None);
//rval might be null
}
if ((rval==NULL)
......
import copy
import os
import theano
from theano import config, gof
from theano import gof
try:
from pygpu import gpuarray
......@@ -10,7 +9,8 @@ except ImportError:
pass
from .type import GpuArrayType
from .basic_ops import as_gpuarray_variable, GpuKernelBase, Kernel
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel,
infer_context_name)
from theano.gof import utils
......@@ -58,6 +58,9 @@ class GpuConv(GpuKernelBase, gof.Op):
them.
"""
__props__ = ('border_mode', 'subsample', 'logical_img_hw',
'logical_kern_hw', 'logical_kern_align_top', 'version',
'verbose', 'kshp', 'imshp', 'max_threads_dim0')
@staticmethod
def logical_output_shape_2d(imshp, kshp, mode):
......@@ -67,20 +70,13 @@ class GpuConv(GpuKernelBase, gof.Op):
return imshp[0] + kshp[0] - 1, imshp[1] + kshp[1] - 1
raise ValueError(mode)
def __init__(self, border_mode,
subsample=(1, 1),
logical_img_hw=None,
logical_kern_hw=None,
def __init__(self, border_mode, subsample=(1, 1),
logical_img_hw=None, logical_kern_hw=None,
logical_kern_align_top=True,
version=-1,
direction_hint=None,
verbose=0,
kshp=None,
imshp=None,
version=-1, direction_hint=None,
verbose=0, kshp=None, imshp=None,
max_threads_dim0=None,
nkern=None,
bsize=None,
fft_opt=True):
nkern=None, bsize=None, fft_opt=True):
self.border_mode = border_mode
self.subsample = subsample
if logical_img_hw is not None:
......@@ -108,19 +104,6 @@ class GpuConv(GpuKernelBase, gof.Op):
self.bsize = bsize
self.fft_opt = fft_opt
def __eq__(self, other):
return type(self) == type(other) \
and self.border_mode == other.border_mode \
and self.subsample == other.subsample \
and self.logical_img_hw == other.logical_img_hw \
and self.logical_kern_hw == other.logical_kern_hw \
and self.logical_kern_align_top == other.logical_kern_align_top \
and self.version == other.version \
and self.verbose == other.verbose \
and self.kshp == other.kshp\
and self.imshp == other.imshp\
and self.max_threads_dim0 == other.max_threads_dim0
def __setstate__(self, d):
self.__dict__.update(d)
if not hasattr(self, "imshp"):
......@@ -136,32 +119,6 @@ class GpuConv(GpuKernelBase, gof.Op):
if not hasattr(self, "fft_opt"):
self.fft_opt = True
def __hash__(self):
# don't use hash(self.version) as hash(-1)==-2 and
# hash(-2)==-2 in python!
return hash(type(self)) \
^ hash(self.border_mode) \
^ hash(self.subsample) \
^ hash(self.logical_img_hw) \
^ hash(self.logical_kern_hw) \
^ hash(self.logical_kern_align_top) \
^ self.version \
^ hash(self.verbose) \
^ hash(self.kshp)\
^ hash(self.imshp)\
^ hash(self.max_threads_dim0)
def __str__(self):
return '%s{%s, %s, %s, %s, %s, %s, %s}' % (
self.__class__.__name__,
self.border_mode,
str(self.subsample),
str(self.logical_img_hw),
str(self.logical_kern_hw),
str(self.logical_kern_align_top),
str(self.imshp),
str(self.kshp))
def make_node(self, img, kern):
if img.dtype != "float32" or kern.dtype != "float32":
raise NotImplementedError("GpuConv currently only work"
......@@ -170,13 +127,17 @@ class GpuConv(GpuKernelBase, gof.Op):
raise TypeError('img must be 4D tensor')
if kern.type.ndim != 4:
raise TypeError('kern must be 4D tensor')
img = as_gpuarray_variable(img)
kern = as_gpuarray_variable(kern)
ctx_name = infer_context_name(img, kern)
img = as_gpuarray_variable(img, ctx_name)
kern = as_gpuarray_variable(kern, ctx_name)
broadcastable = [img.type.broadcastable[0], kern.type.broadcastable[0],
False, False]
out = GpuArrayType(img.dtype, broadcastable)()
out = GpuArrayType(img.dtype, broadcastable, context_name=ctx_name)()
return gof.Apply(self, [img, kern], [out])
def get_context(self, node):
return node.inputs[0].type.context
def flops(self, inputs, outputs):
"""
Useful with the hack in profilemode to print the MFlops.
......@@ -202,22 +163,8 @@ class GpuConv(GpuKernelBase, gof.Op):
def make_thunk(self, node, storage_map, compute_map, no_recycling):
node_ = copy.copy(node)
assert node.op is node_.op
if config.gpuarray.sync:
raise NotImplementedError("GpuConv do not implement gpuarray.sync Theano flag")
if node_.op.max_threads_dim0 is None:
cuda = theano.sandbox.cuda
device_id = cuda.use.device_number
if device_id is None:
cuda.use("gpu",
force=False,
default_to_move_computation_to_gpu=False,
move_shared_float32_to_gpu=False,
enable_cuda=False,
test_driver=True)
device_id = cuda.use.device_number
cuda_ndarray = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray
prop = cuda_ndarray.device_properties(device_id)
node_.op.max_threads_dim0 = prop['maxThreadsDim0']
node_.op.max_threads_dim0 = node._inputs[0].type.context.maxlsize
return super(GpuConv, node_.op).make_thunk(node_, storage_map,
compute_map, no_recycling)
......@@ -235,6 +182,8 @@ class GpuConv(GpuKernelBase, gof.Op):
return (0, 22)
def c_code(self, node, nodename, inp, out_, sub):
if node.inputs[0].type.context.kind != "cuda":
raise NotImplementedError("GpuConv only works for cuda devices")
img, kern = inp
out, = out_
dx = self.subsample[0]
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论