Add connection_pattern and output variable to spatial transformer Op

上级 ccbd7937
...@@ -2901,18 +2901,16 @@ class GpuDnnTransformer(DnnBase): ...@@ -2901,18 +2901,16 @@ class GpuDnnTransformer(DnnBase):
DnnBase.__init__(self, ["c_code/dnn_sptf.c"], "dnn_sptf") DnnBase.__init__(self, ["c_code/dnn_sptf.c"], "dnn_sptf")
self.dtype = dtype self.dtype = dtype
def make_node(self, img, theta, grid_dims, desc, alpha=None, beta=None): def make_node(self, img, theta, output, grid_dims, desc, alpha=None, beta=None):
assert theta.dtype in ('float16', 'float32', 'float64') assert theta.dtype in ('float16', 'float32', 'float64')
context_name = infer_context_name(img) context_name = infer_context_name(img)
theta = gpu_contiguous(as_gpuarray_variable(theta, context_name)) theta = as_gpuarray_variable(theta, context_name)
img = as_gpuarray_variable(img, context_name) img = as_gpuarray_variable(img, context_name)
grid_dims = as_tensor_variable(grid_dims) grid_dims = as_tensor_variable(grid_dims)
output = as_gpuarray_variable(output, context_name)
output = GpuArrayType(dtype=self.dtype,
broadcastable=img.type.ndim * (False,),
context_name=context_name)()
grid = GpuArrayType(dtype=self.dtype, grid = GpuArrayType(dtype=self.dtype,
broadcastable=img.type.ndim * (False,), broadcastable=img.type.ndim * (False,),
context_name=context_name)() context_name=context_name)()
...@@ -2920,6 +2918,9 @@ class GpuDnnTransformer(DnnBase): ...@@ -2920,6 +2918,9 @@ class GpuDnnTransformer(DnnBase):
if img.type.ndim != 4: if img.type.ndim != 4:
raise TypeError('img must be a 4D tensor') raise TypeError('img must be a 4D tensor')
if output.type.ndim != 4:
raise TypeError('output must be a 4D tensor')
if (not isinstance(desc.type, CDataType) or if (not isinstance(desc.type, CDataType) or
desc.type.ctype != 'cudnnSpatialTransformerDescriptor_t'): desc.type.ctype != 'cudnnSpatialTransformerDescriptor_t'):
raise ValueError('desc must be cudnnSpatialTransformerDescriptor_t') raise ValueError('desc must be cudnnSpatialTransformerDescriptor_t')
...@@ -2928,7 +2929,7 @@ class GpuDnnTransformer(DnnBase): ...@@ -2928,7 +2929,7 @@ class GpuDnnTransformer(DnnBase):
beta = ensure_dt(beta, _zero, 'beta', img.dtype) beta = ensure_dt(beta, _zero, 'beta', img.dtype)
inputs = [img, theta, grid_dims, desc, alpha, beta] inputs = [img, theta, grid_dims, desc, alpha, beta]
outputs = [output, grid] outputs = [output.type(), grid]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def L_op(self, inputs, outputs, grads): def L_op(self, inputs, outputs, grads):
...@@ -2943,10 +2944,14 @@ class GpuDnnTransformer(DnnBase): ...@@ -2943,10 +2944,14 @@ class GpuDnnTransformer(DnnBase):
return [dimg, dtheta, return [dimg, dtheta,
theano.gradient.grad_undefined(self, 2, grid_dims), theano.gradient.grad_undefined(self, 2, grid_dims),
theano.gradient.grad_undefined(self, 3, desc), DisconnectedType()(),
theano.gradient.grad_undefined(self, 4, alpha), theano.gradient.grad_undefined(self, 4, alpha),
theano.gradient.grad_undefined(self, 5, beta)] theano.gradient.grad_undefined(self, 5, beta)]
def connection_pattern(self, node):
# not connected to desc
return [[1, 1], [1, 1], [1, 1], [0, 0], [1, 1], [1, 1]]
class GpuDnnTransformerGradI(DnnBase): class GpuDnnTransformerGradI(DnnBase):
""" """
...@@ -2987,6 +2992,10 @@ class GpuDnnTransformerGradI(DnnBase): ...@@ -2987,6 +2992,10 @@ class GpuDnnTransformerGradI(DnnBase):
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def connection_pattern(self, node):
# not connected to desc
return [[1], [1], [1], [1], [1], [0], [1], [1]]
class GpuDnnTransformerGradT(DnnBase): class GpuDnnTransformerGradT(DnnBase):
""" """
...@@ -3011,6 +3020,10 @@ class GpuDnnTransformerGradT(DnnBase): ...@@ -3011,6 +3020,10 @@ class GpuDnnTransformerGradT(DnnBase):
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def connection_pattern(self, node):
# not connected to desc
return [[1], [0]]
def dnn_spatialtf(inp, theta, scale_width=1, scale_height=1, alpha=None, beta=None, def dnn_spatialtf(inp, theta, scale_width=1, scale_height=1, alpha=None, beta=None,
dtype=theano.config.floatX): dtype=theano.config.floatX):
...@@ -3053,20 +3066,22 @@ def dnn_spatialtf(inp, theta, scale_width=1, scale_height=1, alpha=None, beta=No ...@@ -3053,20 +3066,22 @@ def dnn_spatialtf(inp, theta, scale_width=1, scale_height=1, alpha=None, beta=No
# Theta is an array of transformation matrices and must have shape: (num_images, 2, 3) # Theta is an array of transformation matrices and must have shape: (num_images, 2, 3)
assert theta.ndim == 3 assert theta.ndim == 3
grid_dims = (as_scalar(inp.shape[0]).astype('int32'), grid_dims = (inp.shape[0], inp.shape[1],
as_scalar(inp.shape[1]).astype('int32'), inp.shape[2] * scale_height,
as_scalar(inp.shape[2] * scale_height).astype('int32'), inp.shape[3] * scale_width)
as_scalar(inp.shape[3] * scale_width).astype('int32')) grid_dims = tuple(map(lambda v: as_scalar(v).astype('int32'), list(grid_dims)))
inp = gpu_contiguous(inp) inp = gpu_contiguous(inp)
theta = gpu_contiguous(theta) theta = gpu_contiguous(theta)
output = GpuAllocEmpty(inp.dtype, infer_context_name(inp))(*grid_dims)
# Create spatial transformer descriptor # Create spatial transformer descriptor
desc = GpuDnnTransformerDescriptor(dtype)(grid_dims) desc = GpuDnnTransformerDescriptor(dtype)(grid_dims)
# Create grid dimensions variable # Create grid dimensions variable
grid_dims_var = as_tensor_variable(grid_dims) grid_dims_var = as_tensor_variable(grid_dims)
# Setup spatial transformer # Setup spatial transformer
transformer = GpuDnnTransformer(dtype)(inp, theta, grid_dims_var, desc, alpha, beta) transformer = GpuDnnTransformer(dtype)(inp, theta, output, grid_dims_var, desc, alpha, beta)
return transformer return transformer
......
...@@ -2457,9 +2457,12 @@ def test_dnn_spatialtf(): ...@@ -2457,9 +2457,12 @@ def test_dnn_spatialtf():
st_dnn = dnn.dnn_spatialtf(t_img, t_theta, scale_height=scale_height, st_dnn = dnn.dnn_spatialtf(t_img, t_theta, scale_height=scale_height,
scale_width=scale_width) scale_width=scale_width)
st_dnn_func = theano.function([t_img, t_theta], [st_dnn]) st_dnn_func = theano.function([t_img, t_theta], st_dnn)
# Check if function graph contains the spatial transformer Op
topo = st_dnn_func.maker.fgraph.toposort()
assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnTransformer)]) == 1
img_out_gpu, = st_dnn_func(img, transform) img_out_gpu = st_dnn_func(img, transform)
img_out = np.asarray(img_out_gpu) img_out = np.asarray(img_out_gpu)
t_dy = T.tensor4('dy') t_dy = T.tensor4('dy')
...@@ -2467,18 +2470,11 @@ def test_dnn_spatialtf(): ...@@ -2467,18 +2470,11 @@ def test_dnn_spatialtf():
grad_fn = theano.function([t_img, t_theta, t_dy], img_grad) grad_fn = theano.function([t_img, t_theta, t_dy], img_grad)
# dy contains the gradients of the subsequent layer in a neural net,
# which receives the transformed inputs, so dy must have the same shape
# as the transformed inputs.
dy_shp = (img.shape[0], img.shape[1], int(img.shape[2] * scale_height), dy_shp = (img.shape[0], img.shape[1], int(img.shape[2] * scale_height),
int(img.shape[3] * scale_width)) int(img.shape[3] * scale_width))
dy = -1 + 2 * np.random.randn(*dy_shp).astype(theano.config.floatX) dy = -1 + 2 * np.random.randn(*dy_shp).astype(theano.config.floatX)
grad_fn(img, transform, dy) grad_fn(img, transform, dy)
# Check if function graph contains the spatial transformer Ops
topo = st_dnn_func.maker.fgraph.toposort()
assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnTransformer)]) == 1
# Setup CPU Op # Setup CPU Op
st_cpu = spatialtf_cpu(t_theta, t_img, scale_height, scale_width, 'nearest') st_cpu = spatialtf_cpu(t_theta, t_img, scale_height, scale_width, 'nearest')
st_cpu_func = theano.function([t_theta, t_img], [st_cpu], mode=mode_without_gpu) st_cpu_func = theano.function([t_theta, t_img], [st_cpu], mode=mode_without_gpu)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论