提交 14539bc2 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fix test_elemwise and make it pass.

上级 3372bb22
...@@ -37,7 +37,7 @@ def make_argument(v, name): ...@@ -37,7 +37,7 @@ def make_argument(v, name):
return ArrayArg(numpy.dtype(v.type.dtype), name) return ArrayArg(numpy.dtype(v.type.dtype), name)
def ensure_allocated(storage, shape, dtype): def ensure_allocated(storage, shape, dtype, ctx):
odat = storage[0] odat = storage[0]
if odat is not None: if odat is not None:
if odat.shape != shape: if odat.shape != shape:
...@@ -45,7 +45,7 @@ def ensure_allocated(storage, shape, dtype): ...@@ -45,7 +45,7 @@ def ensure_allocated(storage, shape, dtype):
# we have to allocate output storage. # we have to allocate output storage.
odat = None odat = None
if odat is None: if odat is None:
odat = pygpu.empty(shape, dtype=dtype) odat = pygpu.empty(shape, dtype=dtype, context=ctx)
storage[0] = odat storage[0] = odat
return odat return odat
...@@ -401,7 +401,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise): ...@@ -401,7 +401,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
""" % locals() """ % locals()
return str(code) return str(code)
def perform(self, node, inputs, output_storage): def perform(self, node, inputs, output_storage, ctx):
# Try to reuse the kernel from a previous call to hopefully # Try to reuse the kernel from a previous call to hopefully
# avoid recompiling # avoid recompiling
if not hasattr(node, '_cache_elemwise_k'): if not hasattr(node, '_cache_elemwise_k'):
...@@ -422,7 +422,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise): ...@@ -422,7 +422,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
if n in self.inplace_pattern: if n in self.inplace_pattern:
stor[0] = inputs[self.inplace_pattern[n]] stor[0] = inputs[self.inplace_pattern[n]]
else: else:
args.append(ensure_allocated(stor, out_shape, out.type.dtype)) args.append(ensure_allocated(stor, out_shape, out.type.dtype, ctx))
node._cache_elemwise_k(*args, broadcast=True) node._cache_elemwise_k(*args, broadcast=True)
if config.gpuarray.sync: if config.gpuarray.sync:
...@@ -2633,7 +2633,6 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2633,7 +2633,6 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
Too slow for now as it only have a python interface. Too slow for now as it only have a python interface.
""" """
def __init__(self, scalar_op, axis=None, dtype=None, acc_dtype=None): def __init__(self, scalar_op, axis=None, dtype=None, acc_dtype=None):
if not hasattr(scalar_op, 'identity'): if not hasattr(scalar_op, 'identity'):
raise ValueError("No identity on scalar op") raise ValueError("No identity on scalar op")
...@@ -2647,10 +2646,12 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2647,10 +2646,12 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
return "GpuReduce{%s}%s" % (self.scalar_op, ax) return "GpuReduce{%s}%s" % (self.scalar_op, ax)
def make_node(self, input): def make_node(self, input):
ctx_name = infer_context_name(input)
res = CAReduceDtype.make_node(self, input) res = CAReduceDtype.make_node(self, input)
input = as_gpuarray_variable(input) input = as_gpuarray_variable(input, ctx_name)
otype = GpuArrayType(dtype=res.outputs[0].dtype, otype = GpuArrayType(dtype=res.outputs[0].dtype,
broadcastable=res.outputs[0].broadcastable) broadcastable=res.outputs[0].broadcastable,
context_name=ctx_name)
if res.op.axis is not None: if res.op.axis is not None:
redux = [] redux = []
...@@ -2662,6 +2663,9 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2662,6 +2663,9 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
return Apply(res.op, [input], [otype()]) return Apply(res.op, [input], [otype()])
def get_context(self, node):
return node.outputs[0].type.context
def make_thunk(self, node, storage_map, compute_map, no_recycling): def make_thunk(self, node, storage_map, compute_map, no_recycling):
# cache the kernel object # cache the kernel object
self.get_kernel_cache(node) self.get_kernel_cache(node)
...@@ -2887,7 +2891,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2887,7 +2891,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
arguments=[make_argument(node.inputs[0], 'a')], arguments=[make_argument(node.inputs[0], 'a')],
init_nd=node.inputs[0].ndim) init_nd=node.inputs[0].ndim)
def perform(self, node, inp, out): def perform(self, node, inp, out, ctx):
input, = inp input, = inp
output, = out output, = out
...@@ -2901,6 +2905,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2901,6 +2905,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
copy=False, dtype=node.outputs[0].type.dtype) copy=False, dtype=node.outputs[0].type.dtype)
else: else:
output[0] = pygpu.gpuarray.array(input, copy=True, output[0] = pygpu.gpuarray.array(input, copy=True,
dtype=node.outputs[0].type.dtype) dtype=node.outputs[0].type.dtype,
context=ctx)
# To allow reloading old pickled files # To allow reloading old pickled files
GpuCAReduce = GpuCAReduceCPY GpuCAReduce = GpuCAReduceCPY
...@@ -4,20 +4,19 @@ import theano ...@@ -4,20 +4,19 @@ import theano
from theano import scalar, gof from theano import scalar, gof
from theano.tests.unittest_tools import SkipTest, assert_allclose from theano.tests.unittest_tools import SkipTest, assert_allclose
from theano.tensor.tests.test_elemwise import (test_Broadcast, test_DimShuffle, from theano.tensor.tests import test_elemwise
test_CAReduce, T_reduce_dtype)
from .config import mode_with_gpu from .config import mode_with_gpu, test_ctx_name
from .test_basic_ops import rand_gpuarray from .test_basic_ops import rand_gpuarray
from ..elemwise import (GpuElemwise, GpuDimShuffle, from ..elemwise import (GpuElemwise, GpuDimShuffle,
GpuCAReduceCuda, GpuCAReduceCPY) GpuCAReduceCuda, GpuCAReduceCPY)
from ..type import GpuArrayType from ..type import GpuArrayType, get_context
from pygpu import ndgpuarray as gpuarray from pygpu import ndgpuarray as gpuarray
# This is acutally a test for GpuElemwise # This is acutally a test for GpuElemwise
class test_gpu_Broadcast(test_Broadcast): class test_gpu_Broadcast(test_elemwise.test_Broadcast):
op = GpuElemwise op = GpuElemwise
type = GpuArrayType type = GpuArrayType
cop = GpuElemwise cop = GpuElemwise
...@@ -26,8 +25,7 @@ class test_gpu_Broadcast(test_Broadcast): ...@@ -26,8 +25,7 @@ class test_gpu_Broadcast(test_Broadcast):
linkers = [gof.PerformLinker, gof.CLinker] linkers = [gof.PerformLinker, gof.CLinker]
def setUp(self): def setUp(self):
dev = theano.sandbox.gpuarray.init_dev.device if get_context(test_ctx_name).kind != 'cuda':
if not dev.startswith('cuda'):
self.linkers = [gof.PerformLinker] self.linkers = [gof.PerformLinker]
def rand_val(self, shp): def rand_val(self, shp):
...@@ -37,14 +35,12 @@ class test_gpu_Broadcast(test_Broadcast): ...@@ -37,14 +35,12 @@ class test_gpu_Broadcast(test_Broadcast):
return rand_gpuarray(*shp, **dict(cls=gpuarray)) return rand_gpuarray(*shp, **dict(cls=gpuarray))
def test_c(self): def test_c(self):
dev = theano.sandbox.gpuarray.init_dev.device if get_context(test_ctx_name).kind != 'cuda':
if not dev.startswith('cuda'):
raise SkipTest("Cuda specific tests") raise SkipTest("Cuda specific tests")
super(test_gpu_Broadcast, self).test_c() super(test_gpu_Broadcast, self).test_c()
def test_c_inplace(self): def test_c_inplace(self):
dev = theano.sandbox.gpuarray.init_dev.device if get_context(test_ctx_name).kind != 'cuda':
if not dev.startswith('cuda'):
raise SkipTest("Cuda specific tests") raise SkipTest("Cuda specific tests")
super(test_gpu_Broadcast, self).test_c_inplace() super(test_gpu_Broadcast, self).test_c_inplace()
...@@ -52,8 +48,7 @@ class test_gpu_Broadcast(test_Broadcast): ...@@ -52,8 +48,7 @@ class test_gpu_Broadcast(test_Broadcast):
def test_elemwise_pow(): def test_elemwise_pow():
# Test that GpuElemwise(pow) can compile with any combination of integer # Test that GpuElemwise(pow) can compile with any combination of integer
# or float input dtype. # or float input dtype.
dev = theano.sandbox.gpuarray.init_dev.device if get_context(test_ctx_name).kind != 'cuda':
if not dev.startswith('cuda'):
raise SkipTest("Cuda specific tests") raise SkipTest("Cuda specific tests")
dtypes = ["uint8", "uint16", "uint32", "uint64", dtypes = ["uint8", "uint16", "uint32", "uint64",
...@@ -78,11 +73,11 @@ def test_elemwise_pow(): ...@@ -78,11 +73,11 @@ def test_elemwise_pow():
assert_allclose(out, expected_out) assert_allclose(out, expected_out)
class test_GpuDimShuffle(test_DimShuffle): class test_GpuDimShuffle(test_elemwise.test_DimShuffle):
op = GpuDimShuffle op = GpuDimShuffle
class test_GpuCAReduceCPY(test_CAReduce): class test_GpuCAReduceCPY(test_elemwise.test_CAReduce):
dtypes = ["float32"] dtypes = ["float32"]
bin_dtypes = ["uint8", "int8"] bin_dtypes = ["uint8", "int8"]
op = GpuCAReduceCPY op = GpuCAReduceCPY
...@@ -210,12 +205,11 @@ class test_GpuCAReduceCuda(test_GpuCAReduceCPY): ...@@ -210,12 +205,11 @@ class test_GpuCAReduceCuda(test_GpuCAReduceCPY):
def setUp(self): def setUp(self):
super(test_GpuCAReduceCuda, self).setUp() super(test_GpuCAReduceCuda, self).setUp()
dev = theano.sandbox.gpuarray.init_dev.device if get_context(test_ctx_name).kind != 'cuda':
if not dev.startswith('cuda'):
raise SkipTest("Cuda specific tests") raise SkipTest("Cuda specific tests")
class T_gpureduce_dtype(T_reduce_dtype): class T_gpureduce_dtype(test_elemwise.T_reduce_dtype):
mode = mode_with_gpu.excluding('local_cut_useless_reduce') mode = mode_with_gpu.excluding('local_cut_useless_reduce')
op = GpuCAReduceCuda op = GpuCAReduceCuda
# Currently we don't support reduction on 0 axis # Currently we don't support reduction on 0 axis
...@@ -226,8 +220,7 @@ class T_gpureduce_dtype(T_reduce_dtype): ...@@ -226,8 +220,7 @@ class T_gpureduce_dtype(T_reduce_dtype):
'float32', 'float64'] 'float32', 'float64']
def setUp(self): def setUp(self):
dev = theano.sandbox.gpuarray.init_dev.device if get_context(test_ctx_name).kind != 'cuda':
if not dev.startswith('cuda'):
raise SkipTest("Cuda specific tests") raise SkipTest("Cuda specific tests")
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论