提交 36ebd8c9 authored 作者: Frederic's avatar Frederic

Don't try to generate the kernel in the make_node.

上级 32c89663
...@@ -4,12 +4,14 @@ from itertools import izip ...@@ -4,12 +4,14 @@ from itertools import izip
import numpy import numpy
from theano import Op, Apply, scalar, config from theano import Op, Apply, scalar, config
from theano.tensor.elemwise import Elemwise, DimShuffle, CAReduceDtype from theano.tensor.elemwise import Elemwise, DimShuffle, CAReduceDtype
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
try: try:
import pygpu import pygpu
from pygpu.tools import ScalarArg, ArrayArg from pygpu.tools import ScalarArg, ArrayArg
from pygpu.elemwise import ElemwiseKernel from pygpu.elemwise import ElemwiseKernel
from pygpu.reduction import ReductionKernel from pygpu.reduction import ReductionKernel
from pygpu.gpuarray import dtype_to_typecode
except ImportError: except ImportError:
pass pass
...@@ -63,12 +65,10 @@ class GpuElemwise(HideC, Elemwise): ...@@ -63,12 +65,10 @@ class GpuElemwise(HideC, Elemwise):
outputs = [GpuArrayType(broadcastable=o.type.broadcastable, outputs = [GpuArrayType(broadcastable=o.type.broadcastable,
dtype=o.type.dtype)() for o in res.outputs] dtype=o.type.dtype)() for o in res.outputs]
inputs = [as_gpuarray_variable(i) for i in inputs] inputs = [as_gpuarray_variable(i) for i in inputs]
res = Apply(self, inputs, outputs) node = Apply(self, inputs, outputs)
# Try to generate the kernel to catch SupportCodeErrors
k = self.generate_kernel(res, 'test')
return res
def generate_kernel(self, node, nodename): # Try to generate the kernel to catch SupportCodeErrors
try:
inps = [make_argument(i, 'i%d' % (n,)) for n, i in inps = [make_argument(i, 'i%d' % (n,)) for n, i in
enumerate(node.inputs)] enumerate(node.inputs)]
scal_ins = [scalar.Scalar(i.dtype) for i in node.inputs] scal_ins = [scalar.Scalar(i.dtype) for i in node.inputs]
...@@ -79,28 +79,37 @@ class GpuElemwise(HideC, Elemwise): ...@@ -79,28 +79,37 @@ class GpuElemwise(HideC, Elemwise):
fake_node = Apply(self.scalar_op, [i() for i in scal_ins], fake_node = Apply(self.scalar_op, [i() for i in scal_ins],
[o() for o in scal_out]) [o() for o in scal_out])
code = self.scalar_op.c_support_code_apply(fake_node, "test")
try:
code = self.scalar_op.c_support_code_apply(fake_node, nodename)
if code: if code:
raise SupportCodeError(code) raise SupportCodeError(code)
except MethodNotDefined: except MethodNotDefined:
pass pass
support_code = ""
try: try:
support_code = self.scalar_op.c_support_code() support_code = self.scalar_op.c_support_code()
except MethodNotDefined:
pass
if (support_code.strip() != "#define THEANO_MACRO_MOD(x,y) (x % y)" and if (support_code.strip() != "#define THEANO_MACRO_MOD(x,y) (x % y)" and
support_code.strip() != ""): support_code.strip() != ""):
# The macro is fine, the C++ struct is not. # The macro is fine, the C++ struct is not.
raise SupportCodeError(support_code) raise SupportCodeError(support_code)
except MethodNotDefined:
pass
return node
def generate_kernel(self, node, nodename):
inps = [make_argument(i, 'i%d' % (n,)) for n, i in
enumerate(node.inputs)]
scal_ins = [scalar.Scalar(i.dtype) for i in node.inputs]
outs = [make_argument(o, 'o%d' % (n,)) for n, o in
enumerate(node.outputs) if not n in self.inplace_pattern]
scal_out = [scalar.Scalar(o.dtype) for o in node.outputs]
fake_node = Apply(self.scalar_op, [i() for i in scal_ins],
[o() for o in scal_out])
scal_out = [] scal_out = []
oi = 0 oi = 0
for n in range(len(fake_node.outputs)): for n in range(len(node.outputs)):
if n in self.inplace_pattern: if n in self.inplace_pattern:
scal_out.append(inps[self.inplace_pattern[n]].name+'[i]') scal_out.append(inps[self.inplace_pattern[n]].name+'[i]')
else: else:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论