提交 345d9024 authored 作者: sentient07's avatar sentient07

Corrected few import statements in scan module and fixed error with elemwise, fft and nerv

上级 3bd237f5
...@@ -374,11 +374,11 @@ if scikits_cuda_available: ...@@ -374,11 +374,11 @@ if scikits_cuda_available:
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([theano.tensor.fft.RFFTOp]) @op_lifter([theano.tensor.fft.RFFTOp])
@register_opt2([theano.tensor.fft.RFFTOp], 'fast_compile') @register_opt2([theano.tensor.fft.RFFTOp], 'fast_compile')
def local_gpua_curfft_op(node, context_name): def local_gpua_curfft_op(op, ctx_name, inputs, outputs):
return curfft_op return curfft_op
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([theano.tensor.fft.IRFFTOp]) @op_lifter([theano.tensor.fft.IRFFTOp])
@register_opt2([theano.tensor.fft.IRFFTOp], 'fast_compile') @register_opt2([theano.tensor.fft.IRFFTOp], 'fast_compile')
def local_gpua_cuirfft_op(node, context_name): def local_gpua_cuirfft_op(op, ctx_name, inputs, outputs):
return cuirfft_op return cuirfft_op
...@@ -471,6 +471,6 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): ...@@ -471,6 +471,6 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([Images2Neibs]) @op_lifter([Images2Neibs])
@register_opt2([Images2Neibs], 'fast_compile') @register_opt2([Images2Neibs], 'fast_compile')
def local_gpua_images2neibs(op, context_name, inputs): def local_gpua_images2neibs(op, context_name, inputs, outputs):
if op.mode in ['valid', 'ignore_borders', 'wrap_centered']: if op.mode in ['valid', 'ignore_borders', 'wrap_centered']:
return GpuImages2Neibs(op.mode) return GpuImages2Neibs(op.mode)
...@@ -451,7 +451,7 @@ def test_local_assert_no_cpu_op(): ...@@ -451,7 +451,7 @@ def test_local_assert_no_cpu_op():
out = theano.tensor.tanh(ms).dot(ms.T) out = theano.tensor.tanh(ms).dot(ms.T)
mode_local_assert = mode_with_gpu.including("assert_no_cpu_op") mode_local_assert = mode_with_gpu.including("assert_no_cpu_op")
mode_local_assert = mode_local_assert.excluding("local_gpu_elemwise") mode_local_assert = mode_local_assert.excluding("local_gpua_elemwise")
old = theano.config.assert_no_cpu_op old = theano.config.assert_no_cpu_op
old2 = theano.config.on_opt_error old2 = theano.config.on_opt_error
......
...@@ -152,12 +152,14 @@ def traverse(out, x, x_copy, d, visited=None): ...@@ -152,12 +152,14 @@ def traverse(out, x, x_copy, d, visited=None):
return d return d
visited.add(out) visited.add(out)
from theano.sandbox import cuda from theano.sandbox import cuda
from theano.gpuarray.basic_ops import gpu_from_host from theano.gpuarray.basic_ops import gpu_from_host, host_from_gpu
from theano.gpuarray import pygpu_activated
from theano.gpuarray.type import GpuArrayType
if out == x: if out == x:
if isinstance(x.type, cuda.CudaNdarrayType): if isinstance(x.type, cuda.CudaNdarrayType):
d[out] = cuda.gpu_from_host(x_copy) d[out] = cuda.gpu_from_host(x_copy)
else: else:
assert isinstance(x.type, gpuarray.GpuArrayType) assert isinstance(x.type, GpuArrayType)
d[out] = gpu_from_host(x.type.context_name)(x_copy) d[out] = gpu_from_host(x.type.context_name)(x_copy)
return d return d
elif out.owner is None: elif out.owner is None:
...@@ -167,8 +169,8 @@ def traverse(out, x, x_copy, d, visited=None): ...@@ -167,8 +169,8 @@ def traverse(out, x, x_copy, d, visited=None):
out.owner.inputs == [x]): out.owner.inputs == [x]):
d[out] = tensor.as_tensor_variable(x_copy) d[out] = tensor.as_tensor_variable(x_copy)
return d return d
elif (gpuarray.pygpu_activated and elif (pygpu_activated and
out.owner.op == gpuarray.host_from_gpu and out.owner.op == host_from_gpu and
out.owner.inputs == [x]): out.owner.inputs == [x]):
d[out] = tensor.as_tensor_variable(x_copy) d[out] = tensor.as_tensor_variable(x_copy)
return d return d
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论