提交 c28d13a0 authored 作者: Razvan Pascanu's avatar Razvan Pascanu

Added new mechanism to construct output types that avoid importing Cuda.

The way it works, __init__ gets a lambda function that construct a Tensor Type. By default constructs Tensor Types, but move to gpu will replace it with a CudaNdarray constructor.
上级 54e1d21e
...@@ -28,7 +28,7 @@ from theano import gof ...@@ -28,7 +28,7 @@ from theano import gof
from theano.tensor import TensorType from theano.tensor import TensorType
from theano import tensor from theano import tensor
from theano.tensor.opt import Shape_i from theano.tensor.opt import Shape_i
from theano.sandbox import cuda #from theano.sandbox import cuda
from theano.compile.profiling import ScanProfileStats from theano.compile.profiling import ScanProfileStats
import scan_utils import scan_utils
...@@ -46,7 +46,9 @@ class Scan(Op): ...@@ -46,7 +46,9 @@ class Scan(Op):
def __init__( self def __init__( self
, inputs , inputs
, outputs , outputs
, info ): , info
, typeConstructor = None
):
""" """
:param inputs: inputs of the inner function of scan :param inputs: inputs of the inner function of scan
:param outputs: outputs of the inner function of scan :param outputs: outputs of the inner function of scan
...@@ -66,39 +68,16 @@ class Scan(Op): ...@@ -66,39 +68,16 @@ class Scan(Op):
self.output_types = [] self.output_types = []
idx = 0 idx = 0
jdx = 0 jdx = 0
if self.gpu: if typeConstructor is None:
# mit_mot typeConstructor = lambda broadcastable, dtype: TensorType(
while idx < self.n_mit_mot_outs: broadcastable = broadcastable, dtype = dtype)
# Not that for mit_mot there are several output slices per
# output sequence
o = outputs[idx]
self.output_types.append(
cuda.CudaNdarrayType(
broadcastable = (False,) + o.type.broadcastable))
idx += len(self.mit_mot_out_slices[jdx])
jdx += 1
# mit_sot / sit_sot / nit_sot
end = idx + self.n_mit_sot + self.n_sit_sot + self.n_nit_sot
for o in outputs[idx:end]:
self.output_types.append(
cuda.CudaNdarrayType( broadcastable = (False,) +
o.type.broadcastable))
# shared outputs
for o in outputs[end:]:
if isinstance(o.type, TensorType):
self.output_types.append(cuda.CudaNdarrayType(
broadcastable = o.type.broadcastable))
else:
self.output_types.append( o.type )
else:
while idx < self.n_mit_mot_outs: while idx < self.n_mit_mot_outs:
# Not that for mit_mot there are several output slices per # Not that for mit_mot there are several output slices per
# output sequence # output sequence
o = outputs[idx] o = outputs[idx]
self.output_types.append( self.output_types.append(
TensorType( typeConstructor( broadcastable = (False,) + o.type.broadcastable
broadcastable = (False,) + o.type.broadcastable
, dtype = o.type.dtype) , dtype = o.type.dtype)
) )
idx += len(self.mit_mot_out_slices[jdx]) idx += len(self.mit_mot_out_slices[jdx])
...@@ -108,17 +87,11 @@ class Scan(Op): ...@@ -108,17 +87,11 @@ class Scan(Op):
end = idx + self.n_mit_sot + self.n_sit_sot + self.n_nit_sot end = idx + self.n_mit_sot + self.n_sit_sot + self.n_nit_sot
for o in outputs[idx:end]: for o in outputs[idx:end]:
self.output_types.append( self.output_types.append(
TensorType( typeConstructor(
broadcastable = (False,) + o.type.broadcastable broadcastable = (False,) + o.type.broadcastable
, dtype = o.type.dtype )) , dtype = o.type.dtype ))
# shared outputs + possibly the ending condition # shared outputs + possibly the ending condition
for o in outputs[end:]: for o in outputs[end:]:
if cuda.cuda_available and isinstance(o.type,
cuda.CudaNdarrayType):
self.output_types.append( TensorType(
broadcastable = o.type.broadcastable
, dtype = theano.config.floatX) )
else:
self.output_types.append( o.type ) self.output_types.append( o.type )
if self.as_while: if self.as_while:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论