提交 c28d13a0 authored 作者: Razvan Pascanu's avatar Razvan Pascanu

Added new mechanism to construct output types that avoid importing Cuda.

The way it works, __init__ gets a lambda function that construct a Tensor Type. By default constructs Tensor Types, but move to gpu will replace it with a CudaNdarray constructor.
上级 54e1d21e
...@@ -28,7 +28,7 @@ from theano import gof ...@@ -28,7 +28,7 @@ from theano import gof
from theano.tensor import TensorType from theano.tensor import TensorType
from theano import tensor from theano import tensor
from theano.tensor.opt import Shape_i from theano.tensor.opt import Shape_i
from theano.sandbox import cuda #from theano.sandbox import cuda
from theano.compile.profiling import ScanProfileStats from theano.compile.profiling import ScanProfileStats
import scan_utils import scan_utils
...@@ -46,7 +46,9 @@ class Scan(Op): ...@@ -46,7 +46,9 @@ class Scan(Op):
def __init__( self def __init__( self
, inputs , inputs
, outputs , outputs
, info ): , info
, typeConstructor = None
):
""" """
:param inputs: inputs of the inner function of scan :param inputs: inputs of the inner function of scan
:param outputs: outputs of the inner function of scan :param outputs: outputs of the inner function of scan
...@@ -66,60 +68,31 @@ class Scan(Op): ...@@ -66,60 +68,31 @@ class Scan(Op):
self.output_types = [] self.output_types = []
idx = 0 idx = 0
jdx = 0 jdx = 0
if self.gpu: if typeConstructor is None:
# mit_mot typeConstructor = lambda broadcastable, dtype: TensorType(
while idx < self.n_mit_mot_outs: broadcastable = broadcastable, dtype = dtype)
# Not that for mit_mot there are several output slices per
# output sequence while idx < self.n_mit_mot_outs:
o = outputs[idx] # Not that for mit_mot there are several output slices per
self.output_types.append( # output sequence
cuda.CudaNdarrayType( o = outputs[idx]
broadcastable = (False,) + o.type.broadcastable)) self.output_types.append(
idx += len(self.mit_mot_out_slices[jdx]) typeConstructor( broadcastable = (False,) + o.type.broadcastable
jdx += 1 , dtype = o.type.dtype)
)
# mit_sot / sit_sot / nit_sot idx += len(self.mit_mot_out_slices[jdx])
end = idx + self.n_mit_sot + self.n_sit_sot + self.n_nit_sot jdx += 1
for o in outputs[idx:end]:
self.output_types.append( # mit_sot / sit_sot / nit_sot
cuda.CudaNdarrayType( broadcastable = (False,) + end = idx + self.n_mit_sot + self.n_sit_sot + self.n_nit_sot
o.type.broadcastable)) for o in outputs[idx:end]:
# shared outputs self.output_types.append(
for o in outputs[end:]: typeConstructor(
if isinstance(o.type, TensorType): broadcastable = (False,) + o.type.broadcastable
self.output_types.append(cuda.CudaNdarrayType( , dtype = o.type.dtype ))
broadcastable = o.type.broadcastable)) # shared outputs + possibly the ending condition
else: for o in outputs[end:]:
self.output_types.append( o.type ) self.output_types.append( o.type )
else:
while idx < self.n_mit_mot_outs:
# Not that for mit_mot there are several output slices per
# output sequence
o = outputs[idx]
self.output_types.append(
TensorType(
broadcastable = (False,) + o.type.broadcastable
, dtype = o.type.dtype)
)
idx += len(self.mit_mot_out_slices[jdx])
jdx += 1
# mit_sot / sit_sot / nit_sot
end = idx + self.n_mit_sot + self.n_sit_sot + self.n_nit_sot
for o in outputs[idx:end]:
self.output_types.append(
TensorType(
broadcastable = (False,) + o.type.broadcastable
, dtype = o.type.dtype ))
# shared outputs + possibly the ending condition
for o in outputs[end:]:
if cuda.cuda_available and isinstance(o.type,
cuda.CudaNdarrayType):
self.output_types.append( TensorType(
broadcastable = o.type.broadcastable
, dtype = theano.config.floatX) )
else:
self.output_types.append( o.type )
if self.as_while: if self.as_while:
self.output_types = self.output_types[:-1] self.output_types = self.output_types[:-1]
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论