提交 168c502e authored 作者: Pascal Lamblin's avatar Pascal Lamblin

merge

......@@ -182,14 +182,14 @@ class ProfileMode(Mode):
print 'local_time %fs (Time spent running thunks)'% local_time
if print_apply:
print 'Apply-wise summary: <% of local_time spent at this position> <total of local_time spent at this position> <nb_call> <Apply position> <Apply Op name>'
print 'Apply-wise summary: <% of local_time spent at this position> <cumulative seconds> <apply time> <time per call> <nb_call> <Apply position> <Apply Op name>'
atimes = [(t/local_time, t, (a[0], str(a[1])), apply_call[a]) for a, t in apply_time.items()]
atimes.sort()
atimes.reverse()
tot=0
for f,t,a,nb_call in atimes[:n_apply_to_print]:
tot+=t
print ' %4.1f%% %.3fs %.3fs %i %i %s' % (f*100, tot, t, nb_call, a[0], a[1])
print ' %4.1f%% %.3fs %.3fs %.2es %i %i %s' % (f*100, tot, t, t/nb_call,nb_call, a[0], a[1])
print ' ... (remaining %i Apply instances account for %.2f%%(%.2fs) of the runtime)'\
%(max(0, len(atimes)-n_apply_to_print),
sum(f for f, t, a, nb_call in atimes[n_apply_to_print:])*100,
......@@ -199,7 +199,7 @@ class ProfileMode(Mode):
if op_flops:
flops_msg=' <MFlops/s>'
print '\nHACK WARNING: we print the flops for some OP, but the logic don\' always work. You need to know the internal of Theano to make it work correctly. Otherwise don\'t use!'
print '\nOp-wise summary: < of local_time spent on this kind of Op> <cumulative seconds> <self seconds>%s <nb_call> <Op name>'%(flops_msg)
print '\nOp-wise summary: <%% of local_time spent on this kind of Op> <cumulative seconds> <self seconds> <time per call> %s <nb_call> <Op name>'%(flops_msg)
otimes = [(t/local_time, t, a, op_cimpl[a], op_call[a]) for a, t in op_time.items()]
otimes.sort()
......@@ -212,9 +212,9 @@ class ProfileMode(Mode):
else:
msg = ' '
if op_flops:
print ' %4.1f%% %.3fs %.3fs %s %7.1f %d %s' % (f*100, tot, t, msg, op_flops.get(a,-1), nb_call, a)
print ' %4.1f%% %.3fs %.3fs %.2es %s %7.1f %d %s' % (f*100, tot, t, t/nb_call, msg, op_flops.get(a,-1), nb_call, a)
else:
print ' %4.1f%% %.3fs %.3fs %s %d %s' % (f*100, tot, t, msg, nb_call, a)
print ' %4.1f%% %.3fs %.3fs %.2es %s %d %s' % (f*100, tot, t, t/nb_call, msg, nb_call, a)
print ' ... (remaining %i Ops account for %6.2f%%(%.2fs) of the runtime)'\
%(max(0, len(otimes)-n_ops_to_print),
sum(f for f, t, a, ci, nb_call in otimes[n_ops_to_print:])*100,
......@@ -231,7 +231,7 @@ class ProfileMode(Mode):
sop_c.setdefault(type(a),True)
sop_c[type(a)]=sop_c[type(a)] and op_cimpl[a]
sop_call[type(a)]=sop_call.get(type(a),0)+op_call[a]
print '\nSingle Op-wise summary: <% of local_time spent on this kind of Op> <cumulative seconds> <self seconds> <nb_call> <Op name>'
print '\nSingle Op-wise summary: <% of local_time spent on this kind of Op> <cumulative seconds> <self seconds> <time per call> <nb_call> <Op name>'
sotimes = [(t/local_time, t, a, sop_c[a], sop_call[a]) for a, t in sop_time.items()]
sotimes.sort()
sotimes.reverse()
......@@ -242,7 +242,7 @@ class ProfileMode(Mode):
msg = '*'
else:
msg = ' '
print ' %4.1f%% %.3fs %.3fs %s %d %s' % (f*100, tot, t, msg, nb_call, a)
print ' %4.1f%% %.3fs %.3fs %.2es %s %d %s' % (f*100, tot, t, t/nb_call, msg, nb_call, a)
print ' ... (remaining %i Ops account for %.2f%%(%.2fs) of the runtime)'\
%(max(0, len(sotimes)-n_ops_to_print),
sum(f for f, t, a, ci, nb_call in sotimes[n_ops_to_print:])*100,
......
......@@ -8,6 +8,10 @@ default_={
'ProfileMode.n_ops_to_print':20,
'tensor_opt.local_elemwise_fusion':False,
'lib.amdlibm':False,
'op.set_flops':False,#currently used only in ConvOp. The profile mode will print the flops/s for the op.
'nvcc.fastmath':False,
'scalar.floatX':'float64',
'gpuelemwise.sync':True, #when true, wait that the gpu fct finished and check it error code.
}
#default value taked from env variable
......@@ -38,6 +42,8 @@ THEANO_DEBUGMODE_CHECK_PY = bool(int(os.getenv('THEANO_DEBUGMODE_CHECK_PY', 1)))
THEANO_DEBUGMODE_CHECK_FINITE = bool(int(os.getenv('THEANO_DEBUGMODE_CHECK_FINITE', 1)))
THEANO_DEBUGMODE_CHECK_STRIDES = bool(int(os.getenv('THEANO_DEBUGMODE_CHECK_STRIDES', 1)))
THEANO_FLAGS=os.getenv("THEANO_FLAGS","")
class TheanoConfig(object):
"""Return the value for a key after parsing ~/.theano.cfg and
the THEANO_FLAGS environment variable.
......@@ -72,7 +78,7 @@ class TheanoConfig(object):
#user config file override the default value
self.config.read(['theano.cfg', os.path.expanduser('~/.theano.cfg')])
self.env_flags=os.getenv("THEANO_FLAGS","")
self.env_flags=THEANO_FLAGS
#The value in the env variable THEANO_FLAGS override the previous value
for flag in self.env_flags.split(','):
if not flag:
......@@ -88,16 +94,17 @@ class TheanoConfig(object):
self.config.set(sp[0],sp[1],val)
else:
found=0
sp=sp[0].lower()#the ConfigParser seam to use only lower letter.
for sec in self.config.sections():
for opt in self.config.options(sec):
if opt == sp[0]:
if opt == sp:
found+=1
section=sec
option=opt
if found==1:
self.config.set(section,option,val)
elif found>1:
raise Exception("Ambiguous option (%s) in THEANO_FLAGS"%(sp[0]))
raise Exception("Ambiguous option (%s) in THEANO_FLAGS"%(sp))
def __getitem__(self, key):
""":returns: a str with the value associated to the key"""
......@@ -142,3 +149,5 @@ class TheanoConfig(object):
config = TheanoConfig()
if config.get('scalar.floatX') not in ['float32', 'float64']:
raise Exception("the configuration scalar.floatX must have value float32 or float64")
import numpy as N
import theano
import theano.tensor as T
from theano import gof, Op, tensor
from theano import gof, Op, tensor, config
from theano.printing import Print
def getFilterOutShp(inshp, kshp, (dx,dy)=(1,1), mode='valid'):
......@@ -131,6 +131,8 @@ class ConvOp(Op):
"'valid' mode)")%(self.imshp_logical,self.kshp_logical))
self._rehash()
if config.config.getboolean('op.set_flops'):
self.set_flops()
def __eq__(self, other):
if type(self) != type(other):
......@@ -177,11 +179,12 @@ class ConvOp(Op):
col=-img_col
img_col+=col
while col < max_col: #loop over kern col
self.flops+=1
self.flops+=2
col+=1
self.flops*=self.imshp[0]*self.nkern*self.bsize#for all outputs images#n_stack==self.imshp[0]
assert self.flops==self.bsize * self.nkern * self.imshp[0] * self.kshp[0] * self.kshp[1] * self.imshp[1] * self.imshp[2] * 2
def make_node(self, inputs, kerns):
# TODO: find a way to make ConvOp work for N-D (after NIPS09)
......
......@@ -2,7 +2,7 @@ import StringIO, sys
import numpy
from theano import Op, Type, Apply, Variable, Constant
from theano import tensor, scalar
from theano import tensor, scalar, config
from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda.type_support import filter as type_support_filter
......@@ -67,7 +67,7 @@ class GpuElemwise(Op):
nin = property(lambda self: self.scalar_op.nin)
nout = property(lambda self: self.scalar_op.nout)
def __init__(self, scalar_op, inplace_pattern):
def __init__(self, scalar_op, inplace_pattern, sync=None):
##
# TODO: implement inplace operations.
# It's ok that we set the DestroyMap to something but then don't actually destroy
......@@ -77,6 +77,7 @@ class GpuElemwise(Op):
# the amount of loading and storing to global memory that we would have to do.
# That's why it isn't implemented yet.
#
sync = config.config.getboolean('gpuelemwise.sync',sync)
self.scalar_op = scalar_op
self.inplace_pattern = inplace_pattern
self.destroy_map = dict((o, [i]) for o, i in inplace_pattern.items())
......@@ -86,7 +87,8 @@ class GpuElemwise(Op):
self.ufunc = None
self._rehash()
self.src_generator = NaiveAlgo(self.scalar_op)
self.src_generator = NaiveAlgo(self.scalar_op, sync=sync)
self.sync = sync
def __getstate__(self):
d = copy.copy(self.__dict__)
......
......@@ -51,7 +51,6 @@ def local_gpu_elemwise_0(node):
if numpy.any([hasattr(i.owner, 'op') and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]):
if numpy.any([o.type.dtype == 'float64' for o in node.outputs]):
print 'WARNING: THERE ARE STILL float64s in your graph local_gpu_elemwise_0', node
import pdb; pdb.set_trace()
else:
# move the add to a GpuAdd
new_op = GpuElemwise(node.op.scalar_op, node.op.inplace_pattern)
......
......@@ -172,7 +172,7 @@ def speed_elemwise_collapse2():
t2=time.time()
def test_elemwise_collapse():
""" used to test if the case where all inputs are broadcast """
""" Test when all inputs have one(and the same) broadcastable dimension """
shape = (4,5,60)
a = cuda_ndarray.CudaNdarray(numpy.asarray(numpy.random.rand(*shape),dtype='float32'))
......@@ -186,14 +186,16 @@ def test_elemwise_collapse():
v = numpy.asarray(numpy.random.rand(shape[0],1,*shape[1:]),dtype='float32')
v=cuda_ndarray.CudaNdarray(v)
for id,n in enumerate(f.maker.env.toposort()):
print id, n
if False:
for id,n in enumerate(f.maker.env.toposort()):
print id, n
#let debugmode catch errors
f(v)
out=f(v)[0]
assert numpy.allclose(out,a.reshape(shape[0],1,*shape[1:])+v)
print "Expected collapse of all dimensions"
def test_elemwise_collapse2():
""" used to test if the case where one inputs have a broadcast """
""" Test when only one inputs have one broadcastable dimension """
shape = (4,5,60)
a = cuda_ndarray.CudaNdarray(numpy.asarray(numpy.random.rand(*shape),dtype='float32'))
......@@ -207,14 +209,16 @@ def test_elemwise_collapse2():
v = numpy.asarray(numpy.random.rand(shape[0],5,*shape[1:]),dtype='float32')
v=cuda_ndarray.CudaNdarray(v)
for id,n in enumerate(f.maker.env.toposort()):
print id, n
if False:
for id,n in enumerate(f.maker.env.toposort()):
print id, n
#let debugmode catch errors
f(v)
out=f(v)[0]
assert numpy.allclose(out,a.reshape(shape[0],1,*shape[1:])+v)
print "Expected collapse to 3 dimensions"
def test_elemwise_collapse3():
""" used to test if the case where one inputs have 2 broadcast dimensions at each ends."""
""" Test when only one inputs have two broadcastable dimension at each ends """
shape = (4,5)
a = cuda_ndarray.CudaNdarray(numpy.asarray(numpy.random.rand(*shape),dtype='float32'))
......@@ -228,14 +232,16 @@ def test_elemwise_collapse3():
v = numpy.asarray(numpy.random.rand(5,shape[0],shape[1],4),dtype='float32')
v=cuda_ndarray.CudaNdarray(v)
for id,n in enumerate(f.maker.env.toposort()):
print id, n
if False:
for id,n in enumerate(f.maker.env.toposort()):
print id, n
#let debugmode catch errors
f(v)
out=f(v)[0]
assert numpy.allclose(out,a.reshape(1,shape[0],shape[1],1)+v)
print "Expected collapse to 3 dimensions"
def test_elemwise_collapse4():
""" used to test if the case where one inputs have 2 broadcast dimensions at each ends and a scalar"""
""" Test when only one inputs have two broadcastable dimension at each ends and we add a scalar"""
shape = (4,5)
a = cuda_ndarray.CudaNdarray(numpy.asarray(numpy.random.rand(*shape),dtype='float32'))
......@@ -249,8 +255,74 @@ def test_elemwise_collapse4():
v = numpy.asarray(numpy.random.rand(5,shape[0],shape[1],4),dtype='float32')
v=cuda_ndarray.CudaNdarray(v)
for id,n in enumerate(f.maker.env.toposort()):
print id, n
if False:
for id,n in enumerate(f.maker.env.toposort()):
print id, n
#let debugmode catch errors
f(v)
out=f(v)[0]
assert numpy.allclose(out,a.reshape(1,shape[0],shape[1],1)+v+2)
print "Expected collapse to 3 dimensions"
def test_elemwise_collapse5():
""" Test when only one inputs have two broadcastable dimension at the beginning and we add a scalar"""
shape = (4,5)
a = cuda_ndarray.CudaNdarray(numpy.asarray(numpy.random.rand(*shape),dtype='float32'))
a = numpy.asarray(numpy.random.rand(*shape),dtype='float32')
a2 = tcn.shared_constructor(a, 'a')
a3 = a2.dimshuffle('x','x',0,1)
b = tcn.CudaNdarrayType((False, False, False, False))()
c = (a3+b+2)
f = pfunc([b], [c])
v = numpy.asarray(numpy.random.rand(5,4,shape[0],shape[1]),dtype='float32')
v=cuda_ndarray.CudaNdarray(v)
if False:
for id,n in enumerate(f.maker.env.toposort()):
print id, n
#let debugmode catch errors
out=f(v)[0]
assert numpy.allclose(out,a.reshape(1,1,shape[0],shape[1])+v+2)
print "Expected collapse to 2 dimensions"
def test_elemwise_collapse6():
""" Test when all inputs have two broadcastable dimension at the beginning"""
shape = (4,5)
a = cuda_ndarray.CudaNdarray(numpy.asarray(numpy.random.rand(*shape),dtype='float32'))
a = numpy.asarray(numpy.random.rand(*shape),dtype='float32')
a2 = tcn.shared_constructor(a, 'a')
a3 = a2.dimshuffle('x','x',0,1)
b = tcn.CudaNdarrayType((True, True, False, False))()
f = pfunc([b], [a3+b])
v = numpy.asarray(numpy.random.rand(1,1,shape[0],shape[1]),dtype='float32')
v=cuda_ndarray.CudaNdarray(v)
if False:
for id,n in enumerate(f.maker.env.toposort()):
print id, n
#let debugmode catch errors
out=f(v)[0]
assert numpy.allclose(out,a.reshape(1,1,shape[0],shape[1])+v)
print "Expected collapse to c contiguous"
def test_elemwise_collapse7(atol=1e-6):
""" Test when one input have one broadcastable dimension and the other is a scalar"""
shape = (5,4,1)
a = cuda_ndarray.CudaNdarray(numpy.asarray(numpy.random.rand(*shape),dtype='float32'))
a = numpy.asarray(numpy.random.rand(*shape),dtype='float32')
a2 = tcn.shared_constructor(a.copy(), 'a')
a3 = a2.dimshuffle(0, 'x', 1, 2)
f = pfunc([], [a3+2])
if False:
for id,n in enumerate(f.maker.env.toposort()):
print id, n
#let debugmode catch errors
out=f()[0]
ans=(a+2).reshape(shape[0],1,shape[1],shape[2])
assert numpy.allclose(out,ans, atol=atol)
print "Expected collapse to c contiguous"
......@@ -228,6 +228,11 @@ class Kouh2008(object):
class Config(object):
use_gpu = True
dtype='float32'
dtype2=dtype
if dtype2=='floatX':
import theano.config as c
dtype2 = c.config.get('scalar.floatX')
rng_seed = 23498
n_hid = 300
......@@ -296,7 +301,7 @@ def test_bench_elemwise(n_iter=1000, **kwargs):
xval = numpy.asarray(
rng.uniform(size=(conf.ft_batchsize, x.type.shape[1])),
dtype=conf.dtype,
dtype=conf.dtype2,
)
yval = numpy.arange(conf.ft_batchsize)
for i in xrange(n_iter):
......
......@@ -261,11 +261,17 @@ class CudaNdarrayType(Type):
def c_code_cache_version(self):
#return ()
#no need to put nvcc.fastmath in the tuple as the c_compile_args is put in the key.
return (2,) # with assertion about refcounts
def c_compiler(self):
return nvcc_module_compile_str
def c_compile_args(self):
ret = []
if config.config.getboolean('nvcc.fastmath'):
ret.append('-use_fast_math')
return ret
# THIS WORKS
# But CudaNdarray instances don't compare equal to one another, and what about __hash__ ?
......
......@@ -56,6 +56,8 @@ def constant(x):
class Scalar(Type):
def __init__(self, dtype):
if dtype=='floatX':
dtype=config.config.get('floatX')
self.dtype = dtype
self.dtype_specs() # error checking
......@@ -238,8 +240,8 @@ class Scalar(Type):
def c_code_cache_version(self):
#return ()
# no need to put lib.amdlibm here as c_compile_args() are put in the key.
return (4,) #explicit T given in specialization of operator= lines. This makes it compile with open64
#2,
int8 = Scalar('int8')
......@@ -252,6 +254,7 @@ uint32 = Scalar('uint32')
uint64 = Scalar('uint64')
float32 = Scalar('float32')
float64 = Scalar('float64')
floatX = Scalar(config.config.get('scalar.floatX'))
complex64 = Scalar('complex64')
complex128 = Scalar('complex128')
......@@ -934,6 +937,7 @@ convert_to_uint32 = Cast(uint32, name='convert_to_uint32')
convert_to_uint64 = Cast(uint64, name='convert_to_uint64')
convert_to_float32 = Cast(float32, name='convert_to_float32')
convert_to_float64 = Cast(float64, name='convert_to_float64')
convert_to_floatX = Cast(floatX, name='convert_to_floatX')
convert_to_complex64 = Cast(complex64, name='convert_to_complex64')
convert_to_complex128 = Cast(complex128, name='convert_to_complex128')
......@@ -948,10 +952,13 @@ _cast_mapping = {
'uint64': convert_to_uint64,
'float32': convert_to_float32,
'float64': convert_to_float64,
'floatX': convert_to_floatX,
'complex64': convert_to_complex64,
'complex128': convert_to_complex128}
def cast(x, dtype):
"""Symbolically cast `x` to a Scalar of given `dtype`."""
if dtype=='floatX': dtype = config.config.get('scalar.floatX')
_x = as_scalar(x)
if _x.type.dtype == dtype:
return _x
......
......@@ -217,10 +217,17 @@ def _wrap_tensor_into_member(x):
return compile.module.Member(constant(x))
compile.module.register_wrapper(_obj_is_wrappable_as_tensor, _wrap_tensor_into_member)
if int(config.THEANO_CMP_SLOPPY):
if int(config.THEANO_CMP_SLOPPY)>1:
# This environment variable is a quick-and-dirty way to get low-precision comparisons.
# For a more precise setting of these tolerances set them explicitly in your user code by
# assigning, for example, "theano.tensor.basic.float32_atol = ..."
#when THEANO_CMP_SLOPPY>1 we are even more sloppy. This is usefull to test the gpu as they don't use extended precision and this cause some difference bigger then the normal sloppy.
float32_atol = 5e-4
float32_rtol = 1e-3
float64_rtol = 1e-4
float64_atol = 1e-3
elif int(config.THEANO_CMP_SLOPPY):
float32_atol = 1e-4
float32_rtol = 1e-3
float64_rtol = 1e-4
......@@ -275,6 +282,8 @@ class TensorType(Type):
Optional name for this type.
"""
self.dtype = str(dtype)
if self.dtype=='floatX':
self.dtype=config.config.get('scalar.floatX')
self.broadcastable = tuple(broadcastable)
self.dtype_specs() # error checking is done there
self.name = name
......@@ -601,6 +610,7 @@ cscalar = TensorType('complex64', ())
zscalar = TensorType('complex128', ())
fscalar = TensorType('float32', ())
dscalar = TensorType('float64', ())
xscalar = TensorType('floatX',())
bscalar = TensorType('int8', ())
wscalar = TensorType('int16', ())
iscalar = TensorType('int32', ())
......@@ -621,6 +631,7 @@ cvector = TensorType('complex64', (False, ))
zvector = TensorType('complex128', (False, ))
fvector = TensorType('float32', (False, ))
dvector = TensorType('float64', (False, ))
xvector = TensorType('floatX', (False, ))
bvector = TensorType('int8', (False,))
wvector = TensorType('int16', (False,))
ivector = TensorType('int32', (False, ))
......@@ -638,6 +649,7 @@ cmatrix = TensorType('complex64', (False, False))
zmatrix = TensorType('complex128', (False, False))
fmatrix = TensorType('float32', (False, False))
dmatrix = TensorType('float64', (False, False))
xmatrix = TensorType('floatX', (False, False))
bmatrix = TensorType('int8', (False, False))
wmatrix = TensorType('int16', (False, False))
imatrix = TensorType('int32', (False, False))
......@@ -655,6 +667,7 @@ crow = TensorType('complex64', (True, False))
zrow = TensorType('complex128', (True, False))
frow = TensorType('float32', (True, False))
drow = TensorType('float64', (True, False))
xrow = TensorType('floatX', (True, False))
brow = TensorType('int8', (True, False))
wrow = TensorType('int16', (True, False))
irow = TensorType('int32', (True, False))
......@@ -668,6 +681,7 @@ ccol = TensorType('complex64', (False, True))
zcol = TensorType('complex128', (False, True))
fcol = TensorType('float32', (False, True))
dcol = TensorType('float64', (False, True))
xcol = TensorType('floatX', (False, True))
bcol = TensorType('int8', (False, True))
wcol = TensorType('int16', (False, True))
icol = TensorType('int32', (False, True))
......@@ -681,6 +695,7 @@ ctensor3 = TensorType('complex64', (False,)*3)
ztensor3 = TensorType('complex128', (False,)*3)
ftensor3 = TensorType('float32', (False,)*3)
dtensor3 = TensorType('float64', (False,)*3)
xtensor3 = TensorType('floatX', (False,)*3)
btensor3 = TensorType('int8', (False,)*3)
wtensor3 = TensorType('int16', (False,)*3)
itensor3 = TensorType('int32', (False,)*3)
......@@ -690,6 +705,7 @@ ctensor4 = TensorType('complex64', (False,)*4)
ztensor4 = TensorType('complex128', (False,)*4)
ftensor4 = TensorType('float32', (False,)*4)
dtensor4 = TensorType('float64', (False,)*4)
xtensor4 = TensorType('floatX', (False,)*4)
btensor4 = TensorType('int8', (False,)*4)
wtensor4 = TensorType('int16', (False,)*4)
itensor4 = TensorType('int32', (False,)*4)
......@@ -1086,6 +1102,9 @@ _convert_to_float32 = _conversion(elemwise.Elemwise(scal.convert_to_float32), 'f
_convert_to_float64 = _conversion(elemwise.Elemwise(scal.convert_to_float64), 'float64')
"""Cast to double-precision floating point"""
_convert_to_floatX = _conversion(elemwise.Elemwise(scal.convert_to_floatX), 'floatX')
"""Cast to floatX floating point"""
_convert_to_complex64 = _conversion(elemwise.Elemwise(scal.convert_to_complex64), 'complex64')
"""Cast to single-precision complex"""
......@@ -1103,11 +1122,14 @@ _cast_mapping = {
'uint64': _convert_to_uint64,
'float32': _convert_to_float32,
'float64': _convert_to_float64,
'floatX': _convert_to_floatX,
'complex64': _convert_to_complex64,
'complex128': _convert_to_complex128}
@constructor
def cast(x, dtype):
"""Symbolically cast `x` to a Tensor of type `dtype`."""
if dtype=='floatX': dtype = config.config.get('scalar.floatX')
_x = as_tensor_variable(x)
if _x.type.dtype == dtype:
return _x
......@@ -2462,7 +2484,7 @@ def get_vector_length(v):
return join.vec_length(v)
except ValueError:
pass
if v.owner and v.owner.op == _shape:
if v.owner and isinstance(v.owner.op, Shape):
return v.owner.inputs[0].type.ndim
raise ValueError("length not known")
......
......@@ -1929,6 +1929,63 @@ def test_default_state():
assert f(1) == 4.8
assert f(2.2) == 7
def test_cast_floatX():
floatx=config.config.get('scalar.floatX')
#float64 cast to float64 should not generate an op
x = dvector('x')
f = function([x],[cast(x,'float64')])
# print f.maker.env.toposort()
assert len(f.maker.env.toposort())==0
#float32 cast to float32 should not generate an op
x = fvector('x')
f = function([x],[cast(x,'float32')])
# print f.maker.env.toposort()
assert len(f.maker.env.toposort())==0
#floatX cast to float64
x = xvector('x')
f = function([x],[cast(x,'float64')])
# print f.maker.env.toposort()
if floatx=='float64':
assert len(f.maker.env.toposort()) == 0
else:
assert len(f.maker.env.toposort()) == 1
#floatX cast to float32
x = xvector('x')
f = function([x],[cast(x,'float32')])
# print f.maker.env.toposort()
if floatx=='float32':
assert len(f.maker.env.toposort()) == 0
else:
assert len(f.maker.env.toposort()) == 1
#float64 cast to floatX
x = dvector('x')
f = function([x],[cast(x,'floatX')])
# print f.maker.env.toposort()
if floatx=='float64':
assert len(f.maker.env.toposort()) == 0
else:
assert len(f.maker.env.toposort()) == 1
#float32 cast to floatX
x = fvector('x')
f = function([x],[cast(x,'floatX')])
# print f.maker.env.toposort()
if floatx=='float32':
assert len(f.maker.env.toposort()) == 0
else:
assert len(f.maker.env.toposort()) == 1
#floatX cast to floatX
x = xvector('x')
f = function([x],[cast(x,'floatX')])
# print f.maker.env.toposort()
assert len(f.maker.env.toposort()) == 0
if __name__ == '__main__':
if len(sys.argv) >= 2 and sys.argv[1] == 'OPT':
default_mode = compile.Mode(linker = 'c&py',
......
......@@ -1059,34 +1059,42 @@ class test_fusion(unittest.TestCase):
print "time", self.do(mode, shared, shp=(1000,1000),gpu=False, assert_len_topo=False,slice=s, nb_repeat=100)
def tes_memory_leak(self, mode=compile.mode.predefined_modes['FAST_RUN'], shared_fn=shared, shp=(3000,3000), gpu=False, nb_repeat=30, assert_len_topo=True, slice=None):
def tes_memory_leak(self, mode=compile.mode.Mode('c', 'merge'), shared_fn=shared, shp=(3000,3000), gpu=False, nb_repeat=30, assert_len_topo=True, slice=None):
"""
param shared_fn: if None, will use compile.function
verify that the elemwise fusion work
Test with and without DimShuffle
"""
#TODO: disable the canonizer?
fx, fy = fmatrices('xy')
fx = fmatrices('x')
fxv = numpy.zeros(shp, dtype='float32')+ 2
fyv = numpy.zeros(shp, dtype='float32')+ 3
cases = [
(fx+fy,(fx,fy),(fxv,fyv),1,fxv+fyv,'float32'),#1
(fx,(fx),(fxv),'float32'),#1
]
import gc, pdb, objgraph, weakref
d={}
dl=[]
v1=None
for id, [g, sym_inputs, val_inputs, nb_elemwise, answer, out_dtype] in enumerate(cases):
mode=compile.mode.Mode('c', 'merge')
for id, [g, sym_inputs, val_inputs, out_dtype] in enumerate(cases):
for zzzz in range(nb_repeat):
v=numpy.zeros(shp, dtype=out_dtype)
gc.collect();gc.collect();gc.collect()
print 'v1',v1
v1=weakref.ref(v)
# print 'v1',v1
# v1=weakref.ref(v)
out=shared_fn(v,'out')
f = pfunc(sym_inputs,[],updates=[(out,out+g)],mode=mode)
pdb.set_trace()
# f = pfunc(sym_inputs,[],updates=[(out,out+g)],mode=mode)
# f = pfunc([fx],[],updates=[(out,out+fx)],mode=mode)
# f = pfunc([fx],out+fx,mode=mode)
# f = compile.function([fx,out],[out+fx],mode=mode)#no memory leak.
f = compile.function([fx,compile.In(variable=out, value=out.container, mutable=None)],
[out+fx],mode=mode)#if mutable is True or False, their is a memory leak
del v
gc.collect();gc.collect();gc.collect()
pdb.set_trace()
if True:
if False:
gc.collect();gc.collect();gc.collect()
nd=objgraph.typestats()
print 'key, old val, new val, diff'
......@@ -1097,7 +1105,7 @@ class test_fusion(unittest.TestCase):
d=nd
# pdb.set_trace()
if True:
if False:
gc.collect();gc.collect();gc.collect()
ndl=objgraph.by_type('list')
ll=[]
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论