print"We guess some tips to make your code faster. If you think of new one, suggest them on the mailing list. Test them before use as they are not guaranted to always give a speed up."
scalar_op_amdlibm_speed_up=[scal.Mod,scal.Pow,scal.Ceil,scal.Floor,scal.RoundHalfToEven,scal.RoundHalfAwayFromZero,scal.Log,scal.Log2,scal.Log10,scal.Log1p,scal.Exp,scal.Sqrt,scal.Abs,scal.Cos,scal.Sin,scal.Tan,scal.Tanh,scal.Cosh,scal.Sinh,T.nnet.sigm.ScalarSigmoid,T.nnet.sigm.ScalarSoftplus]#Abs, Mod in float{32,64} only
print" - With the default gcc libm, exp in float32 is slower then in float64! Try Theano flags floatX=float64 or install amdlibm and set the theano flags lib.amdlibm=True"
print" - You have a dot operation that was not optimized to dot22 that is faster. Make sure the inputs are float32 or 64 and are the same for both input. Currently they are:",[i.typeforiinnode.inputs]
#tip 5
fora,tinapply_time.iteritems():
node=a[1]
ifisinstance(node.op,RandomFunction):
print" - Replace the default random number generator by 'from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams' as this is is faster. It is still experimental, but seam to work correctly."
ifconfig.device.startswith("gpu"):
print" - MRG_RandomStreams is the only random number supported on the GPU."
break
register_mode('PROFILE_MODE',ProfileMode())
#needed to print the profile at the end automatically