提交 af5457e1 authored 作者: Frederic Bastien's avatar Frederic Bastien

Added missing example file and a few fix to the tutorial slide.

上级 7b3aa573
......@@ -6,7 +6,7 @@ rng = numpy.random
N = 400
feats = 784
D = (rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
training_steps = 10
training_steps = 100
# Declare Theano symbolic variables
x = T.matrix("x")
......@@ -30,11 +30,21 @@ gw,gb = T.grad(cost, [w,b])
train = theano.function(
inputs=[x,y],
outputs=[prediction, xent],
updates={w:w-0.1*gw, b:b-0.1*gb},
updates={w:w-0.01*gw, b:b-0.01*gb},
name = "train")
predict = theano.function(inputs=[x], outputs=prediction,
name = "predict")
if any( [x.op.__class__.__name__=='Gemv' for x in train.maker.env.toposort()]):
print 'Used the cpu'
elif any( [x.op.__class__.__name__=='GpuGemm' for x in train.maker.env.toposort()]):
print 'Used the gpu'
else:
print 'ERROR, not able to tell if theano used the cpu or the gpu'
print train.maker.env.toposort()
for i in range(training_steps):
pred, err = train(D[0], D[1])
print "Final model:"
......
......@@ -522,7 +522,7 @@ rng = numpy.random
N = 400
feats = 784
D = (rng.randn(N, feats), rng.randint(size=N,low=0, high=2))
training_steps = 10
training_steps = 100
\end{Verbatim}
\end{frame}
......@@ -752,7 +752,7 @@ Computers in the class
\end{itemize}
\vfill
\begin{itemize}
\item Broadcastability must be specified when creating the variable.
\item Broadcastability must be specified when creating the variable
\item The only shorcut with broadcastable dimensions are: {\bf T.row} and {\bf T.col}
\item For all others: T.tensor(dtype, broadcastable={\bf ([False or True])*nd})
\end{itemize}
......@@ -1337,7 +1337,7 @@ multiply_them(
\begin{frame}
\frametitle{PyCUDA Exercises}
\begin{itemize}
\item Run the example
\item Run the example in the file pycuda_simple.py
\item Modify and execute it to work for a matrix of 20 $\times$ 10
\end{itemize}
\end{frame}
......
import pycuda.autoinit
import pycuda.driver as drv
import numpy
from pycuda.compiler import SourceModule
mod = SourceModule("""
__global__ void multiply_them(float *dest, float *a, float *b)
{
const int i = threadIdx.x;
dest[i] = a[i] * b[i];
}
""")
multiply_them = mod.get_function("multiply_them")
a = numpy.random.randn(400).astype(numpy.float32)
b = numpy.random.randn(400).astype(numpy.float32)
dest = numpy.zeros_like(a)
multiply_them(
drv.Out(dest), drv.In(a), drv.In(b),
block=(400,1,1), grid=(1,1))
assert numpy.allclose(dest, a*b)
print dest
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论