Added missing example file and a few fix to the tutorial slide.

af5457e1 · Frederic Bastien · 7b3aa573 · af5457e1 · af5457e1 · af5457e1
--- a/doc/hpcs2011_tutorial/logreg_example.py
+++ b/doc/hpcs2011_tutorial/logreg_example.py
@@ -6,7 +6,7 @@ rng = numpy.random
 N = 400
 feats = 784
 D = (rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
-training_steps = 10
+training_steps = 100
 # Declare Theano symbolic variables
 x = T.matrix("x")
@@ -30,11 +30,21 @@ gw,gb = T.grad(cost, [w,b])
 train = theano.function(
            inputs=[x,y],
            outputs=[prediction, xent],
-            updates={w:w-0.1*gw, b:b-0.1*gb},
+            updates={w:w-0.01*gw, b:b-0.01*gb},
            name = "train")
 predict = theano.function(inputs=[x], outputs=prediction,
            name = "predict")
+if any( [x.op.__class__.__name__=='Gemv' for x in train.maker.env.toposort()]):
+    print 'Used the cpu'
+elif any( [x.op.__class__.__name__=='GpuGemm' for x in train.maker.env.toposort()]):
+    print 'Used the gpu'
+else:
+    print 'ERROR, not able to tell if theano used the cpu or the gpu'
+    print train.maker.env.toposort()
 for i in range(training_steps):
    pred, err = train(D[0], D[1])
 print "Final model:"

--- a/doc/hpcs2011_tutorial/presentation.tex
+++ b/doc/hpcs2011_tutorial/presentation.tex
@@ -522,7 +522,7 @@ rng = numpy.random
 N = 400
 feats = 784
 D = (rng.randn(N, feats), rng.randint(size=N,low=0, high=2))
-training_steps = 10
+training_steps = 100
 \end{Verbatim}
 \end{frame}
@@ -752,7 +752,7 @@ Computers in the class
  \end{itemize}
  \vfill
  \begin{itemize}
-  \item Broadcastability must be specified when creating the variable.
+  \item Broadcastability must be specified when creating the variable
  \item The only shorcut with broadcastable dimensions are: {\bf T.row} and {\bf T.col}
  \item For all others: T.tensor(dtype, broadcastable={\bf ([False or True])*nd})
  \end{itemize}
@@ -1337,7 +1337,7 @@ multiply_them(
 \begin{frame}
 \frametitle{PyCUDA Exercises}
 \begin{itemize}
-\item Run the example
+\item Run the example in the file pycuda_simple.py
 \item Modify and execute it to work for a matrix of 20 $\times$ 10
 \end{itemize}
 \end{frame}

--- a/doc/hpcs2011_tutorial/pycuda_simple.py
+++ b/doc/hpcs2011_tutorial/pycuda_simple.py
+import pycuda.autoinit
+import pycuda.driver as drv
+import numpy
+from pycuda.compiler import SourceModule
+mod = SourceModule("""
+__global__ void multiply_them(float *dest, float *a, float *b)
+{
+  const int i = threadIdx.x;
+  dest[i] = a[i] * b[i];
+}
+""")
+multiply_them = mod.get_function("multiply_them")
+a = numpy.random.randn(400).astype(numpy.float32)
+b = numpy.random.randn(400).astype(numpy.float32)
+dest = numpy.zeros_like(a)
+multiply_them(
+        drv.Out(dest), drv.In(a), drv.In(b),
+        block=(400,1,1), grid=(1,1))
+assert numpy.allclose(dest, a*b)
+print dest