Merge pull request #2100 from nouiz/omlw

Open Machine Learning Workshop 2014 presentation

Merge pull request #2100 from nouiz/omlw
fa1d9c4a · Frédéric Bastien · d12a03be · 1a42c65f · fa1d9c4a · fa1d9c4a
--- a/.gitignore
+++ b/.gitignore
@@ -10,7 +10,6 @@
 *.log
 *.nav
 *.out
-*.pdf
 *.snm
 *.toc
 *.vrb

--- a/doc/index.txt
+++ b/doc/index.txt
@@ -21,6 +21,8 @@ Montreal).
 News
 ====

+* Open Machine Learning Workshop 2014 `presentation <omlw2014/omlw_presentation.pdf>`_.
+
 * Colin Raffel `tutorial on Theano <http://nbviewer.ipython.org/github/craffel/theano-tutorial/blob/master/Theano%20Tutorial.ipynb>`_.

 * Ian Goodfellow did a `12h class with exercises on Theano <https://github.com/goodfeli/theano_exercises>`_.

--- a/doc/omlw2014/Makefile
+++ b/doc/omlw2014/Makefile
+all: presentation.pdf sharing.pdf
+
+clean:
+	rm -f pygpu_ndarray.so core.* *.o *~
+
+cleantmp:
+	rm -f core.* *.o *~
+
+presentation.pdf: presentation.tex
+	pdflatex presentation
+	pdflatex presentation
+
+sharing.pdf: sharing.tex
+	pdflatex sharing
+	pdflatex sharing
--- a/doc/omlw2014/logreg.py
+++ b/doc/omlw2014/logreg.py
+import numpy
+import theano
+import theano.tensor as tt
+rng = numpy.random
+
+N = 400
+feats = 784
+D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
+training_steps = 10000
+
+# Declare Theano symbolic variables
+x = tt.matrix("x")
+y = tt.vector("y")
+w = theano.shared(rng.randn(feats), name="w")
+b = theano.shared(0., name="b")
+print "Initial model:"
+print w.get_value(), b.get_value()
+
+# Construct Theano expression graph
+p_1 = 1 / (1 + tt.exp(-tt.dot(x, w) - b))   # Probability that target = 1
+prediction = p_1 > 0.5                      # The prediction thresholded
+xent = -y * tt.log(p_1) - (1 - y) * tt.log(1 - p_1)  # Cross-entropy loss
+cost = xent.mean() + 0.01 * (w ** 2).sum()  # The cost to minimize
+gw, gb = tt.grad(cost, [w, b])
+
+# Compile
+train = theano.function(
+    inputs=[x, y],
+    outputs=[prediction, xent],
+    updates=[(w, w - 0.1 * gw),
+             (b, b - 0.1 * gb)],
+    name='train')
+
+predict = theano.function(inputs=[x], outputs=prediction,
+                          name='predict')
+
+# Train
+for i in range(training_steps):
+    pred, err = train(D[0], D[1])
+
+print "Final model:"
+print w.get_value(), b.get_value()
+print "target values for D:", D[1]
+print "prediction on D:", predict(D[0])
--- a/doc/omlw2014/omlw_presentation.pdf
+++ b/doc/omlw2014/omlw_presentation.pdf
--- a/doc/omlw2014/pr_conv_gemm_profile.png
+++ b/doc/omlw2014/pr_conv_gemm_profile.png
--- a/doc/omlw2014/presentation.tex
+++ b/doc/omlw2014/presentation.tex
+\documentclass[utf8x,xcolor=pdftex,dvipsnames,table]{beamer}
+\usetheme{Malmoe}  % Now it's a beamer presentation with the lisa theme!
+\setbeamertemplate{footline}[page number]
+\usecolortheme{beaver}
+\usepackage[T1]{fontenc}
+\usepackage{amsmath}
+\usepackage[utf8x]{inputenc}
+%\logo{\includegraphics[width=.8in]{UdeM_NoirBleu_logo_Marie_crop}}
+\usepackage{listings}
+
+\newcommand{\superscript}[1]{\ensuremath{^{\textrm{#1}}}}
+
+\mode<presentation>
+
+\title{Theano, Pylearn2, libgpuarray Presentation}
+
+\author{%
+\footnotesize
+Frédéric Bastien, Bart van Merriënboer \newline
+Département d'Informatique et de Recherche Opérationnelle \newline
+Université de Montréal \newline
+Montréal, Canada \newline
+\texttt{\{bastienf, vanmerb\}@iro.umontreal.ca} \newline \newline
+}
+
+\date{OML Workshop 2014}
+
+\setbeamertemplate{navigation symbols}{}
+
+\begin{document}
+
+\begin{frame}[plain]
+ \titlepage
+ \vspace{-5em}
+ \includegraphics[width=1in]{../hpcs2011_tutorial/pics/lisabook_logo_text_3.png}
+ \hfill
+ \includegraphics[width=.8in]{../hpcs2011_tutorial/pics/UdeM_NoirBleu_logo_Marie_crop}
+\end{frame}
+
+\section{Introduction}
+\begin{frame}{High level}\setcounter{page}{1}
+  Python <- \{NumPy/SciPy/libgpuarray\} <- Theano <- Pylearn2
+  \begin{itemize}
+  \item Python: OO coding language
+  \item Numpy: $n$-dimensional array object and scientific computing toolbox
+  \item SciPy: sparse matrix objects and more scientific computing functionality
+  \item libgpuarray: GPU $n$-dimensional array object in C for CUDA and OpenCL
+  \item Theano: compiler/symbolic graph manipulation
+  \item Pylearn2: machine learning framework
+  \end{itemize}
+\end{frame}
+
+
+%% \begin{frame}{Others}
+%%   \begin{itemize}
+%%   \item matplotlib: one of the many plotting library
+%%   \item IPython: Advanced python shell
+%%   \item IPython notebook: web-based interactive computational environment where you can combine code execution, text, mathematics, plots and rich media into a single document
+%%   \end{itemize}
+%% \end{frame}
+
+\begin{frame}{Python}
+  \begin{itemize}
+  \item General-purpose high-level OO interpreted language
+  \item Emphasizes code readability
+  \item Comprehensive standard library
+  \item Dynamic type and memory management
+  \item Slow execution
+  \item Easily extensible with C
+  \item Popular in {\em web development}\ and {\em scientific communities}
+  \end{itemize}
+\end{frame}
+
+\begin{frame}{NumPy/SciPy}
+  \begin{itemize}
+  \item Python floats are full-fledged objects on the heap
+      \begin{itemize}
+      \item Not suitable for high-performance computing!
+      \end{itemize}
+
+  \item NumPy provides an $n$-dimensional numeric array in Python
+      \begin{itemize}
+      \item Perfect for high-performance computing
+      \item Slices of arrays are views (no copying)
+      \end{itemize}
+
+  \item NumPy provides
+      \begin{itemize}
+      \item Elementwise computations
+      \item Linear algebra, Fourier transforms
+      \item Pseudorandom number generators (many distributions)
+      \end{itemize}
+
+  \item SciPy provides lots more, including
+      \begin{itemize}
+      \item Sparse matrices
+      \item More linear algebra
+      \item Solvers and optimization algorithms
+      \item Matlab-compatible I/O
+      \item I/O and signal processing for images and audio
+      \end{itemize}
+  \end{itemize}
+\end{frame}
+
+\begin{frame}{What's missing?}
+  \begin{itemize}
+    \item Non-lazy evaluation (required by Python) hurts performance
+    \item Bound to the CPU
+    \item Lacks symbolic or automatic differentiation
+    \item No automatic speed and stability optimization
+  \end{itemize}
+
+\end{frame}
+
+%% \begin{frame}{Why scripting for GPUs?}
+%%   \begin{bf}They complement each other\end{bf}
+
+%%   GPUs are everything that high level languages are not
+
+%%   \begin{itemize}
+%%     \item Highly parallel
+%%     \item Very architecture-sensitive
+%%     \item Built for maximum FP/memory throughput
+%%     \item So hard to program that meta-programming is easier
+%%   \end{itemize}
+
+%%   \begin{bf}Best of both worlds:\end{bf} easily scripted code which invokes high-performance GPU kernels.
+
+%%   \begin{bf}Theano C code generation removes overhead\end{bf} of
+%%   function calls between Python and C by launching many C functions at once.
+
+%% \end{frame}
+
+\begin{frame}{Theano}
+
+  High-level domain-specific language tailored to numeric computation.
+
+  \begin{itemize}
+    \item Syntax as close to NumPy as possible
+    \item Compiles most common expressions to C for CPU and/or GPU
+    \item Limited expressivity means more opportunities optimizations
+    \begin{itemize}
+      \item No subroutines -> global optimization
+      \item Strongly typed -> compiles to C
+      \item Array oriented -> easy parallelism
+      \item Support for looping and branching in expressions
+    \end{itemize}
+    \item Automatic speed and stability optimizations
+    \item Can reuse other technologies for best performance.
+    \begin{itemize}
+      \item BLAS, SciPy, Cython, Numba, PyCUDA, CUDA
+    \end{itemize}
+    \item Automatic differentiation and R op
+    \item Sparse matrices
+  \end{itemize}
+\end{frame}
+
+
+\begin{frame}{Pylearn2}
+
+  Machine Learning library aimed at researchers
+
+  \begin{itemize}
+    \item Built on top of Theano, for fast execution and use of GPU
+    \item Easy to try variants of implemented algorithms, and to extend them (using Theano)
+    \item Very modular, each component of the library can be used in isolation
+    \item Experiments can be specified through a YAML config file, or by a Python script
+    \item Scripts for visualizing weights, plot monitored values
+  \end{itemize}
+\end{frame}
+
+
+\begin{frame}{libgpuarray}
+  Goal: A common GPU $n$-dimensional array that can be reused by all projects, support for both CUDA and OpenCL.
+  \newline \newline
+  Motivation:
+  \begin{itemize}
+  \item Currently there are at least 6 different GPU arrays in Python
+    \begin{itemize}
+    \item CudaNdarray (Theano), GPUArray (pycuda), CUDAMatrix (cudamat), GPUArray (pyopencl), Clyther, Copperhead, ...
+    \item There are even more if we include other languages.
+    \end{itemize}
+  \item They are incompatible
+    \begin{itemize}
+    \item None have the same properties and interface
+    \end{itemize}
+  \item All of them implement a subset of numpy.ndarray properties
+  \item This is the new GPU backend on Theano
+  \end{itemize}
+\end{frame}
+
+
+\begin{frame}{Goal of the stack}
+\begin{center}
+\begin{bf}Fast to develop\end{bf}\newline \bigskip
+\begin{bf}Fast to run\end{bf}\newline \bigskip
+\hspace{-2.5cm}
+\includegraphics[width=0.35\textwidth]{road-runner-1.jpg}
+\end{center}
+\end{frame}
+
+
+\section{Theano}
+% I think it is a good idea to make explicit the change into a new section -- PL
+\begin{frame}
+  \tableofcontents[currentsection]
+\end{frame}
+
+\begin{frame}{Description}
+  \begin{itemize}
+    \item Mathematical symbolic expression compiler
+    \item Expressions mimic NumPy's syntax and semantics
+    \item Dynamic C/CUDA code generation
+    \begin{itemize}
+      \item C/C++, CUDA, OpenCL, PyCUDA, Cython, Numba, \ldots
+    \end{itemize}
+    \item Efficient symbolic differentiation
+    %\begin{itemize}
+    %  \item Derivatives of functions with one or many inputs.
+    %  \item Computation of the Jacobian, Hessian, R and L op.
+    %\end{itemize}
+    \item Speed and stability optimizations
+    \begin{itemize}
+      \item Gives the right answer for ``$\log (1 + x)$'' even if $x$ is really tiny.
+    \end{itemize}
+    \item Extensive unit-testing and self-verification
+    %\begin{itemize}
+    %  \item Detects and diagnoses many types of errors
+    %\end{itemize}
+    \item Works on Linux, OS X and Windows
+    \item Transparent use of a GPU
+    \begin{itemize}
+      \item {\tt float32} only for now (libgpuarray provides much more)
+      \item Limited support on Windows
+    \end{itemize}
+
+%    \item Statically typed and purely functional
+    \item Sparse operations (CPU only)
+  \end{itemize}
+\end{frame}
+
+% The following does not work with lstset, for some reason
+%\begin{frame}{Simple example}
+\begin{frame}[fragile]
+  \frametitle{Simple example}
+
+\lstset{language=Python,
+        commentstyle=\itshape\color{blue},
+        stringstyle=\color{violet},
+        }
+\begin{lstlisting}
+import theano
+# declare symbolic variable
+a = theano.tensor.vector("a")
+# build symbolic expression
+b = a + a ** 10
+# compile function
+f = theano.function([a], b)
+print f([0, 1, 2])
+# prints `array([0, 2, 1026])`
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}{Simple example: graph optimization}
+\center
+\includegraphics[width=0.35\textwidth]{../hpcs2011_tutorial/pics/f_unoptimized.png}
+\hspace{0.1\textwidth}
+\includegraphics[width=0.35\textwidth]{../hpcs2011_tutorial/pics/f_optimized.png}
+%Symbolic programming = *Paradigm shift*: people need to use it to understand it.
+
+\end{frame}
+
+
+\begin{frame}{Project status?}
+  \begin{itemize}
+    \item Mature: Theano has been developed and used since January 2008 (6.5 yrs old)
+    \item Driven over 100 research papers
+    \item Good user documentation
+    \item Active mailing list with participants from outside our lab
+    \item Core technology for a few Silicon-Valley start-ups
+    \item Many contributors (some from outside our lab)
+    \item Used to teach many university classes
+    \item Has been used for research at Google and Yahoo.
+  \end{itemize}
+  Theano: \url{deeplearning.net/software/theano/}
+
+  Deep Learning Tutorials: \url{deeplearning.net/tutorial/}
+\end{frame}
+
+
+\section{Pylearn2}
+\begin{frame}
+  \tableofcontents[currentsection]
+\end{frame}
+
+\begin{frame}{Pylearn2 details}
+    The core library contains a collection of:
+    \begin{itemize}
+      \item Training algorithms (e.g. Stochastic and Batch GD, model-specific rules)
+      \begin{itemize}
+        \item Costs, supervised/unsupervised and exact/estimated (e.g. NLL, Score matching, NCE)
+        \item Monitor, history of (functions of) parameters and hyperparameters on different data sets (training, validation, test)
+        \item Termination criteria, determine when to stop training
+      \end{itemize}
+      \item Training extensions, perform actions throughout the training process (e.g., early stopping)
+      \item Models (e.g. NNets, ConvNets, RBMs, k-means, PCA, SVMs)
+      \item Datasets (e.g. MNIST, CIFAR-10) and preprocessors (LCN, ZCA)
+    \end{itemize}
+\end{frame}
+
+\begin{frame}{Pylearn2 details, continued}
+\begin{itemize}
+  \item Data specifications which give semantics to data
+  \begin{itemize}
+    \item IndexSpace, 1D integer array e.g.\ for labels
+    \item VectorSpace, 1D float array e.g.\ for softmax output
+    \item Conv2DSpace, 3D float32 arrays e.g.\ for color image input
+  \end{itemize}
+  \item Allows for automatic conversion when needed e.g.\ labels to one-hot vectors, images to flattened vectors
+  \item YAML file allows experiments to be conducted without writing code
+\end{itemize}
+\end{frame}
+
+\begin{frame}{Project status}
+  \begin{itemize}
+    \item Has been used for scientific publications, Kaggle competitions, used by many researchers at LISA
+    \item Still under rapid development, however the API shouldn't break without warning
+    \item Documentation is incomplete, but quickly improving
+    \item Active mailing list with participants from outside our lab
+    \item Core technology for a least one Silicon-Valley start-up
+    \item Features currently in development:
+    \begin{itemize}
+      \item Recurrent neural networks (RNNs), based on the GroundHog framework developed at LISA
+      \item Better hyperparameter search support, using e.g. Hyperopt
+    \end{itemize}
+  \end{itemize}
+\end{frame}
+
+%% \begin{frame}[fragile]
+%%   \frametitle{Simple example}
+
+%% % I know it is not Python, but YAML is not supported by listings
+%% % close enough? -- PL
+%% \lstset{language=python,
+%%         commentstyle=\slshape\color{blue},
+%%         stringstyle=\color{violet},
+%%         basicstyle=\tiny\ttfamily}
+%% \begin{lstlisting}
+%% !obj:pylearn2.train.Train {
+%%     "dataset": !obj:pylearn2.datasets.dense_design_matrix.DenseDesignMatrix &dataset {
+%%         "X" : !obj:numpy.random.normal { 'size': [5,3] },
+%%     },
+%%     "model": !obj:pylearn2.models.autoencoder.DenoisingAutoencoder {
+%%         "nvis" : 3,
+%%         "nhid" : 4,
+%%         "irange" : 0.05,  # Interval from which to sample weights
+%%         "corruptor": !obj:pylearn2.corruption.BinomialCorruptor {
+%%             "corruption_level": 0.5,
+%%         },
+%%         "act_enc": "tanh",
+%%         "act_dec": null,    # Linear activation on the decoder side.
+%%     },
+%%     "algorithm": !obj:pylearn2.training_algorithms.sgd.SGD {
+%%         "learning_rate" : 1e-3,
+%%         "batch_size" : 5,
+%%         "monitoring_dataset" : *dataset,
+%%         "cost" : !obj:pylearn2.costs.autoencoder.MeanSquaredReconstructionError {},
+%%         "termination_criterion" : !obj:pylearn2.termination_criteria.EpochCounter {
+%%             "max_epochs": 10,
+%%         },
+%%     }
+%% }
+%% \end{lstlisting}
+%% \end{frame}
+
+%% \begin{frame}[fragile]
+%%   \frametitle{Simple example}
+
+%% \lstset{language=python,
+%%         commentstyle=\itshape\color{blue},
+%%         stringstyle=\color{violet},
+%%         basicstyle=\small
+%%         }
+%% \begin{lstlisting}
+%% # Use Pylearn2 to perform a linear transformation
+%% # followed by a softmax
+%% x = theano.tensor.vector("x")
+%% softmax = pylearn2.models.mlp.Softmax(
+%%     n_classes=2, layer_name="softmax", irange=0.05
+%% )
+%% softmax.set_input_space(
+%%   pylearn2.space.VectorSpace(dim=5)
+%% )
+%% y = softmax.fprop(x)
+%% f = theano.function([x], y)
+%% print f([0.12, 0.12, 0.43, 0.32, 0.96])
+%% # prints [0.43, 0.54]
+%% \end{lstlisting}
+%% \end{frame}
+
+\section{libgpuarray}
+\begin{frame}
+  \tableofcontents[currentsection]
+\end{frame}
+
+\begin{frame}{libgpuarray: Design Goals}
+  \begin{itemize}
+  \item Have the base object in C to allow collaboration with more projects.
+    \begin{itemize}
+    \item We want people from C, C++, ruby, R, \ldots all use the same base GPU ndarray.
+    \end{itemize}
+  \item Be compatible with CUDA and OpenCL.
+  \item Not too simple, (don’t support just matrix).
+  \item Support all dtype.
+  \item Allow strided views.
+  \item But still easy to develop new code that support only a few memory layout.
+    \begin{itemize}
+    \item This ease the development of new code.
+    \end{itemize}
+  \end{itemize}
+\end{frame}
+
+\begin{frame}{Project status?}
+  \begin{itemize}
+  \item Usable directly, but not all implementation available.
+  \item Multiple GPUs works.
+  \item Is the next GPU array container for Theano and is working.
+    \begin{itemize}
+    \item Not all Theano implementations available now.
+    \item OpenCL misses more implementations.
+    \item Multiple GPUs on the way.
+    \end{itemize}
+  \item Web site: \url{http://deeplearning.net/software/libgpuarray/}
+  \end{itemize}
+\end{frame}
+
+\section{Conclusion}
+\begin{frame}
+  \tableofcontents[currentsection]
+\end{frame}
+
+\begin{frame}{Conclusion}
+Theano/Pylearn2/libgpuarry provide an environment for machine learning that is:
+\begin{bf}Fast to develop\end{bf}\newline
+\begin{bf}Fast to run\end{bf}\newline
+\end{frame}
+
+\begin{frame}{Acknowledgments}
+\begin{itemize}
+\item All people working or having worked at the LISA lab.
+\item All Theano/Pylearn 2 users/contributors
+\item Compute Canada, RQCHP, NSERC, and Canada Research Chairs for providing funds or access to compute resources.
+\end{itemize}
+\end{frame}
+
+\begin{frame}
+\begin{center}
+\Huge
+Questions?
+\end{center}
+\end{frame}
+
+
+\end{document}
--- a/doc/omlw2014/road-runner-1.jpg
+++ b/doc/omlw2014/road-runner-1.jpg
--- a/doc/omlw2014/sharing.tex
+++ b/doc/omlw2014/sharing.tex
+\documentclass[utf8x,xcolor=pdftex,dvipsnames,table]{beamer}
+\usetheme{Malmoe}  % Now it's a beamer presentation with the lisa theme!
+\setbeamertemplate{footline}[page number]
+\usecolortheme{beaver}
+\usepackage[T1]{fontenc}
+\usepackage{amsmath}
+\usepackage[utf8x]{inputenc}
+%\logo{\includegraphics[width=.8in]{UdeM_NoirBleu_logo_Marie_crop}}
+\usepackage{listings}
+
+\newcommand{\superscript}[1]{\ensuremath{^{\textrm{#1}}}}
+
+\mode<presentation>
+
+\title{Theano, Pylearn2, libgpuarray: Sharing and Future}
+
+\author{%
+\footnotesize
+Frédéric Bastien, Bart van Merriënboer \newline
+Département d'Informatique et de Recherche Opérationnelle \newline
+Université de Montréal \newline
+Montréal, Canada \newline
+\texttt{\{bastienf, vanmerb\}@iro.umontreal.ca} \newline \newline
+}
+
+\date{OML Workshop 2014}
+
+\setbeamertemplate{navigation symbols}{}
+
+\begin{document}
+
+\begin{frame}[plain]
+ \titlepage
+ \vspace{-5em}
+ \includegraphics[width=1in]{../hpcs2011_tutorial/pics/lisabook_logo_text_3.png}
+ \hfill
+ \includegraphics[width=.8in]{../hpcs2011_tutorial/pics/UdeM_NoirBleu_logo_Marie_crop}
+\end{frame}
+
+\section{Future}
+\begin{frame}
+  \tableofcontents[currentsection]
+\end{frame}
+
+\begin{frame}{Theano}\setcounter{page}{1}
+\begin{itemize}
+\item Easier C code development and better documentation of that
+\item Faster compilation
+\item Multi-GPU
+\item Better looping (update to scan)
+\item Allow checkpoint with GPU to reload without GPU
+\item Less memory allocation(lower Theano overhead)
+\item Faster convolution
+\end{itemize}
+\end{frame}
+
+\begin{frame}{libgpuarray}
+\begin{itemize}
+\item Find other projects to use it?
+\item More functionality as NumPy
+\item Move some of the functionality from Python/Theano to the C level
+\item Optimize the kernel selection and parametrization based on the GPU
+\end{itemize}
+\end{frame}
+
+\begin{frame}{Pylearn2}
+\begin{itemize}
+\item RNN
+\item Better hyperparameter search support, using e.g. Hyperopt
+\item Documentation
+\item Checkpoint
+\item Better support for sparse dataset
+\item Machine translation examples
+\item Gated activations for conditional computation
+\item Variational Auto-Encoders
+\end{itemize}
+\end{frame}
+
+\begin{frame}
+\end{frame}
+
+\begin{frame}{Simplifying code sharing between}
+\begin{enumerate}
+  \item<1-> License: \begin{bf}Suggest BSD\end{bf} as it is used by many software in our field.
+    \begin{itemize}
+    \item Common license help share code.
+    \item When reusing code, don't forget to keep the license and the copyright notice
+    \end{itemize}
+  \item<2-> Common base object! \begin{bf}libgpuarray\end{bf}
+  \item<3-> Otherwise: put important implementation(e.g. convolution) in \begin{bf}separate file\end{bf} and \begin{bf}use raw ptr/shape/strides\end{bf} as inputs. Document that interface.
+  \item<4-> Acknowledge reuse \begin{bf}in section on web site\end{bf} AND \begin{bf}in papers\end{bf} about the software we reuse! (and use too)
+\end{enumerate}
+\end{frame}
+
+
+\end{document}