提交 e9c1d577 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #2214 from nouiz/blocksparse

Tests fix, pydotprint and Better error message.
...@@ -22,6 +22,7 @@ install: ...@@ -22,6 +22,7 @@ install:
# So we test with 0.11. Our internal buildbot have 0.7.2. # So we test with 0.11. Our internal buildbot have 0.7.2.
- conda create --yes -q -n py26 python=2.6 numpy=1.6 scipy=0.11 nose=1.1 pip - conda create --yes -q -n py26 python=2.6 numpy=1.6 scipy=0.11 nose=1.1 pip
- source activate py26 - source activate py26
- pip install pydot
- pip install . --no-deps --use-mirrors - pip install . --no-deps --use-mirrors
# command to run tests # command to run tests
......
...@@ -7,6 +7,7 @@ from copy import copy ...@@ -7,6 +7,7 @@ from copy import copy
import logging import logging
import os import os
import sys import sys
import warnings
# Not available on all platforms # Not available on all platforms
hashlib = None hashlib = None
...@@ -27,7 +28,6 @@ from theano import gof ...@@ -27,7 +28,6 @@ from theano import gof
from theano import config from theano import config
from theano.compat.six import StringIO from theano.compat.six import StringIO
from theano.gof import Op, Apply from theano.gof import Op, Apply
from theano.gof.python25 import any
from theano.compile import Function, debugmode from theano.compile import Function, debugmode
from theano.compile.profilemode import ProfileMode from theano.compile.profilemode import ProfileMode
...@@ -523,12 +523,14 @@ def pydotprint(fct, outfile=None, ...@@ -523,12 +523,14 @@ def pydotprint(fct, outfile=None,
max_label_size=70, scan_graphs=False, max_label_size=70, scan_graphs=False,
var_with_name_simple=False, var_with_name_simple=False,
print_output_file=True, print_output_file=True,
assert_nb_all_strings=-1 assert_nb_all_strings=-1,
return_image=False,
): ):
""" """
Print to a file (png format) the graph of a compiled theano function's ops. Print to a file (png format) the graph of a compiled theano function's ops.
:param fct: the theano fct returned by theano.function. :param fct: a compiled Theano function, a Variable, an Apply or
a list of Variable.
:param outfile: the output file where to put the graph. :param outfile: the output file where to put the graph.
:param compact: if True, will remove intermediate var that don't have name. :param compact: if True, will remove intermediate var that don't have name.
:param format: the file format of the output. :param format: the file format of the output.
...@@ -557,6 +559,16 @@ def pydotprint(fct, outfile=None, ...@@ -557,6 +559,16 @@ def pydotprint(fct, outfile=None,
the number of unique string nodes in the dot graph is equal to the number of unique string nodes in the dot graph is equal to
this number. This is used in tests to verify that dot won't this number. This is used in tests to verify that dot won't
merge Theano nodes. merge Theano nodes.
:param return_image: If True, it will create the image and return it.
Useful to display the image in ipython notebook.
.. code-block:: python
import theano
v = theano.tensor.vector()
from IPython.display import SVG
SVG(theano.printing.pydotprint(v*2, return_image=True,
format='svg'))
In the graph, ellipses are Apply Nodes (the execution of an op) In the graph, ellipses are Apply Nodes (the execution of an op)
and boxes are variables. If variables have names they are used as and boxes are variables. If variables have names they are used as
...@@ -589,27 +601,39 @@ def pydotprint(fct, outfile=None, ...@@ -589,27 +601,39 @@ def pydotprint(fct, outfile=None,
if (not isinstance(mode, ProfileMode) if (not isinstance(mode, ProfileMode)
or not fct in mode.profile_stats): or not fct in mode.profile_stats):
mode = None mode = None
fct_fgraph = fct.maker.fgraph outputs = fct.maker.fgraph.outputs
topo = fct.maker.fgraph.toposort()
elif isinstance(fct, gof.FunctionGraph): elif isinstance(fct, gof.FunctionGraph):
mode = None mode = None
profile = None profile = None
fct_fgraph = fct outputs = fct.outputs
topo = fct.toposort()
else: else:
raise ValueError(('pydotprint expects as input a theano.function or ' if isinstance(fct, gof.Variable):
'the FunctionGraph of a function!'), fct) fct = [fct]
elif isinstance(fct, gof.Apply):
fct = fct.outputs
assert isinstance(fct, (list, tuple))
assert all(isinstance(v, gof.Variable) for v in fct)
fct = gof.FunctionGraph(inputs=gof.graph.inputs(fct),
outputs=fct)
mode = None
profile = None
outputs = fct.outputs
topo = fct.toposort()
if not pydot_imported: if not pydot_imported:
raise RuntimeError("Failed to import pydot. You must install pydot" raise RuntimeError("Failed to import pydot. You must install pydot"
" for `pydotprint` to work.") " for `pydotprint` to work.")
return return
g = pd.Dot() g = pd.Dot()
if cond_highlight is not None: if cond_highlight is not None:
c1 = pd.Cluster('Left') c1 = pd.Cluster('Left')
c2 = pd.Cluster('Right') c2 = pd.Cluster('Right')
c3 = pd.Cluster('Middle') c3 = pd.Cluster('Middle')
cond = None cond = None
for node in fct_fgraph.toposort(): for node in topo:
if (node.op.__class__.__name__ == 'IfElse' if (node.op.__class__.__name__ == 'IfElse'
and node.op.name == cond_highlight): and node.op.name == cond_highlight):
cond = node cond = node
...@@ -684,7 +708,6 @@ def pydotprint(fct, outfile=None, ...@@ -684,7 +708,6 @@ def pydotprint(fct, outfile=None,
all_strings.add(varstr) all_strings.add(varstr)
return varstr return varstr
topo = fct_fgraph.toposort()
apply_name_cache = {} apply_name_cache = {}
def apply_name(node): def apply_name(node):
...@@ -736,7 +759,6 @@ def pydotprint(fct, outfile=None, ...@@ -736,7 +759,6 @@ def pydotprint(fct, outfile=None,
# Update the inputs that have an update function # Update the inputs that have an update function
input_update = {} input_update = {}
outputs = list(fct_fgraph.outputs)
if isinstance(fct, Function): if isinstance(fct, Function):
for i in reversed(fct.maker.expanded_inputs): for i in reversed(fct.maker.expanded_inputs):
if i.update is not None: if i.update is not None:
...@@ -792,7 +814,7 @@ def pydotprint(fct, outfile=None, ...@@ -792,7 +814,7 @@ def pydotprint(fct, outfile=None,
for id, var in enumerate(node.outputs): for id, var in enumerate(node.outputs):
varstr = var_name(var) varstr = var_name(var)
out = any([x[0] == 'output' for x in var.clients]) out = var in outputs
label = str(var.type) label = str(var.type)
if len(node.outputs) > 1: if len(node.outputs) > 1:
label = str(id) + ' ' + label label = str(id) + ' ' + label
...@@ -825,15 +847,11 @@ def pydotprint(fct, outfile=None, ...@@ -825,15 +847,11 @@ def pydotprint(fct, outfile=None,
if not outfile.endswith('.' + format): if not outfile.endswith('.' + format):
outfile += '.' + format outfile += '.' + format
g.write(outfile, prog='dot', format=format)
if print_output_file:
print 'The output file is available at', outfile
if assert_nb_all_strings != -1: if assert_nb_all_strings != -1:
assert len(all_strings) == assert_nb_all_strings assert len(all_strings) == assert_nb_all_strings, len(all_strings)
if scan_graphs: if scan_graphs:
scan_ops = [(idx, x) for idx, x in enumerate(fct_fgraph.toposort()) scan_ops = [(idx, x) for idx, x in enumerate(topo)
if isinstance(x.op, theano.scan_module.scan_op.Scan)] if isinstance(x.op, theano.scan_module.scan_op.Scan)]
path, fn = os.path.split(outfile) path, fn = os.path.split(outfile)
basename = '.'.join(fn.split('.')[:-1]) basename = '.'.join(fn.split('.')[:-1])
...@@ -851,6 +869,13 @@ def pydotprint(fct, outfile=None, ...@@ -851,6 +869,13 @@ def pydotprint(fct, outfile=None,
high_contrast, cond_highlight, colorCodes, high_contrast, cond_highlight, colorCodes,
max_label_size, scan_graphs) max_label_size, scan_graphs)
if return_image:
return g.create(prog='dot', format=format)
else:
g.write(outfile, prog='dot', format=format)
if print_output_file:
print 'The output file is available at', outfile
def pydotprint_variables(vars, def pydotprint_variables(vars,
outfile=None, outfile=None,
...@@ -859,8 +884,15 @@ def pydotprint_variables(vars, ...@@ -859,8 +884,15 @@ def pydotprint_variables(vars,
high_contrast=True, colorCodes=None, high_contrast=True, colorCodes=None,
max_label_size=50, max_label_size=50,
var_with_name_simple=False): var_with_name_simple=False):
''' Identical to pydotprint just that it starts from a variable instead '''DEPRECATED: use pydotprint() instead.
of a compiled function. Could be useful ? '''
Identical to pydotprint just that it starts from a variable
instead of a compiled function. Could be useful ?
'''
warnings.warn("pydotprint_variables() is deprecated."
" Use pydotprint() instead.")
if colorCodes is None: if colorCodes is None:
colorCodes = default_colorCodes colorCodes = default_colorCodes
...@@ -949,7 +981,7 @@ def pydotprint_variables(vars, ...@@ -949,7 +981,7 @@ def pydotprint_variables(vars,
g.add_node(pd.Node(varastr, color='green')) g.add_node(pd.Node(varastr, color='green'))
else: else:
varastr = my_list[nd] varastr = my_list[nd]
label = '' label = None
if len(app.inputs) > 1: if len(app.inputs) > 1:
label = str(i) label = str(i)
g.add_edge(pd.Edge(varastr, astr, label=label)) g.add_edge(pd.Edge(varastr, astr, label=label))
...@@ -974,7 +1006,7 @@ def pydotprint_variables(vars, ...@@ -974,7 +1006,7 @@ def pydotprint_variables(vars,
g.add_node(pd.Node(varastr, color=color)) g.add_node(pd.Node(varastr, color=color))
else: else:
varastr = my_list[nd] varastr = my_list[nd]
label = '' label = None
if len(app.outputs) > 1: if len(app.outputs) > 1:
label = str(i) label = str(i)
g.add_edge(pd.Edge(astr, varastr, label=label)) g.add_edge(pd.Edge(astr, varastr, label=label))
......
...@@ -327,10 +327,19 @@ class GpuDimShuffle(GpuOp): ...@@ -327,10 +327,19 @@ class GpuDimShuffle(GpuOp):
def make_node(self, input): def make_node(self, input):
ib = tuple(input.type.broadcastable) ib = tuple(input.type.broadcastable)
if not ib == self.input_broadcastable: if not ib == self.input_broadcastable:
raise TypeError( if len(ib) != len(self.input_broadcastable):
"The number of dimensions and/or broadcastable pattern of the" raise TypeError((
" input is incorrect for this op. Expected %s, got %s." % "The number of dimensions of the "
(self.input_broadcastable, ib)) "input is incorrect for this op. Expected %s, got %s."
% (self.input_broadcastable, ib)))
for expected, b in zip(self.input_broadcastable, ib):
if expected is True and b is False:
raise TypeError((
"The broadcastable pattern of the "
"input is incorrect for this op. Expected %s, got %s."
% (self.input_broadcastable, ib)))
#else, expected == b or expected is False and b is True
# Both case are good.
ob = [] ob = []
if not isinstance(input.type, CudaNdarrayType): if not isinstance(input.type, CudaNdarrayType):
raise TypeError("The input of a GpuDimshuffle must" raise TypeError("The input of a GpuDimshuffle must"
......
...@@ -1362,7 +1362,8 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM): ...@@ -1362,7 +1362,8 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
"""Gradient wrt. filters for `GpuCorr3dMM`. """Gradient wrt. filters for `GpuCorr3dMM`.
:note: You will not want to use this directly, but rely on Theano's :note: You will not want to use this directly, but rely on Theano's
automatic differentiation or graph optimization to use it as needed.""" automatic differentiation or graph optimization to use it as needed.
"""
def __init__(self, border_mode="valid", def __init__(self, border_mode="valid",
subsample=(1, 1, 1), subsample=(1, 1, 1),
...@@ -1417,7 +1418,8 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM): ...@@ -1417,7 +1418,8 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
"""Gradient wrt. inputs for `GpuCorr3dMM`. """Gradient wrt. inputs for `GpuCorr3dMM`.
:note: You will not want to use this directly, but rely on Theano's :note: You will not want to use this directly, but rely on Theano's
automatic differentiation or graph optimization to use it as needed.""" automatic differentiation or graph optimization to use it as needed.
"""
def __init__(self, border_mode="valid", def __init__(self, border_mode="valid",
subsample=(1, 1, 1), subsample=(1, 1, 1),
......
...@@ -3,16 +3,17 @@ import theano ...@@ -3,16 +3,17 @@ import theano
from theano import Apply, tensor, scalar, Constant from theano import Apply, tensor, scalar, Constant
from theano.tensor import DimShuffle, discrete_dtypes from theano.tensor import DimShuffle, discrete_dtypes
from theano.gradient import grad_undefined, grad_not_implemented from theano.gradient import grad_undefined
from theano.sandbox.cuda import cuda_available, GpuOp, GpuElemwise from theano.sandbox.cuda import cuda_available, GpuOp, GpuElemwise
if cuda_available: if cuda_available:
from theano.sandbox.cuda import (basic_ops, CudaNdarrayType, from theano.sandbox.cuda import (basic_ops,
CudaNdarray, opt, GpuFromHost, opt, GpuFromHost,
HostFromGpu, host_from_gpu, HostFromGpu, host_from_gpu,
GpuDimShuffle) GpuDimShuffle)
class SparseBlockGemvSS(GpuOp): class SparseBlockGemvSS(GpuOp):
""" """
This op computes the dot product of specified pieces of vectors This op computes the dot product of specified pieces of vectors
...@@ -183,7 +184,8 @@ static int SparseBlockGemv_copy(PyArrayObject *a, npy_intp *b) { ...@@ -183,7 +184,8 @@ static int SparseBlockGemv_copy(PyArrayObject *a, npy_intp *b) {
cudaMemcpyHostToDevice); cudaMemcpyHostToDevice);
Py_DECREF(aa); Py_DECREF(aa);
if (err != cudaSuccess) { if (err != cudaSuccess) {
PyErr_SetString(PyExc_RuntimeError, "Cannot copy index data to GPU"); PyErr_Format(PyExc_RuntimeError, "Cannot copy index data to GPU (%s)",
cudaGetErrorString(err));
return -1; return -1;
} }
return 0; return 0;
...@@ -241,11 +243,11 @@ Py_INCREF(%(out)s); ...@@ -241,11 +243,11 @@ Py_INCREF(%(out)s);
res = """ res = """
if (CudaNdarray_prep_output(&%(out)s, 3, CudaNdarray_HOST_DIMS(%(o)s))) if (CudaNdarray_prep_output(&%(out)s, 3, CudaNdarray_HOST_DIMS(%(o)s)))
{ {
PyErr_SetString(PyExc_RuntimeError, "Cannot allocate output"); // Error already set
%(fail)s %(fail)s
} }
if (CudaNdarray_CopyFromCudaNdarray(%(out)s, %(o)s)) { if (CudaNdarray_CopyFromCudaNdarray(%(out)s, %(o)s)) {
PyErr_SetString(PyExc_RuntimeError, "Cannot copy data to output"); // Error already set
%(fail)s %(fail)s
} }
""" % dict(out=out, o=o, fail=sub['fail']) """ % dict(out=out, o=o, fail=sub['fail'])
...@@ -313,7 +315,8 @@ CudaNdarray_HOST_STRIDES(%(out)s)[0], CudaNdarray_HOST_STRIDES(%(out)s)[1], ...@@ -313,7 +315,8 @@ CudaNdarray_HOST_STRIDES(%(out)s)[0], CudaNdarray_HOST_STRIDES(%(out)s)[1],
CudaNdarray_HOST_DIMS(%(h)s)[1] * CudaNdarray_HOST_DIMS(%(h)s)[1] *
CudaNdarray_HOST_DIMS(%(o)s)[0]); CudaNdarray_HOST_DIMS(%(o)s)[0]);
if (err != CUBLAS_STATUS_SUCCESS) { if (err != CUBLAS_STATUS_SUCCESS) {
PyErr_SetString(PyExc_RuntimeError, "SgemvBatched failed"); PyErr_Format(PyExc_RuntimeError, "SgemvBatched failed(%%s)",
cublasGetErrorString(err));
%(fail)s %(fail)s
} }
} }
...@@ -322,7 +325,7 @@ CudaNdarray_HOST_STRIDES(%(out)s)[0], CudaNdarray_HOST_STRIDES(%(out)s)[1], ...@@ -322,7 +325,7 @@ CudaNdarray_HOST_STRIDES(%(out)s)[0], CudaNdarray_HOST_STRIDES(%(out)s)[1],
W=W, fail=sub['fail'], name=nodename) W=W, fail=sub['fail'], name=nodename)
def c_code_cache_version(self): def c_code_cache_version(self):
return (10,) return (11,)
def grad(self, inputs, grads): def grad(self, inputs, grads):
o, W, h, inputIdx, outputIdx = inputs o, W, h, inputIdx, outputIdx = inputs
...@@ -482,7 +485,8 @@ static int SparseBlockOuter_copy(PyArrayObject *a, npy_intp *b) { ...@@ -482,7 +485,8 @@ static int SparseBlockOuter_copy(PyArrayObject *a, npy_intp *b) {
cudaMemcpyHostToDevice); cudaMemcpyHostToDevice);
Py_DECREF(aa); Py_DECREF(aa);
if (err != cudaSuccess) { if (err != cudaSuccess) {
PyErr_SetString(PyExc_RuntimeError, "Cannot copy index data to GPU"); PyErr_Format(PyExc_RuntimeError, "Cannot copy index data to GPU(%s)",
cudaGetErrorString(err));
return -1; return -1;
} }
return 0; return 0;
...@@ -541,11 +545,11 @@ Py_INCREF(%(out)s); ...@@ -541,11 +545,11 @@ Py_INCREF(%(out)s);
res = """ res = """
if (CudaNdarray_prep_output(&%(out)s, 4, CudaNdarray_HOST_DIMS(%(o)s))) if (CudaNdarray_prep_output(&%(out)s, 4, CudaNdarray_HOST_DIMS(%(o)s)))
{ {
PyErr_SetString(PyExc_RuntimeError, "Cannot allocate output"); // Python error already set
%(fail)s %(fail)s
} }
if (CudaNdarray_CopyFromCudaNdarray(%(out)s, %(o)s)) { if (CudaNdarray_CopyFromCudaNdarray(%(out)s, %(o)s)) {
PyErr_SetString(PyExc_RuntimeError, "Cannot copy data to output"); //Error message already set
%(fail)s %(fail)s
} }
""" % dict(out=out, o=o, fail=sub['fail']) """ % dict(out=out, o=o, fail=sub['fail'])
...@@ -612,7 +616,8 @@ CudaNdarray_HOST_STRIDES(%(out)s)[0], CudaNdarray_HOST_STRIDES(%(out)s)[1], ...@@ -612,7 +616,8 @@ CudaNdarray_HOST_STRIDES(%(out)s)[0], CudaNdarray_HOST_STRIDES(%(out)s)[1],
"block size too big. The current limit is 65535 for " "block size too big. The current limit is 65535 for "
"iSize * oSize."); "iSize * oSize.");
} else { } else {
PyErr_SetString(PyExc_RuntimeError, "SgerBatched failed"); PyErr_Format(PyExc_RuntimeError, "SgerBatched failed(%%s)",
cublasGetErrorString(err));
} }
%(fail)s %(fail)s
} }
...@@ -620,7 +625,7 @@ CudaNdarray_HOST_STRIDES(%(out)s)[0], CudaNdarray_HOST_STRIDES(%(out)s)[1], ...@@ -620,7 +625,7 @@ CudaNdarray_HOST_STRIDES(%(out)s)[0], CudaNdarray_HOST_STRIDES(%(out)s)[1],
alpha=alpha, fail=sub['fail']) alpha=alpha, fail=sub['fail'])
def c_code_cache_version(self): def c_code_cache_version(self):
return (9,) return (10,)
sparse_block_outer_ss = SparseBlockOuterSS(False) sparse_block_outer_ss = SparseBlockOuterSS(False)
......
...@@ -91,6 +91,8 @@ extern DllExport cublasHandle_t handle; ...@@ -91,6 +91,8 @@ extern DllExport cublasHandle_t handle;
* *
* device_malloc will set the Python error message before returning None. * device_malloc will set the Python error message before returning None.
* device_free will return nonzero on failure (after setting the python error message) * device_free will return nonzero on failure (after setting the python error message)
*
* Set the Python error
*/ */
DllExport void * device_malloc(size_t size); DllExport void * device_malloc(size_t size);
DllExport void * device_malloc(size_t size, int verbose); DllExport void * device_malloc(size_t size, int verbose);
...@@ -148,6 +150,8 @@ enum operator_t ...@@ -148,6 +150,8 @@ enum operator_t
/* /*
* Return a CudaNdarray whose 'nd' dimensions are all 0. * Return a CudaNdarray whose 'nd' dimensions are all 0.
* if nd==-1, it is not initialized. * if nd==-1, it is not initialized.
*
* Set the Python error
*/ */
DllExport PyObject * DllExport PyObject *
CudaNdarray_New(int nd=-1); CudaNdarray_New(int nd=-1);
...@@ -286,6 +290,8 @@ static PyObject *CudaNdarray_SIZE_Object(const CudaNdarray *self, void *closure) ...@@ -286,6 +290,8 @@ static PyObject *CudaNdarray_SIZE_Object(const CudaNdarray *self, void *closure)
* Allocate a new CudaNdarray with room for given number of dimensions * Allocate a new CudaNdarray with room for given number of dimensions
* *
* No Storage space is allocated (and all dimensions are 0) * No Storage space is allocated (and all dimensions are 0)
*
* Set the Python error
*/ */
DllExport PyObject * CudaNdarray_new_nd(const int nd); DllExport PyObject * CudaNdarray_new_nd(const int nd);
...@@ -294,6 +300,8 @@ DllExport PyObject * CudaNdarray_new_nd(const int nd); ...@@ -294,6 +300,8 @@ DllExport PyObject * CudaNdarray_new_nd(const int nd);
* *
* Note: This does not allocate storage for data, or free * Note: This does not allocate storage for data, or free
* pre-existing storage. * pre-existing storage.
*
* Set the Python error
*/ */
DllExport inline int ALWAYS_INLINE DllExport inline int ALWAYS_INLINE
CudaNdarray_set_nd(CudaNdarray * self, const int nd) CudaNdarray_set_nd(CudaNdarray * self, const int nd)
...@@ -505,6 +513,8 @@ DllExport int CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj); ...@@ -505,6 +513,8 @@ DllExport int CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj);
* e.g. suppose self and other are 2D matrices and other * e.g. suppose self and other are 2D matrices and other
* has only one row. Then we need to copy this row several * has only one row. Then we need to copy this row several
* times when copying to self. * times when copying to self.
*
* Set the Python error
*/ */
DllExport int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, DllExport int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self,
const CudaNdarray * other, bool unbroadcast = false); const CudaNdarray * other, bool unbroadcast = false);
...@@ -575,6 +585,7 @@ DllExport int CudaNdarray_dimshuffle(CudaNdarray * self, unsigned int len, const ...@@ -575,6 +585,7 @@ DllExport int CudaNdarray_dimshuffle(CudaNdarray * self, unsigned int len, const
DllExport PyObject* DllExport PyObject*
CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args); CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args);
// Set the Python error
int fprint_CudaNdarray(FILE * fd, const CudaNdarray *self); int fprint_CudaNdarray(FILE * fd, const CudaNdarray *self);
...@@ -589,6 +600,8 @@ DllExport int CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_othe ...@@ -589,6 +600,8 @@ DllExport int CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_othe
// or a pointer to an ndarray of the right size. In the last case it will // or a pointer to an ndarray of the right size. In the last case it will
// not change. // not change.
// If fortran is non-zero, a fortran order is expected/created // If fortran is non-zero, a fortran order is expected/created
//
// Set the Python error
DllExport int CudaNdarray_prep_output(CudaNdarray ** arr, int nd, DllExport int CudaNdarray_prep_output(CudaNdarray ** arr, int nd,
const int * dims, int fortran = 0); const int * dims, int fortran = 0);
......
...@@ -1846,7 +1846,8 @@ def local_assert(node): ...@@ -1846,7 +1846,8 @@ def local_assert(node):
node.inputs[0].owner and node.inputs[0].owner and
isinstance(node.inputs[0].owner.op, isinstance(node.inputs[0].owner.op,
HostFromGpu)): HostFromGpu)):
return [host_from_gpu(node.op(node.inputs[0].owner.inputs[0]))] return [host_from_gpu(node.op(node.inputs[0].owner.inputs[0],
*node.inputs[1:]))]
@register_opt() @register_opt()
......
...@@ -80,7 +80,8 @@ def test_gpualloc(): ...@@ -80,7 +80,8 @@ def test_gpualloc():
x = theano.shared(numpy.ones(3, dtype='float32'), 'x') x = theano.shared(numpy.ones(3, dtype='float32'), 'x')
m = (x).dimshuffle(['x', 0]) m = (x).dimshuffle(['x', 0])
v = tensor.alloc(1., *m.shape) v = tensor.alloc(1., *m.shape)
f = theano.function([], v + x, mode=mode_with_gpu) f = theano.function([], v + x,
mode=mode_with_gpu.excluding("local_alloc_elemwise"))
l = f.maker.fgraph.toposort() l = f.maker.fgraph.toposort()
assert numpy.any([isinstance(x.op, cuda.GpuAlloc) for x in l]) assert numpy.any([isinstance(x.op, cuda.GpuAlloc) for x in l])
......
...@@ -514,7 +514,8 @@ def local_gpua_softmaxwithbias(node): ...@@ -514,7 +514,8 @@ def local_gpua_softmaxwithbias(node):
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([theano.tensor.opt.Assert]) @op_lifter([theano.tensor.opt.Assert])
def local_assert(node): def local_assert(node):
return [host_from_gpu(node.op(node.inputs[0].owner.inputs[0]))] return [host_from_gpu(node.op(node.inputs[0].owner.inputs[0],
*node.inputs[1:]))]
@register_opt('fast_compile') @register_opt('fast_compile')
......
...@@ -5198,7 +5198,17 @@ class Choose(Op): ...@@ -5198,7 +5198,17 @@ class Choose(Op):
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
if isinstance(node.inputs[1], TensorVariable): if isinstance(node.inputs[1], TensorVariable):
return[(shapes[0])] # We have padded node.inputs[0] to the right number of
# dimensions for the output
l = []
for sh1, sh2, b1 in zip(shapes[0],
shapes[1][1:],
node.inputs[0].broadcastable):
if b1:
l.append(sh2)
else:
l.append(sh1)
return [tuple(l)]
else: else:
import theano.typed_list import theano.typed_list
assert isinstance(node.inputs[1], assert isinstance(node.inputs[1],
...@@ -5214,11 +5224,47 @@ class Choose(Op): ...@@ -5214,11 +5224,47 @@ class Choose(Op):
# import at the top as it would cause circular import. # import at the top as it would cause circular import.
import theano.typed_list import theano.typed_list
a = as_tensor_variable(a) a = as_tensor_variable(a)
if isinstance(choices, (tuple, list)): if a.dtype not in theano.tensor.discrete_dtypes:
raise TypeError(
'choose first argument must have an [u]int* dtype. Got %s.'
% a.dtype)
if isinstance(choices, (tuple, list,
theano.typed_list.TypedListVariable)):
choice = theano.typed_list.make_list(choices) choice = theano.typed_list.make_list(choices)
choice_ndim = choice.ttype.ndim
choice_bcast = choice.ttype.broadcastable
else: else:
choice = as_tensor_variable(choices) choice = as_tensor_variable(choices)
return Apply(self, [a, choice], [a.type()]) choice_ndim = choice.ndim - 1
choice_bcast = choice.broadcastable[1:]
out_ndim = numpy.max([a.ndim, choice_ndim])
# Make explicit all added broadcastable dimensions.
a = shape_padleft(a, out_ndim - a.ndim)
if len(choice_bcast) != out_ndim:
if isinstance(choice.type, TensorType):
choice = choice.dimshuffle(0,
*(('x',) *(out_ndim - choice_ndim) +
tuple(range(1, choice.ndim))))
choice_ndim = choice.ndim - 1
choice_bcast = choice.broadcastable[1:]
else:
raise NotImplementedError(
"We currently didn't implemented that case. "
"To make it work, explicitly add dimensions "
"of size one for dimensions that will be broadcasted")
assert isinstance(node.inputs[1],
theano.typed_list.TypedListVariable)
bcast = [False] * out_ndim
for idx, (b1, b2) in enumerate(
zip(a.broadcastable,
(True,) * (out_ndim - choice_ndim) + choice_bcast)):
if b1 and b2:
bcast[idx] = True
o = TensorType(choice.dtype, bcast)
return Apply(self, [a, choice], [o()])
def perform(self, node, inputs, (z, )): def perform(self, node, inputs, (z, )):
a = inputs[0] a = inputs[0]
......
...@@ -7045,74 +7045,119 @@ class T_Power(unittest.TestCase): ...@@ -7045,74 +7045,119 @@ class T_Power(unittest.TestCase):
class T_Choose(utt.InferShapeTester): class T_Choose(utt.InferShapeTester):
op = staticmethod(choose) op = staticmethod(choose)
op_class = Choose op_class = Choose
modes = ['raise', 'wrap', 'clip']
def test_numpy_compare(self): def test_numpy_compare(self):
a = tensor.vector(dtype='int64') a = tensor.vector(dtype='int32')
b = tensor.matrix(dtype='int64') b = tensor.matrix(dtype='float32')
A = numpy.asarray(numpy.random.rand(4), dtype='int64')
B = numpy.asarray(numpy.random.rand(4, 4), dtype='int64')
modes = ['raise', 'wrap', 'clip'] A = numpy.asarray(numpy.random.random_integers(0, 3, 4),
dtype='int32')
B = numpy.asarray(numpy.random.rand(4, 4), dtype='float32')
for m in modes: for m in self.modes:
f = function([a, b], choose(a, b, mode=m)) f = function([a, b], choose(a, b, mode=m))
t_c = f(A, B) t_c = f(A, B)
n_c = numpy.choose(A, B, mode=m) n_c = numpy.choose(A, B, mode=m)
assert numpy.allclose(t_c, n_c) assert numpy.allclose(t_c, n_c)
def test_numpy_compare_tuple(self): def test_broadcasted(self):
a = tensor.scalar(dtype='int32')
b = tensor.matrix(dtype='float32')
a = tensor.tensor3(dtype='int64') # Test when a is broadcastable
b = tensor.tensor3(dtype='int64') A = 3
c = tensor.tensor3(dtype='int64') B = numpy.asarray(numpy.random.rand(4, 4), dtype='float32')
A = numpy.asarray(numpy.random.rand(2, 1, 1), dtype='int64') for m in self.modes:
B = numpy.asarray(numpy.random.rand(1, 6, 1), dtype='int64') f = function([a, b], choose(a, b, mode=m))
C = numpy.asarray(numpy.random.rand(1, 1, 5), dtype='int64') t_c = f(A, B)
n_c = numpy.choose(A, B, mode=m)
assert numpy.allclose(t_c, n_c)
f = function([a, b, c], choose(a, (b, c))) # Test when the result should be broadcastable
t_c = f(A, B, C) b = theano.tensor.col(dtype='float32')
n_c = numpy.choose(A, (B, C)) B = numpy.asarray(numpy.random.rand(4, 1), dtype='float32')
for m in self.modes:
f = function([a, b], choose(a, b, mode=m))
assert choose(a, b, mode=m).broadcastable[0]
t_c = f(A, B)
n_c = numpy.choose(A, B, mode=m)
assert numpy.allclose(t_c, n_c) assert numpy.allclose(t_c, n_c)
def test_infer_shape(self): def test_dtype_error(self):
a = tensor.scalar(dtype='float32')
b = tensor.matrix(dtype='float32')
A = 3
B = numpy.asarray(numpy.random.rand(4, 4), dtype='float32')
self.assertRaises(TypeError, choose, a, b)
def test_numpy_compare_tuple(self):
a = tensor.matrix(dtype='int64') a = tensor.tensor3(dtype='int32')
b = tensor.vector(dtype='int64') b = tensor.tensor3(dtype='float32')
c = tensor.matrix(dtype='int64') c = tensor.tensor3(dtype='float32')
d = tensor.vector(dtype='int64')
A = numpy.asarray(numpy.random.rand(5, 4), dtype='int64') A = numpy.asarray(numpy.random.random_integers(0, 1, (2, 1, 1)),
B = numpy.asarray(numpy.random.rand(4), dtype='int64') dtype='int32')
C = numpy.asarray(numpy.random.rand(7, 4), dtype='int64') B = numpy.asarray(numpy.random.rand(1, 6, 1), dtype='float32')
D = numpy.asarray(numpy.random.rand(4), dtype='int64') C = numpy.asarray(numpy.random.rand(1, 1, 5), dtype='float32')
var1 = [a, b, a, b] for m in self.modes:
var2 = [c, d, b, a] f = function([a, b, c], choose(a, (b, c), mode=m))
mat1 = [A, B, A, B] t_c = f(A, B, C)
mat2 = [C, D, B, A] n_c = numpy.choose(A, (B, C), mode=m)
assert numpy.allclose(t_c, n_c)
for v, m, w, n in zip(var1, mat1, var2, mat2): def test_infer_shape(self):
self._compile_and_check([v, w], # theano.function inputs for shp1, shp2 in [
[self.op(v, w)], # theano.function outputs ((5, 4), (7, 4)),
((1, 4), (7, 4)),
((5, 1), (7, 4)),
((5, 4), (1, 4)),
((5, 4), (7, 1)),
((5, 4), (4,)),
((1, 4), (4,)),
((5, 1), (4,)),
((5, 4), (1,)),
((4,), (5, 4)),
((1,), (5, 4)),
((4,), (1, 4)),
((4,), (3, 1)),
((4,), (4,)),
((1,), (4,)),
((4,), (1,)),
((1,), (1,)),
]:
a = tensor.tensor(dtype='int32',
broadcastable=[n == 1 for n in shp1])
c = tensor.tensor(dtype='float32',
broadcastable=[n == 1 for n in shp2])
A = numpy.asarray(numpy.random.rand(*shp1) * shp2[0], dtype='int32')
C = numpy.asarray(numpy.random.rand(*shp2) * shp2[0], dtype='float32')
self._compile_and_check([a, c], # theano.function inputs
[self.op(a, c)], # theano.function outputs
# Always use not square matrix! # Always use not square matrix!
# inputs data # inputs data
[m, n], [A, C],
# Op that should be removed from the graph. # Op that should be removed from the graph.
self.op_class) self.op_class)
# Disabled as it isn't implemented. # Disabled as it isn't implemented.
def ___test_infer_shape_tuple(self): def ___test_infer_shape_tuple(self):
a = tensor.tensor3(dtype='int64') a = tensor.tensor3(dtype='int32')
b = tensor.tensor3(dtype='int64') b = tensor.tensor3(dtype='int32')
c = tensor.tensor3(dtype='int64') c = tensor.tensor3(dtype='int32')
A = numpy.asarray([1, 0], dtype='int64').reshape((2, 1, 1)) A = numpy.asarray([1, 0], dtype='int32').reshape((2, 1, 1))
B = numpy.asarray(numpy.random.rand(1, 4, 1), dtype='int64') B = numpy.asarray(numpy.random.rand(1, 4, 1), dtype='int32')
C = numpy.asarray(numpy.random.rand(1, 1, 7), dtype='int64') C = numpy.asarray(numpy.random.rand(1, 1, 7), dtype='int32')
f = function([a, b, c], choose(a, (b, c))) f = function([a, b, c], choose(a, (b, c)))
shape = (2, 4, 7) shape = (2, 4, 7)
......
...@@ -2491,6 +2491,7 @@ def test_local_IncSubtensor_serialize(): ...@@ -2491,6 +2491,7 @@ def test_local_IncSubtensor_serialize():
cost = T.sqr(t - y) cost = T.sqr(t - y)
dW = theano.grad(cost, W) dW = theano.grad(cost, W)
mode = theano.compile.mode.get_default_mode().excluding('fusion') mode = theano.compile.mode.get_default_mode().excluding('fusion')
mode = mode.including("local_IncSubtensor_serialize")
f = theano.function([i, j, t], updates=[(W, W - 0.01 * dW)], mode=mode) f = theano.function([i, j, t], updates=[(W, W - 0.01 * dW)], mode=mode)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
adds = [n for n in topo if isinstance(n.op, T.Elemwise) and adds = [n for n in topo if isinstance(n.op, T.Elemwise) and
......
...@@ -45,6 +45,16 @@ def test_pydotprint_cond_highlight(): ...@@ -45,6 +45,16 @@ def test_pydotprint_cond_highlight():
' is no IfElse node in the graph\n') ' is no IfElse node in the graph\n')
def test_pydotprint_return_image():
# Skip test if pydot is not available.
if not theano.printing.pydot_imported:
raise SkipTest('pydot not available')
x = tensor.dvector()
ret = theano.printing.pydotprint(x * 2, return_image=True)
assert isinstance(ret, str)
def test_pydotprint_variables(): def test_pydotprint_variables():
""" """
This is a REALLY PARTIAL TEST. This is a REALLY PARTIAL TEST.
...@@ -65,11 +75,10 @@ def test_pydotprint_variables(): ...@@ -65,11 +75,10 @@ def test_pydotprint_variables():
new_handler.setLevel(logging.DEBUG) new_handler.setLevel(logging.DEBUG)
orig_handler = theano.logging_default_handler orig_handler = theano.logging_default_handler
theano.theano_logger.removeHandler(orig_handler)
theano.theano_logger.addHandler(new_handler)
theano.theano_logger.removeHandler(orig_handler) theano.theano_logger.removeHandler(orig_handler)
theano.theano_logger.addHandler(new_handler) theano.theano_logger.addHandler(new_handler)
try: try:
theano.printing.pydotprint(x * 2)
theano.printing.pydotprint_variables(x * 2) theano.printing.pydotprint_variables(x * 2)
finally: finally:
theano.theano_logger.addHandler(orig_handler) theano.theano_logger.addHandler(orig_handler)
...@@ -94,14 +103,13 @@ def test_pydotprint_long_name(): ...@@ -94,14 +103,13 @@ def test_pydotprint_long_name():
f = theano.function([x], [x * 2, x + x], mode=mode) f = theano.function([x], [x * 2, x + x], mode=mode)
f([1, 2, 3, 4]) f([1, 2, 3, 4])
s = StringIO()
new_handler = logging.StreamHandler(s)
new_handler.setLevel(logging.DEBUG)
orig_handler = theano.logging_default_handler
theano.printing.pydotprint(f, max_label_size=5, theano.printing.pydotprint(f, max_label_size=5,
print_output_file=False, print_output_file=False,
assert_nb_all_strings=6) assert_nb_all_strings=6)
theano.printing.pydotprint([x * 2, x + x],
max_label_size=5,
print_output_file=False,
assert_nb_all_strings=8)
def test_pydotprint_profile(): def test_pydotprint_profile():
......
...@@ -41,6 +41,7 @@ class _typed_list_py_operators: ...@@ -41,6 +41,7 @@ class _typed_list_py_operators:
return index_(self, elem) return index_(self, elem)
ttype = property(lambda self: self.type.ttype) ttype = property(lambda self: self.type.ttype)
dtype = property(lambda self: self.type.ttype.dtype)
class TypedListVariable(_typed_list_py_operators, Variable): class TypedListVariable(_typed_list_py_operators, Variable):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论