提交 d0184177 authored 作者: Dumitru Erhan's avatar Dumitru Erhan

branch merge

......@@ -41,12 +41,27 @@ precise inspection of what's being computed where, when, and how, see the
:ref:`faq_wraplinker`.
How do I print a graph before or after compilation?
----------------------------------------------------------
Theano provides a function to print a graph before and after compilation:
>>> x = T.dscalar('x')
>>> y = x**2
>>> gy = T.grad(y, x)
>>> pp(gy) # print out the gradient prior to optimization
'((fill((x ** 2), 1.0) * 2) * (x ** (2 - 1)))'
>>> f = function([x], gy)
>>> pp(f.maker.env.outputs[0])
'(2.0 * x)'
The parameter in T.dscalar('x') in the first line is the name of this variable(in the graph, not in python). This name is reused when printing the graph. Otherwise the variable x is printed as its type as: <TensorType(float64, scalar)>. That is not the most comprehensible. The string 'x' can be any string, but to make the code more comprehensible, try to pass the same name or derivative of the name in python.
The function I compiled is too slow, what's up?
-----------------------------------------------
First, make sure you're running in FAST_RUN mode, by passing
``mode='FAST_RUN'`` to ``theano.function`` or ``theano.make``. Some
First, make sure you're running in FAST_RUN mode, by passing ``mode='FAST_RUN'``
to ``theano.function`` or ``theano.make`` or by setting to ``PROFILE_MODE``
the flags :attr:`config.mode`. Some
operations have excruciatingly slow Python implementations and that
can negatively effect the performance of FAST_COMPILE.
......
......@@ -1211,7 +1211,7 @@ class GpuSum(Op):
class GpuReshape(tensor.Reshape):
# __hash__, __eq__, __str__ come from tensor.Subtensor
def make_node(self, x, shp):
host_reshaped = host_from_gpu(x).reshape(shp)
host_reshaped = host_from_gpu(x).reshape(shp,ndim=self.ndim)
return Apply(self, [x, shp], [CudaNdarrayType(host_reshaped.broadcastable)()])
def perform(self, node, (x, shp), (out,)):
if (len(shp) != self.ndim):
......
......@@ -198,7 +198,7 @@ class GpuConv(Op):
return ['cuda_ndarray.cuh','<stdio.h>']
def c_code_cache_version(self):
return (0,2)
return (0,3)
def c_support_code_apply(self, node, nodename):
return open(os.path.join(os.path.split(__file__)[0],'conv_kernel.cu')).read()+\
......
......@@ -626,7 +626,7 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
}
else
{
PyErr_Format(PyExc_RuntimeError, "ERROR: all implementations failed! (%s)",
PyErr_Format(PyExc_RuntimeError, "ERROR: all implementations failed for CudaNdarray_conv_valid! (%s)",
cudaGetErrorString(sts));
return -1;
}
......@@ -673,7 +673,7 @@ CudaNdarray_conv_full(const CudaNdarray *img, const CudaNdarray * kern, CudaNdar
const int nkern=CudaNdarray_HOST_DIMS(kern)[0];
const int img_wid=CudaNdarray_HOST_DIMS(img)[3];
const int img_len=CudaNdarray_HOST_DIMS(img)[2];
const int kern_wid=CudaNdarray_HOST_DIMS(img)[3];
const int kern_wid=CudaNdarray_HOST_DIMS(kern)[3];
const int kern_len=CudaNdarray_HOST_DIMS(kern)[2];
const int out_wid=CudaNdarray_HOST_DIMS(out)[3];
const int out_len=CudaNdarray_HOST_DIMS(out)[2];
......@@ -821,13 +821,13 @@ CudaNdarray_conv_full(const CudaNdarray *img, const CudaNdarray * kern, CudaNdar
cudaError_t sts = cudaGetLastError();
if (cudaSuccess == sts)
{
if (verbose>1) printf("threads.x=%i, threads.y=%i, grid.x=%i, grid.y=%i,shared_size=%i, nb_threads=%i, out_len=%i, nb_split=%i, version=%i\n", threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y, out_len, nb_split, version);
if (verbose>1) printf("threads.x=%i, threads.y=%i, threads.z=%i, grid.x=%i, grid.y=%i,shared_size=%i, nb_threads=%i, out_len=%i, nb_split=%i, version=%i\n", threads.x, threads.y, threads.z, grid.x, grid.y, shared_size, threads.x * threads.y * threads.z, out_len, nb_split, version);
if (verbose) printf("INFO: used 'conv_full_patch_stack_padded' nb_split=%d low_mem=%s\n",nb_split,(version==5?"true":"false"));
work_complete = true;
}
else
{
if (verbose) printf("threads.x=%i, threads.y=%i, grid.x=%i, grid.y=%i,shared_size=%i, nb_threads=%i, out_len=%i, nb_split=%i, version=%i\n", threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y, out_len, nb_split, version);
if (verbose) printf("threads.x=%i, threads.y=%i, threads.z=%i, grid.x=%i, grid.y=%i,shared_size=%i, nb_threads=%i, out_len=%i, nb_split=%i, version=%i\n", threads.x, threads.y, threads.z, grid.x, grid.y, shared_size, threads.x * threads.y * threads.z, out_len, nb_split, version);
if (verbose) printf("INFO: impl 'conv_full_patch_stack_padded' %s %s failed (%s), trying next implementation\n",
version==3?"no split": "split",(version==5?"low_mem":"not_low_mem"),
cudaGetErrorString(sts));
......@@ -1013,7 +1013,7 @@ CudaNdarray_conv_full(const CudaNdarray *img, const CudaNdarray * kern, CudaNdar
if (verbose) printf("threads.x=%i, threads.y=%i, grid.x=%i, grid.y=%i, shared_size=%i, nb_threads=%i\n", n_threads, 1, n_blocks, 1, 0, n_threads);
if (verbose) printf("INFO: impl 'conv_reference_full' failed (%s), trying next implementation\n",
cudaGetErrorString(sts));
PyErr_Format(PyExc_RuntimeError, "ERROR: all implementations failed! (%s)",
PyErr_Format(PyExc_RuntimeError, "ERROR: all implementations failed for CudaNdarray_conv_full! (%s)",
cudaGetErrorString(sts));
return -1;
}
......
......@@ -5,9 +5,6 @@
#include <iostream>
#include "cuda_ndarray.cuh"
#ifndef DONT_UNROLL
#define UNROLL_LOOP
#endif
/////////////////////////
// Static helper methods
......
......@@ -1168,6 +1168,8 @@ class ScalarFromTensor(Op):
out[0] = s.flatten()[0]
def grad(self, (s,), (dt,)):
return [TensorFromScalar(dt)]
def __str__(self):
return self.__class__.__name__
scalar_from_tensor = ScalarFromTensor()
......
......@@ -71,6 +71,8 @@ class GemmRelated(Op):
return (type(self) == type(other))
def __hash__(self):
return hash(type(self))
def __str__(self):
return self.__class__.__name__
def c_support_code(self):
#return cblas_header_text()
mod_str = """
......
......@@ -1516,7 +1516,7 @@ if(mode != VALID && mode != FULL){
if(dim_zz[0]<=0 || dim_zz[1]<=0){
PyErr_Format(PyExc_ValueError,
"Output dimensions are not valid %%dx%%d",dim_zz[0],dim_zz[1]);
"Output dimensions are not valid %%ldx%%ld",(long int)dim_zz[0],(long int)dim_zz[1]);
%(fail)s;
}
......
......@@ -495,7 +495,8 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
return type(self) == type(other)
def __hash__(self):
return tensor.hashtype(self)
def __str__(self):
return self.__class__.__name__
def make_node(self, x, b, y_idx):
x = tensor.as_tensor_variable(x)
b = tensor.as_tensor_variable(b)
......@@ -673,6 +674,8 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
return type(self) == type(other)
def __hash__(self):
return tensor.hashtype(self)
def __str__(self):
return self.__class__.__name__
def make_node(self, dy, sm, y_idx,**kwargs):
dy = tensor.as_tensor_variable(dy)
sm = tensor.as_tensor_variable(sm)
......@@ -720,14 +723,14 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
}
if (%(dnll)s->dimensions[0] != %(sm)s->dimensions[0])
{
PyErr_Format(PyExc_ValueError, "dnll.shape[0] (%%d) != sm.shape[0] (%%d)",
%(dnll)s->dimensions[0], %(sm)s->dimensions[0]);
//PyErr_SetString(PyExc_ValueError, "dnll.shape[0] != sm.shape[0]");
PyErr_Format(PyExc_ValueError, "dnll.shape[0] (%%ld) != sm.shape[0] (%%ld)",
(long int)%(dnll)s->dimensions[0], (long int)%(sm)s->dimensions[0]);
%(fail)s;
}
if (%(dnll)s->dimensions[0] != %(y_idx)s->dimensions[0])
{
PyErr_SetString(PyExc_ValueError, "dnll.shape[0] != y_idx.shape[0]");
PyErr_Format(PyExc_ValueError, "dnll.shape[0] (%%ld) != y_idx.shape[0] (%%ld)",
(long int)%(dnll)s->dimensions[0], (long int)%(y_idx)s->dimensions[0]);
%(fail)s;
}
if ((NULL == %(dx)s)
......
......@@ -170,10 +170,7 @@ class T_RandomStreams(unittest.TestCase):
# ndim specified, inconsistent with shape, should raise ValueError
m3 = Module()
m3.random = RandomStreams(234)
m3.fn = Method([], m3.random.uniform((2,2), ndim=1))
made3 = m3.make()
made3.random.initialize()
self.assertRaises(ValueError, made3.fn)
self.assertRaises(ValueError, m3.random.uniform, (2,2), ndim=1)
def test_uniform(self):
"""Test that RandomStreams.uniform generates the same results as numpy"""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论