提交 d0184177 authored 作者: Dumitru Erhan's avatar Dumitru Erhan

branch merge

...@@ -41,12 +41,27 @@ precise inspection of what's being computed where, when, and how, see the ...@@ -41,12 +41,27 @@ precise inspection of what's being computed where, when, and how, see the
:ref:`faq_wraplinker`. :ref:`faq_wraplinker`.
How do I print a graph before or after compilation?
----------------------------------------------------------
Theano provides a function to print a graph before and after compilation:
>>> x = T.dscalar('x')
>>> y = x**2
>>> gy = T.grad(y, x)
>>> pp(gy) # print out the gradient prior to optimization
'((fill((x ** 2), 1.0) * 2) * (x ** (2 - 1)))'
>>> f = function([x], gy)
>>> pp(f.maker.env.outputs[0])
'(2.0 * x)'
The parameter in T.dscalar('x') in the first line is the name of this variable(in the graph, not in python). This name is reused when printing the graph. Otherwise the variable x is printed as its type as: <TensorType(float64, scalar)>. That is not the most comprehensible. The string 'x' can be any string, but to make the code more comprehensible, try to pass the same name or derivative of the name in python.
The function I compiled is too slow, what's up? The function I compiled is too slow, what's up?
----------------------------------------------- -----------------------------------------------
First, make sure you're running in FAST_RUN mode, by passing ``mode='FAST_RUN'``
First, make sure you're running in FAST_RUN mode, by passing to ``theano.function`` or ``theano.make`` or by setting to ``PROFILE_MODE``
``mode='FAST_RUN'`` to ``theano.function`` or ``theano.make``. Some the flags :attr:`config.mode`. Some
operations have excruciatingly slow Python implementations and that operations have excruciatingly slow Python implementations and that
can negatively effect the performance of FAST_COMPILE. can negatively effect the performance of FAST_COMPILE.
......
...@@ -1211,7 +1211,7 @@ class GpuSum(Op): ...@@ -1211,7 +1211,7 @@ class GpuSum(Op):
class GpuReshape(tensor.Reshape): class GpuReshape(tensor.Reshape):
# __hash__, __eq__, __str__ come from tensor.Subtensor # __hash__, __eq__, __str__ come from tensor.Subtensor
def make_node(self, x, shp): def make_node(self, x, shp):
host_reshaped = host_from_gpu(x).reshape(shp) host_reshaped = host_from_gpu(x).reshape(shp,ndim=self.ndim)
return Apply(self, [x, shp], [CudaNdarrayType(host_reshaped.broadcastable)()]) return Apply(self, [x, shp], [CudaNdarrayType(host_reshaped.broadcastable)()])
def perform(self, node, (x, shp), (out,)): def perform(self, node, (x, shp), (out,)):
if (len(shp) != self.ndim): if (len(shp) != self.ndim):
......
...@@ -198,7 +198,7 @@ class GpuConv(Op): ...@@ -198,7 +198,7 @@ class GpuConv(Op):
return ['cuda_ndarray.cuh','<stdio.h>'] return ['cuda_ndarray.cuh','<stdio.h>']
def c_code_cache_version(self): def c_code_cache_version(self):
return (0,2) return (0,3)
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
return open(os.path.join(os.path.split(__file__)[0],'conv_kernel.cu')).read()+\ return open(os.path.join(os.path.split(__file__)[0],'conv_kernel.cu')).read()+\
......
...@@ -626,7 +626,7 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern, ...@@ -626,7 +626,7 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
} }
else else
{ {
PyErr_Format(PyExc_RuntimeError, "ERROR: all implementations failed! (%s)", PyErr_Format(PyExc_RuntimeError, "ERROR: all implementations failed for CudaNdarray_conv_valid! (%s)",
cudaGetErrorString(sts)); cudaGetErrorString(sts));
return -1; return -1;
} }
...@@ -673,7 +673,7 @@ CudaNdarray_conv_full(const CudaNdarray *img, const CudaNdarray * kern, CudaNdar ...@@ -673,7 +673,7 @@ CudaNdarray_conv_full(const CudaNdarray *img, const CudaNdarray * kern, CudaNdar
const int nkern=CudaNdarray_HOST_DIMS(kern)[0]; const int nkern=CudaNdarray_HOST_DIMS(kern)[0];
const int img_wid=CudaNdarray_HOST_DIMS(img)[3]; const int img_wid=CudaNdarray_HOST_DIMS(img)[3];
const int img_len=CudaNdarray_HOST_DIMS(img)[2]; const int img_len=CudaNdarray_HOST_DIMS(img)[2];
const int kern_wid=CudaNdarray_HOST_DIMS(img)[3]; const int kern_wid=CudaNdarray_HOST_DIMS(kern)[3];
const int kern_len=CudaNdarray_HOST_DIMS(kern)[2]; const int kern_len=CudaNdarray_HOST_DIMS(kern)[2];
const int out_wid=CudaNdarray_HOST_DIMS(out)[3]; const int out_wid=CudaNdarray_HOST_DIMS(out)[3];
const int out_len=CudaNdarray_HOST_DIMS(out)[2]; const int out_len=CudaNdarray_HOST_DIMS(out)[2];
...@@ -821,13 +821,13 @@ CudaNdarray_conv_full(const CudaNdarray *img, const CudaNdarray * kern, CudaNdar ...@@ -821,13 +821,13 @@ CudaNdarray_conv_full(const CudaNdarray *img, const CudaNdarray * kern, CudaNdar
cudaError_t sts = cudaGetLastError(); cudaError_t sts = cudaGetLastError();
if (cudaSuccess == sts) if (cudaSuccess == sts)
{ {
if (verbose>1) printf("threads.x=%i, threads.y=%i, grid.x=%i, grid.y=%i,shared_size=%i, nb_threads=%i, out_len=%i, nb_split=%i, version=%i\n", threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y, out_len, nb_split, version); if (verbose>1) printf("threads.x=%i, threads.y=%i, threads.z=%i, grid.x=%i, grid.y=%i,shared_size=%i, nb_threads=%i, out_len=%i, nb_split=%i, version=%i\n", threads.x, threads.y, threads.z, grid.x, grid.y, shared_size, threads.x * threads.y * threads.z, out_len, nb_split, version);
if (verbose) printf("INFO: used 'conv_full_patch_stack_padded' nb_split=%d low_mem=%s\n",nb_split,(version==5?"true":"false")); if (verbose) printf("INFO: used 'conv_full_patch_stack_padded' nb_split=%d low_mem=%s\n",nb_split,(version==5?"true":"false"));
work_complete = true; work_complete = true;
} }
else else
{ {
if (verbose) printf("threads.x=%i, threads.y=%i, grid.x=%i, grid.y=%i,shared_size=%i, nb_threads=%i, out_len=%i, nb_split=%i, version=%i\n", threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y, out_len, nb_split, version); if (verbose) printf("threads.x=%i, threads.y=%i, threads.z=%i, grid.x=%i, grid.y=%i,shared_size=%i, nb_threads=%i, out_len=%i, nb_split=%i, version=%i\n", threads.x, threads.y, threads.z, grid.x, grid.y, shared_size, threads.x * threads.y * threads.z, out_len, nb_split, version);
if (verbose) printf("INFO: impl 'conv_full_patch_stack_padded' %s %s failed (%s), trying next implementation\n", if (verbose) printf("INFO: impl 'conv_full_patch_stack_padded' %s %s failed (%s), trying next implementation\n",
version==3?"no split": "split",(version==5?"low_mem":"not_low_mem"), version==3?"no split": "split",(version==5?"low_mem":"not_low_mem"),
cudaGetErrorString(sts)); cudaGetErrorString(sts));
...@@ -1013,7 +1013,7 @@ CudaNdarray_conv_full(const CudaNdarray *img, const CudaNdarray * kern, CudaNdar ...@@ -1013,7 +1013,7 @@ CudaNdarray_conv_full(const CudaNdarray *img, const CudaNdarray * kern, CudaNdar
if (verbose) printf("threads.x=%i, threads.y=%i, grid.x=%i, grid.y=%i, shared_size=%i, nb_threads=%i\n", n_threads, 1, n_blocks, 1, 0, n_threads); if (verbose) printf("threads.x=%i, threads.y=%i, grid.x=%i, grid.y=%i, shared_size=%i, nb_threads=%i\n", n_threads, 1, n_blocks, 1, 0, n_threads);
if (verbose) printf("INFO: impl 'conv_reference_full' failed (%s), trying next implementation\n", if (verbose) printf("INFO: impl 'conv_reference_full' failed (%s), trying next implementation\n",
cudaGetErrorString(sts)); cudaGetErrorString(sts));
PyErr_Format(PyExc_RuntimeError, "ERROR: all implementations failed! (%s)", PyErr_Format(PyExc_RuntimeError, "ERROR: all implementations failed for CudaNdarray_conv_full! (%s)",
cudaGetErrorString(sts)); cudaGetErrorString(sts));
return -1; return -1;
} }
......
...@@ -5,9 +5,6 @@ ...@@ -5,9 +5,6 @@
#include <iostream> #include <iostream>
#include "cuda_ndarray.cuh" #include "cuda_ndarray.cuh"
#ifndef DONT_UNROLL
#define UNROLL_LOOP
#endif
///////////////////////// /////////////////////////
// Static helper methods // Static helper methods
......
...@@ -1168,6 +1168,8 @@ class ScalarFromTensor(Op): ...@@ -1168,6 +1168,8 @@ class ScalarFromTensor(Op):
out[0] = s.flatten()[0] out[0] = s.flatten()[0]
def grad(self, (s,), (dt,)): def grad(self, (s,), (dt,)):
return [TensorFromScalar(dt)] return [TensorFromScalar(dt)]
def __str__(self):
return self.__class__.__name__
scalar_from_tensor = ScalarFromTensor() scalar_from_tensor = ScalarFromTensor()
......
...@@ -71,6 +71,8 @@ class GemmRelated(Op): ...@@ -71,6 +71,8 @@ class GemmRelated(Op):
return (type(self) == type(other)) return (type(self) == type(other))
def __hash__(self): def __hash__(self):
return hash(type(self)) return hash(type(self))
def __str__(self):
return self.__class__.__name__
def c_support_code(self): def c_support_code(self):
#return cblas_header_text() #return cblas_header_text()
mod_str = """ mod_str = """
......
...@@ -1516,7 +1516,7 @@ if(mode != VALID && mode != FULL){ ...@@ -1516,7 +1516,7 @@ if(mode != VALID && mode != FULL){
if(dim_zz[0]<=0 || dim_zz[1]<=0){ if(dim_zz[0]<=0 || dim_zz[1]<=0){
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"Output dimensions are not valid %%dx%%d",dim_zz[0],dim_zz[1]); "Output dimensions are not valid %%ldx%%ld",(long int)dim_zz[0],(long int)dim_zz[1]);
%(fail)s; %(fail)s;
} }
......
...@@ -495,7 +495,8 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -495,7 +495,8 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
return type(self) == type(other) return type(self) == type(other)
def __hash__(self): def __hash__(self):
return tensor.hashtype(self) return tensor.hashtype(self)
def __str__(self):
return self.__class__.__name__
def make_node(self, x, b, y_idx): def make_node(self, x, b, y_idx):
x = tensor.as_tensor_variable(x) x = tensor.as_tensor_variable(x)
b = tensor.as_tensor_variable(b) b = tensor.as_tensor_variable(b)
...@@ -673,6 +674,8 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op): ...@@ -673,6 +674,8 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
return type(self) == type(other) return type(self) == type(other)
def __hash__(self): def __hash__(self):
return tensor.hashtype(self) return tensor.hashtype(self)
def __str__(self):
return self.__class__.__name__
def make_node(self, dy, sm, y_idx,**kwargs): def make_node(self, dy, sm, y_idx,**kwargs):
dy = tensor.as_tensor_variable(dy) dy = tensor.as_tensor_variable(dy)
sm = tensor.as_tensor_variable(sm) sm = tensor.as_tensor_variable(sm)
...@@ -720,14 +723,14 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op): ...@@ -720,14 +723,14 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
} }
if (%(dnll)s->dimensions[0] != %(sm)s->dimensions[0]) if (%(dnll)s->dimensions[0] != %(sm)s->dimensions[0])
{ {
PyErr_Format(PyExc_ValueError, "dnll.shape[0] (%%d) != sm.shape[0] (%%d)", PyErr_Format(PyExc_ValueError, "dnll.shape[0] (%%ld) != sm.shape[0] (%%ld)",
%(dnll)s->dimensions[0], %(sm)s->dimensions[0]); (long int)%(dnll)s->dimensions[0], (long int)%(sm)s->dimensions[0]);
//PyErr_SetString(PyExc_ValueError, "dnll.shape[0] != sm.shape[0]");
%(fail)s; %(fail)s;
} }
if (%(dnll)s->dimensions[0] != %(y_idx)s->dimensions[0]) if (%(dnll)s->dimensions[0] != %(y_idx)s->dimensions[0])
{ {
PyErr_SetString(PyExc_ValueError, "dnll.shape[0] != y_idx.shape[0]"); PyErr_Format(PyExc_ValueError, "dnll.shape[0] (%%ld) != y_idx.shape[0] (%%ld)",
(long int)%(dnll)s->dimensions[0], (long int)%(y_idx)s->dimensions[0]);
%(fail)s; %(fail)s;
} }
if ((NULL == %(dx)s) if ((NULL == %(dx)s)
......
...@@ -170,10 +170,7 @@ class T_RandomStreams(unittest.TestCase): ...@@ -170,10 +170,7 @@ class T_RandomStreams(unittest.TestCase):
# ndim specified, inconsistent with shape, should raise ValueError # ndim specified, inconsistent with shape, should raise ValueError
m3 = Module() m3 = Module()
m3.random = RandomStreams(234) m3.random = RandomStreams(234)
m3.fn = Method([], m3.random.uniform((2,2), ndim=1)) self.assertRaises(ValueError, m3.random.uniform, (2,2), ndim=1)
made3 = m3.make()
made3.random.initialize()
self.assertRaises(ValueError, made3.fn)
def test_uniform(self): def test_uniform(self):
"""Test that RandomStreams.uniform generates the same results as numpy""" """Test that RandomStreams.uniform generates the same results as numpy"""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论