提交 fb168351 authored 作者: lamblin's avatar lamblin

Merge pull request #812 from nouiz/mixed

Mixed
language: python
python:
- "2.6"
# - "2.7"
# - "3.2"
# command to install dependencies
install: "pip install . --use-mirrors"
# command to run tests
script: theano-nose
\ No newline at end of file
...@@ -44,9 +44,10 @@ file and run it. ...@@ -44,9 +44,10 @@ file and run it.
t0 = time.time() t0 = time.time()
for i in xrange(iters): for i in xrange(iters):
r = f() r = f()
print 'Looping %d times took'%iters, time.time() - t0, 'seconds' t1 = time.time()
print 'Looping %d times took' % iters, t1 - t0, 'seconds'
print 'Result is', r print 'Result is', r
print 'Used the','cpu' if numpy.any( [isinstance(x.op,T.Elemwise) for x in f.maker.fgraph.toposort()]) else 'gpu' print 'Used the', 'cpu' if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]) else 'gpu'
The program just computes the exp() of a bunch of random numbers. The program just computes the exp() of a bunch of random numbers.
Note that we use the `shared` function to Note that we use the `shared` function to
...@@ -100,10 +101,11 @@ after the T.exp(x) is replaced by a GPU version of exp(). ...@@ -100,10 +101,11 @@ after the T.exp(x) is replaced by a GPU version of exp().
t0 = time.time() t0 = time.time()
for i in xrange(iters): for i in xrange(iters):
r = f() r = f()
print 'Looping %d times took'%iters, time.time() - t0, 'seconds' t1 = time.time()
print 'Looping %d times took' % iters, t1 - t0, 'seconds'
print 'Result is', r print 'Result is', r
print 'Numpy result is', numpy.asarray(r) print 'Numpy result is', numpy.asarray(r)
print 'Used the','cpu' if numpy.any( [isinstance(x.op,T.Elemwise) for x in f.maker.fgraph.toposort()]) else 'gpu' print 'Used the', 'cpu' if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]) else 'gpu'
The output from this program is The output from this program is
...@@ -155,10 +157,11 @@ that it has the un-wanted side-effect of really slowing things down. ...@@ -155,10 +157,11 @@ that it has the un-wanted side-effect of really slowing things down.
t0 = time.time() t0 = time.time()
for i in xrange(iters): for i in xrange(iters):
r = f() r = f()
print 'Looping %d times took'%iters, time.time() - t0, 'seconds' t1 = time.time()
print 'Looping %d times took' % iters, t1 - t0, 'seconds'
print 'Result is', r print 'Result is', r
print 'Numpy result is', numpy.asarray(r) print 'Numpy result is', numpy.asarray(r)
print 'Used the','cpu' if numpy.any( [isinstance(x.op,T.Elemwise) for x in f.maker.fgraph.toposort()]) else 'gpu' print 'Used the', 'cpu' if numpy.any([isinstance(x.op,T.Elemwise) for x in f.maker.fgraph.toposort()]) else 'gpu'
Running this version of the code takes just under 0.05 seconds, over 140x faster than Running this version of the code takes just under 0.05 seconds, over 140x faster than
the CPU implementation! the CPU implementation!
......
...@@ -7,6 +7,7 @@ This is needed as we need to have parsed the previous ...@@ -7,6 +7,7 @@ This is needed as we need to have parsed the previous
import os import os
import logging import logging
import subprocess import subprocess
import tempfile
import theano import theano
from theano.configparser import ( from theano.configparser import (
...@@ -63,16 +64,19 @@ int main( int argc, const char* argv[] ) ...@@ -63,16 +64,19 @@ int main( int argc, const char* argv[] )
} }
} }
""" """
p = os.path.join(config.compiledir, 'test_omp.c') fd, path = tempfile.mkstemp(suffix='.c', prefix='test_omp_')
f = open(p, 'w') try:
f.write(code) os.write(fd, code)
f.close() os.close(fd)
p = subprocess.Popen(['g++', '-fopenmp', p], stdout=subprocess.PIPE, proc = subprocess.Popen(['g++', '-fopenmp', path],
stderr=subprocess.PIPE, stdout=subprocess.PIPE,
stdin=dummy_stdin.fileno()) stderr=subprocess.PIPE,
p.wait() stdin=dummy_stdin.fileno())
if p.returncode != 0: proc.wait()
default_openmp = False if proc.returncode != 0:
default_openmp = False
finally:
os.remove(path)
except OSError, e: except OSError, e:
default_openmp = False default_openmp = False
......
...@@ -14,7 +14,7 @@ files=["/tmp/do_nightly_build_theano", "/tmp/do_nightly_build_pylearn", ...@@ -14,7 +14,7 @@ files=["/tmp/do_nightly_build_theano", "/tmp/do_nightly_build_pylearn",
"/tmp/do_nightly_build_deeplearning", "/tmp/do_nightly_build_pylearn2", "/tmp/do_nightly_build_deeplearning", "/tmp/do_nightly_build_pylearn2",
"/tmp/do_nightly_build_theano_python2.4"] "/tmp/do_nightly_build_theano_python2.4"]
msgs=['Theano buildbot', 'Pylearn buildbot', 'Deep Learning Tutorial buildbot', msgs=['Theano buildbot', 'Pylearn buildbot', 'Deep Learning Tutorial buildbot',
'Pylearn2 buildbot', 'Theano Python2.4'] 'Pylearn2 buildbot', 'Theano Python2.4 buildbot']
print files print files
print msgs print msgs
......
...@@ -79,6 +79,19 @@ class HostFromGpu(GpuOp): ...@@ -79,6 +79,19 @@ class HostFromGpu(GpuOp):
def infer_shape(self, node, xshp): def infer_shape(self, node, xshp):
return xshp return xshp
def c_code(self, node, name, inputs, outputs, sub):
inp = inputs[0]
out = outputs[0]
fail = sub['fail']
return """
%(out)s = (PyArrayObject *) CudaNdarray_CreateArrayObj(%(inp)s);
if(!%(out)s)
%(fail)s;
""" % locals()
def c_code_cache_version(self):
return (1,)
host_from_gpu = HostFromGpu() host_from_gpu = HostFromGpu()
......
...@@ -300,10 +300,6 @@ class NVCC_compiler(object): ...@@ -300,10 +300,6 @@ class NVCC_compiler(object):
finally: finally:
os.chdir(orig_dir) os.chdir(orig_dir)
if nvcc_stdout:
# this doesn't happen to my knowledge
print >> sys.stderr, "DEBUG: nvcc STDOUT", nvcc_stdout
for eline in nvcc_stderr.split('\n'): for eline in nvcc_stderr.split('\n'):
if not eline: if not eline:
continue continue
...@@ -334,11 +330,16 @@ class NVCC_compiler(object): ...@@ -334,11 +330,16 @@ class NVCC_compiler(object):
except Exception: except Exception:
pass pass
print >> sys.stderr, l print >> sys.stderr, l
print nvcc_stdout
raise Exception('nvcc return status', p.returncode, raise Exception('nvcc return status', p.returncode,
'for cmd', ' '.join(cmd)) 'for cmd', ' '.join(cmd))
elif config.cmodule.compilation_warning and nvcc_stdout: elif config.cmodule.compilation_warning and nvcc_stdout:
print nvcc_stdout print nvcc_stdout
if nvcc_stdout:
# this doesn't happen to my knowledge
print >> sys.stderr, "DEBUG: nvcc STDOUT", nvcc_stdout
#touch the __init__ file #touch the __init__ file
file(os.path.join(location, "__init__.py"), 'w').close() file(os.path.join(location, "__init__.py"), 'w').close()
return dlimport(lib_filename) return dlimport(lib_filename)
......
...@@ -2183,6 +2183,11 @@ class MaxAndArgmax(Op): ...@@ -2183,6 +2183,11 @@ class MaxAndArgmax(Op):
def make_node(self, x, axis=None): def make_node(self, x, axis=None):
x = _as_tensor_variable(x) x = _as_tensor_variable(x)
if isinstance(axis, Variable):
if not isinstance(axis, Constant):
raise TypeError("MaxAndArgmax need a constant axis")
axis = [axis.data]
if isinstance(axis, int): if isinstance(axis, int):
axis = [axis] axis = [axis]
elif isinstance(axis, (tuple, list)): elif isinstance(axis, (tuple, list)):
...@@ -2192,6 +2197,7 @@ class MaxAndArgmax(Op): ...@@ -2192,6 +2197,7 @@ class MaxAndArgmax(Op):
assert axis == range(x.type.ndim), ( assert axis == range(x.type.ndim), (
"MaxAndArgmax does not support multiple" "MaxAndArgmax does not support multiple"
" axes. the max fct supports it.") " axes. the max fct supports it.")
# we make the axis all positive to make the infer_shape work # we make the axis all positive to make the infer_shape work
# with negative axis # with negative axis
if x.type.ndim > 0 and axis is not None: if x.type.ndim > 0 and axis is not None:
......
...@@ -1790,7 +1790,8 @@ class T_max_and_argmax(unittest.TestCase): ...@@ -1790,7 +1790,8 @@ class T_max_and_argmax(unittest.TestCase):
data = rand(2, 3) data = rand(2, 3)
n = as_tensor_variable(data) n = as_tensor_variable(data)
for (axis, np_axis) in [(-1, -1), (0, 0), (1, 1), (None, None), for (axis, np_axis) in [(-1, -1), (0, 0), (1, 1), (None, None),
([0, 1], None), ([1, 0], None)]: ([0, 1], None), ([1, 0], None),
(constant(0), 0)]:
v, i = eval_outputs(max_and_argmax(n, axis)) v, i = eval_outputs(max_and_argmax(n, axis))
assert i.dtype == 'int64' assert i.dtype == 'int64'
self.assertTrue(numpy.all(v == numpy.max(data, np_axis))) self.assertTrue(numpy.all(v == numpy.max(data, np_axis)))
......
...@@ -800,16 +800,17 @@ class T_using_gpu(unittest.TestCase): ...@@ -800,16 +800,17 @@ class T_using_gpu(unittest.TestCase):
t0 = time.time() t0 = time.time()
for i in xrange(iters): for i in xrange(iters):
r = f() r = f()
print 'Looping %d times took'%iters, time.time() - t0, 'seconds' t1 = time.time()
print 'Looping %d times took' % iters, t1 - t0, 'seconds'
print 'Result is', r print 'Result is', r
if numpy.any( [isinstance(x.op,T.Elemwise) for x in f.maker.fgraph.toposort()]): if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
print 'Used the cpu' print 'Used the cpu'
else: else:
print 'Used the gpu' print 'Used the gpu'
if theano.config.device.find('gpu') > -1: if theano.config.device.find('gpu') > -1:
assert not numpy.any( [isinstance(x.op,T.Elemwise) for x in f.maker.fgraph.toposort()]) assert not numpy.any( [isinstance(x.op,T.Elemwise) for x in f.maker.fgraph.toposort()])
else: else:
assert numpy.any( [isinstance(x.op,T.Elemwise) for x in f.maker.fgraph.toposort()]) assert numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()])
...@@ -831,15 +832,16 @@ class T_using_gpu(unittest.TestCase): ...@@ -831,15 +832,16 @@ class T_using_gpu(unittest.TestCase):
t0 = time.time() t0 = time.time()
for i in xrange(iters): for i in xrange(iters):
r = f() r = f()
print 'Looping %d times took'%iters, time.time() - t0, 'seconds' t1 = time.time()
print 'Looping %d times took' % iters, t1 - t0, 'seconds'
print 'Result is', r print 'Result is', r
print 'Numpy result is', numpy.asarray(r) print 'Numpy result is', numpy.asarray(r)
if numpy.any( [isinstance(x.op,T.Elemwise) for x in f.maker.fgraph.toposort()]): if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
print 'Used the cpu' print 'Used the cpu'
else: else:
print 'Used the gpu' print 'Used the gpu'
assert not numpy.any( [isinstance(x.op,T.Elemwise) for x in f.maker.fgraph.toposort()]) assert not numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()])
...@@ -865,15 +867,16 @@ class T_using_gpu(unittest.TestCase): ...@@ -865,15 +867,16 @@ class T_using_gpu(unittest.TestCase):
t0 = time.time() t0 = time.time()
for i in xrange(iters): for i in xrange(iters):
r = f() r = f()
print 'Looping %d times took'%iters, time.time() - t0, 'seconds' t1 = time.time()
print 'Looping %d times took' % iters, t1 - t0, 'seconds'
print 'Result is', r print 'Result is', r
print 'Numpy result is', numpy.asarray(r) print 'Numpy result is', numpy.asarray(r)
if numpy.any( [isinstance(x.op,T.Elemwise) for x in f.maker.fgraph.toposort()]): if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
print 'Used the cpu' print 'Used the cpu'
else: else:
print 'Used the gpu' print 'Used the gpu'
assert not numpy.any( [isinstance(x.op,T.Elemwise) for x in f.maker.fgraph.toposort()]) assert not numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()])
class T_fibby(unittest.TestCase): class T_fibby(unittest.TestCase):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论