提交 2dfec943 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #3125 from harlouci/flake8_v7

Flake8 misc
...@@ -7,13 +7,6 @@ ...@@ -7,13 +7,6 @@
# a,b scalar # a,b scalar
from __future__ import print_function from __future__ import print_function
s = """
result for shapes=(2000,2000) and iters=100
GTX 470 7.22s
GTX 285, 6.84s
GTX 480 5.83s
"""
import os import os
import sys import sys
import time import time
...@@ -52,14 +45,14 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000, ...@@ -52,14 +45,14 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000,
print(' OMP_NUM_THREADS=', os.getenv('OMP_NUM_THREADS')) print(' OMP_NUM_THREADS=', os.getenv('OMP_NUM_THREADS'))
print(' GOTO_NUM_THREADS=', os.getenv('GOTO_NUM_THREADS')) print(' GOTO_NUM_THREADS=', os.getenv('GOTO_NUM_THREADS'))
print() print()
print ('Numpy config: (used when the Theano flag' print('Numpy config: (used when the Theano flag'
' "blas.ldflags" is empty)') ' "blas.ldflags" is empty)')
numpy.show_config() numpy.show_config()
print('Numpy dot module:', numpy.dot.__module__) print('Numpy dot module:', numpy.dot.__module__)
print('Numpy location:', numpy.__file__) print('Numpy location:', numpy.__file__)
print('Numpy version:', numpy.__version__) print('Numpy version:', numpy.__version__)
if (theano.config.device.startswith("gpu") or if (theano.config.device.startswith("gpu") or
theano.config.init_gpu_device.startswith("gpu")): theano.config.init_gpu_device.startswith("gpu")):
print('nvcc version:') print('nvcc version:')
subprocess.call((theano.sandbox.cuda.nvcc_compiler.nvcc_path, subprocess.call((theano.sandbox.cuda.nvcc_compiler.nvcc_path,
"--version")) "--version"))
...@@ -116,8 +109,8 @@ def test(): ...@@ -116,8 +109,8 @@ def test():
parser = OptionParser( parser = OptionParser(
usage='%prog <options>\nCompute time needed to perform BLAS gemm ' usage='%prog <options>\nCompute time needed to perform BLAS gemm '
'computations between matrices of size (M, N) and (N, K).') 'computations between matrices of size (M, N) and (N, K).')
parser.add_option('-q', '--quiet', action='store_true', dest='quiet', parser.add_option('-q', '--quiet', action='store_true', dest='quiet',
default=False, default=False,
...@@ -255,25 +248,25 @@ if __name__ == "__main__": ...@@ -255,25 +248,25 @@ if __name__ == "__main__":
K40 0.88s K40 0.88s
K20m/ECC K20m/ECC
K20/NOECC K20/NOECC
M2090 M2090
C2075 C2075
M2075 M2075
M2070 M2070
M2070-Q M2070-Q
M2050(Amazon) M2050(Amazon)
C1060 C1060
K600 K600
GTX Titan Black GTX Titan Black
GTX Titan(D15U-50) GTX Titan(D15U-50)
GTX 780 GTX 780
GTX 980 GTX 980
GTX 970 GTX 970
GTX 680 GTX 680
GRID K520 GRID K520
GTX 580 GTX 580
GTX 480 GTX 480
GTX 750 Ti GTX 750 Ti
""") """)
if options.M == 0: if options.M == 0:
...@@ -306,7 +299,6 @@ if __name__ == "__main__": ...@@ -306,7 +299,6 @@ if __name__ == "__main__":
print() print()
print('Total execution time: %.2fs on %s.' % (t, impl)) print('Total execution time: %.2fs on %s.' % (t, impl))
print() print()
print ('Try to run this script a few times. Experience shows that' print('Try to run this script a few times. Experience shows that'
' the first time is not as fast as followings calls. The' ' the first time is not as fast as followings calls. The'
' difference is not big, but consistent.') ' difference is not big, but consistent.')
from __future__ import print_function from __future__ import print_function
import six.moves.cPickle as pickle import six.moves.cPickle as pickle
import os, sys import os
import sys
import theano import theano
from six import iteritems, itervalues from six import iteritems, itervalues
...@@ -90,6 +92,6 @@ useless = total - uniq ...@@ -90,6 +92,6 @@ useless = total - uniq
print("mod.{cpp,cu} total:", total) print("mod.{cpp,cu} total:", total)
print("mod.{cpp,cu} uniq:", uniq) print("mod.{cpp,cu} uniq:", uniq)
print("mod.{cpp,cu} with more than 1 copy:", more_than_one) print("mod.{cpp,cu} with more than 1 copy:", more_than_one)
print("mod.{cpp,cu} useless:", useless, float(useless)/total*100, "%") print("mod.{cpp,cu} useless:", useless, float(useless) / total * 100, "%")
print("nb directory", len(dirs)) print("nb directory", len(dirs))
...@@ -7,15 +7,20 @@ WARNING: In the test of this file there is a transpose that is used... ...@@ -7,15 +7,20 @@ WARNING: In the test of this file there is a transpose that is used...
So there can be problem with shape and stride order... So there can be problem with shape and stride order...
""" """
import six
try: try:
import cudamat import cudamat
cudamat_available = True cudamat_available = True
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
if cuda.cuda_available == False: if cuda.cuda_available is False:
raise ImportError('Optional theano package cuda disabled') raise ImportError('Optional theano package cuda disabled')
if six.PY3:
long = int
def cudandarray_to_cudamat(x, copyif=False): def cudandarray_to_cudamat(x, copyif=False):
""" take a CudaNdarray and return a cudamat.CUDAMatrix object. """ take a CudaNdarray and return a cudamat.CUDAMatrix object.
...@@ -43,7 +48,7 @@ try: ...@@ -43,7 +48,7 @@ try:
# Check if it is c contiguous # Check if it is c contiguous
size = 1 size = 1
c_contiguous = True c_contiguous = True
for i in range(x.ndim-1, -1, -1): for i in range(x.ndim - 1, -1, -1):
if x.shape[i] == 1: if x.shape[i] == 1:
continue continue
if x._strides[i] != size: if x._strides[i] != size:
...@@ -73,11 +78,10 @@ try: ...@@ -73,11 +78,10 @@ try:
cm_mat.data_device = ctypes.cast(x.gpudata, ctypes.POINTER(ctypes.c_float)) cm_mat.data_device = ctypes.cast(x.gpudata, ctypes.POINTER(ctypes.c_float))
px = cudamat.CUDAMatrix(cm_mat) px = cudamat.CUDAMatrix(cm_mat)
px._base = x # x won't be __del__'ed as long as px is around. px._base = x # x won't be __del__'ed as long as px is around.
px.mat_on_host = False # let cudamat know that we don't have a numpy # let cudamat know that we don't have a numpy array attached.
# array attached. px.mat_on_host = False
return px return px
def cudamat_to_cudandarray(x): def cudamat_to_cudandarray(x):
...@@ -86,12 +90,12 @@ try: ...@@ -86,12 +90,12 @@ try:
if not isinstance(x, cudamat.CUDAMatrix): if not isinstance(x, cudamat.CUDAMatrix):
raise ValueError("We can transfer only cudamat.CUDAMatrix to CudaNdarray") raise ValueError("We can transfer only cudamat.CUDAMatrix to CudaNdarray")
# elif x.dtype != "float32": # elif x.dtype != "float32":
# raise ValueError("CudaNdarray support only float32") # raise ValueError("CudaNdarray support only float32")
# We don't need this, because cudamat is always float32. # We don't need this, because cudamat is always float32.
else: else:
strides = [1] strides = [1]
for i in x.shape[::-1][:-1]: for i in x.shape[::-1][:-1]:
strides.append(strides[-1]*i) strides.append(strides[-1] * i)
strides = tuple(strides[::-1]) strides = tuple(strides[::-1])
import ctypes import ctypes
......
...@@ -51,5 +51,5 @@ if __name__ == '__main__': ...@@ -51,5 +51,5 @@ if __name__ == '__main__':
costlySpeedstring = "slowdown" costlySpeedstring = "slowdown"
print("Fast op time without openmp %fs with openmp %fs %s %2.2f" % (cheapTime, cheapTimeOpenmp, cheapSpeedstring, cheapSpeed)) print("Fast op time without openmp %fs with openmp %fs %s %2.2f" % (cheapTime, cheapTimeOpenmp, cheapSpeedstring, cheapSpeed))
print("Slow op time without openmp %fs with openmp %fs %s %2.2f" % (costlyTime, costlyTimeOpenmp, costlySpeedstring, costlySpeed)) print("Slow op time without openmp %fs with openmp %fs %s %2.2f" % (costlyTime, costlyTimeOpenmp, costlySpeedstring, costlySpeed))
...@@ -19,31 +19,31 @@ token = None ...@@ -19,31 +19,31 @@ token = None
def get_auth_token(): def get_auth_token():
global token global token
if token is not None: if token is not None:
return token return token
import keyring import keyring
token = keyring.get_password('github', fake_username) token = keyring.get_password('github', fake_username)
if token is not None: if token is not None:
return token return token
print("Please enter your github username and password. These are not " print("Please enter your github username and password. These are not "
"stored, only used to get an oAuth token. You can revoke this at " "stored, only used to get an oAuth token. You can revoke this at "
"any time on Github.") "any time on Github.")
user = input("Username: ") user = input("Username: ")
pw = getpass.getpass("Password: ") pw = getpass.getpass("Password: ")
auth_request = { auth_request = {
"scopes": [ "scopes": [
"public_repo", "public_repo",
"gist" "gist"
], ],
"note": "IPython tools", "note": "IPython tools",
"note_url": "https://github.com/ipython/ipython/tree/master/tools", "note_url": "https://github.com/ipython/ipython/tree/master/tools",
} }
response = requests.post('https://api.github.com/authorizations', response = requests.post('https://api.github.com/authorizations',
auth=(user, pw), data=json.dumps(auth_request)) auth=(user, pw), data=json.dumps(auth_request))
response.raise_for_status() response.raise_for_status()
token = json.loads(response.text)['token'] token = json.loads(response.text)['token']
keyring.set_password('github', fake_username, token) keyring.set_password('github', fake_username, token)
...@@ -57,40 +57,40 @@ def make_auth_header(): ...@@ -57,40 +57,40 @@ def make_auth_header():
def post_issue_comment(project, num, body): def post_issue_comment(project, num, body):
url = 'https://api.github.com/repos/{project}/issues/{num}/comments'.format(project=project, num=num) url = 'https://api.github.com/repos/{project}/issues/{num}/comments'.format(project=project, num=num)
payload = json.dumps({'body': body}) payload = json.dumps({'body': body})
r = requests.post(url, data=payload, headers=make_auth_header()) requests.post(url, data=payload, headers=make_auth_header())
def post_gist(content, description='', filename='file', auth=False): def post_gist(content, description='', filename='file', auth=False):
"""Post some text to a Gist, and return the URL.""" """Post some text to a Gist, and return the URL."""
post_data = json.dumps({ post_data = json.dumps({
"description": description, "description": description,
"public": True, "public": True,
"files": { "files": {
filename: { filename: {
"content": content "content": content
}
} }
}
}).encode('utf-8') }).encode('utf-8')
headers = make_auth_header() if auth else {} headers = make_auth_header() if auth else {}
response = requests.post("https://api.github.com/gists", data=post_data, headers=headers) response = requests.post("https://api.github.com/gists", data=post_data, headers=headers)
response.raise_for_status() response.raise_for_status()
response_data = json.loads(response.text) response_data = json.loads(response.text)
return response_data['html_url'] return response_data['html_url']
def get_pull_request(project, num, github_api=3): def get_pull_request(project, num, github_api=3):
"""get pull request info by number """get pull request info by number
github_api : version of github api to use github_api : version of github api to use
""" """
if github_api == 2 : if github_api == 2:
url = "http://github.com/api/v2/json/pulls/{project}/{num}".format(project=project, num=num) url = "http://github.com/api/v2/json/pulls/{project}/{num}".format(project=project, num=num)
elif github_api == 3: elif github_api == 3:
url = "https://api.github.com/repos/{project}/pulls/{num}".format(project=project, num=num) url = "https://api.github.com/repos/{project}/pulls/{num}".format(project=project, num=num)
response = requests.get(url) response = requests.get(url)
response.raise_for_status() response.raise_for_status()
if github_api == 2 : if github_api == 2:
return json.loads(response.text)['pull'] return json.loads(response.text)['pull']
return json.loads(response.text) return json.loads(response.text)
...@@ -100,12 +100,12 @@ def get_pulls_list(project, github_api=3): ...@@ -100,12 +100,12 @@ def get_pulls_list(project, github_api=3):
github_api : version of github api to use github_api : version of github api to use
""" """
if github_api == 3 : if github_api == 3:
url = "https://api.github.com/repos/{project}/pulls".format(project=project) url = "https://api.github.com/repos/{project}/pulls".format(project=project)
else : else:
url = "http://github.com/api/v2/json/pulls/{project}".format(project=project) url = "http://github.com/api/v2/json/pulls/{project}".format(project=project)
response = requests.get(url) response = requests.get(url)
response.raise_for_status() response.raise_for_status()
if github_api == 2 : if github_api == 2:
return json.loads(response.text)['pulls'] return json.loads(response.text)['pulls']
return json.loads(response.text) return json.loads(response.text)
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
This code can only work if gnumpy and theano are initialized on the This code can only work if gnumpy and theano are initialized on the
same gpu as theano. same gpu as theano.
""" """
import six
from six.moves import reduce from six.moves import reduce
try: try:
...@@ -12,9 +14,12 @@ try: ...@@ -12,9 +14,12 @@ try:
___const_garray = gnumpy.rand(1) ___const_garray = gnumpy.rand(1)
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
if cuda.cuda_available == False: if cuda.cuda_available is False:
raise ImportError('Optional theano package cuda disabled') raise ImportError('Optional theano package cuda disabled')
if six.PY3:
long = int
def cudandarray_to_garray(x, copyif=False): def cudandarray_to_garray(x, copyif=False):
""" take a CudaNdarray and return a gnumpy.garray object. """ take a CudaNdarray and return a gnumpy.garray object.
...@@ -38,7 +43,7 @@ try: ...@@ -38,7 +43,7 @@ try:
# Check if it is c contiguous # Check if it is c contiguous
size = 1 size = 1
c_contiguous = True c_contiguous = True
for i in range(x.ndim-1, -1, -1): for i in range(x.ndim - 1, -1, -1):
if x.shape[i] == 1: if x.shape[i] == 1:
continue continue
if x._strides[i] != size: if x._strides[i] != size:
...@@ -57,7 +62,7 @@ try: ...@@ -57,7 +62,7 @@ try:
# a cudamat object with no data_host. # a cudamat object with no data_host.
cm_mat = cudamat.cudamat() cm_mat = cudamat.cudamat()
cm_mat.size[0] = reduce(lambda x, y: x*y, x.shape, 1) cm_mat.size[0] = reduce(lambda x, y: x * y, x.shape, 1)
cm_mat.size[1] = 1 cm_mat.size[1] = 1
cm_mat.on_host = 0 cm_mat.on_host = 0
cm_mat.on_device = 1 cm_mat.on_device = 1
...@@ -72,8 +77,8 @@ try: ...@@ -72,8 +77,8 @@ try:
px._base = x # x won't be freed if the cudamat object isn't freed. px._base = x # x won't be freed if the cudamat object isn't freed.
px.mat_on_host = False # let cudamat know that we don't have a numpy # let cudamat know that we don't have a numpy array attached.
# array attached. px.mat_on_host = False
# Note how gnumpy tracks its cudamat objects: it moves things to the # Note how gnumpy tracks its cudamat objects: it moves things to the
# _cmsReuseCache when the gnumpy array is deleted, thus the arrays # _cmsReuseCache when the gnumpy array is deleted, thus the arrays
...@@ -100,7 +105,7 @@ try: ...@@ -100,7 +105,7 @@ try:
else: else:
strides = [1] strides = [1]
for i in x.shape[::-1][:-1]: for i in x.shape[::-1][:-1]:
strides.append(strides[-1]*i) strides.append(strides[-1] * i)
strides = strides[::-1] strides = strides[::-1]
for i in range(len(strides)): for i in range(len(strides)):
if x.shape[i] == 1: if x.shape[i] == 1:
......
from __future__ import print_function from __future__ import print_function
import time import time
import numpy import numpy
...@@ -13,8 +14,12 @@ print(f1.maker.fgraph.toposort()) ...@@ -13,8 +14,12 @@ print(f1.maker.fgraph.toposort())
print(f2.maker.fgraph.toposort()) print(f2.maker.fgraph.toposort())
for i in [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000]: for i in [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000]:
o = numpy.zeros(i, dtype='float32') o = numpy.zeros(i, dtype='float32')
t0 = time.time(); f1(o); t1 = time.time(); t0 = time.time()
tf1 = t1-t0 f1(o)
t0 = time.time(); f2(); t1 = time.time(); t1 = time.time()
tf1 = t1 - t0
t0 = time.time()
f2()
t1 = time.time()
print("%8i %6.1f ns %7.1f ns"%(i, tf1*1e6, (t1-t0)*1e6)) print("%8i %6.1f ns %7.1f ns" % (i, tf1 * 1e6, (t1 - t0) * 1e6))
...@@ -3,8 +3,6 @@ Function to detect memory sharing for ndarray AND sparse type AND CudaNdarray. ...@@ -3,8 +3,6 @@ Function to detect memory sharing for ndarray AND sparse type AND CudaNdarray.
numpy version support only ndarray. numpy version support only ndarray.
""" """
__docformat__ = "restructuredtext en"
import numpy import numpy
from theano.tensor.basic import TensorType from theano.tensor.basic import TensorType
...@@ -20,6 +18,8 @@ except ImportError: ...@@ -20,6 +18,8 @@ except ImportError:
return False return False
from theano.sandbox import cuda from theano.sandbox import cuda
from theano.sandbox import gpuarray
if cuda.cuda_available: if cuda.cuda_available:
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
...@@ -29,8 +29,9 @@ else: ...@@ -29,8 +29,9 @@ else:
def _is_cuda(a): def _is_cuda(a):
return False return False
__docformat__ = "restructuredtext en"
from theano.sandbox import gpuarray
if gpuarray.pygpu: if gpuarray.pygpu:
def _is_gpua(a): def _is_gpua(a):
return isinstance(a, gpuarray.pygpu.gpuarray.GpuArray) return isinstance(a, gpuarray.pygpu.gpuarray.GpuArray)
...@@ -56,7 +57,7 @@ def may_share_memory(a, b, raise_other_type=True): ...@@ -56,7 +57,7 @@ def may_share_memory(a, b, raise_other_type=True):
a_sparse = _is_sparse(a) a_sparse = _is_sparse(a)
b_sparse = _is_sparse(b) b_sparse = _is_sparse(b)
if (not(a_ndarray or a_sparse or a_cuda or a_gpua) or if (not(a_ndarray or a_sparse or a_cuda or a_gpua) or
not(b_ndarray or b_sparse or b_cuda or b_gpua)): not(b_ndarray or b_sparse or b_cuda or b_gpua)):
if raise_other_type: if raise_other_type:
raise TypeError("may_share_memory support only ndarray" raise TypeError("may_share_memory support only ndarray"
" and scipy.sparse, CudaNdarray or GpuArray type") " and scipy.sparse, CudaNdarray or GpuArray type")
......
from __future__ import print_function from __future__ import print_function
from collections import MutableSet from collections import MutableSet
from theano.compat import OrderedDict
import types import types
import weakref
from six import string_types from six import string_types
...@@ -37,7 +38,7 @@ def check_deterministic(iterable): ...@@ -37,7 +38,7 @@ def check_deterministic(iterable):
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# {{{ http://code.activestate.com/recipes/576696/ (r5) # {{{ http://code.activestate.com/recipes/576696/ (r5)
import weakref
class Link(object): class Link(object):
# This make that we need to use a different pickle protocol # This make that we need to use a different pickle protocol
...@@ -61,6 +62,7 @@ class Link(object): ...@@ -61,6 +62,7 @@ class Link(object):
if len(state) == 3: if len(state) == 3:
self.key = state[2] self.key = state[2]
class OrderedSet(MutableSet): class OrderedSet(MutableSet):
'Set the remembers the order elements were added' 'Set the remembers the order elements were added'
# Big-O running times for all methods are the same as for regular sets. # Big-O running times for all methods are the same as for regular sets.
...@@ -179,10 +181,10 @@ class OrderedSet(MutableSet): ...@@ -179,10 +181,10 @@ class OrderedSet(MutableSet):
elif isinstance(other, set): elif isinstance(other, set):
# Raise exception to avoid confusion. # Raise exception to avoid confusion.
raise TypeError( raise TypeError(
'Cannot compare an `OrderedSet` to a `set` because ' 'Cannot compare an `OrderedSet` to a `set` because '
'this comparison cannot be made symmetric: please ' 'this comparison cannot be made symmetric: please '
'manually cast your `OrderedSet` into `set` before ' 'manually cast your `OrderedSet` into `set` before '
'performing this comparison.') 'performing this comparison.')
else: else:
return NotImplemented return NotImplemented
......
...@@ -33,16 +33,14 @@ from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable, ...@@ -33,16 +33,14 @@ from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
from theano.sandbox.cuda.opt import gpu_seqopt from theano.sandbox.cuda.opt import gpu_seqopt
from theano.tensor.utils import hash_from_dict from theano.tensor.utils import hash_from_dict
from . import pycuda_init
if not pycuda_init.pycuda_available:
raise Exception("No pycuda available. You can't load pycuda_example.py")
import pycuda import pycuda
from pycuda.elementwise import ElementwiseKernel
from pycuda.compiler import SourceModule from pycuda.compiler import SourceModule
from pycuda.tools import VectorArg
import pycuda.gpuarray import pycuda.gpuarray
from . import pycuda_init
if not pycuda_init.pycuda_available:
raise Exception("No pycuda available. You can't load pycuda_example.py")
def _replace_npy_types(c_arg): def _replace_npy_types(c_arg):
c_arg = c_arg.replace('npy_float32', 'float') c_arg = c_arg.replace('npy_float32', 'float')
...@@ -235,12 +233,11 @@ class PycudaElemwiseSourceModuleOp(GpuOp): ...@@ -235,12 +233,11 @@ class PycudaElemwiseSourceModuleOp(GpuOp):
c_code = self.scalar_op.c_code(out_node, "some_name", c_code = self.scalar_op.c_code(out_node, "some_name",
tuple([n + "[i]" for n in in_name]), tuple([n + "[i]" for n in in_name]),
tuple(n + "[i]" for n in out_name), {}) tuple(n + "[i]" for n in out_name), {})
c_code_param = ", ".join([ c_code_param = ", ".join(
_replace_npy_types(var.type.dtype_specs()[1]) + " *" + name [_replace_npy_types(var.type.dtype_specs()[1]) + " *" + name
for var, name in chain(izip(inputs, in_name), for var, name in chain(izip(inputs, in_name),
izip(out_node.outputs, izip(out_node.outputs, out_name))] +
out_name)) ["int size"])
] + ["int size"])
mod = SourceModule(""" mod = SourceModule("""
__global__ void %s(%s) __global__ void %s(%s)
{ {
...@@ -259,8 +256,8 @@ class PycudaElemwiseSourceModuleOp(GpuOp): ...@@ -259,8 +256,8 @@ class PycudaElemwiseSourceModuleOp(GpuOp):
# TODO assert all input have the same shape # TODO assert all input have the same shape
z, = out z, = out
if (z[0] is None or if (z[0] is None or
z[0].shape != inputs[0].shape or z[0].shape != inputs[0].shape or
not z[0].is_c_contiguous()): not z[0].is_c_contiguous()):
z[0] = theano.sandbox.cuda.CudaNdarray.zeros(inputs[0].shape) z[0] = theano.sandbox.cuda.CudaNdarray.zeros(inputs[0].shape)
if inputs[0].shape != inputs[1].shape: if inputs[0].shape != inputs[1].shape:
raise TypeError("PycudaElemwiseSourceModuleOp:" raise TypeError("PycudaElemwiseSourceModuleOp:"
...@@ -329,8 +326,8 @@ class PycudaElemwiseSourceModuleMakeThunkOp(Op): ...@@ -329,8 +326,8 @@ class PycudaElemwiseSourceModuleMakeThunkOp(Op):
c_code_param = ", ".join( c_code_param = ", ".join(
[_replace_npy_types(var.type.dtype_specs()[1]) + " *" + name [_replace_npy_types(var.type.dtype_specs()[1]) + " *" + name
for var, name in chain(izip(node.inputs, in_name), for var, name in chain(izip(node.inputs, in_name),
izip(node.outputs, out_name))] izip(node.outputs, out_name))] +
+ ["int size"]) ["int size"])
mod = SourceModule(""" mod = SourceModule("""
__global__ void %s(%s) __global__ void %s(%s)
...@@ -349,8 +346,8 @@ class PycudaElemwiseSourceModuleMakeThunkOp(Op): ...@@ -349,8 +346,8 @@ class PycudaElemwiseSourceModuleMakeThunkOp(Op):
def thunk(): def thunk():
z = outputs[0] z = outputs[0]
if (z[0] is None or if (z[0] is None or
z[0].shape != inputs[0][0].shape or z[0].shape != inputs[0][0].shape or
not z[0].is_c_contiguous()): not z[0].is_c_contiguous()):
z[0] = theano.sandbox.cuda.CudaNdarray.zeros( z[0] = theano.sandbox.cuda.CudaNdarray.zeros(
inputs[0][0].shape) inputs[0][0].shape)
if inputs[0][0].shape != inputs[1][0].shape: if inputs[0][0].shape != inputs[1][0].shape:
...@@ -363,9 +360,9 @@ class PycudaElemwiseSourceModuleMakeThunkOp(Op): ...@@ -363,9 +360,9 @@ class PycudaElemwiseSourceModuleMakeThunkOp(Op):
else: else:
grid = (1, 1) grid = (1, 1)
block = (inputs[0][0].shape[0], inputs[0][0].shape[1], 1) block = (inputs[0][0].shape[0], inputs[0][0].shape[1], 1)
out = pycuda_fct(inputs[0][0], inputs[1][0], z[0], pycuda_fct(inputs[0][0], inputs[1][0], z[0],
numpy.intc(inputs[1][0].size), block=block, numpy.intc(inputs[1][0].size), block=block,
grid=grid) grid=grid)
thunk.inputs = inputs thunk.inputs = inputs
thunk.outputs = outputs thunk.outputs = outputs
thunk.lazy = False thunk.lazy = False
...@@ -384,7 +381,7 @@ def local_pycuda_gpu_elemwise(node): ...@@ -384,7 +381,7 @@ def local_pycuda_gpu_elemwise(node):
""" """
if isinstance(node.op, GpuElemwise): if isinstance(node.op, GpuElemwise):
if (not any([any(i.type.broadcastable) for i in node.inputs]) and if (not any([any(i.type.broadcastable) for i in node.inputs]) and
all([i.ndim <= 2 for i in node.inputs])): all([i.ndim <= 2 for i in node.inputs])):
new_op = PycudaElemwiseSourceModuleOp(node.op.scalar_op, new_op = PycudaElemwiseSourceModuleOp(node.op.scalar_op,
node.op.inplace_pattern)( node.op.inplace_pattern)(
*node.inputs) *node.inputs)
...@@ -393,12 +390,12 @@ def local_pycuda_gpu_elemwise(node): ...@@ -393,12 +390,12 @@ def local_pycuda_gpu_elemwise(node):
pycuda_optimizer.register("local_pycuda_gpu_elemwise", pycuda_optimizer.register("local_pycuda_gpu_elemwise",
local_pycuda_gpu_elemwise) local_pycuda_gpu_elemwise)
"""
@local_optimizer([GpuElemwise]) @local_optimizer([GpuElemwise])
def local_pycuda_gpu_elemwise_kernel(node): def local_pycuda_gpu_elemwise_kernel(node):
""" ""
GpuElemwise -> PycudaElemwiseKernelOp GpuElemwise -> PycudaElemwiseKernelOp
""" ""
if isinstance(node.op, GpuElemwise): if isinstance(node.op, GpuElemwise):
if not any([any(i.type.broadcastable) for i in node.inputs]): if not any([any(i.type.broadcastable) for i in node.inputs]):
new_op = PycudaElemwiseKernelOp(node.op.scalar_op, new_op = PycudaElemwiseKernelOp(node.op.scalar_op,
...@@ -408,3 +405,4 @@ def local_pycuda_gpu_elemwise_kernel(node): ...@@ -408,3 +405,4 @@ def local_pycuda_gpu_elemwise_kernel(node):
pycuda_optimizer.register("local_pycuda_gpu_elemwise_kernel", pycuda_optimizer.register("local_pycuda_gpu_elemwise_kernel",
local_pycuda_gpu_elemwise_kernel, 1.5) local_pycuda_gpu_elemwise_kernel, 1.5)
"""
...@@ -23,7 +23,7 @@ pycuda_available = False ...@@ -23,7 +23,7 @@ pycuda_available = False
# If theano.sandbox.cuda don't exist, it is because we are importing # If theano.sandbox.cuda don't exist, it is because we are importing
# it and it try to import this file! This mean we must init the device. # it and it try to import this file! This mean we must init the device.
if (not hasattr(theano.sandbox, 'cuda') or if (not hasattr(theano.sandbox, 'cuda') or
theano.sandbox.cuda.use.device_number is None): theano.sandbox.cuda.use.device_number is None):
try: try:
import pycuda import pycuda
import pycuda.autoinit import pycuda.autoinit
......
import numpy
import pycuda.gpuarray import pycuda.gpuarray
from theano.sandbox import cuda from theano.sandbox import cuda
if cuda.cuda_available == False: if cuda.cuda_available is False:
raise ImportError('Optional theano package cuda disabled') raise ImportError('Optional theano package cuda disabled')
...@@ -29,7 +28,7 @@ def to_gpuarray(x, copyif=False): ...@@ -29,7 +28,7 @@ def to_gpuarray(x, copyif=False):
# Check if it is c contiguous # Check if it is c contiguous
size = 1 size = 1
c_contiguous = True c_contiguous = True
for i in range(x.ndim-1, -1, -1): for i in range(x.ndim - 1, -1, -1):
if x.shape[i] == 1: if x.shape[i] == 1:
continue continue
if x._strides[i] != size: if x._strides[i] != size:
...@@ -59,7 +58,7 @@ def to_cudandarray(x): ...@@ -59,7 +58,7 @@ def to_cudandarray(x):
else: else:
strides = [1] strides = [1]
for i in x.shape[::-1][:-1]: for i in x.shape[::-1][:-1]:
strides.append(strides[-1]*i) strides.append(strides[-1] * i)
strides = tuple(strides[::-1]) strides = tuple(strides[::-1])
ptr = int(x.gpudata) # in pycuda trunk, y.ptr also works, which is a little cleaner ptr = int(x.gpudata) # in pycuda trunk, y.ptr also works, which is a little cleaner
z = cuda.from_gpu_pointer(ptr, x.shape, strides, x) z = cuda.from_gpu_pointer(ptr, x.shape, strides, x)
......
...@@ -2,12 +2,12 @@ ...@@ -2,12 +2,12 @@
Helper function to safely convert an array to a new data type. Helper function to safely convert an array to a new data type.
""" """
__docformat__ = "restructuredtext en"
import numpy import numpy
import theano import theano
__docformat__ = "restructuredtext en"
def _asarray(a, dtype, order=None): def _asarray(a, dtype, order=None):
"""Convert the input to a Numpy array. """Convert the input to a Numpy array.
...@@ -45,11 +45,13 @@ def _asarray(a, dtype, order=None): ...@@ -45,11 +45,13 @@ def _asarray(a, dtype, order=None):
return rval.view(dtype=dtype) return rval.view(dtype=dtype)
else: else:
# Unexpected mismatch: better know what is going on! # Unexpected mismatch: better know what is going on!
raise TypeError('numpy.array did not return the data type we ' raise TypeError(
'asked for (%s %s #%s), instead it returned type ' 'numpy.array did not return the data type we '
'%s %s #%s: function ' 'asked for (%s %s #%s), instead it returned type '
'theano._asarray may need to be modified to handle this ' '%s %s #%s: function '
'data type.' % 'theano._asarray may need to be modified to handle this '
(dtype, dtype.str, dtype.num, rval.dtype, rval.str, rval.dtype.num)) 'data type.' %
(dtype, dtype.str, dtype.num, rval.dtype, rval.str,
rval.dtype.num))
else: else:
return rval return rval
import warnings
from six.moves import xrange from six.moves import xrange
...@@ -26,7 +25,8 @@ def render_string(string, sub): ...@@ -26,7 +25,8 @@ def render_string(string, sub):
finalCode = string[0:i] % sub finalCode = string[0:i] % sub
except Exception as F: except Exception as F:
if str(F) == str(E): if str(F) == str(E):
raise Exception(string[0:i]+"<<<< caused exception "+str(F)) raise Exception(
string[0:i] + "<<<< caused exception " + str(F))
i += 1 i += 1
assert False assert False
return finalCode return finalCode
...@@ -35,7 +35,7 @@ def render_string(string, sub): ...@@ -35,7 +35,7 @@ def render_string(string, sub):
def pretty_format(string): def pretty_format(string):
lines = string.split('\n') lines = string.split('\n')
lines = [ strip_leading_white_space(line) for line in lines ] lines = [strip_leading_white_space(line) for line in lines]
indent = 0 indent = 0
for i in xrange(len(lines)): for i in xrange(len(lines)):
...@@ -43,7 +43,7 @@ def pretty_format(string): ...@@ -43,7 +43,7 @@ def pretty_format(string):
if indent < 0: if indent < 0:
indent = 0 indent = 0
# #
lines[i] = (' '*indent) + lines[i] lines[i] = (' ' * indent) + lines[i]
indent += lines[i].count('{') indent += lines[i].count('{')
# #
......
...@@ -184,20 +184,6 @@ whitelist_flake8 = [ ...@@ -184,20 +184,6 @@ whitelist_flake8 = [
"scan_module/scan_opt.py", "scan_module/scan_opt.py",
"scan_module/tests/test_scan.py", "scan_module/tests/test_scan.py",
"scan_module/tests/test_scan_opt.py", "scan_module/tests/test_scan_opt.py",
"misc/elemwise_openmp_speedup.py",
"misc/gh_api.py",
"misc/check_blas.py",
"misc/latence_gpu_transfert.py",
"misc/cudamat_utils.py",
"misc/pycuda_utils.py",
"misc/pycuda_example.py",
"misc/ordered_set.py",
"misc/strutil.py",
"misc/gnumpy_utils.py",
"misc/may_share_memory.py",
"misc/safe_asarray.py",
"misc/pycuda_init.py",
"misc/check_duplicate_key.py",
"misc/tests/test_may_share_memory.py", "misc/tests/test_may_share_memory.py",
"misc/tests/test_pycuda_theano_simple.py", "misc/tests/test_pycuda_theano_simple.py",
"misc/tests/test_gnumpy_utils.py", "misc/tests/test_gnumpy_utils.py",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论