提交 5d229740 authored 作者: Dumitru Erhan's avatar Dumitru Erhan

merge

...@@ -8,9 +8,11 @@ AddConfigVar('floatX', ...@@ -8,9 +8,11 @@ AddConfigVar('floatX',
EnumStr('float64', 'float32'), EnumStr('float64', 'float32'),
) )
#gpu mean let the driver select the gpu. Needed in case of gpu in exclusive mode.
#gpuX mean use the gpu number X.
AddConfigVar('device', AddConfigVar('device',
"Default device for computations", "Default device for computations",
EnumStr('cpu', *['gpu%i'%i for i in range(4)]) EnumStr('cpu', 'gpu',*['gpu%i'%i for i in range(4)])
) )
# keep the default mode.optimizer==config.optimizer and mode.linker==config.linker! # keep the default mode.optimizer==config.optimizer and mode.linker==config.linker!
......
...@@ -112,7 +112,9 @@ if cuda_available: ...@@ -112,7 +112,9 @@ if cuda_available:
def use(device): def use(device):
global cuda_enabled, enabled_cuda global cuda_enabled, enabled_cuda
if device.startswith('gpu'): if device == 'gpu':
pass
elif device.startswith('gpu'):
device = int(device[3:]) device = int(device[3:])
elif device == 'cpu': elif device == 'cpu':
device = -1 device = -1
...@@ -120,13 +122,17 @@ def use(device): ...@@ -120,13 +122,17 @@ def use(device):
raise ValueError("Invalid device identifier", device) raise ValueError("Invalid device identifier", device)
if use.device_number is None: if use.device_number is None:
# No successful call to use() has been made yet # No successful call to use() has been made yet
if device<0: if device != 'gpu' and device<0:
return return
if device in [None,""]: if device in [None,""]:
device=0 device=0
device=int(device)
try: try:
gpu_init(device) if device !='gpu':
gpu_init(device)
else:
#warning To let people see that the gpu will be used.
_logger.warn("We let the driver select the gpu device to use")
handle_shared_float32(True) handle_shared_float32(True)
use.device_number = device use.device_number = device
cuda_enabled = True cuda_enabled = True
......
...@@ -162,16 +162,19 @@ class GpuConv(Op): ...@@ -162,16 +162,19 @@ class GpuConv(Op):
and self.logical_img_hw == other.logical_img_hw \ and self.logical_img_hw == other.logical_img_hw \
and self.logical_kern_hw == other.logical_kern_hw \ and self.logical_kern_hw == other.logical_kern_hw \
and self.logical_kern_align_top == other.logical_kern_align_top \ and self.logical_kern_align_top == other.logical_kern_align_top \
and self.version == other.version and self.version == other.version \
and self.verbose == other.verbose
def __hash__(self): def __hash__(self):
# don't use hash(self.version) as hash(-1)==-2 and hash(-2)==-2 in python!
return hash(type(self)) \ return hash(type(self)) \
^ hash(self.border_mode) \ ^ hash(self.border_mode) \
^ hash(self.subsample) \ ^ hash(self.subsample) \
^ hash(self.logical_img_hw) \ ^ hash(self.logical_img_hw) \
^ hash(self.logical_kern_hw) \ ^ hash(self.logical_kern_hw) \
^ hash(self.logical_kern_align_top) \ ^ hash(self.logical_kern_align_top) \
^ self.version# don't use hash as hash(-1)==-2 and hash(-2)==-2 in python! ^ self.version \
^ self.verbose
def __str__(self): def __str__(self):
return '%s{%s, %s, %s, %s, %s}' %(self.__class__.__name__, return '%s{%s, %s, %s, %s, %s}' %(self.__class__.__name__,
...@@ -200,7 +203,7 @@ class GpuConv(Op): ...@@ -200,7 +203,7 @@ class GpuConv(Op):
return ['cuda_ndarray.cuh','<stdio.h>'] return ['cuda_ndarray.cuh','<stdio.h>']
def c_code_cache_version(self): def c_code_cache_version(self):
return (0,4) return (0,5)
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
return open(os.path.join(os.path.split(__file__)[0],'conv_kernel.cu')).read()+\ return open(os.path.join(os.path.split(__file__)[0],'conv_kernel.cu')).read()+\
......
...@@ -307,7 +307,7 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern, ...@@ -307,7 +307,7 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
#define CONV_ROWS_STACK_SPECIAL(kern_wid) \ #define CONV_ROWS_STACK_SPECIAL(kern_wid) \
if(!img_contiguous_2d || !kern_contiguous_2d) f = conv_rows_stack<kern_wid, false>;\ if(!img_contiguous_2d || !kern_contiguous_2d) f = conv_rows_stack<kern_wid, false>;\
else f = conv_rows_stack<kern_wid, true>;\ else f = conv_rows_stack<kern_wid, true>;
CONV_ROWS_STACK_SPECIAL(THEANO_KERN_WID); CONV_ROWS_STACK_SPECIAL(THEANO_KERN_WID);
f<<< grid, threads, shared_size >>> f<<< grid, threads, shared_size >>>
...@@ -379,7 +379,8 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern, ...@@ -379,7 +379,8 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
if((!img_contiguous_2d || !kern_contiguous_2d)&&version==9) f = conv_rows_stack2<kern_wid, false,true>;\ if((!img_contiguous_2d || !kern_contiguous_2d)&&version==9) f = conv_rows_stack2<kern_wid, false,true>;\
else if(version==9) f = conv_rows_stack2<kern_wid, true,true>;\ else if(version==9) f = conv_rows_stack2<kern_wid, true,true>;\
else if(!img_contiguous_2d || !kern_contiguous_2d) f = conv_rows_stack2<kern_wid, false, false>;\ else if(!img_contiguous_2d || !kern_contiguous_2d) f = conv_rows_stack2<kern_wid, false, false>;\
else f = conv_rows_stack2<kern_wid, true, false>;\ else f = conv_rows_stack2<kern_wid, true, false>;
CONV_ROWS_STACK2_SPECIAL(THEANO_KERN_WID); CONV_ROWS_STACK2_SPECIAL(THEANO_KERN_WID);
f<<< grid, threads, shared_size >>> f<<< grid, threads, shared_size >>>
......
...@@ -225,7 +225,6 @@ def get_shapes2(scales_img=(1,1), scales_kern=(1,1), subsample=(1,1), img_stride ...@@ -225,7 +225,6 @@ def get_shapes2(scales_img=(1,1), scales_kern=(1,1), subsample=(1,1), img_stride
return shapes return shapes
def test_valid(): def test_valid():
raise Exception('One of the modes here causes a segmentation fault!')
# img shape, kern shape, subsample shape # img shape, kern shape, subsample shape
shapes = get_basic_shapes() shapes = get_basic_shapes()
......
...@@ -14,7 +14,7 @@ import numpy ...@@ -14,7 +14,7 @@ import numpy
# Skip test if cuda_ndarray is not available. # Skip test if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
import theano.sandbox.cuda as cuda_ndarray import theano.sandbox.cuda as cuda_ndarray
if cuda_ndarray.cuda_enabled == False: if cuda_ndarray.cuda_available == False:
raise SkipTest('Optional package cuda disabled') raise SkipTest('Optional package cuda disabled')
import theano.sandbox.cuda as tcn import theano.sandbox.cuda as tcn
...@@ -23,6 +23,13 @@ import logging ...@@ -23,6 +23,13 @@ import logging
logging.getLogger('theano.sandbox.cuda.tests.test_nnet').setLevel(logging.INFO) logging.getLogger('theano.sandbox.cuda.tests.test_nnet').setLevel(logging.INFO)
def my_rand(*shape):
return theano._asarray(numpy.random.rand(*shape),dtype='float32')
def my_randn(*shape):
return theano._asarray(numpy.random.randn(*shape),dtype='float32')
def my_zeros(*shape):
return theano._asarray(numpy.zeros(*shape),dtype='float32')
def get_mode(use_gpu): def get_mode(use_gpu):
ret = theano.compile.get_default_mode() ret = theano.compile.get_default_mode()
if isinstance(ret, theano.compile.ProfileMode): if isinstance(ret, theano.compile.ProfileMode):
...@@ -44,15 +51,15 @@ def print_diff_mode(a,b): ...@@ -44,15 +51,15 @@ def print_diff_mode(a,b):
def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10, n_iter=100): def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10, n_iter=100):
if use_gpu: if use_gpu:
w = tcn.shared_constructor(0.01*(numpy.random.rand(n_in,n_hid)-0.5), 'w') w = tcn.shared_constructor(0.01*(my_rand(n_in,n_hid)-0.5), 'w')
b = tcn.shared_constructor(numpy.zeros(n_hid), 'b') b = tcn.shared_constructor(my_zeros(n_hid), 'b')
v = tcn.shared_constructor(numpy.zeros((n_hid, n_out)), 'c') v = tcn.shared_constructor(my_zeros((n_hid, n_out)), 'c')
c = tcn.shared_constructor(numpy.zeros(n_out), 'c') c = tcn.shared_constructor(my_zeros(n_out), 'c')
else: else:
w = shared(theano._asarray(0.01*(numpy.random.rand(n_in,n_hid)-0.5), dtype='float32'), 'w') w = shared(0.01*(my_rand(n_in,n_hid)-0.5), 'w')
b = shared(theano._asarray(numpy.zeros(n_hid), dtype='float32'), 'b') b = shared(my_zeros(n_hid), 'b')
v = shared(theano._asarray(numpy.zeros((n_hid, n_out)), dtype='float32'), 'c') v = shared(my_zeros((n_hid, n_out)), 'c')
c = shared(theano._asarray(numpy.zeros(n_out), dtype='float32'), 'c') c = shared(my_zeros(n_out), 'c')
x = tensor.fmatrix('x') x = tensor.fmatrix('x')
y = tensor.fmatrix('y') y = tensor.fmatrix('y')
...@@ -75,8 +82,8 @@ def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10, n_iter=100): ...@@ -75,8 +82,8 @@ def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10, n_iter=100):
for i, n in enumerate(train.maker.env.toposort()): for i, n in enumerate(train.maker.env.toposort()):
print i, n print i, n
xval = theano._asarray(numpy.random.rand(n_batch, n_in), dtype='float32') xval = my_rand(n_batch, n_in)
yval = theano._asarray(numpy.random.rand(n_batch, n_out), dtype='float32') yval = my_rand(n_batch, n_out)
lr = theano._asarray(0.01, dtype='float32') lr = theano._asarray(0.01, dtype='float32')
t0 = time.time() t0 = time.time()
...@@ -123,10 +130,10 @@ def run_conv_nnet1(use_gpu): ...@@ -123,10 +130,10 @@ def run_conv_nnet1(use_gpu):
n_hid = n_kern * logical_hid_shape[0] * logical_hid_shape[1] n_hid = n_kern * logical_hid_shape[0] * logical_hid_shape[1]
n_out = 10 n_out = 10
w = shared_fn(theano._asarray(0.01*(numpy.random.rand(*shape_kern)-0.5), dtype='float32'), 'w') w = shared_fn(0.01*(my_rand(*shape_kern)-0.5), 'w')
b = shared_fn(theano._asarray(numpy.zeros((n_kern,)), dtype='float32'), 'b') b = shared_fn(my_zeros((n_kern,)), 'b')
v = shared_fn(theano._asarray(numpy.zeros((n_hid, n_out)), dtype='float32'), 'c') v = shared_fn(my_zeros((n_hid, n_out)), 'c')
c = shared_fn(theano._asarray(numpy.zeros(n_out), dtype='float32'), 'c') c = shared_fn(my_zeros(n_out), 'c')
x = tensor.Tensor(dtype='float32', broadcastable=(0,1,0,0))('x') x = tensor.Tensor(dtype='float32', broadcastable=(0,1,0,0))('x')
y = tensor.fmatrix('y') y = tensor.fmatrix('y')
...@@ -152,8 +159,8 @@ def run_conv_nnet1(use_gpu): ...@@ -152,8 +159,8 @@ def run_conv_nnet1(use_gpu):
# for i, n in enumerate(train.maker.env.toposort()): # for i, n in enumerate(train.maker.env.toposort()):
# print i, n # print i, n
xval = theano._asarray(numpy.random.rand(*shape_img), dtype='float32') xval = my_rand(*shape_img)
yval = theano._asarray(numpy.random.rand(n_batch, n_out), dtype='float32') yval = my_rand(n_batch, n_out)
lr = theano._asarray(0.01, dtype='float32') lr = theano._asarray(0.01, dtype='float32')
for i in xrange(10): for i in xrange(10):
...@@ -204,12 +211,12 @@ def run_conv_nnet2(use_gpu): # pretend we are training LeNet for MNIST ...@@ -204,12 +211,12 @@ def run_conv_nnet2(use_gpu): # pretend we are training LeNet for MNIST
n_hid = n_kern1 * logical_hid_shape1[0] * logical_hid_shape1[1] n_hid = n_kern1 * logical_hid_shape1[0] * logical_hid_shape1[1]
n_out = 10 n_out = 10
w0 = shared_fn(theano._asarray(0.01*(numpy.random.rand(*shape_kern)-0.5), dtype='float32'), 'w0') w0 = shared_fn(0.01*(my_rand(*shape_kern)-0.5), 'w0')
b0 = shared_fn(theano._asarray(numpy.zeros((n_kern,)), dtype='float32'), 'b0') b0 = shared_fn(my_zeros((n_kern,)), 'b0')
w1 = shared_fn(theano._asarray(0.01*(numpy.random.rand(*shape_kern1)-0.5), dtype='float32'), 'w1') w1 = shared_fn(0.01*(my_rand(*shape_kern1)-0.5), 'w1')
b1 = shared_fn(theano._asarray(numpy.zeros((n_kern1,)), dtype='float32'), 'b1') b1 = shared_fn(my_zeros((n_kern1,)), 'b1')
v = shared_fn(theano._asarray(numpy.zeros((n_hid, n_out)), dtype='float32'), 'c') v = shared_fn(my_zeros((n_hid, n_out)), 'c')
c = shared_fn(theano._asarray(numpy.zeros(n_out), dtype='float32'), 'c') c = shared_fn(my_zeros(n_out), 'c')
x = tensor.Tensor(dtype='float32', broadcastable=(0,1,0,0))('x') x = tensor.Tensor(dtype='float32', broadcastable=(0,1,0,0))('x')
y = tensor.fmatrix('y') y = tensor.fmatrix('y')
...@@ -238,8 +245,8 @@ def run_conv_nnet2(use_gpu): # pretend we are training LeNet for MNIST ...@@ -238,8 +245,8 @@ def run_conv_nnet2(use_gpu): # pretend we are training LeNet for MNIST
# for i, n in enumerate(train.maker.env.toposort()): # for i, n in enumerate(train.maker.env.toposort()):
# print i, n # print i, n
xval = theano._asarray(numpy.random.rand(*shape_img), dtype='float32') xval = my_rand(*shape_img)
yval = theano._asarray(numpy.random.rand(n_batch,n_out), dtype='float32')#int32 make all 0... yval = my_rand(n_batch,n_out)#int32 make all 0...
lr = theano._asarray(0.01, dtype='float32') lr = theano._asarray(0.01, dtype='float32')
for i in xrange(n_train): for i in xrange(n_train):
rval = train(xval, yval, lr) rval = train(xval, yval, lr)
...@@ -284,12 +291,12 @@ def run_conv_nnet2_classif(use_gpu, isize, ksize, n_batch, n_iter, ...@@ -284,12 +291,12 @@ def run_conv_nnet2_classif(use_gpu, isize, ksize, n_batch, n_iter,
n_out = 10 n_out = 10
w0 = shared_fn(theano._asarray(0.01*(numpy.random.rand(*shape_kern)-0.5), dtype='float32'), 'w0') w0 = shared_fn(0.01*(my_rand(*shape_kern)-0.5), 'w0')
b0 = shared_fn(theano._asarray(numpy.zeros((n_kern,)), dtype='float32'), 'b0') b0 = shared_fn(my_zeros((n_kern,)), 'b0')
w1 = shared_fn(theano._asarray(0.01*(numpy.random.rand(*shape_kern1)-0.5), dtype='float32'), 'w1') w1 = shared_fn(0.01*(my_rand(*shape_kern1)-0.5), 'w1')
b1 = shared_fn(theano._asarray(numpy.zeros((n_kern1,)), dtype='float32'), 'b1') b1 = shared_fn(my_zeros((n_kern1,)), 'b1')
v = shared_fn(theano._asarray(0.01*numpy.random.randn(n_hid, n_out), dtype='float32'), 'v') v = shared_fn(0.01*my_randn(n_hid, n_out), 'v')
c = shared_fn(theano._asarray(numpy.zeros(n_out), dtype='float32'), 'c') c = shared_fn(my_zeros(n_out), 'c')
print 'ALLOCATING ARCH: w0 shape', w0.value.shape print 'ALLOCATING ARCH: w0 shape', w0.value.shape
print 'ALLOCATING ARCH: w1 shape', w1.value.shape print 'ALLOCATING ARCH: w1 shape', w1.value.shape
...@@ -330,11 +337,11 @@ def run_conv_nnet2_classif(use_gpu, isize, ksize, n_batch, n_iter, ...@@ -330,11 +337,11 @@ def run_conv_nnet2_classif(use_gpu, isize, ksize, n_batch, n_iter,
for i, n in enumerate(train.maker.env.toposort()): for i, n in enumerate(train.maker.env.toposort()):
print i, n print i, n
xval = theano._asarray(numpy.random.rand(*shape_img), dtype='float32') xval = my_rand(*shape_img)
yval = theano._asarray(numpy.random.rand(n_batch,n_out), dtype='float32') yval = my_rand(n_batch,n_out)
lr = theano._asarray(0.01, dtype='float32') lr = theano._asarray(0.01, dtype='float32')
rvals=numpy.zeros(n_iter) rvals=my_zeros(n_iter)
t0 = time.time() t0 = time.time()
for i in xrange(n_iter): for i in xrange(n_iter):
rvals[i] = train(xval, yval, lr)[0] rvals[i] = train(xval, yval, lr)[0]
......
...@@ -1064,7 +1064,7 @@ class TensorValue(Value, _tensor_py_operators): ...@@ -1064,7 +1064,7 @@ class TensorValue(Value, _tensor_py_operators):
Tensor = TensorType Tensor = TensorType
#QUESTION: why are we doing this!? #QUESTION: why are we doing this!?
elemwise.as_tensor_variable = as_tensor_variable elemwise.as_tensor_variable = as_tensor_variable
elemwise.TensorType = TensorType elemwise.TensorType = TensorType
elemwise.TensorVariable = TensorVariable elemwise.TensorVariable = TensorVariable
elemwise.TensorConstant = TensorConstant elemwise.TensorConstant = TensorConstant
......
...@@ -24,6 +24,8 @@ from theano import compile #to register the optimizer built by this file ...@@ -24,6 +24,8 @@ from theano import compile #to register the optimizer built by this file
from theano.gof.python25 import any, all from theano.gof.python25 import any, all
from theano.gof.opt import Optimizer from theano.gof.opt import Optimizer
from theano.gof import toolbox, DestroyHandler from theano.gof import toolbox, DestroyHandler
# Utilities # Utilities
def out2in(*local_opts): def out2in(*local_opts):
...@@ -395,6 +397,13 @@ class ShapeFeature(object): ...@@ -395,6 +397,13 @@ class ShapeFeature(object):
else: else:
self.shape_of[r] = tuple([self.unpack(s_i) for s_i in s]) self.shape_of[r] = tuple([self.unpack(s_i) for s_i in s])
def init_r(self,r):
if r not in self.shape_of:
try:
self.set_shape(r, self.shape_tuple(r))
except AttributeError:
self.set_shape(r,None)
def make_vector_shape(self, r): def make_vector_shape(self, r):
return make_vector(*self.shape_of[r]) return make_vector(*self.shape_of[r])
# #
...@@ -421,11 +430,7 @@ class ShapeFeature(object): ...@@ -421,11 +430,7 @@ class ShapeFeature(object):
for i, r in enumerate(node.inputs): for i, r in enumerate(node.inputs):
# make sure we have shapes for the inputs # make sure we have shapes for the inputs
if r not in self.shape_of: self.init_r(r)
try:
self.set_shape(r, self.shape_tuple(r))
except AttributeError:
self.set_shape(r, None ) # not a TensorType variable
try: try:
shape_infer = node.op.infer_shape shape_infer = node.op.infer_shape
...@@ -453,7 +458,7 @@ class ShapeFeature(object): ...@@ -453,7 +458,7 @@ class ShapeFeature(object):
# TODO: # TODO:
# This tells us that r and new_r must have the same shape # This tells us that r and new_r must have the same shape
# if we didn't know that the shapes are related, now we do. # if we didn't know that the shapes are related, now we do.
self.init_r(new_r)
# change_input happens in two cases: # change_input happens in two cases:
# 1) we are trying to get rid of r, or # 1) we are trying to get rid of r, or
# 2) we are putting things back after a failed transaction. # 2) we are putting things back after a failed transaction.
......
...@@ -212,7 +212,7 @@ class DownsampleFactorMax(Op): ...@@ -212,7 +212,7 @@ class DownsampleFactorMax(Op):
""" % locals() """ % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return () return (0,1)
class DownsampleFactorMaxGrad(Op): class DownsampleFactorMaxGrad(Op):
...@@ -349,4 +349,4 @@ class DownsampleFactorMaxGrad(Op): ...@@ -349,4 +349,4 @@ class DownsampleFactorMaxGrad(Op):
""" %locals() """ %locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return () return (0,1)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论