提交 618a8e25 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

merge

import sys
sys.path.insert(0, '..')
import theano
from theano import tensor as T
from theano.tensor import nnet
from theano.compile import module
from theano import printing, pprint
from theano import compile
import numpy as N
class LogisticRegressionN(module.FancyModule):
class InstanceType(module.FancyModuleInstance):
def initialize(self, n_in, n_out, seed = None):
#self.component is the LogisticRegressionTemplate instance that built this guy.
rng = N.random.RandomState(seed)
self.w = rng.randn(n_in, n_out)
self.b = rng.randn(n_out)
self.lr = 0.01
self.__hide__ = ['params']
def __eq__(self, other):
if not isinstance(other.component, LogisticRegressionN) and not isinstance(other.component, LogisticRegression2):
raise NotImplementedError
#we compare the member.
if (N.abs(self.w-other.w)<1e-8).all() and (N.abs(self.b-other.b)<1e-8).all() and self.lr == other.lr:
return True
return False
def __hash__(self):
raise NotImplementedError
def __init__(self, x = None, targ = None):
super(LogisticRegressionN, self).__init__() #boilerplate
self.x = x if x is not None else T.matrix()
self.targ = targ if targ is not None else T.lvector()
self.w = module.Member(T.matrix()) #automatically names
self.b = module.Member(T.vector()) #automatically names
self.lr = module.Member(T.dscalar()) #provides an external interface to change it
#and makes it an implicit input to any Method you build.
self.params = [self.w, self.b]
xent, y = nnet.crossentropy_softmax_1hot(
T.dot(self.x, self.w) + self.b, self.targ)
xent = T.sum(xent)
self.y = y
self.xent = xent
gparams = T.grad(xent, self.params)
self.update = module.Method([self.x, self.targ], xent,
updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
self.apply = module.Method([self.x], T.argmax(T.dot(self.x, self.w) + self.b, axis=1))
class LogisticRegression2(module.FancyModule):
class InstanceType(module.FancyModuleInstance):
def initialize(self, n_in, seed = 1827):
#self.component is the LogisticRegressionTemplate instance that built this guy.
rng = N.random.RandomState(seed)
self.w = rng.randn(n_in, 1)
self.b = rng.randn(1)
self.lr = 0.01
self.__hide__ = ['params']
def __eq__(self, other):
if not isinstance(other.component, LogisticRegressionN) and not isinstance(other.component, LogisticRegression2):
raise NotImplementedError
#we compare the member.
if (N.abs(self.w-other.w)<1e-8).all() and (N.abs(self.b-other.b)<1e-8).all() and self.lr == other.lr:
return True
return False
def __hash__(self):
raise NotImplementedError
def __init__(self, x = None, targ = None):
super(LogisticRegression2, self).__init__() #boilerplate
self.x = x if x is not None else T.matrix()
self.targ = targ if targ is not None else T.lcol()
self.w = module.Member(T.dmatrix()) #automatically names
self.b = module.Member(T.dvector()) #automatically names
self.lr = module.Member(T.dscalar()) #provides an external interface to change it
#and makes it an implicit input to any Method you build.
self.params = [self.w, self.b]
y = nnet.sigmoid(T.dot(self.x, self.w))
xent_elem = -self.targ * T.log(y) - (1.0 - self.targ) * T.log(1.0 - y)
xent = T.sum(xent_elem)
self.y = y
self.xent_elem = xent_elem
self.xent = xent
gparams = T.grad(xent, self.params)
self.update = module.Method([self.x, self.targ], xent,
updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
self.apply = module.Method([self.x], T.argmax(T.dot(self.x, self.w) + self.b, axis=1))
def main():
pprint.assign(nnet.crossentropy_softmax_1hot_with_bias_dx, printing.FunctionPrinter('xsoftmaxdx'))
pprint.assign(nnet.crossentropy_softmax_argmax_1hot_with_bias, printing.FunctionPrinter('nll', 'softmax', 'argmax'))
if 1:
lrc = LogisticRegressionN()
print '================'
print lrc.update.pretty()
print '================'
print lrc.update.pretty(mode = theano.Mode('py', 'fast_run'))
print '================'
# print lrc.update.pretty(mode = compile.FAST_RUN.excluding('inplace'))
# print '================'
# sys.exit(0)
lr = lrc.make(10, 2, mode=theano.Mode('c|py', 'fast_run'))
#lr = lrc.make(10, 2, mode=compile.FAST_RUN.excluding('fast_run'))
#lr = lrc.make(10, 2, mode=theano.Mode('py', 'merge')) #'FAST_RUN')
data_x = N.random.randn(5, 10)
data_y = (N.random.randn(5) > 0)
for i in xrange(10000):
lr.lr = 0.02
xe = lr.update(data_x, data_y)
if i % 100 == 0:
print i, xe
print
print 'TRAINED MODEL:'
print lr
if 0:
lrc = LogisticRegression2()
lr = lrc.make(10, mode=theano.Mode('c|py', 'merge')) #'FAST_RUN')
data_x = N.random.randn(5, 10)
data_y = (N.random.randn(5, 1) > 0)
for i in xrange(10000):
xe = lr.update(data_x, data_y)
if i % 100 == 0:
print i, xe
print
print 'TRAINED MODEL:'
print lr
if __name__ == '__main__':
main()
#!/usr/bin/env python
#
# UNIT TEST
#
import unittest
import numpy
from theano import gof
from theano.gradient import *
from theano import gradient
import theano
import sys
from theano import tensor as T
from theano.tensor import nnet
from theano.compile import module
from theano import printing, pprint
from theano import compile
import numpy as N
class test_logistic_regression_example(unittest.TestCase):
def test_example_main(self):
"""Test that the file execute without trouble"""
import os
sys.path.append(os.path.realpath(".."))
import logistic_regression
logistic_regression.main()
def test_example_moduleN(self):
"""Test that the LogisticRegressionN module execute the same with different mode"""
import os
sys.path.append(os.path.realpath(".."))
import logistic_regression
pprint.assign(nnet.crossentropy_softmax_1hot_with_bias_dx, printing.FunctionPrinter('xsoftmaxdx'))
pprint.assign(nnet.crossentropy_softmax_argmax_1hot_with_bias, printing.FunctionPrinter('nll', 'softmax', 'argmax'))
lrc = logistic_regression.LogisticRegressionN()
lr0 = lrc.make(10, 2, seed=1827)
lr1 = lrc.make(10, 2, mode=theano.Mode('c|py', 'fast_run'), seed=1827)
lr2 = lrc.make(10, 2, mode=theano.Mode('py', 'fast_run'), seed=1827)
lr3 = lrc.make(10, 2, mode=theano.Mode('py', 'merge'), seed=1827) #'FAST_RUN')
lr4 = lrc.make(10, 2, mode=compile.FAST_RUN.excluding('fast_run'), seed=1827)
#FAST_RUN, FAST_COMPILE,
data_x = N.random.randn(5, 10)
data_y = (N.random.randn(5) > 0)
def train(lr):
for i in xrange(1000):
lr.lr = 0.02
xe = lr.update(data_x, data_y)
train(lr0)
train(lr1)
train(lr2)
train(lr3)
train(lr4)
assert lr0==lr1
assert lr0==lr2
assert lr0==lr3
assert lr0==lr4
def test_example_module2(self):
"""Test that the LogisticRegression2 module execute the same with different mode"""
import os
sys.path.append(os.path.realpath(".."))
import logistic_regression
lrc = logistic_regression.LogisticRegression2() #TODO: test 2==N
lr0 = lrc.make(10,1827)
lr1 = lrc.make(10, mode=theano.Mode('c|py', 'fast_run'), seed=1827)
lr2 = lrc.make(10, mode=theano.Mode('py', 'fast_run'), seed=1827)
lr3 = lrc.make(10, mode=theano.Mode('py', 'merge'), seed=1827) #'FAST_RU
lr4 = lrc.make(10, mode=compile.FAST_RUN.excluding('fast_run'), seed=1827)
#FAST_RUN, FAST_COMPILE,
data_x = N.random.randn(5, 10)
data_y = (N.random.randn(5) > 0)
data_y = data_y.reshape((data_y.shape[0],1))#need to be a column
def train(lr):
for i in xrange(1000):
lr.lr = 0.02
xe = lr.update(data_x, data_y)
train(lr0)
train(lr1)
train(lr2)
train(lr3)
train(lr4)
assert lr0==lr1
assert lr0==lr2
assert lr0==lr3
assert lr0==lr4
# self.fail("NotImplementedError")
if __name__ == '__main__':
from theano.tests import main
main(__file__)
import unittest
import theano
import numpy as N
from theano import tensor as T
from theano.tensor import nnet as NN
from theano.compile import module as M
class Blah(M.ModuleInstance):
# self.component #refer the Module
# def __init__(self, input = None, target = None, regularize = True):
# super(Blah, self)
def initialize(self,input_size = None, target_size = None, seed = 1827,
**init):
if input_size and target_size:
# initialize w and b in a special way using input_size and target_size
sz = (input_size, target_size)
rng = N.random.RandomState(seed)
self.w = rng.uniform(size = sz, low = -0.5, high = 0.5)
self.b = N.zeros(target_size)
self.stepsize = 0.01
#we call default_initialize after as we want the parameter to superseed the default value.
M.default_initialize(self,**init)#equivalent to previous line.
def __eq__(self, other):
if not isinstance(other.component, SoftmaxXERegression1) and not isinstance(other.component, SoftmaxXERegression2):
raise NotImplementedError
#we compare the member.
if (self.w==other.w).all() and (self.b==other.b).all() and self.stepsize == other.stepsize:
return True
return False
def __hash__(self):
raise NotImplementedError
def fit(self, train, test):
pass
class RegressionLayer1(M.Module):
InstanceType=Blah
def __init__(self, input = None, target = None, regularize = True):
super(RegressionLayer1, self).__init__() #boilerplate
# MODEL CONFIGURATION
self.regularize = regularize
# ACQUIRE/MAKE INPUT AND TARGET
if not input:
input = T.matrix('input')
if not target:
target = T.matrix('target')
# HYPER-PARAMETERS
self.stepsize = M.Member(T.scalar()) # a stepsize for gradient descent
# PARAMETERS
self.w = M.Member(T.matrix()) #the linear transform to apply to our input points
self.b = M.Member(T.vector()) #a vector of biases, which make our transform affine instead of linear
# REGRESSION MODEL
self.activation = T.dot(input, self.w) + self.b
self.prediction = self.build_prediction()
# CLASSIFICATION COST
self.classification_cost = self.build_classification_cost(target)
# REGULARIZATION COST
self.regularization = self.build_regularization()
# TOTAL COST
self.cost = self.classification_cost
if self.regularize:
self.cost = self.cost + self.regularization
# GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
self.grad_w, self.grad_b = T.grad(self.cost, [self.w, self.b])
# INTERFACE METHODS
self.update = M.Method([input, target],
self.cost,
w = self.w - self.stepsize * self.grad_w,
b = self.b - self.stepsize * self.grad_b)
self.apply = M.Method(input, self.prediction)
def params(self):
return self.w, self.b
def build_regularization(self):
return T.zero() # no regularization!
class RegressionLayer2(M.Module):
def __init__(self, input = None, target = None, regularize = True):
super(RegressionLayer2, self).__init__() #boilerplate
# MODEL CONFIGURATION
self.regularize = regularize
# ACQUIRE/MAKE INPUT AND TARGET
if not input:
input = T.matrix('input')
if not target:
target = T.matrix('target')
# HYPER-PARAMETERS
self.stepsize = M.Member(T.scalar()) # a stepsize for gradient descent
# PARAMETERS
self.w = M.Member(T.matrix()) #the linear transform to apply to our input points
self.b = M.Member(T.vector()) #a vector of biases, which make our transform affine instead of linear
# REGRESSION MODEL
self.activation = T.dot(input, self.w) + self.b
self.prediction = self.build_prediction()
# CLASSIFICATION COST
self.classification_cost = self.build_classification_cost(target)
# REGULARIZATION COST
self.regularization = self.build_regularization()
# TOTAL COST
self.cost = self.classification_cost
if self.regularize:
self.cost = self.cost + self.regularization
# GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
self.grad_w, self.grad_b = T.grad(self.cost, [self.w, self.b])
# INTERFACE METHODS
self.update = M.Method([input, target],
self.cost,
w = self.w - self.stepsize * self.grad_w,
b = self.b - self.stepsize * self.grad_b)
self.apply = M.Method(input, self.prediction)
def params(self):
return self.w, self.b
def _instance_initialize(self, obj, input_size = None, target_size = None,
seed = 1827, **init):
# obj is an "instance" of this module holding values for each member and
# functions for each method
if input_size and target_size:
# initialize w and b in a special way using input_size and target_size
sz = (input_size, target_size)
rng = N.random.RandomState(seed)
obj.w = rng.uniform(size = sz, low = -0.5, high = 0.5)
obj.b = N.zeros(target_size)
obj.stepsize = 0.01
# here we call the default_initialize method, which takes all the name: value
# pairs in init and sets the property with that name to the provided value
# this covers setting stepsize, l2_coef; w and b can be set that way too
# we call it after as we want the parameter to superseed the default value.
M.default_initialize(obj,**init)
def build_regularization(self):
return T.zero() # no regularization!
class SoftmaxXERegression1(RegressionLayer1):
""" XE mean cross entropy"""
def build_prediction(self):
return NN.softmax(self.activation)
def build_classification_cost(self, target):
#self.classification_cost_matrix = target * T.log(self.prediction) + (1 - target) * T.log(1 - self.prediction)
self.classification_cost_matrix = (target - self.prediction)**2
self.classification_costs = -T.sum(self.classification_cost_matrix, axis=1)
return T.sum(self.classification_costs)
def build_regularization(self):
self.l2_coef = M.Member(T.scalar()) # we can add a hyper parameter if we need to
return self.l2_coef * T.sum(self.w * self.w)
class SoftmaxXERegression2(RegressionLayer2):
""" XE mean cross entropy"""
def build_prediction(self):
return NN.softmax(self.activation)
def build_classification_cost(self, target):
#self.classification_cost_matrix = target * T.log(self.prediction) + (1 - target) * T.log(1 - self.prediction)
self.classification_cost_matrix = (target - self.prediction)**2
self.classification_costs = -T.sum(self.classification_cost_matrix, axis=1)
return T.sum(self.classification_costs)
def build_regularization(self):
self.l2_coef = M.Member(T.scalar()) # we can add a hyper parameter if we need to
return self.l2_coef * T.sum(self.w * self.w)
class T_test_wiki_module(unittest.TestCase):
def test_Module_basic_example1(self):
n, c = T.scalars('nc')
inc = theano.function([n, ((c, c + n), 0)], [])
dec = theano.function([n, ((c, c - n), inc.container[c])], []) # we need to pass inc's container in order to share
plus10 = theano.function([(c, inc.container[c])], c + 10)
assert inc[c] == 0
inc(2)
assert inc[c] == 2 and dec[c] == inc[c]
dec(3)
assert inc[c] == -1 and dec[c] == inc[c]
assert plus10() == 9
def test_Module_basic_example2(self):
m = M.Module()
n = T.scalar('n')
m.c = M.Member(T.scalar()) # state variables must be wrapped with ModuleMember
m.inc = M.Method(n, [], c = m.c + n) # m.c <= m.c + n
m.dec = M.Method(n, [], c = m.c - n) # k.c <= k.c - n
m.dec = M.Method(n, [], updates = {m.c: m.c - n})
#m.dec = M.Method(n, [], updates = {c: m.c - n})#global c don't exist
#m.dec = M.Method(n, [], m.c = m.c - n) #python don't suppor this syntax
m.plus10 = M.Method([], m.c + 10) # m.c is always accessible since it is a member of this mlass
inst = m.make(c = 0) # here, we make an "instance" of the module with c initialized to 0
assert inst.c == 0
inst.inc(2)
assert inst.c == 2
inst.dec(3)
assert inst.c == -1
assert inst.plus10() == 9
inst = m.make(c = 5) # here, we make an "instance" of the module with c initialized to 0
assert inst.c == 5
inst.inc(2)
assert inst.c == 7
inst.dec(3)
assert inst.c == 4
assert inst.plus10() == 14
def test_Module_nesting_example1(self):
def make_incdec_function():
n, c = T.scalars('nc')
inc = theano.function([n, ((c, c + n), 0)], [])
dec = theano.function([n, ((c, c - n), inc.container[c])], [])
return inc,dec
inc1, dec1 = make_incdec_function()
inc2, dec2 = make_incdec_function()
a, b = T.scalars('ab')
sum = theano.function([(a, inc1.container['c']), (b, inc2.container['c'])], a + b)
inc1(2)
dec1(4)
inc2(6)
assert inc1['c'] == -2 and inc2['c'] == 6
assert sum() == 4 # -2 + 6
def test_Module_nesting_example2(self):
def make_incdec_module():
m = M.Module()
n = T.scalar('n')
m.c = M.Member(T.scalar()) # state variables must be wrapped with ModuleMember
m.inc = M.Method(n, [], c = m.c + n) # m.c <= m.c + n
m.dec = M.Method(n, [], c = m.c - n) # k.c <= k.c - n
return m
m = M.Module()
m.incdec1 = make_incdec_module()
m.incdec2 = make_incdec_module()
m.sum = M.Method([], m.incdec1.c + m.incdec2.c)
inst = m.make(incdec1 = dict(c=0), incdec2 = dict(c=0))
assert inst.incdec1.c==0 and inst.incdec2.c==0
inst.incdec1.inc(2)
inst.incdec1.dec(4)
inst.incdec2.inc(6)
assert inst.incdec1.c == -2 and inst.incdec2.c == 6
assert inst.sum() == 4 # -2 + 6
def test_Module_Advanced_example(self):
data_x = N.random.randn(4, 10)
data_y = [ [int(x)] for x in N.random.randn(4) > 0]
def test(model):
model = model.make(input_size = 10,
target_size = 1,
stepsize = 0.1)
print model.stepsize
self.failUnless( model.w.shape == (10,1) and model.b.shape == (1,))
assert model.stepsize == 0.1
for i in xrange(1000):
xe = model.update(data_x, data_y)
if i % 100 == 0:
print i, xe
pass
#for inputs, targets in my_training_set():
#print "cost:", model.update(inputs, targets)
print "final weights:", model.w
print "final biases:", model.b
#Print "some prediction:", model.prediction(some_inputs)
return model
m1=test(SoftmaxXERegression1(regularize = False))
m2=test(SoftmaxXERegression2(regularize = False))
print "m1",m1
print "m2",m2
print m2==m1
print m1==m2
assert m2==m1 and m1==m2
def test_Module_extending_module_methods(self):
model_module = SoftmaxXERegression1(regularize = False)
model_module.sum = M.Member(T.scalar()) # we add a module member to hold the sum
model_module.update.updates.update(sum = model_module.sum + model_module.cost) # now update will also update sum!
model = model_module.make(input_size = 4,
target_size = 2,
stepsize = 0.1,
sum = 0) # we mustn't forget to initialize the sum
print model.stepsize
self.failUnless( model.w.shape == (4,2) and model.b.shape == (2,))
assert model.stepsize == 0.1
test = model.update([[0,0,1,0]], [[0,1]])
test += model.update([[0,1,0,0]], [[1,0]])
assert model.sum == test
def test_Module_basic_example2_more(self):
m = M.Module()
m2 = M.Module()
m2.name="m2" # for better error
#top level don't have name, but other have auto name.
n = T.scalar('n')
m.c = M.Member(T.scalar()) # state variables must be wrapped with ModuleMember
m2.c = M.Member(T.scalar()) # state variables must be wrapped with ModuleMember
m.dec = M.Method(n, [], c = m.c - n)
m.inc = M.Method(n, [], c = m.c + n) # m.c <= m.c + n
# m.inc = M.Method(n, [], c = c + n)#fail c not defined
#syntax error
# m.inc = M.Method(n, [], m.c = m.c + n)#fail
m.inc = M.Method(n, [], updates={m.c: m.c + n})
# m.inc = M.Method(n, [], updates={c: m.c + n})#fail with NameError
# m.inc = M.Method(n, [], updates={m.c: c + n})#fail with NameError
# m.inc = M.Method(n, [], updates={c: c + n})#fail with NameError
m.inc = M.Method(n, [], updates={m.c: m2.c + n})#work! should be allowed?
a = M.Module()
a.m1 = m
a.m2 = m2
a.make()#should work.
# self.assertRaises(m.make(c = 0), Error)
m.inc = M.Method(n, [], updates={m2.c: m.c + n})#work! should be allowed?
# self.assertRaises(m.make(c = 0), Error)
# m.inc = M.Method(n, [], updates={m.c: m2.c + m.c+ n})#work! should be allowed?
m2.inc = M.Method(n, [], updates={m2.c: m2.c + 2*m.c+ n})#work! should be allowed?
# self.assertRaises(m.make(c = 0), Error)
if __name__ == '__main__':
from theano.tests import main
main("test_wiki")
......@@ -4,6 +4,7 @@ from theano.gof.link import WrapLinkerMany
from theano.gof.cutils import run_cthunk
from theano.compile.mode import Mode, register_mode, predefined_modes, predefined_linkers, predefined_optimizers, default_linker, default_optimizer
from theano.gof.cc import OpWiseCLinker
from theano.gof.python25 import any
from theano import gof
import theano.config as config
......
......@@ -13,46 +13,58 @@ def getFilterOutShp(inshp, kshp, (dx,dy)=(1,1), mode='valid'):
return N.int64(N.ceil((inshp[1:] + s*kshp - s*1)/\
N.array([dx,dy], dtype='float')))
def conv(border_mode, subsample=(1,1), imshp=None, kshp=None, **kargs):
"""
This fct return an instanciated ConvOp but give better name for some param.
We do this instead of changing the ConvOp interface to don't change all code
used up to now.
:type border_mode: string
:param border_mode:'valid'(only apply kernel over complete patch of the image)
or 'full'(padd the image with 0 and apply the kernel over all full patch and partial patch of the image
:type subsample: tuple of len 2
:param subsample: how many pixel we move in the (row,col) direction of the image when we change of patch
:type imshp: tuple of len 4
:param imshp: (batch size, stack size, nb row, nb col)
:type kshp: tuple of len 4
:param kshp: (nb kernel, stack size, nb row, nb col)
"""
if imshp is not None and kshp is not None:
assert imshp[1]==kshp[1]
nkern = kshp[0]
bsize = imshp[0]
kshp = kshp[:2]
imshp = imshp[1:]
else:
nkern, bsize = None, None
return ConvOp(output_mode=border_mode, dx=subsample[0], dy=subsample[1],
imshp=imshp, kshp=kshp, nkern=nkern, bsize=bsize,**kargs)
class ConvOp(Op):
"""
A convolution op that should mimic scipy.signal.convolve2d, but faster!
In development.
A convolution op that should extend scipy.signal.convolve2d, but much faster!
"""
__attrnames = ['imshp', 'kshp', 'nkern', 'bsize', 'dx', 'dy', 'out_mode',
'unroll_batch', 'unroll_kern',
'unroll_batch', 'unroll_kern', 'unroll_patch',
'imshp_logical', 'kshp_logical', 'kshp_logical_top_aligned']
#FRED: I added both unroll as we don't want ops to be merged if they have different value. Otherwise, the tests for the unroll don't work correctly.
"""These attributes uniquely identify the behaviour of this op for given inputs"""
#TODO: make the stacksize its own parameter, and make imshp a pair
def __init__(self, imshp=None, kshp=None, nkern=None, bsize=None, dx=None, dy=None, output_mode='valid',
unroll_batch=0,
unroll_kern=0,
unroll_patch=False,
unroll_patch=True,
imshp_logical=None,
kshp_logical=None,
kshp_logical_top_aligned=True,
verbose=0,
version=-1):
"""
imshp - image shape tuple of 2 or 3: 2 for a 2d image, 3 for a stack of 2d images.
kshp - kernel shape 2
nkern - # kernels
bsize - batch size
dx - patch stride rows
dy - patch stride cols
out_mode - 'valid', 'full'
unroll_patch - c code generation option
unroll_batch - c code generation option
unroll_kern - c code generation option
verbose - passed to GpuConv
version - passed to GpuConv
This Op implement the convolution of a kernel(tensor 4d,(nkern, stacksize, nb row, nb col)) on an image(tensor 4d, (batchsize, stacksize, nb row, nb col). The batch size is multiple image that we want to apply the same kernel over. The nkern is numtiple kernel that we want to apply to each image. The stack size is mostly used when their is multiple layer in the network. It is the sum of the convolution of multiple 2d image and kernel.
The reason that this op does the summation over convolutions within the 'stack' is that
it allows us to be memory-efficient about how gradients are calculated. If, for
......@@ -62,24 +74,67 @@ class ConvOp(Op):
gradient on the filters.
unroll_patch. If True will use a version that is faster then without not unroll by unroll the patch loop.
unroll_batch. If >0 will use a version that will unroll the batch loop by the value of the option. By default don't use this version of the code.
unroll_nkern. idem as unroll_batch but unroll the kernel loop.
If the imshp, kshp, nkern and bsize are provided, we can generate more optimal code. This make a significant difference for the full mode with unroll_patch version.
The most frequent faster code currently available on 64_x86 computer is unroll_batch=4, unroll_kern=4, unroll_patch=False and this request that all the optional shape information are gived. Those number are empirically tested and backed up by the article: Anatomy of High-Performance Matrix Multiplication by Kazushige Goto and Robert A. Van De Geijn, ACM Transactions on Mathematical Software, vol 34, No. 3, article 12, May 2008. It is in figure 12, it give the value mr x nr, those value are the optimum to use for unroll_batch and unroll_kern. For x86_64 bits computer it is 4x4. Other architecture can have different value.(2x4 for x86, 8x8 for itanium,...)
The version is with unroll_batch=4 and unroll_nkern if possible(currenctly it don't support logical shape != physical shape) as this is what give the best performance in practice. This also tell that to have the best performance, you should have a batch size and a number of kernel multiple of 4. In the article:
Anatomy of High-Performance Matrix Multiplication by Kazushige Goto and Robert A. Van De Geijn, ACM Transactions on Mathematical Software, vol 34, No. 3, article 12, May 2008.
In figure 12, it give the value mr x nr, those value are the optimum to use for unroll_batch and unroll_kern. For x86_64 bits computer it is 4x4. Other architecture can have different value.(2x4 for x86, 8x8 for itanium,...)
"""
imshp = tuple(imshp)
if len(imshp)==2:
self.imshp = (1,)+imshp
elif len(imshp)==3:
self.imshp = imshp
else:
raise Exception("bad len for imshp")
del imshp
:type out_mode: string
:param out_mode: 'valid'(give an output smaller then the image, 'full'(give an output bigger then the image)
self.kshp = tuple(kshp)
optional parameter(if provided will be used to generate more optinal c code):
:type imshp: tuple of len 2 or 3: 2 for 2d image, 3 for a stack of 2d images.
:param imshp: (stacksize, nb image row, nb image col)
:type kshp: tuple of len 2
:param kshp: (nb kernel row, nb kernel col)
:type nkern: int
:param nkern: the number of kernel
:type bsize: int
:param bsize: the size of the minibatch
:type dx: int
:param dx: patch stride rows
:type dy: int
:param dx: patch stride cols
param to select the version of code used:
:type unroll_patch: bool
:param unroll_patch: use a version of c_code that unroll the patch loop that don't request all shape information to work, but if all shape information are present, will use it to hardcode the value in the code for faster code.
:type unroll_batch:int
:param unroll_batch: use a version of c_code that unroll the batch(by unroll_batch) and the nkern(by unroll_kern) loop. The size must by a multiple of bsize or nkern respectively.
:type unroll_kern:int
:param unroll_kern: use a version of c_code that unroll the batch(by unroll_batch) and the nkern(by unroll_kern) loop. The size must by a multiple of bsize or nkern respectively.
:type verbose: int
:param verbose: passed to GpuConv
:type version: int
:param version: passed to GpuConv
:param imshp_logical: used internally when we generate the gradient when dx!=1 or dy!=1
:param kshp_logical: idem
:param kshp_logical_top_aligned: idem
"""
all_shape = imshp is not None and kshp is not None and nkern is not None and bsize is not None
if (unroll_batch>0 or unroll_kern>0) and not all_shape:
raise Exception("In ConvOp, when using unroll_batch and unroll_nkern, all shape are needed")
if not all_shape and (imshp is not None or kshp is not None or nkern is not None or bsize is not None):
print "OPTIMISATION WARNING: passing only a few shape to ConvOp for faster code is useless. We use all of them or none."
if not all_shape:
unroll_patch = True
if imshp is not None:
imshp = tuple(imshp)
if len(imshp)==2:
imshp = (1,)+imshp
elif len(imshp)==3:
imshp = imshp
else:
raise Exception("bad len for imshp")
self.imshp = imshp
if kshp is not None:
kshp = tuple(kshp)
self.kshp = kshp
self.nkern = nkern
self.bsize=bsize
self.dx=dx
......@@ -89,7 +144,7 @@ class ConvOp(Op):
# a triple
self.imshp_logical = self.imshp
if imshp_logical is not None: self.imshp_logical = tuple(imshp_logical)
assert len(self.imshp) == len(self.imshp_logical)
assert (self.imshp is None and self.imshp_logical is None) or (len(self.imshp) == len(self.imshp_logical))
# a pair
self.kshp_logical = self.kshp
......@@ -123,13 +178,17 @@ class ConvOp(Op):
new-=1
print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(%s) should be 0 or a divisor of nkern(%s)We revert it to %d. This won't change the result, but may make it slower."%(str(self.unroll_kern),str(self.nkern),new)
self.unroll_kern=new
self.outshp = getFilterOutShp(self.imshp_logical, self.kshp_logical, (dx,dy), output_mode)
self.fulloutshp = getFilterOutShp(self.imshp_logical, self.kshp_logical, (1,1), output_mode)
if all_shape:
self.outshp = getFilterOutShp(self.imshp_logical, self.kshp_logical, (dx,dy), output_mode)
self.fulloutshp = getFilterOutShp(self.imshp_logical, self.kshp_logical, (1,1), output_mode)
else:
self.outshp = None
self.fulloutshp = None
self.out_mode = output_mode
if not self.out_mode in ["valid", "full"]:
raise Exception("Mode %s not implemented"%self.out_mode)
if not (self.outshp > 0).all():
if all_shape and not (self.outshp > 0).all():
raise Exception(("Bad size for the output shape. Verify that [post-supersampling] input shape (%s)"
"and kern shape(%s) are ok. (hint: kerns must fit inside image in"
"'valid' mode)")%(self.imshp_logical,self.kshp_logical))
......@@ -220,47 +279,69 @@ class ConvOp(Op):
# TODO: move these back out to global scope when they no longer cause an atexit error
from scipy.signal.signaltools import _valfrommode, _bvalfromboundary
from scipy.signal.sigtools import _convolve2d
#print 'img2d (%s)'%str(self.imshp_logical), img2d
#print 'filtersflipped (%s)'%str(self.kshp_logical), filtersflipped
imshp = self.imshp
if imshp is None:
imshp = tuple(img2d.shape[1:])
kshp = self.kshp
if kshp is None:
kshp = tuple(filtersflipped.shape[2:])
bsize = self.bsize
if bsize is None:
bsize = img2d.shape[0]
nkern = self.nkern
if nkern is None:
nkern = filtersflipped.shape[0]
imshp_logical = self.imshp_logical
if imshp_logical is None:
imshp_logical = imshp
kshp_logical = self.kshp_logical
if kshp_logical is None:
kshp_logical = kshp
if self.fulloutshp is not None:
fulloutshp = tuple(self.fulloutshp)
else:
fulloutshp = tuple(getFilterOutShp(imshp_logical, kshp_logical, (1,1), self.out_mode))
if z[0] is None:
z[0] = N.zeros((self.bsize,)+(self.nkern,)+tuple(self.fulloutshp),
z[0] = N.zeros((bsize,)+(nkern,)+fulloutshp,
dtype=img2d.dtype)
zz=z[0]
val = _valfrommode(self.out_mode)
bval = _bvalfromboundary('fill')
batchsize = self.bsize
stacklen = self.imshp[0]
stacklen = imshp[0]
img2d = img2d.reshape((batchsize,)+ self.imshp)
filtersflipped = filtersflipped.reshape((self.nkern,stacklen)+self.kshp)
img2d = img2d.reshape((bsize,)+ imshp)
filtersflipped = filtersflipped.reshape((nkern,stacklen)+kshp)
if self.imshp != self.imshp_logical:
# assuming that to get from imshp to imshp logical we insert zeros in missing spots
rstride = int(N.ceil(self.imshp_logical[1] / float(self.imshp[1])))
cstride = int(N.ceil(self.imshp_logical[2] / float(self.imshp[2])))
buf = N.zeros((batchsize,)+ self.imshp_logical, dtype=img2d.dtype)
rstride = int(N.ceil(imshp_logical[1] / float(imshp[1])))
cstride = int(N.ceil(imshp_logical[2] / float(imshp[2])))
buf = N.zeros((bsize,)+ imshp_logical, dtype=img2d.dtype)
buf[:,:,::rstride, ::cstride] = img2d
img2d = buf
del buf, rstride, cstride
if self.kshp != self.kshp_logical:
rstride = int(N.ceil(self.kshp_logical[0] / float(self.kshp[0])))
cstride = int(N.ceil(self.kshp_logical[1] / float(self.kshp[1])))
buf = N.zeros((self.nkern,stacklen)+ self.kshp_logical, dtype=filtersflipped.dtype)
if self.kshp_logical_top_aligned:
if kshp != kshp_logical:
rstride = int(N.ceil(kshp_logical[0] / float(kshp[0])))
cstride = int(N.ceil(kshp_logical[1] / float(kshp[1])))
buf = N.zeros((nkern,stacklen)+ self.kshp_logical, dtype=filtersflipped.dtype)
if kshp_logical_top_aligned:
roffset=coffset=0
else:
roffset=(self.kshp_logical[0] - (self.kshp[0]*rstride) - 1+rstride) % rstride
coffset=(self.kshp_logical[1] - (self.kshp[1]*cstride) - 1+cstride) % cstride
roffset=(kshp_logical[0] - (kshp[0]*rstride) - 1+rstride) % rstride
coffset=(kshp_logical[1] - (kshp[1]*cstride) - 1+cstride) % cstride
assert roffset >= 0
assert coffset >= 0
buf[:,:,roffset::rstride, coffset::cstride] = filtersflipped
filtersflipped = buf
del buf, rstride, cstride
for b in range(batchsize):
for n in range(self.nkern):
for b in range(bsize):
for n in range(nkern):
zz[b,n,...].fill(0)
for im0 in range(stacklen):
zz[b,n,...] += _convolve2d(\
......@@ -271,7 +352,7 @@ class ConvOp(Op):
#execute the c version which is much faster.
if self.dx>1 or self.dy>1:
zz = zz[:,:,0::self.dx,0::self.dy].copy()
#print 'zz (%s)'%str((self.dx, self.dy)), zz
z[0]=zz
......@@ -286,6 +367,11 @@ class ConvOp(Op):
if self.imshp != self.imshp_logical or self.kshp != self.kshp_logical:
raise NotImplementedError('todo')
all_shape = self.imshp is not None and self.kshp is not None and self.nkern is not None and self.bsize is not None
if not all_shape and (self.dx!=1 or self.dy!=1):
raise Exception("ConvOp.grad when dx!=1 or dy!=1 we must have all the optional shape information")
grad_hack_necessary = False
if grad_hack_necessary:
if self.dx!=1 or self.dy!=1:
......@@ -301,25 +387,31 @@ class ConvOp(Op):
newin = inputs.dimshuffle((1,0,2,3))
newgz = gz.dimshuffle((1,0,2,3))
(bsize, nkern) = None, None
imshp = None
kshp = None
un_p = self.unroll_patch
imshp_logical = None
if self.out_mode == 'valid':
(img, filters) = (newin, newgz)
imshp_logical = None
kshp_logical = self.fulloutshp
kshp_logical_top_aligned=False
(bsize, nkern) = (self.imshp[0], self.nkern)
imshp = (self.bsize, self.imshp[1], self.imshp[2])
if all_shape:
(bsize, nkern) = (self.imshp[0], self.nkern)
imshp = (self.bsize, self.imshp[1], self.imshp[2])
kshp = self.outshp
un_b = self.unroll_batch
un_k = self.unroll_kern
#print 'dw_valid', imshp, kshp, nkern, bsize
elif self.out_mode == 'full':
(img, filters) = (newgz, newin)
imshp_logical = (self.bsize, self.fulloutshp[0], self.fulloutshp[1])
kshp_logical = None
kshp_logical_top_aligned=True
(bsize, nkern) = (self.nkern, self.imshp[0])
imshp = (self.bsize, self.outshp[0], self.outshp[1])
kshp = self.imshp[1:]
if all_shape:
imshp_logical = (self.bsize, self.fulloutshp[0], self.fulloutshp[1])
(bsize, nkern) = (self.nkern, self.imshp[0])
imshp = (self.bsize, self.outshp[0], self.outshp[1])
kshp = self.imshp[1:]
un_b = self.unroll_kern
un_k = self.unroll_batch
#print 'dw_full', imshp, kshp, nkern, bsize
......@@ -329,7 +421,7 @@ class ConvOp(Op):
filters = filters[:,:,::-1,::-1] #flip them
#find good value for the unroll
if un_b!=0 and bsize%un_b!=0:
if all_shape and un_b!=0 and bsize%un_b!=0:
if bsize<un_b:
un_b = bsize
else:
......@@ -343,7 +435,7 @@ class ConvOp(Op):
print "OPTIMISATION WARNING: in ConvOp.grad() we can't determine a good unroll value for the kernel. Maybe you can optimize this!"
dw = ConvOp(imshp, kshp, nkern, bsize, 1,1, output_mode='valid',
unroll_batch=un_b, unroll_kern=un_k,
unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p,
imshp_logical=imshp_logical,
kshp_logical=kshp_logical,
kshp_logical_top_aligned=kshp_logical_top_aligned,
......@@ -352,7 +444,8 @@ class ConvOp(Op):
if hasattr(self,'flops'):
dw.set_flops()
dw = dw(img,filters)
assert (dw.owner.op.outshp==self.kshp).all()
if all_shape:
assert (dw.owner.op.outshp==self.kshp).all()
if self.out_mode == 'valid':
# before DimShuffle, dw is of shape visdim x nkern x kshp[0] x kshp[1]
dw = dw.dimshuffle((1,0,2,3))
......@@ -363,26 +456,34 @@ class ConvOp(Op):
if not self.out_mode == 'full': mode = 'full'
filters = kerns.dimshuffle((1,0,2,3))
filters = filters[:,:,::-1,::-1]
nkern = self.imshp[0]
imshp = (self.nkern, self.outshp[0], self.outshp[1])
nkern = None
imshp = None
imshp_logical = None
kshp = None
if all_shape:
nkern = self.imshp[0]
imshp = (self.nkern, self.outshp[0], self.outshp[1])
imshp_logical=(self.nkern, self.fulloutshp[0], self.fulloutshp[1])
#print 'din', imshp, self.kshp, nkern
din = ConvOp(imshp, self.kshp, nkern, self.bsize,
1,1, output_mode=mode,
unroll_batch=un_b, unroll_kern=un_k,
imshp_logical=(self.nkern, self.fulloutshp[0], self.fulloutshp[1]),
unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p,
imshp_logical=imshp_logical,
kshp_logical=None,
version=-1,#we we change the mode, we don't forward the version.
verbose=self.verbose)
if hasattr(self,'flops'):
din.set_flops()
din = din(gz,filters)
assert (din.owner.op.outshp==self.imshp[1:]).all()
assert (din.owner.op.outshp is None and self.imshp is None) or (din.owner.op.outshp==self.imshp[1:]).all()
return [din, dw]
#def c():
def c_headers(self):
return ['<numpy/noprefix.h>', '<iostream>', '<sstream>' ]
def c_code_cache_version(self):
return (1)
def c_support_code(self):
return """
#define STRIDES(arr) ((arr)->strides)
......@@ -400,24 +501,50 @@ using namespace std;
assert node.inputs[0].type.dtype == node.inputs[1].type.dtype
d=locals()
d.update(sub)
all_shape = self.imshp is not None and self.kshp is not None and self.nkern is not None and self.bsize is not None
d["self_out_mode"]=self.out_mode
d["self_bsize"]=self.bsize
d["self_nkern"]=self.nkern
d["self_dx"]=self.dx
d["self_dy"]=self.dy
d["mode"]=self.out_mode.upper()
d["self_outshp0"]=self.outshp[0]
d["self_outshp1"]=self.outshp[1]
d["self_imshp0"]=self.imshp[0]
d["self_imshp1"]=self.imshp[1]
d["self_imshp2"]=self.imshp[2]
d["mode"]=self.out_mode.upper()
d["self_kshp0"]=self.kshp[0]
d["self_kshp1"]=self.kshp[1]
d["self_kshp_logical_r"] = self.kshp_logical[0]
d["self_kshp_logical_c"] = self.kshp_logical[1]
d["self_kshp_logical_stride_r"] = int(N.ceil(self.kshp_logical[0] / float(self.kshp[0])))
d["self_kshp_logical_stride_c"] = int(N.ceil(self.kshp_logical[1] / float(self.kshp[1])))
d["affectation"]="="
if all_shape:
d["self_bsize"]=self.bsize
d["self_nkern"]=self.nkern
d["self_outshp0"]=self.outshp[0]
d["self_outshp1"]=self.outshp[1]
d["self_imshp0"]=self.imshp[0]
d["self_imshp1"]=self.imshp[1]
d["self_imshp2"]=self.imshp[2]
d["self_kshp0"]=self.kshp[0]
d["self_kshp1"]=self.kshp[1]
d["self_kshp_logical_r"] = self.kshp_logical[0]
d["self_kshp_logical_c"] = self.kshp_logical[1]
d["self_kshp_logical_stride_r"] = int(N.ceil(self.kshp_logical[0] / float(self.kshp[0])))
d["self_kshp_logical_stride_c"] = int(N.ceil(self.kshp_logical[1] / float(self.kshp[1])))
d["self_imshp_logical_r"] = self.imshp_logical[1] #N.B. 1 not 0
d["self_imshp_logical_c"] = self.imshp_logical[2]#N.B. 2 not 1
d["self_imshp_logical_stride_r"] = int(N.ceil(self.imshp_logical[1] / float(self.imshp[1])))
d["self_imshp_logical_stride_c"] = int(N.ceil(self.imshp_logical[2] / float(self.imshp[2])))
if not self.imshp[0]==1: d["affectation"]="+="
d["all_shape"]=1
d["dim_zz_const"]="const"
else:
d["self_bsize"]="%(img2d)s->dimensions[0]"%d
d["self_nkern"]="%(filtersflipped)s->dimensions[0]"%d
d["self_outshp0"]="-1"
d["self_outshp1"]="-1"
d["self_imshp0"]="%(img2d)s->dimensions[1]"%d
d["self_imshp1"]="%(img2d)s->dimensions[2]"%d
d["self_imshp2"]="%(img2d)s->dimensions[3]"%d
d["self_kshp0"]="%(filtersflipped)s->dimensions[2]"%d
d["self_kshp1"]="%(filtersflipped)s->dimensions[3]"%d
d["affectation"]="+="
d["all_shape"]=0
d["dim_zz_const"]=""
if self.kshp_logical_top_aligned:
d["self_kshp_logical_offset_r"] = 0
d["self_kshp_logical_offset_c"] = 0
......@@ -427,42 +554,38 @@ using namespace std;
d["self_kshp_logical_offset_r"] = (self.kshp_logical[0] - (self.kshp[0]*rstride) - 1+rstride) % rstride
d["self_kshp_logical_offset_c"] = (self.kshp_logical[1] - (self.kshp[1]*cstride) - 1+cstride) % cstride
del rstride, cstride
d["self_imshp_logical_r"] = self.imshp_logical[1] #N.B. 1 not 0
d["self_imshp_logical_c"] = self.imshp_logical[2]#N.B. 2 not 1
d["self_imshp_logical_stride_r"] = int(N.ceil(self.imshp_logical[1] / float(self.imshp[1])))
d["self_imshp_logical_stride_c"] = int(N.ceil(self.imshp_logical[2] / float(self.imshp[2])))
d["affectation"]="="
if not self.imshp[0]==1: d["affectation"]="+="
if node.inputs[0].type.dtype=="float32": d["type"]="float"
elif node.inputs[0].type.dtype=="float64": d["type"]="double"
else: raise Exception("Type %s not implemented"%node.inputs[0].type.dtype)
d["gemm"]='dgemm_'
if not d["type"]=="double":d["gemm"]='sgemm_'
#print 'LOGICAL OFFSET', self.kshp_logical_top_aligned, d["self_kshp_logical_r"],
#print d["self_kshp0"], d["self_kshp_logical_offset_r"], d["self_kshp_logical_stride_r"],
#print self.out_mode, d["self_imshp_logical_stride_r"]
if self.imshp != self.imshp_logical or self.kshp != self.kshp_logical:
# print "return imshp!=imshp_logical or self.kshp != self.kshp_logical shape version"
if verbose:
print "return imshp!=imshp_logical or self.kshp != self.kshp_logical shape version"
return _conv_op_code_a % d
if self.unroll_patch:
# print "return unroll patch version",self.dx,self.dy
if self.verbose:
print "return unroll patch version",self.dx,self.dy
return _conv_op_code_unroll_patch%d
if self.unroll_batch>0 or self.unroll_kern>0:
if self.unroll_batch<=0: self.unroll_batch=1
if self.unroll_kern<=0: self.unroll_kern=1
# print "return unrolled batch and kern code by",self.unroll_batch, self.unroll_kern
if self.verbose:
print "return unrolled batch and kern code by",self.unroll_batch, self.unroll_kern
return gen_conv_code_unroll_batch_kern(d, self.unroll_batch,
self.unroll_kern)
#TODO: should we choose the unroll size automatically with the bigger divisor under 5?
if self.out_mode == 'valid' and self.dx==0 and self.dy==0:
# print "return gemm version"
if self.verbose:
print "return gemm version"
return _conv_op_code_valid_gemm % d
else:
# print "return no gemm version"
if self.verbose:
print "return no gemm version"
return _conv_op_code_a % d
def convolve2(kerns, kshp, nkern, images, imshp, bsize, step=(1,1),
......@@ -626,9 +749,6 @@ if ((!%(z)s)
int Os[2];
Os[0]=%(self_outshp0)s;
Os[1]=%(self_outshp1)s;
//I keep the formula to calculte Os in case we need it in the futur.
//if (mode == FULL) {Os[0] = (int)ceil((dim_im[0]+dim_ker[0]-1)/float(%(self_dx)s)); Os[1] = ceil((dim_im[1]+dim_ker[1]-1)/float(%(self_dy)s));}
//else {Os[0] = (int)ceil((dim_im[0]-dim_ker[0]+1)/float(%(self_dx)s)); Os[1] = (int)ceil((dim_im[1]-dim_ker[1]+1)/float(%(self_dy)s));}
for(int b=0;b< %(self_bsize)s;b++){
for(int n_kern=0;n_kern<%(self_nkern)s;n_kern++){
......@@ -1110,9 +1230,6 @@ if ((!%(z)s)
int Os[2];
Os[0]=%(self_outshp0)s;
Os[1]=%(self_outshp1)s;
//I keep the formula to calculte Os in case we need it in the futur.
//if (mode == FULL) {Os[0] = (int)ceil((dim_im[0]+dim_ker[0]-1)/float(%(self_dx)s)); Os[1] = ceil((dim_im[1]+dim_ker[1]-1)/float(%(self_dy)s));}
//else {Os[0] = (int)ceil((dim_im[0]-dim_ker[0]+1)/float(%(self_dx)s)); Os[1] = (int)ceil((dim_im[1]-dim_ker[1]+1)/float(%(self_dy)s));}
for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
for(int n_kern=0;n_kern<%(self_nkern)s;n_kern+=%(unroll_ksize)s){
......@@ -1226,15 +1343,23 @@ _conv_op_code_unroll_patch = """
const int mode=%(mode)s;
int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
const %(type)s fill_value = 0;
const %(type)s fill_value = 0;//only value of 0 are currently tested and correctly implemented
int type_im=PyArray_TYPE(%(img2d)s);
int type_ker=PyArray_TYPE(%(filtersflipped)s);
npy_intp dim_zz[2]={%(self_outshp0)s,%(self_outshp1)s};
npy_intp dim_im[2]={%(self_imshp1)s,%(self_imshp2)s};
npy_intp dim_ker[2]={%(self_kshp0)s,%(self_kshp1)s};
const npy_intp dim_im[2]={%(self_imshp1)s,%(self_imshp2)s};
const npy_intp dim_ker[2]={%(self_kshp0)s,%(self_kshp1)s};
%(dim_zz_const)s npy_intp dim_zz[2]={%(self_outshp0)s,%(self_outshp1)s};
#if !%(all_shape)s
if (mode == FULL) {
dim_zz[0] = (int)ceil((dim_im[0]+dim_ker[0]-1)/float(%(self_dx)s));
dim_zz[1] = (int)ceil((dim_im[1]+dim_ker[1]-1)/float(%(self_dy)s));
} else {
dim_zz[0] = (int)ceil((dim_im[0]-dim_ker[0]+1)/float(%(self_dx)s));
dim_zz[1] = (int)ceil((dim_im[1]-dim_ker[1]+1)/float(%(self_dy)s));
}
#endif
PyArray_Dims img2d_shape;
npy_intp img2d_dim[4]={1,1,0,0};
img2d_shape.ptr=img2d_dim;
......@@ -1259,7 +1384,8 @@ if(%(img2d)s->nd==2){
img2d_dim[1]=%(img2d)s->dimensions[1];
img2d_dim[0]=%(img2d)s->dimensions[0];
}else {
PyErr_SetString(PyExc_ValueError, "img don't have a good shape");
PyErr_Format(PyExc_ValueError,
"image don't have a good number of dimensions %%d. ", %(filtersflipped)s->nd);
%(fail)s;
}
......@@ -1273,11 +1399,8 @@ if(%(filtersflipped)s->nd==3){
kerns_dim[1]=%(filtersflipped)s->dimensions[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0];
}else{
std:stringstream temp;
temp << "nddim="<<%(filtersflipped)s->nd;
std::string param = temp.str();
PyErr_SetString(PyExc_ValueError,
("kernel don't have a good shape. " + param).c_str());
PyErr_Format(PyExc_ValueError,
"kernel don't have a good number of dimensions %%d. ", %(filtersflipped)s->nd);
%(fail)s;
}
......@@ -1312,6 +1435,13 @@ filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(mode != VALID && mode != FULL){
PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported"); %(fail)s;
}
if(dim_zz[0]<=0 || dim_zz[1]<=0){
PyErr_Format(PyExc_ValueError,
"Output dimensions are not valid %%dx%%d",dim_zz[0],dim_zz[1]);
%(fail)s;
}
typenum = PyArray_ObjectType((PyObject*)%(img2d)s, 0);
typenum_f = PyArray_ObjectType((PyObject*)%(filtersflipped)s, 0);
if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type"); %(fail)s;}
......@@ -1339,13 +1469,6 @@ if ((!%(z)s)
//PyArray_FILLWBYTE((PyObject*)%(z)s,0);
}
int Os[2];
Os[0]=%(self_outshp0)s;
Os[1]=%(self_outshp1)s;
//I keep the formula to calculte Os in case we need it in the futur.
//if (mode == FULL) {Os[0] = (int)ceil((dim_im[0]+dim_ker[0]-1)/float(%(self_dx)s)); Os[1] = ceil((dim_im[1]+dim_ker[1]-1)/float(%(self_dy)s));}
//else {Os[0] = (int)ceil((dim_im[0]-dim_ker[0]+1)/float(%(self_dx)s)); Os[1] = (int)ceil((dim_im[1]-dim_ker[1]+1)/float(%(self_dy)s));}
for(int b=0;b< %(self_bsize)s;b++){
for(int n_kern=0;n_kern<%(self_nkern)s;n_kern++){
......@@ -1365,13 +1488,13 @@ for(int b=0;b< %(self_bsize)s;b++){
int new_m;
for (int iter_m=0; iter_m < Os[0]; iter_m++) {
for (int iter_m=0; iter_m < dim_zz[0]; iter_m++) {
// Reposition index into input image based on requested output size
int pos_m = iter_m*%(self_dx)s;//The position of the patch in the image
if (mode == FULL) new_m = pos_m ;
else new_m = (pos_m+dim_ker[0]-1);
for (int iter_n=0; iter_n < Os[1]; iter_n++) { // loop over columns
for (int iter_n=0; iter_n < dim_zz[1]; iter_n++) { // loop over columns
int pos_n=iter_n*%(self_dy)s;
%(type)s sum=0;
%(type)s sum2=0;
......@@ -1392,7 +1515,6 @@ for(int b=0;b< %(self_bsize)s;b++){
}
}else{
//do the part where kernel is to the right of the img
//TODO: implement unroll patch for fill_value!=0
int k=0,max_k=max((int)(pos_n-dim_im[1])+1,0);
if(fill_value!=0){
......@@ -1405,34 +1527,25 @@ for(int b=0;b< %(self_bsize)s;b++){
max_k=min(pos_n+1,(int)dim_ker[1]);
const %(type)s * idx_in=&in[ind0*dim_im[1]];
if(iter_n + 4*%(self_dy)s < Os[1]
if(iter_n + 4*%(self_dy)s < dim_zz[1]
&& iter_n>dim_ker[1]-1+3
&& iter_n<dim_im[1]-dim_ker[1]+1-3){
nb_sum=4;
//cout<<4<<endl;
for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
sum+=idx_hvals[k]*idx_in[ind1];
sum2+=idx_hvals[k]*idx_in[ind1+%(self_dy)s];
sum3+=idx_hvals[k]*idx_in[ind1+2*%(self_dy)s];
sum4+=idx_hvals[k]*idx_in[ind1+3*%(self_dy)s];
}
}else if(iter_n + 2*%(self_dy)s < Os[1]
}else if(iter_n + 2*%(self_dy)s < dim_zz[1]
&& iter_n>dim_ker[1]-1
&& iter_n<dim_im[1]-dim_ker[1]+1){
//cout<<2<<endl;
nb_sum=2;
// if(iter_n==dim_ker[1]-1){//k-1<min(pos_n+%(self_dy)s,(int)dim_ker[1])){
// sum2+=idx_hvals[k-1]*idx_in[pos_n-k-%(self_dy)s];
// }
for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
sum+=idx_hvals[k]*idx_in[ind1];
sum2+=idx_hvals[k]*idx_in[ind1+%(self_dy)s];
}
// sum2+=idx_hvals[k]*idx_in[pos_n-k+%(self_dy)s];
// sum+=idx_hvals[k]*idx_in[pos_n-k];
// k++;
}else{
//cout<<1<<endl;
nb_sum=1;
/*
%(type)s sum_=0;
......@@ -1456,7 +1569,7 @@ for(int b=0;b< %(self_bsize)s;b++){
}else{//valid mode
const %(type)s* idx_in=&in[ind0*dim_im[1]];
const %(type)s* idx_hvals=&hvals[j*dim_ker[1]];
if(iter_n + 4*%(self_dy)s < Os[1]){
if(iter_n + 4*%(self_dy)s < dim_zz[1]){
nb_sum=4;
for (int k=dim_ker[1]-1,im_idx=pos_n; k >=0; k--,im_idx++) {
sum+=idx_hvals[k]*idx_in[im_idx];
......@@ -1464,7 +1577,7 @@ for(int b=0;b< %(self_bsize)s;b++){
sum3+=idx_hvals[k]*idx_in[im_idx+2*%(self_dy)s];
sum4+=idx_hvals[k]*idx_in[im_idx+3*%(self_dy)s];
}
}else if(iter_n + 2*%(self_dy)s < Os[1]){
}else if(iter_n + 2*%(self_dy)s < dim_zz[1]){
nb_sum=2;
for (int k=dim_ker[1]-1,im_idx=pos_n; k >=0; k--,im_idx++) {
sum+=idx_hvals[k]*idx_in[im_idx];
......@@ -1485,29 +1598,9 @@ for(int b=0;b< %(self_bsize)s;b++){
case 1: out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum;
}
iter_n+=nb_sum-1;
/*
out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum;
if(nb_sum>=2){
iter_n++;
out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum2;
}
if(nb_sum>=3){
iter_n++;
out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum3;
}
if(nb_sum>=4){
iter_n++;
out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum4;
}
*/
}//for iter_n
}//for iter_m
}//for stack_size
if (0 && (mode==FULL)){
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i)
std::cout << " " << out[i];
std::cout << "\\n";
}
}//for n_kern
}//for b
Py_XDECREF(img2d);
......
import numpy
import theano
import theano.sandbox.scan
# generator network, only one output , type scalar ; no sequence or
# non sequence arguments
def test_1():
def f_pow2(x_tm1):
return (2*x_tm1, {})
s = theano.tensor.dvector()
n_steps = theano.tensor.dscalar()
Y = theano.sandbox.scan.scan(f_pow2, [],s, [],n_steps = n_steps)
f1 = theano.function([s,n_steps], Y)
assert( numpy.any(f1([1],3)== [2,4,8]) )
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars
def test_2():
def f_rnn(u_t,x_tm1,W_in, W):
return (u_t*W_in+x_tm1*W, {})
u = theano.tensor.dvector()
x0 = theano.tensor.dvector()
W_in = theano.tensor.dscalar()
W = theano.tensor.dscalar()
Y = theano.sandbox.scan.scan(f_rnn, u,x0,[W_in,W])
f2 = theano.function([u,x0,W_in,W], Y)
assert(numpy.any(f2([1,2,3,4],[1],.1,1)== numpy.array([1.1,1.3,1.6,2.])))
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables
def test_3():
u = theano.tensor.dvector()
x0 = theano.tensor.dvector()
W_in = theano.shared(.1, name = 'w_in')
W = theano.shared(1., name ='w')
def f_rnn_shared(u_t,x_tm1):
return (u_t*W_in+x_tm1*W, {})
Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0,[])
f3 = theano.function([u,x0], Y)
assert(numpy.any(f3([1,2,3,4],[1])== numpy.array([1.1,1.3,1.6,2.])))
# some rnn with multiple outputs and multiple inputs; other dimension
# instead of scalars/vectors
def test_4():
W_in2 = theano.shared(numpy.array([1.,2.]), name='win2')
W = theano.shared(numpy.array([[2.,1.],[1.,1.]]), name='w')
W_out = theano.shared(numpy.array([.5,1.]), name = 'wout')
W_in1 = theano.tensor.dmatrix('win')
u1 = theano.tensor.dmatrix('u1')
u2 = theano.tensor.dvector('u2')
x0 = theano.tensor.dmatrix('x0')
y0 = theano.tensor.dvector('y0')
## Why dot doesn;t work with scalars !??
## Why * doesn't support SharedVariable and TensorVariable
def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
return ({}, [theano.dot(u1_t,W_in1) + u2_t* W_in2 + \
theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)])
Y = theano.sandbox.scan.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1)
f4 = theano.function([u1,u2,x0,y0,W_in1], Y)
(x,y) = f4( numpy.array([[1,2],[1,2],[1,2]]), \
numpy.array([1,2,3]), \
numpy.array([[0,0]]), \
numpy.array([1]), \
numpy.array([[1,1],[1,1]]))
assert( numpy.all(x == numpy.array([[4.,5.],[18.,16.],[58.,43.]])))
assert( numpy.all(y == numpy.array([0.,7.,25.])))
# basic ESN using updates
def test_5():
W_in = theano.shared(numpy.array([1.,1.]), name='win')
W = theano.shared(numpy.array([[.1,0.],[.0,.1]]),name='w')
W_out= theano.shared(numpy.array([.5,1.]), name='wout')
u = theano.tensor.dvector('u')
x = theano.shared(numpy.array([0.,0.]),'x')
y0 = theano.tensor.dvector('y0')
def f_ESN(u_t):
return ( theano.dot(x,W_out), \
{ x: W_in*u_t + theano.dot(x,W) } )
Y = theano.sandbox.scan.scan(f_ESN,u,y0,[],outputs_taps={0:[]})
f5 = theano.function([u,y0],Y)
assert( f5( numpy.array([1,2,3]), numpy.array([0])) == \
numpy.array([0.,1.4,3.15]))
# basic ESN using updates ; moving backwards
def test_6():
W_in = theano.shared(numpy.array([1.,1.]), name='win')
W = theano.shared(numpy.array([[.1,0.],[.0,.1]]),name='w')
W_out= theano.shared(numpy.array([.5,1.]), name='wout')
u = theano.tensor.dvector('u')
x = theano.shared(numpy.array([0.,0.]),'x')
y0 = theano.tensor.dvector('y0')
def f_ESN(u_t):
return ( theano.dot(x,W_out), \
{ x: W_in*u_t + theano.dot(x,W) } )
Y = theano.sandbox.scan.scan(f_ESN,u,y0,[],outputs_taps={0:[]}, \
go_backwards = True)
f6 = theano.function([u,y0],Y)
assert( f6( numpy.array([1,2,3]), numpy.array([0])) == \
numpy.array([0., 4.5, 3.45]))
'''
TO TEST:
- test taps (for sequences and outputs )
- test gradient (one output)
- test gradient (multiple outputs)
- test gradient (go_bacwards)
- test gradient (multiple outputs / some uncomputable )
- test gradient (truncate_gradient)
- test gradient (force_gradient)
- test inplace map
'''
if __name__=='__main__':
test_1()
test_2()
test_3()
test_4()
test_5()
test_6()
......@@ -13,29 +13,24 @@ def info(*msg):
_logger.info('INFO theano.scan: '+' '.join(msg))
# Hashing a list; list used by scan are list of numbers, therefore a list
# can be hashed by hashing all elements in the list
def hash_list(list):
# Hashing a dictionary or a list or a tuple or any type that is hashable with
# the hash() function
def hash_listsDictsTuples(x):
hash_value = 0
for v in list:
hash_value ^= hash(v)
return hash_value
# Hashing a dictionary; the dictionary used by scan has as keys numbers and
# as values either numbers or list of numbers
def hash_dict(dictionary):
hash_value = 0
for k,v in dictionary.iteritems():
# hash key
hash_value ^= hash(k)
if type(v) in (list,tuple):
hash_value ^= hash_list(v)
else:
hash_value ^= hash(v)
if type(x) == dict :
for k,v in x.iteritems():
hash_value ^= hash_listsDictsTuples(k)
hash_value ^= hash_listsDictsTuples(v)
elif type(x) in (list,tuple):
for v in x:
hash_value ^= hash_listsDictsTuples(v)
else:
try:
hash_value ^= hash(x)
except:
pass
return hash_value
def scan(fn, sequences, initial_states, non_sequences, inplace_map={},
sequences_taps={}, outputs_taps = {},
n_steps = theano.tensor.zero(), force_gradient = False,
......@@ -174,7 +169,8 @@ class Scan(theano.Op):
self.destroy_map = {}
if inplace:
self.destroy_map = inplace_map
for i in inplace_map.keys():
self.destroy_map.update({i: [inplace_map[i]] } )
self.seqs_taps = seqs_taps
self.outs_taps = outs_taps
......@@ -192,13 +188,25 @@ class Scan(theano.Op):
self.fn = theano.function(inputs,outputs, \
updates = updates, mode = mode)
g_y = [outputs[0].type()]
g_args = theano.tensor.grad(outputs[0],inputs, g_cost = g_y[-1])
def compute_gradient(y, g_y):
gmap = theano.gradient.grad_sources_inputs( \
[(y,g_y)], theano.gof.graph.inputs([y]), False)
def zero(p):
return theano.tensor.TensorConstant(theano.tensor.TensorType(\
dtype=p.type.dtype, broadcastable=[]),
numpy.asarray(0,dtype = p.type.dtype))
return [gmap.get(p, zero(p)) for p in inputs]
g_args = compute_gradient( outputs[0], g_y[-1])
# for all outputs compute gradients and then sum them up
for y in outputs[1:]:
g_y += [y.type()]
g_args_y = theano.tensor.grad(y,inputs, g_cost=g_y[-1])
g_args_y = compute_gradient( y,g_y[-1])
for i in xrange(len(g_args)):
g_args[i] += g_args_y[i]
......@@ -244,6 +252,7 @@ class Scan(theano.Op):
(self.n_outs == other.n_outs) and\
(self.n_args == other.n_args)
return rval
def __hash__(self):
return hash(type(self)) ^ \
......@@ -254,13 +263,13 @@ class Scan(theano.Op):
hash(self.go_backwards) ^\
hash(self.truncate_gradient) ^\
hash(self.n_args) ^ \
hash_list(self.outputs) ^ \
hash_list(self.inputs) ^ \
hash_list(self.g_ins) ^ \
hash_list(self.g_outs) ^ \
hash_dict(self.seqs_taps) ^\
hash_dict(self.outs_taps) ^\
hash_dict(self.updates)
hash_listsDictsTuples(self.outputs) ^ \
hash_listsDictsTuples(self.inputs) ^ \
hash_listsDictsTuples(self.g_ins) ^ \
hash_listsDictsTuples(self.g_outs) ^ \
hash_listsDictsTuples(self.seqs_taps) ^\
hash_listsDictsTuples(self.outs_taps) ^\
hash_listsDictsTuples(self.updates)
......
......@@ -121,7 +121,12 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
hidval1=outval.copy()
# ConvOp
conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0],ss[1], conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern, unroll_patch=unroll_patch)(inputs4, kerns4)
if unroll_patch:
conv_op = ConvOp(dx=ss[0],dy=ss[1], output_mode=conv_mode,
unroll_patch=unroll_patch)(inputs4, kerns4)
else:
conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0],ss[1], conv_mode,
unroll_batch=unroll_batch, unroll_kern=unroll_kern, unroll_patch=unroll_patch)(inputs4, kerns4)
l1shp=N.hstack((nkern,
getFilterOutShp(imshp, kshp, ss, conv_mode)))
propup2 = function([inputs4, kerns4], conv_op)
......@@ -328,7 +333,7 @@ class TestConvOp(unittest.TestCase):
ssizess = [[(1,1),(1,2)],[(1,1),(2,2)]]
convmodes = ['valid','full']
do_convolve2=True
unroll = [(0,0,False),(0,0,True),(1,1,False),(2,2,False),(3,2,False)]#(batch,kern,patch)
unroll = [(0,0,True),(0,0,False),(1,1,False),(2,2,False),(3,2,False)]#(batch,kern,patch)
do_speed_test = False
# TODO: this version show a bug that was fixed
......@@ -515,23 +520,32 @@ class TestConvOp(unittest.TestCase):
for un_b,un_k, un_p in unroll:
for ss in ssizes:
print 'test_ConvOpGrad'
print 'mode type:', mode, typ
print 'imshp:', imshp
print 'kshp:', kshp
print 'un_b:', un_b
print 'un_k:', un_k
print 'ss:', ss
print 'bsize:', bsize
print 'nkern:', 4
# print 'mode:',mode,'type:', typ
# print 'imshp:', imshp,
# print 'kshp:', kshp
# print 'un_b:', un_b,
# print 'un_k:', un_k,
# print 'un_p:', un_p
# print 'ss:', ss,
# print 'bsize:', bsize,
# print 'nkern:', nkern
def test_i(imgs):
convop = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1],
output_mode=mode, unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p)
if un_p and ss[0]==1 and ss[1]==1:
convop = ConvOp(dx=ss[0], dy=ss[1],
output_mode=mode, unroll_patch=un_p)
else:
convop = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1],
output_mode=mode, unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p)
return convop(imgs, kernvals)
def test_k(kerns):
convop = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1],
output_mode=mode, unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p)
if un_p and ss[0]==1 and ss[1]==1:
convop = ConvOp(dx=ss[0], dy=ss[1],
output_mode=mode, unroll_patch=un_p)
else:
convop = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1],
output_mode=mode, unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p)
return convop(imgvals, kerns)
print mode, imshp, kshp, un_b, un_k, ss
#TODO the tolerance needed to pass is very high for float32(0.17). Is this acceptable? Expected?
......
......@@ -91,7 +91,6 @@ class T_Scan(unittest.TestCase):
utt.seed_rng()
# generator network, only one output , type scalar ; no sequence or
# non sequence arguments
def test_1(self):
......@@ -243,9 +242,11 @@ class T_Scan(unittest.TestCase):
Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0, [], \
sequences_taps = {0:[-2]}, outputs_taps = {0:[-1,-2]})
f7 = theano.function([u,x0], Y)
#print f7([1,2,3,4],[1,2])
f7 = theano.function([u,x0], Y)
v_u = numpy.asarray([1.,2.,3.,4.])
v_x0 = numpy.asarray([1.,2.])
out = numpy.asarray([3.1,5.3])
assert (compareArrays( out, f7(v_u, v_x0)))
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables and past
......@@ -263,16 +264,46 @@ class T_Scan(unittest.TestCase):
Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0, [], \
sequences_taps = {0:[-2,2]}, outputs_taps = {0:[-1,-2]})
f8 = theano.function([u,x0], Y)
f8 = theano.function([u,x0], Y)
v_u = numpy.array([1.,2.,3.,4.,5.,6.])
v_x0 = numpy.array([1.,2.])
out = numpy.array([3.6, 6.4])
assert (compareArrays( out, f8(v_u, v_x0) ) )
#print f8([1,2,3,4,5,6],[1,2])
'''
# simple rnn ; compute inplace
def test_9(self):
u = theano.tensor.dvector()
mu = theano.Param( u, mutable = True)
x0 = theano.tensor.dvector()
W_in = theano.shared(.1)
W = theano.shared(1.)
def f_rnn_shared(u_t, x_tm1):
return (u_t*W_in + x_tm1*W, {})
Y = theano.sandbox.scan.scan(f_rnn_shared, u, x0,[], \
inplace_map={0:0} )
f9 = theano.function([mu,x0], Y , #mode = 'FAST_RUN')
mode = 'DEBUG_MODE')
v_u = numpy.array([1.,2.,3.])
v_x0 = numpy.array([1.])
out = f9(v_u, v_x0)
v_out = numpy.array([1.1,1.3,1.6])
assert (compareArrays(out, v_out))
print v_u
assert (compareArrays(v_u, out))
'''
# test gradient simple network
def test_10(self):
pass
'''
TO TEST:
- test taps (for sequences and outputs )
- test gradient (one output)
- test gradient (multiple outputs)
- test gradient (go_bacwards)
......@@ -280,7 +311,6 @@ class T_Scan(unittest.TestCase):
- test gradient (truncate_gradient)
- test gradient (force_gradient)
- test_gradient (taps past/future)
- test inplace map
'''
......
......@@ -343,12 +343,29 @@ class CSM(gof.Op):
"""
data = tensor.as_tensor_variable(data)
# Note that we use `view(numpy.int32)` in addition to providing the
# 'int32' dtype to `numpy.asarray`. This is because on some computers
# (e.g. a Windows 32 bits machine), we can have the following assert
# fail:
# x = numpy.array([0], dtype=numpy.intc)
# y = numpy.asarray(x, dtype=numpy.int32)
# assert y.dtype.num == numpy.dtype(numpy.int32).num
# while the assert does *not* fail when replacing the second line by:
# y = numpy.asarray(x, dtype='int32').view(numpy.int32)
# This is a known defect in Numpy. For more information see ticket
# http://projects.scipy.org/numpy/ticket/870
# Note also that it is important to keep "dtype='int32'" when calling
# `numpy.asarray`. This is because `view` is only some kind of cast to
# the exact data type we want to use. If a conversion is required (e.g.
# from int64 to int32), it must be done in the call to `numpy.asarray`.
if not isinstance(indices, tensor.TensorVariable):
indices = numpy.asarray(indices, dtype='int32')
indices = numpy.asarray(indices, dtype='int32').view(numpy.int32)
if not isinstance(indptr, tensor.TensorVariable):
indptr = numpy.asarray(indptr, dtype='int32')
indptr = numpy.asarray(indptr, dtype='int32').view(numpy.int32)
if not isinstance(shape, tensor.TensorVariable):
shape = numpy.asarray(shape, dtype='int32')
shape = numpy.asarray(shape, dtype='int32').view(numpy.int32)
indices = tensor.as_tensor_variable(indices)
indptr = tensor.as_tensor_variable(indptr)
shape = tensor.as_tensor_variable(shape)
......
......@@ -169,15 +169,17 @@ class test_structureddot(unittest.TestCase):
# iterate for a few different random graph patterns
for i in range(10):
spmat = sp.csc_matrix((4,6), dtype=sparse_dtype)
for i in range(5):
for k in range(5):
# set non-zeros in random locations (row x, col y)
x = numpy.floor(numpy.random.rand()*spmat.shape[0])
y = numpy.floor(numpy.random.rand()*spmat.shape[1])
spmat[x,y] = numpy.random.rand()*10
spmat = sp.csc_matrix(spmat)
kerns = tensor.Tensor(broadcastable=[False], dtype=sparse_dtype)('kerns')
images = tensor.Tensor(broadcastable=[False, False], dtype=dense_dtype)('images')
kerns = tensor.Tensor(broadcastable=[False],
dtype=sparse_dtype)('kerns')
images = tensor.Tensor(broadcastable=[False, False],
dtype=dense_dtype)('images')
output_dtype = theano.scalar.upcast(sparse_dtype, dense_dtype)
##
......@@ -186,7 +188,8 @@ class test_structureddot(unittest.TestCase):
# build symbolic theano graph
def buildgraphCSC(kerns,images):
csc = CSC(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
csc = CSC(kerns, spmat.indices[:spmat.size],
spmat.indptr, spmat.shape)
assert csc.type.dtype == sparse_dtype
rval = structured_dot(csc, images.T)
assert rval.type.dtype == output_dtype
......@@ -197,8 +200,12 @@ class test_structureddot(unittest.TestCase):
# compute theano outputs
kernvals = spmat.data[:spmat.size]
imvals = 1.0 + 1.0 * numpy.array(numpy.arange(bsize*spmat.shape[1]).\
imvals = 1.0 + 1.0 * numpy.array(
numpy.arange(bsize*spmat.shape[1]).\
reshape(bsize,spmat.shape[1]), dtype=dense_dtype)
#print('dense_dtype=%s' % dense_dtype)
#print('sparse_dtype=%s' % sparse_dtype)
#print('i=%s' % i)
print 'kerntype', str(kernvals.dtype), kernvals.dtype.num
outvals = f(kernvals,imvals)
print 'YAY'
......@@ -210,9 +217,10 @@ class test_structureddot(unittest.TestCase):
assert _is_dense(c)
assert str(outvals.dtype) == output_dtype
assert numpy.all(numpy.abs(outvals -
numpy.array(c, dtype=output_dtype)) < 1e-4)
numpy.array(c, dtype=output_dtype)) < 1e-4)
if sparse_dtype.startswith('float') and dense_dtype.startswith('float'):
if (sparse_dtype.startswith('float') and
dense_dtype.startswith('float')):
utt.verify_grad(buildgraphCSC,
[kernvals, imvals])
......
......@@ -12,6 +12,7 @@ import blas
import xlogx
import raw_random, randomstreams
import shared_randomstreams
from randomstreams import \
RandomStreams
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论