提交 618a8e25 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

merge

import sys
sys.path.insert(0, '..')
import theano
from theano import tensor as T
from theano.tensor import nnet
from theano.compile import module
from theano import printing, pprint
from theano import compile
import numpy as N
class LogisticRegressionN(module.FancyModule):
class InstanceType(module.FancyModuleInstance):
def initialize(self, n_in, n_out, seed = None):
#self.component is the LogisticRegressionTemplate instance that built this guy.
rng = N.random.RandomState(seed)
self.w = rng.randn(n_in, n_out)
self.b = rng.randn(n_out)
self.lr = 0.01
self.__hide__ = ['params']
def __eq__(self, other):
if not isinstance(other.component, LogisticRegressionN) and not isinstance(other.component, LogisticRegression2):
raise NotImplementedError
#we compare the member.
if (N.abs(self.w-other.w)<1e-8).all() and (N.abs(self.b-other.b)<1e-8).all() and self.lr == other.lr:
return True
return False
def __hash__(self):
raise NotImplementedError
def __init__(self, x = None, targ = None):
super(LogisticRegressionN, self).__init__() #boilerplate
self.x = x if x is not None else T.matrix()
self.targ = targ if targ is not None else T.lvector()
self.w = module.Member(T.matrix()) #automatically names
self.b = module.Member(T.vector()) #automatically names
self.lr = module.Member(T.dscalar()) #provides an external interface to change it
#and makes it an implicit input to any Method you build.
self.params = [self.w, self.b]
xent, y = nnet.crossentropy_softmax_1hot(
T.dot(self.x, self.w) + self.b, self.targ)
xent = T.sum(xent)
self.y = y
self.xent = xent
gparams = T.grad(xent, self.params)
self.update = module.Method([self.x, self.targ], xent,
updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
self.apply = module.Method([self.x], T.argmax(T.dot(self.x, self.w) + self.b, axis=1))
class LogisticRegression2(module.FancyModule):
class InstanceType(module.FancyModuleInstance):
def initialize(self, n_in, seed = 1827):
#self.component is the LogisticRegressionTemplate instance that built this guy.
rng = N.random.RandomState(seed)
self.w = rng.randn(n_in, 1)
self.b = rng.randn(1)
self.lr = 0.01
self.__hide__ = ['params']
def __eq__(self, other):
if not isinstance(other.component, LogisticRegressionN) and not isinstance(other.component, LogisticRegression2):
raise NotImplementedError
#we compare the member.
if (N.abs(self.w-other.w)<1e-8).all() and (N.abs(self.b-other.b)<1e-8).all() and self.lr == other.lr:
return True
return False
def __hash__(self):
raise NotImplementedError
def __init__(self, x = None, targ = None):
super(LogisticRegression2, self).__init__() #boilerplate
self.x = x if x is not None else T.matrix()
self.targ = targ if targ is not None else T.lcol()
self.w = module.Member(T.dmatrix()) #automatically names
self.b = module.Member(T.dvector()) #automatically names
self.lr = module.Member(T.dscalar()) #provides an external interface to change it
#and makes it an implicit input to any Method you build.
self.params = [self.w, self.b]
y = nnet.sigmoid(T.dot(self.x, self.w))
xent_elem = -self.targ * T.log(y) - (1.0 - self.targ) * T.log(1.0 - y)
xent = T.sum(xent_elem)
self.y = y
self.xent_elem = xent_elem
self.xent = xent
gparams = T.grad(xent, self.params)
self.update = module.Method([self.x, self.targ], xent,
updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
self.apply = module.Method([self.x], T.argmax(T.dot(self.x, self.w) + self.b, axis=1))
def main():
pprint.assign(nnet.crossentropy_softmax_1hot_with_bias_dx, printing.FunctionPrinter('xsoftmaxdx'))
pprint.assign(nnet.crossentropy_softmax_argmax_1hot_with_bias, printing.FunctionPrinter('nll', 'softmax', 'argmax'))
if 1:
lrc = LogisticRegressionN()
print '================'
print lrc.update.pretty()
print '================'
print lrc.update.pretty(mode = theano.Mode('py', 'fast_run'))
print '================'
# print lrc.update.pretty(mode = compile.FAST_RUN.excluding('inplace'))
# print '================'
# sys.exit(0)
lr = lrc.make(10, 2, mode=theano.Mode('c|py', 'fast_run'))
#lr = lrc.make(10, 2, mode=compile.FAST_RUN.excluding('fast_run'))
#lr = lrc.make(10, 2, mode=theano.Mode('py', 'merge')) #'FAST_RUN')
data_x = N.random.randn(5, 10)
data_y = (N.random.randn(5) > 0)
for i in xrange(10000):
lr.lr = 0.02
xe = lr.update(data_x, data_y)
if i % 100 == 0:
print i, xe
print
print 'TRAINED MODEL:'
print lr
if 0:
lrc = LogisticRegression2()
lr = lrc.make(10, mode=theano.Mode('c|py', 'merge')) #'FAST_RUN')
data_x = N.random.randn(5, 10)
data_y = (N.random.randn(5, 1) > 0)
for i in xrange(10000):
xe = lr.update(data_x, data_y)
if i % 100 == 0:
print i, xe
print
print 'TRAINED MODEL:'
print lr
if __name__ == '__main__':
main()
#!/usr/bin/env python
#
# UNIT TEST
#
import unittest
import numpy
from theano import gof
from theano.gradient import *
from theano import gradient
import theano
import sys
from theano import tensor as T
from theano.tensor import nnet
from theano.compile import module
from theano import printing, pprint
from theano import compile
import numpy as N
class test_logistic_regression_example(unittest.TestCase):
def test_example_main(self):
"""Test that the file execute without trouble"""
import os
sys.path.append(os.path.realpath(".."))
import logistic_regression
logistic_regression.main()
def test_example_moduleN(self):
"""Test that the LogisticRegressionN module execute the same with different mode"""
import os
sys.path.append(os.path.realpath(".."))
import logistic_regression
pprint.assign(nnet.crossentropy_softmax_1hot_with_bias_dx, printing.FunctionPrinter('xsoftmaxdx'))
pprint.assign(nnet.crossentropy_softmax_argmax_1hot_with_bias, printing.FunctionPrinter('nll', 'softmax', 'argmax'))
lrc = logistic_regression.LogisticRegressionN()
lr0 = lrc.make(10, 2, seed=1827)
lr1 = lrc.make(10, 2, mode=theano.Mode('c|py', 'fast_run'), seed=1827)
lr2 = lrc.make(10, 2, mode=theano.Mode('py', 'fast_run'), seed=1827)
lr3 = lrc.make(10, 2, mode=theano.Mode('py', 'merge'), seed=1827) #'FAST_RUN')
lr4 = lrc.make(10, 2, mode=compile.FAST_RUN.excluding('fast_run'), seed=1827)
#FAST_RUN, FAST_COMPILE,
data_x = N.random.randn(5, 10)
data_y = (N.random.randn(5) > 0)
def train(lr):
for i in xrange(1000):
lr.lr = 0.02
xe = lr.update(data_x, data_y)
train(lr0)
train(lr1)
train(lr2)
train(lr3)
train(lr4)
assert lr0==lr1
assert lr0==lr2
assert lr0==lr3
assert lr0==lr4
def test_example_module2(self):
"""Test that the LogisticRegression2 module execute the same with different mode"""
import os
sys.path.append(os.path.realpath(".."))
import logistic_regression
lrc = logistic_regression.LogisticRegression2() #TODO: test 2==N
lr0 = lrc.make(10,1827)
lr1 = lrc.make(10, mode=theano.Mode('c|py', 'fast_run'), seed=1827)
lr2 = lrc.make(10, mode=theano.Mode('py', 'fast_run'), seed=1827)
lr3 = lrc.make(10, mode=theano.Mode('py', 'merge'), seed=1827) #'FAST_RU
lr4 = lrc.make(10, mode=compile.FAST_RUN.excluding('fast_run'), seed=1827)
#FAST_RUN, FAST_COMPILE,
data_x = N.random.randn(5, 10)
data_y = (N.random.randn(5) > 0)
data_y = data_y.reshape((data_y.shape[0],1))#need to be a column
def train(lr):
for i in xrange(1000):
lr.lr = 0.02
xe = lr.update(data_x, data_y)
train(lr0)
train(lr1)
train(lr2)
train(lr3)
train(lr4)
assert lr0==lr1
assert lr0==lr2
assert lr0==lr3
assert lr0==lr4
# self.fail("NotImplementedError")
if __name__ == '__main__':
from theano.tests import main
main(__file__)
差异被折叠。
...@@ -4,6 +4,7 @@ from theano.gof.link import WrapLinkerMany ...@@ -4,6 +4,7 @@ from theano.gof.link import WrapLinkerMany
from theano.gof.cutils import run_cthunk from theano.gof.cutils import run_cthunk
from theano.compile.mode import Mode, register_mode, predefined_modes, predefined_linkers, predefined_optimizers, default_linker, default_optimizer from theano.compile.mode import Mode, register_mode, predefined_modes, predefined_linkers, predefined_optimizers, default_linker, default_optimizer
from theano.gof.cc import OpWiseCLinker from theano.gof.cc import OpWiseCLinker
from theano.gof.python25 import any
from theano import gof from theano import gof
import theano.config as config import theano.config as config
......
差异被折叠。
import numpy
import theano
import theano.sandbox.scan
# generator network, only one output , type scalar ; no sequence or
# non sequence arguments
def test_1():
def f_pow2(x_tm1):
return (2*x_tm1, {})
s = theano.tensor.dvector()
n_steps = theano.tensor.dscalar()
Y = theano.sandbox.scan.scan(f_pow2, [],s, [],n_steps = n_steps)
f1 = theano.function([s,n_steps], Y)
assert( numpy.any(f1([1],3)== [2,4,8]) )
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars
def test_2():
def f_rnn(u_t,x_tm1,W_in, W):
return (u_t*W_in+x_tm1*W, {})
u = theano.tensor.dvector()
x0 = theano.tensor.dvector()
W_in = theano.tensor.dscalar()
W = theano.tensor.dscalar()
Y = theano.sandbox.scan.scan(f_rnn, u,x0,[W_in,W])
f2 = theano.function([u,x0,W_in,W], Y)
assert(numpy.any(f2([1,2,3,4],[1],.1,1)== numpy.array([1.1,1.3,1.6,2.])))
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables
def test_3():
u = theano.tensor.dvector()
x0 = theano.tensor.dvector()
W_in = theano.shared(.1, name = 'w_in')
W = theano.shared(1., name ='w')
def f_rnn_shared(u_t,x_tm1):
return (u_t*W_in+x_tm1*W, {})
Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0,[])
f3 = theano.function([u,x0], Y)
assert(numpy.any(f3([1,2,3,4],[1])== numpy.array([1.1,1.3,1.6,2.])))
# some rnn with multiple outputs and multiple inputs; other dimension
# instead of scalars/vectors
def test_4():
W_in2 = theano.shared(numpy.array([1.,2.]), name='win2')
W = theano.shared(numpy.array([[2.,1.],[1.,1.]]), name='w')
W_out = theano.shared(numpy.array([.5,1.]), name = 'wout')
W_in1 = theano.tensor.dmatrix('win')
u1 = theano.tensor.dmatrix('u1')
u2 = theano.tensor.dvector('u2')
x0 = theano.tensor.dmatrix('x0')
y0 = theano.tensor.dvector('y0')
## Why dot doesn;t work with scalars !??
## Why * doesn't support SharedVariable and TensorVariable
def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
return ({}, [theano.dot(u1_t,W_in1) + u2_t* W_in2 + \
theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)])
Y = theano.sandbox.scan.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1)
f4 = theano.function([u1,u2,x0,y0,W_in1], Y)
(x,y) = f4( numpy.array([[1,2],[1,2],[1,2]]), \
numpy.array([1,2,3]), \
numpy.array([[0,0]]), \
numpy.array([1]), \
numpy.array([[1,1],[1,1]]))
assert( numpy.all(x == numpy.array([[4.,5.],[18.,16.],[58.,43.]])))
assert( numpy.all(y == numpy.array([0.,7.,25.])))
# basic ESN using updates
def test_5():
W_in = theano.shared(numpy.array([1.,1.]), name='win')
W = theano.shared(numpy.array([[.1,0.],[.0,.1]]),name='w')
W_out= theano.shared(numpy.array([.5,1.]), name='wout')
u = theano.tensor.dvector('u')
x = theano.shared(numpy.array([0.,0.]),'x')
y0 = theano.tensor.dvector('y0')
def f_ESN(u_t):
return ( theano.dot(x,W_out), \
{ x: W_in*u_t + theano.dot(x,W) } )
Y = theano.sandbox.scan.scan(f_ESN,u,y0,[],outputs_taps={0:[]})
f5 = theano.function([u,y0],Y)
assert( f5( numpy.array([1,2,3]), numpy.array([0])) == \
numpy.array([0.,1.4,3.15]))
# basic ESN using updates ; moving backwards
def test_6():
W_in = theano.shared(numpy.array([1.,1.]), name='win')
W = theano.shared(numpy.array([[.1,0.],[.0,.1]]),name='w')
W_out= theano.shared(numpy.array([.5,1.]), name='wout')
u = theano.tensor.dvector('u')
x = theano.shared(numpy.array([0.,0.]),'x')
y0 = theano.tensor.dvector('y0')
def f_ESN(u_t):
return ( theano.dot(x,W_out), \
{ x: W_in*u_t + theano.dot(x,W) } )
Y = theano.sandbox.scan.scan(f_ESN,u,y0,[],outputs_taps={0:[]}, \
go_backwards = True)
f6 = theano.function([u,y0],Y)
assert( f6( numpy.array([1,2,3]), numpy.array([0])) == \
numpy.array([0., 4.5, 3.45]))
'''
TO TEST:
- test taps (for sequences and outputs )
- test gradient (one output)
- test gradient (multiple outputs)
- test gradient (go_bacwards)
- test gradient (multiple outputs / some uncomputable )
- test gradient (truncate_gradient)
- test gradient (force_gradient)
- test inplace map
'''
if __name__=='__main__':
test_1()
test_2()
test_3()
test_4()
test_5()
test_6()
...@@ -13,29 +13,24 @@ def info(*msg): ...@@ -13,29 +13,24 @@ def info(*msg):
_logger.info('INFO theano.scan: '+' '.join(msg)) _logger.info('INFO theano.scan: '+' '.join(msg))
# Hashing a list; list used by scan are list of numbers, therefore a list # Hashing a dictionary or a list or a tuple or any type that is hashable with
# can be hashed by hashing all elements in the list # the hash() function
def hash_list(list): def hash_listsDictsTuples(x):
hash_value = 0 hash_value = 0
for v in list: if type(x) == dict :
hash_value ^= hash(v) for k,v in x.iteritems():
return hash_value hash_value ^= hash_listsDictsTuples(k)
hash_value ^= hash_listsDictsTuples(v)
elif type(x) in (list,tuple):
# Hashing a dictionary; the dictionary used by scan has as keys numbers and for v in x:
# as values either numbers or list of numbers hash_value ^= hash_listsDictsTuples(v)
def hash_dict(dictionary):
hash_value = 0
for k,v in dictionary.iteritems():
# hash key
hash_value ^= hash(k)
if type(v) in (list,tuple):
hash_value ^= hash_list(v)
else: else:
hash_value ^= hash(v) try:
hash_value ^= hash(x)
except:
pass
return hash_value return hash_value
def scan(fn, sequences, initial_states, non_sequences, inplace_map={}, def scan(fn, sequences, initial_states, non_sequences, inplace_map={},
sequences_taps={}, outputs_taps = {}, sequences_taps={}, outputs_taps = {},
n_steps = theano.tensor.zero(), force_gradient = False, n_steps = theano.tensor.zero(), force_gradient = False,
...@@ -174,7 +169,8 @@ class Scan(theano.Op): ...@@ -174,7 +169,8 @@ class Scan(theano.Op):
self.destroy_map = {} self.destroy_map = {}
if inplace: if inplace:
self.destroy_map = inplace_map for i in inplace_map.keys():
self.destroy_map.update({i: [inplace_map[i]] } )
self.seqs_taps = seqs_taps self.seqs_taps = seqs_taps
self.outs_taps = outs_taps self.outs_taps = outs_taps
...@@ -194,11 +190,23 @@ class Scan(theano.Op): ...@@ -194,11 +190,23 @@ class Scan(theano.Op):
updates = updates, mode = mode) updates = updates, mode = mode)
g_y = [outputs[0].type()] g_y = [outputs[0].type()]
g_args = theano.tensor.grad(outputs[0],inputs, g_cost = g_y[-1])
def compute_gradient(y, g_y):
gmap = theano.gradient.grad_sources_inputs( \
[(y,g_y)], theano.gof.graph.inputs([y]), False)
def zero(p):
return theano.tensor.TensorConstant(theano.tensor.TensorType(\
dtype=p.type.dtype, broadcastable=[]),
numpy.asarray(0,dtype = p.type.dtype))
return [gmap.get(p, zero(p)) for p in inputs]
g_args = compute_gradient( outputs[0], g_y[-1])
# for all outputs compute gradients and then sum them up # for all outputs compute gradients and then sum them up
for y in outputs[1:]: for y in outputs[1:]:
g_y += [y.type()] g_y += [y.type()]
g_args_y = theano.tensor.grad(y,inputs, g_cost=g_y[-1]) g_args_y = compute_gradient( y,g_y[-1])
for i in xrange(len(g_args)): for i in xrange(len(g_args)):
g_args[i] += g_args_y[i] g_args[i] += g_args_y[i]
...@@ -245,6 +253,7 @@ class Scan(theano.Op): ...@@ -245,6 +253,7 @@ class Scan(theano.Op):
(self.n_args == other.n_args) (self.n_args == other.n_args)
return rval return rval
def __hash__(self): def __hash__(self):
return hash(type(self)) ^ \ return hash(type(self)) ^ \
hash(self.n_seqs) ^ \ hash(self.n_seqs) ^ \
...@@ -254,13 +263,13 @@ class Scan(theano.Op): ...@@ -254,13 +263,13 @@ class Scan(theano.Op):
hash(self.go_backwards) ^\ hash(self.go_backwards) ^\
hash(self.truncate_gradient) ^\ hash(self.truncate_gradient) ^\
hash(self.n_args) ^ \ hash(self.n_args) ^ \
hash_list(self.outputs) ^ \ hash_listsDictsTuples(self.outputs) ^ \
hash_list(self.inputs) ^ \ hash_listsDictsTuples(self.inputs) ^ \
hash_list(self.g_ins) ^ \ hash_listsDictsTuples(self.g_ins) ^ \
hash_list(self.g_outs) ^ \ hash_listsDictsTuples(self.g_outs) ^ \
hash_dict(self.seqs_taps) ^\ hash_listsDictsTuples(self.seqs_taps) ^\
hash_dict(self.outs_taps) ^\ hash_listsDictsTuples(self.outs_taps) ^\
hash_dict(self.updates) hash_listsDictsTuples(self.updates)
......
...@@ -121,7 +121,12 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll ...@@ -121,7 +121,12 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
hidval1=outval.copy() hidval1=outval.copy()
# ConvOp # ConvOp
conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0],ss[1], conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern, unroll_patch=unroll_patch)(inputs4, kerns4) if unroll_patch:
conv_op = ConvOp(dx=ss[0],dy=ss[1], output_mode=conv_mode,
unroll_patch=unroll_patch)(inputs4, kerns4)
else:
conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0],ss[1], conv_mode,
unroll_batch=unroll_batch, unroll_kern=unroll_kern, unroll_patch=unroll_patch)(inputs4, kerns4)
l1shp=N.hstack((nkern, l1shp=N.hstack((nkern,
getFilterOutShp(imshp, kshp, ss, conv_mode))) getFilterOutShp(imshp, kshp, ss, conv_mode)))
propup2 = function([inputs4, kerns4], conv_op) propup2 = function([inputs4, kerns4], conv_op)
...@@ -328,7 +333,7 @@ class TestConvOp(unittest.TestCase): ...@@ -328,7 +333,7 @@ class TestConvOp(unittest.TestCase):
ssizess = [[(1,1),(1,2)],[(1,1),(2,2)]] ssizess = [[(1,1),(1,2)],[(1,1),(2,2)]]
convmodes = ['valid','full'] convmodes = ['valid','full']
do_convolve2=True do_convolve2=True
unroll = [(0,0,False),(0,0,True),(1,1,False),(2,2,False),(3,2,False)]#(batch,kern,patch) unroll = [(0,0,True),(0,0,False),(1,1,False),(2,2,False),(3,2,False)]#(batch,kern,patch)
do_speed_test = False do_speed_test = False
# TODO: this version show a bug that was fixed # TODO: this version show a bug that was fixed
...@@ -515,21 +520,30 @@ class TestConvOp(unittest.TestCase): ...@@ -515,21 +520,30 @@ class TestConvOp(unittest.TestCase):
for un_b,un_k, un_p in unroll: for un_b,un_k, un_p in unroll:
for ss in ssizes: for ss in ssizes:
print 'test_ConvOpGrad' print 'test_ConvOpGrad'
print 'mode type:', mode, typ # print 'mode:',mode,'type:', typ
print 'imshp:', imshp # print 'imshp:', imshp,
print 'kshp:', kshp # print 'kshp:', kshp
print 'un_b:', un_b # print 'un_b:', un_b,
print 'un_k:', un_k # print 'un_k:', un_k,
print 'ss:', ss # print 'un_p:', un_p
print 'bsize:', bsize # print 'ss:', ss,
print 'nkern:', 4 # print 'bsize:', bsize,
# print 'nkern:', nkern
def test_i(imgs): def test_i(imgs):
if un_p and ss[0]==1 and ss[1]==1:
convop = ConvOp(dx=ss[0], dy=ss[1],
output_mode=mode, unroll_patch=un_p)
else:
convop = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1], convop = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1],
output_mode=mode, unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p) output_mode=mode, unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p)
return convop(imgs, kernvals) return convop(imgs, kernvals)
def test_k(kerns): def test_k(kerns):
if un_p and ss[0]==1 and ss[1]==1:
convop = ConvOp(dx=ss[0], dy=ss[1],
output_mode=mode, unroll_patch=un_p)
else:
convop = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1], convop = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1],
output_mode=mode, unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p) output_mode=mode, unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p)
return convop(imgvals, kerns) return convop(imgvals, kerns)
......
...@@ -91,7 +91,6 @@ class T_Scan(unittest.TestCase): ...@@ -91,7 +91,6 @@ class T_Scan(unittest.TestCase):
utt.seed_rng() utt.seed_rng()
# generator network, only one output , type scalar ; no sequence or # generator network, only one output , type scalar ; no sequence or
# non sequence arguments # non sequence arguments
def test_1(self): def test_1(self):
...@@ -244,8 +243,10 @@ class T_Scan(unittest.TestCase): ...@@ -244,8 +243,10 @@ class T_Scan(unittest.TestCase):
sequences_taps = {0:[-2]}, outputs_taps = {0:[-1,-2]}) sequences_taps = {0:[-2]}, outputs_taps = {0:[-1,-2]})
f7 = theano.function([u,x0], Y) f7 = theano.function([u,x0], Y)
v_u = numpy.asarray([1.,2.,3.,4.])
#print f7([1,2,3,4],[1,2]) v_x0 = numpy.asarray([1.,2.])
out = numpy.asarray([3.1,5.3])
assert (compareArrays( out, f7(v_u, v_x0)))
# simple rnn, one input, one state, weights for each; input/state are # simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables and past # vectors, weights are scalars; using shared variables and past
...@@ -264,15 +265,45 @@ class T_Scan(unittest.TestCase): ...@@ -264,15 +265,45 @@ class T_Scan(unittest.TestCase):
sequences_taps = {0:[-2,2]}, outputs_taps = {0:[-1,-2]}) sequences_taps = {0:[-2,2]}, outputs_taps = {0:[-1,-2]})
f8 = theano.function([u,x0], Y) f8 = theano.function([u,x0], Y)
v_u = numpy.array([1.,2.,3.,4.,5.,6.])
v_x0 = numpy.array([1.,2.])
out = numpy.array([3.6, 6.4])
assert (compareArrays( out, f8(v_u, v_x0) ) )
'''
# simple rnn ; compute inplace
def test_9(self):
#print f8([1,2,3,4,5,6],[1,2]) u = theano.tensor.dvector()
mu = theano.Param( u, mutable = True)
x0 = theano.tensor.dvector()
W_in = theano.shared(.1)
W = theano.shared(1.)
def f_rnn_shared(u_t, x_tm1):
return (u_t*W_in + x_tm1*W, {})
Y = theano.sandbox.scan.scan(f_rnn_shared, u, x0,[], \
inplace_map={0:0} )
f9 = theano.function([mu,x0], Y , #mode = 'FAST_RUN')
mode = 'DEBUG_MODE')
v_u = numpy.array([1.,2.,3.])
v_x0 = numpy.array([1.])
out = f9(v_u, v_x0)
v_out = numpy.array([1.1,1.3,1.6])
assert (compareArrays(out, v_out))
print v_u
assert (compareArrays(v_u, out))
'''
# test gradient simple network
def test_10(self):
pass
''' '''
TO TEST: TO TEST:
- test taps (for sequences and outputs )
- test gradient (one output) - test gradient (one output)
- test gradient (multiple outputs) - test gradient (multiple outputs)
- test gradient (go_bacwards) - test gradient (go_bacwards)
...@@ -280,7 +311,6 @@ class T_Scan(unittest.TestCase): ...@@ -280,7 +311,6 @@ class T_Scan(unittest.TestCase):
- test gradient (truncate_gradient) - test gradient (truncate_gradient)
- test gradient (force_gradient) - test gradient (force_gradient)
- test_gradient (taps past/future) - test_gradient (taps past/future)
- test inplace map
''' '''
......
...@@ -343,12 +343,29 @@ class CSM(gof.Op): ...@@ -343,12 +343,29 @@ class CSM(gof.Op):
""" """
data = tensor.as_tensor_variable(data) data = tensor.as_tensor_variable(data)
# Note that we use `view(numpy.int32)` in addition to providing the
# 'int32' dtype to `numpy.asarray`. This is because on some computers
# (e.g. a Windows 32 bits machine), we can have the following assert
# fail:
# x = numpy.array([0], dtype=numpy.intc)
# y = numpy.asarray(x, dtype=numpy.int32)
# assert y.dtype.num == numpy.dtype(numpy.int32).num
# while the assert does *not* fail when replacing the second line by:
# y = numpy.asarray(x, dtype='int32').view(numpy.int32)
# This is a known defect in Numpy. For more information see ticket
# http://projects.scipy.org/numpy/ticket/870
# Note also that it is important to keep "dtype='int32'" when calling
# `numpy.asarray`. This is because `view` is only some kind of cast to
# the exact data type we want to use. If a conversion is required (e.g.
# from int64 to int32), it must be done in the call to `numpy.asarray`.
if not isinstance(indices, tensor.TensorVariable): if not isinstance(indices, tensor.TensorVariable):
indices = numpy.asarray(indices, dtype='int32') indices = numpy.asarray(indices, dtype='int32').view(numpy.int32)
if not isinstance(indptr, tensor.TensorVariable): if not isinstance(indptr, tensor.TensorVariable):
indptr = numpy.asarray(indptr, dtype='int32') indptr = numpy.asarray(indptr, dtype='int32').view(numpy.int32)
if not isinstance(shape, tensor.TensorVariable): if not isinstance(shape, tensor.TensorVariable):
shape = numpy.asarray(shape, dtype='int32') shape = numpy.asarray(shape, dtype='int32').view(numpy.int32)
indices = tensor.as_tensor_variable(indices) indices = tensor.as_tensor_variable(indices)
indptr = tensor.as_tensor_variable(indptr) indptr = tensor.as_tensor_variable(indptr)
shape = tensor.as_tensor_variable(shape) shape = tensor.as_tensor_variable(shape)
......
...@@ -169,15 +169,17 @@ class test_structureddot(unittest.TestCase): ...@@ -169,15 +169,17 @@ class test_structureddot(unittest.TestCase):
# iterate for a few different random graph patterns # iterate for a few different random graph patterns
for i in range(10): for i in range(10):
spmat = sp.csc_matrix((4,6), dtype=sparse_dtype) spmat = sp.csc_matrix((4,6), dtype=sparse_dtype)
for i in range(5): for k in range(5):
# set non-zeros in random locations (row x, col y) # set non-zeros in random locations (row x, col y)
x = numpy.floor(numpy.random.rand()*spmat.shape[0]) x = numpy.floor(numpy.random.rand()*spmat.shape[0])
y = numpy.floor(numpy.random.rand()*spmat.shape[1]) y = numpy.floor(numpy.random.rand()*spmat.shape[1])
spmat[x,y] = numpy.random.rand()*10 spmat[x,y] = numpy.random.rand()*10
spmat = sp.csc_matrix(spmat) spmat = sp.csc_matrix(spmat)
kerns = tensor.Tensor(broadcastable=[False], dtype=sparse_dtype)('kerns') kerns = tensor.Tensor(broadcastable=[False],
images = tensor.Tensor(broadcastable=[False, False], dtype=dense_dtype)('images') dtype=sparse_dtype)('kerns')
images = tensor.Tensor(broadcastable=[False, False],
dtype=dense_dtype)('images')
output_dtype = theano.scalar.upcast(sparse_dtype, dense_dtype) output_dtype = theano.scalar.upcast(sparse_dtype, dense_dtype)
## ##
...@@ -186,7 +188,8 @@ class test_structureddot(unittest.TestCase): ...@@ -186,7 +188,8 @@ class test_structureddot(unittest.TestCase):
# build symbolic theano graph # build symbolic theano graph
def buildgraphCSC(kerns,images): def buildgraphCSC(kerns,images):
csc = CSC(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape) csc = CSC(kerns, spmat.indices[:spmat.size],
spmat.indptr, spmat.shape)
assert csc.type.dtype == sparse_dtype assert csc.type.dtype == sparse_dtype
rval = structured_dot(csc, images.T) rval = structured_dot(csc, images.T)
assert rval.type.dtype == output_dtype assert rval.type.dtype == output_dtype
...@@ -197,8 +200,12 @@ class test_structureddot(unittest.TestCase): ...@@ -197,8 +200,12 @@ class test_structureddot(unittest.TestCase):
# compute theano outputs # compute theano outputs
kernvals = spmat.data[:spmat.size] kernvals = spmat.data[:spmat.size]
imvals = 1.0 + 1.0 * numpy.array(numpy.arange(bsize*spmat.shape[1]).\ imvals = 1.0 + 1.0 * numpy.array(
numpy.arange(bsize*spmat.shape[1]).\
reshape(bsize,spmat.shape[1]), dtype=dense_dtype) reshape(bsize,spmat.shape[1]), dtype=dense_dtype)
#print('dense_dtype=%s' % dense_dtype)
#print('sparse_dtype=%s' % sparse_dtype)
#print('i=%s' % i)
print 'kerntype', str(kernvals.dtype), kernvals.dtype.num print 'kerntype', str(kernvals.dtype), kernvals.dtype.num
outvals = f(kernvals,imvals) outvals = f(kernvals,imvals)
print 'YAY' print 'YAY'
...@@ -212,7 +219,8 @@ class test_structureddot(unittest.TestCase): ...@@ -212,7 +219,8 @@ class test_structureddot(unittest.TestCase):
assert numpy.all(numpy.abs(outvals - assert numpy.all(numpy.abs(outvals -
numpy.array(c, dtype=output_dtype)) < 1e-4) numpy.array(c, dtype=output_dtype)) < 1e-4)
if sparse_dtype.startswith('float') and dense_dtype.startswith('float'): if (sparse_dtype.startswith('float') and
dense_dtype.startswith('float')):
utt.verify_grad(buildgraphCSC, utt.verify_grad(buildgraphCSC,
[kernvals, imvals]) [kernvals, imvals])
......
...@@ -12,6 +12,7 @@ import blas ...@@ -12,6 +12,7 @@ import blas
import xlogx import xlogx
import raw_random, randomstreams import raw_random, randomstreams
import shared_randomstreams
from randomstreams import \ from randomstreams import \
RandomStreams RandomStreams
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论