-
Notifications
You must be signed in to change notification settings - Fork 31
/
AttentionLayer.py
110 lines (89 loc) · 3.35 KB
/
AttentionLayer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
__author__ = 'jingyuan'
import numpy as np
import theano
import theano.tensor as T
import cPickle
#from theano import pp
def softmask(x,mask):
y = T.exp(x-x.max(axis=1, keepdims=True))
y = y * mask
sumx = T.sum(y,axis=1,acc_dtype='float32')
#x = y/sumx.dimshuffle(0,'x')
x=y/sumx.dimshuffle(0,'x')
return x
def softmask_content(x):
y = T.exp(x-x.max(axis=2,keepdims=True))
#pp(y)
sumx = T.sum(y,axis=2)
#pp(sumx)
x=y/sumx.dimshuffle(0,1,'x')
return x
class AttentionLayer_Relu(object):
def __init__(self,rng, uemb_vec, new_Vemb, n_wordin, n_out,mask,name,prefix=None):
self.inputu = uemb_vec
self.name = name
if prefix is None:
Wu_values = np.asarray(
rng.uniform(
low = -np.sqrt(6./(n_wordin+n_out)),
high= np.sqrt(6./(n_wordin+n_out)),
size= (n_wordin,n_out)
),
dtype=np.float32
)
Wu = theano.shared(value=Wu_values, name="Wu", borrow=True)
Wv_values = np.asarray(
rng.uniform(
low = -np.sqrt(6./(n_wordin+n_out)),
high= np.sqrt(6./(n_wordin+n_out)),
size= (n_wordin,n_out)
),
dtype=np.float32
)
Wv = theano.shared(value=Wv_values, name="Wv", borrow=True)
#b_values = np.zeros((n_out,), dtype='float32')
b_values = np.asarray(
rng.normal(scale=0.1, size=(n_out,)),
dtype=np.float32
)
b = theano.shared(value=b_values, name='b', borrow=True)
c_values = np.asarray(
rng.normal(scale=0.1, size=(n_out,)),
dtype=np.float32
)
c = theano.shared(value=c_values, name="c", borrow=True)
else:
f = file(prefix + name + '.save','rb')
Wu = cPickle.load(f)
Wv = cPickle.load(f)
b = cPickle.load(f)
c = cPickle.load(f)
self.Wu = Wu
self.Wv = Wv
self.b = b
self.c = c
if self.name == 'attentionlayer_item':
items_emb = new_Vemb
attenu = T.dot(self.inputu, self.Wu).dimshuffle(0, 'x', 1)
atten = T.nnet.relu(T.dot(items_emb, self.Wv) + attenu + self.b)
atten = T.sum(atten * self.c, axis=2, acc_dtype='float32')
atten = softmask(atten,mask)
output = atten.dimshuffle(0,1,'x') * items_emb
self.output = T.sum(output, axis=1, acc_dtype='float32')
if self.name == 'attentionlayer_cont':
items_emb = new_Vemb
attenu = T.dot(self.inputu, self.Wu).dimshuffle(0,'x','x',1)
atten = T.nnet.relu(T.dot(items_emb, self.Wv) + attenu + self.b)
atten = T.sum(atten * self.c, axis=3, acc_dtype='float32')
atten = softmask_content(atten)
output = atten.dimshuffle(0,1,2,'x') * items_emb
self.output = T.sum(output, axis=2, acc_dtype='float32')
self.params = [self.Wu, self.Wv, self.b, self.c]
self.atten = atten
self.name = name
self.mask = mask
def save(self, prefix):
f = file(prefix + self.name + '.save', 'wb')
for obj in self.params:
cPickle.dump(obj, f, protocol=cPickle.HIGHEST_PROTOCOL)
f.close()