-
Notifications
You must be signed in to change notification settings - Fork 7
/
CCA.py
97 lines (77 loc) · 3.5 KB
/
CCA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# -*- coding: utf-8 -*-
import torch
import scipy.linalg as linalg
class CCA():
"""
dimension : dimension of the common CCA space (number of kept eigenvectors and eigenvalues)
regularization : added to the diagonal of the covariance matrix in order to regularize the problem
power : the power to each the eigenvalues are elevated in the output D
"""
def __init__(self, dimension, regularization=1, power=-1):
super(CCA, self).__init__()
self.dimension = dimension
self.regularization = regularization
self.power = power
self.features_list = []
self.dims = []
self.Ws = None
self.D = None
def solve(self, features_list):
self.features_list = features_list
self.dims = [f.size(1) for f in self.features_list]
S, S_D = self.computeCovMatrix()
self.findMatrixAndEig(S, S_D)
def computeCovMatrix(self):
'''
# Inputs:
features : takes the list of features (phi(X_i)) as input (two or
three elements respectively for the two and three-view CCA)
# Outputs:
S : the covariance matrix composed of all pairs of covariance
matrices between the different views,
S_D : the bloc diagonal matrix composed of the self-covariance
matrices for each view
'''
dims = torch.Tensor(self.dims)
dim = torch.Tensor(dims).sum().int().item()
S = torch.zeros((dim, dim))
S_D = torch.zeros((dim, dim))
indices = dims.cumsum(0)
indices = torch.cat([torch.Tensor([0]), indices]).int()
n_views = len(self.features_list)
for i in range(n_views):
for j in range(i):
S_ij = self.features_list[i].t() @ self.features_list[j]
S[indices[i]:indices[i+1], indices[j]:indices[j+1]] = S_ij
S_ii = self.features_list[i].t() @ self.features_list[i]
S_D[indices[i]:indices[i+1], indices[i]:indices[i+1]] = S_ii
S = S + S.t() + S_D
return S, S_D
def findMatrixAndEig(self, S, S_D):
'''
# Inputs:
S : the global covariance matrix between all pairs of "views"
S_D : the bloc diagonal matrix composed of the self-covariance matrices for each view
# Outputs:
W : the matrix composed of the d eigenvectors as columns
D : diagonal matix given by the p-th power of the d corresponding eigenvalues
'''
# REGULARIZE
I_g = self.regularization * torch.eye(len(S))
S_D = S_D + I_g
# FIND EIGENVECTORS and GET THE INDICES OF THE D LARGEST EIGENVALUES
eigenValues, eigenVectors = linalg.eig(S.numpy(), S_D.numpy())
eigenValues, eigenVectors = torch.from_numpy(eigenValues.real), torch.from_numpy(eigenVectors.real)
idx = eigenValues.argsort(descending=True)[:self.dimension]
# BUILD W AND D
self.D = torch.diag(eigenValues[idx] ** self.power)
self.Ws = eigenVectors[:, idx].split(self.dims)
def getSimilarities(self, feature_1, dim_1, dim_2):
W_1 = self.Ws[dim_1]
W_2 = self.Ws[dim_2]
scaled_proj_1 = feature_1 @ W_1 @ self.D
scaled_proj_2 = self.features_list[dim_2] @ W_2 @ self.D
dots = scaled_proj_1 @ scaled_proj_2.t()
prods = scaled_proj_1.norm() * scaled_proj_2.norm(dim=1)
similarities = dots / prods
return similarities.sort(descending=True)