-
Notifications
You must be signed in to change notification settings - Fork 3
/
utils.py
93 lines (76 loc) · 3.3 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import torch
import torch.nn as nn
import torch_geometric as pyg
import torch.nn.functional as F
from sklearn import metrics
from sklearn.cluster import KMeans
import community ## For calculating modularity
def vGraph_loss(c, recon_c, prior, q):
'''
c: The original node value
recon_c: The reconstructed node value
prior: p(z|w)
q = q(z|c,w)
'''
BCE_loss = F.cross_entropy(recon_c, c) / c.shape[0] ### Normalization is necessary or the dimension of c is too large and it will be the most weighted
# KL_div_loss = F.kl_div(torch.log(prior + 1e-20), q, reduction='batchmean')
KL_div_loss = torch.sum(q*(torch.log(q + 1e-20) - torch.log(prior)), -1).mean() ## As such main use is of just mean()
loss = BCE_loss + KL_div_loss
return loss
def load_checkpoint(ckpt_path, map_location='cpu'):
ckpt = torch.load(ckpt_path, map_location=map_location)
print(' [*] Loading checkpoint from %s succeed!' % ckpt_path)
return ckpt
def save_checkpoint(state, save_path):
'''
Saving checkpoints(state) at the specified save_path location
'''
torch.save(state, save_path)
def similarity_measure(edge_index, w, c, gpu_id):
'''
Used for calculating the coefficient alpha in the case of community smoothness loss
Parameters:
edge_index: edge matrix of the graph
w: the starting node values of an edge
c: the ending node values of an edge
'''
alpha = torch.zeros(w.shape[0], 1)
alpha = cuda(alpha, gpu_id)
for i in range(w.shape[0]):
l1 = edge_index[1, :][edge_index[0, :] == w[i]].tolist()
l2 = edge_index[1, :][edge_index[0, :] == c[i]].tolist()
common_neighbors = [value for value in l1 if value in l2]
common_neighbors = len(common_neighbors)
all_neighbors = len(l1) + len(l2)
similarity = (float)(common_neighbors/all_neighbors)
alpha[i, 0] = similarity
return alpha
def cuda(xs, gpu_id):
if torch.cuda.is_available():
if not isinstance(xs, (list, tuple)):
return xs.cuda(int(gpu_id[0]))
else:
return [x.cuda(int(gpu_id[0])) for x in xs]
return xs
### THIS SECTION WILL BE ADDED ON AND CONTAINS THE ACCURACY MEASURES FOR OVERLAPPING AND NON-OVERLAPPING SUBPROBLEMS
# def calculate_nonoverlap_losses(model, dataset, edge_index):
# '''
# For calculating losses pertaining to the non-overlapping dataset, namely, Macro F1, Micro F1, Modularity, NMI
# '''
# model.eval()
# labels = dataset.y
# w = edge_index[0, :]
# c = edge_index[1, :]
# _, _, q = model(w, c, edge_index)
# new_labels = torch.zeros(w.shape[0], 1)
# for i in range(w.shape[0]):
# new_labels[i] = labels[w[i]]
# kmeans = KMeans(n_clusters=torch.unique(labels).shape[0], random_state=0).fit(q.detach().cpu().numpy())
# ###For calculating modularity
# assignment = {i: int(kmeans.labels_[i]) for i in range(q.shape[0])}
# networkx_graph = pyg.utils.to_networkx(dataset)
# modularity = community.modularity(assignment, networkx_graph)
# ###For calculating macro and micro F1 score
# macro_F1 = metrics.f1_score(new_labels.numpy(), kmeans.labels_, average='macro')
# micro_F1 = metrics.f1_score(new_labels.numpy(), kmeans.labels_, average='micro')
# return modularity, macro_F1, micro_F1