-
Notifications
You must be signed in to change notification settings - Fork 115
/
fb-train-test-splits.py
63 lines (45 loc) · 1.87 KB
/
fb-train-test-splits.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import networkx as nx
import pandas as pd
import pickle
import numpy as np
from gae.preprocessing import mask_test_edges
RANDOM_SEED = 0
### ---------- Load in FB Graphs ---------- ###
FB_EGO_USERS = [0, 107, 1684, 1912, 3437, 348, 3980, 414, 686, 698]
fb_graphs = {} # Dictionary to store all FB ego network graphs
# Read in each FB Ego graph
# Store graphs in dictionary as (adj, features) tuples
for user in FB_EGO_USERS:
network_dir = './fb-processed/{0}-adj-feat.pkl'.format(user)
with open(network_dir, 'rb') as f:
adj, features = pickle.load(f)
# Store in dict
fb_graphs[user] = (adj, features)
# Read in combined FB graph
combined_dir = './fb-processed/combined-adj-sparsefeat.pkl'
with open(combined_dir, 'rb') as f:
adj, features = pickle.load(f)
fb_graphs['combined'] = (adj, features)
### ---------- Generate Train-Test Splits ---------- ###
FRAC_EDGES_HIDDEN = [0.25, 0.5, 0.75]
TRAIN_TEST_SPLITS_FOLDER = './train-test-splits/'
# TODO = ['fb-combined-0.75-hidden']
# Iterate over fractions of edges to hide
for frac_hidden in FRAC_EDGES_HIDDEN:
val_frac = 0.1
test_frac = frac_hidden - val_frac
# Iterate over each graph
for g_name, graph_tuple in fb_graphs.iteritems():
adj = graph_tuple[0]
feat = graph_tuple[1]
current_graph = 'fb-{}-{}-hidden'.format(g_name, frac_hidden)
# if current_graph in TODO:
print "Current graph: ", current_graph
np.random.seed(RANDOM_SEED)
# Run all link prediction methods on current graph, store results
train_test_split = mask_test_edges(adj, test_frac=test_frac, val_frac=val_frac,
verbose=True)
file_name = TRAIN_TEST_SPLITS_FOLDER + current_graph + '.pkl'
# Save split
with open(file_name, 'wb') as f:
pickle.dump(train_test_split, f, protocol=2)