diff --git a/data/karate/karate.cites b/data/karate/karate.cites new file mode 100644 index 0000000..a9aaf17 --- /dev/null +++ b/data/karate/karate.cites @@ -0,0 +1,78 @@ +0 1 +0 2 +0 3 +0 4 +0 5 +0 6 +0 7 +0 8 +0 10 +0 11 +0 12 +0 13 +0 17 +0 19 +0 21 +0 31 +1 2 +1 3 +1 7 +1 13 +1 17 +1 19 +1 21 +1 30 +2 3 +2 32 +2 7 +2 8 +2 9 +2 13 +2 27 +2 28 +3 7 +3 12 +3 13 +4 10 +4 6 +5 16 +5 10 +5 6 +6 16 +8 32 +8 30 +8 33 +9 33 +13 33 +14 32 +14 33 +15 32 +15 33 +18 32 +18 33 +19 33 +20 32 +20 33 +22 32 +22 33 +23 32 +23 25 +23 27 +23 29 +23 33 +24 25 +24 27 +24 31 +25 31 +26 33 +26 29 +27 33 +28 33 +28 31 +29 32 +29 33 +30 33 +30 32 +31 33 +31 32 +32 33 diff --git a/data/karate/karate.content b/data/karate/karate.content new file mode 100644 index 0000000..a825224 --- /dev/null +++ b/data/karate/karate.content @@ -0,0 +1,34 @@ +0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +2 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +3 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +4 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +5 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +6 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +7 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +8 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +9 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +10 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +11 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +12 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +13 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +19 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 +22 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 +23 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 +24 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 +25 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 +26 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 +27 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 +28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 +29 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 +30 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 +31 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 +32 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 +33 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 diff --git a/data/karate/karate.gml b/data/karate/karate.gml new file mode 100644 index 0000000..ecafe9c --- /dev/null +++ b/data/karate/karate.gml @@ -0,0 +1,530 @@ +Creator "Mark Newman on Fri Jul 21 12:39:27 2006" +graph +[ + node + [ + id 1 + ] + node + [ + id 2 + ] + node + [ + id 3 + ] + node + [ + id 4 + ] + node + [ + id 5 + ] + node + [ + id 6 + ] + node + [ + id 7 + ] + node + [ + id 8 + ] + node + [ + id 9 + ] + node + [ + id 10 + ] + node + [ + id 11 + ] + node + [ + id 12 + ] + node + [ + id 13 + ] + node + [ + id 14 + ] + node + [ + id 15 + ] + node + [ + id 16 + ] + node + [ + id 17 + ] + node + [ + id 18 + ] + node + [ + id 19 + ] + node + [ + id 20 + ] + node + [ + id 21 + ] + node + [ + id 22 + ] + node + [ + id 23 + ] + node + [ + id 24 + ] + node + [ + id 25 + ] + node + [ + id 26 + ] + node + [ + id 27 + ] + node + [ + id 28 + ] + node + [ + id 29 + ] + node + [ + id 30 + ] + node + [ + id 31 + ] + node + [ + id 32 + ] + node + [ + id 33 + ] + node + [ + id 34 + ] + edge + [ + source 2 + target 1 + ] + edge + [ + source 3 + target 1 + ] + edge + [ + source 3 + target 2 + ] + edge + [ + source 4 + target 1 + ] + edge + [ + source 4 + target 2 + ] + edge + [ + source 4 + target 3 + ] + edge + [ + source 5 + target 1 + ] + edge + [ + source 6 + target 1 + ] + edge + [ + source 7 + target 1 + ] + edge + [ + source 7 + target 5 + ] + edge + [ + source 7 + target 6 + ] + edge + [ + source 8 + target 1 + ] + edge + [ + source 8 + target 2 + ] + edge + [ + source 8 + target 3 + ] + edge + [ + source 8 + target 4 + ] + edge + [ + source 9 + target 1 + ] + edge + [ + source 9 + target 3 + ] + edge + [ + source 10 + target 3 + ] + edge + [ + source 11 + target 1 + ] + edge + [ + source 11 + target 5 + ] + edge + [ + source 11 + target 6 + ] + edge + [ + source 12 + target 1 + ] + edge + [ + source 13 + target 1 + ] + edge + [ + source 13 + target 4 + ] + edge + [ + source 14 + target 1 + ] + edge + [ + source 14 + target 2 + ] + edge + [ + source 14 + target 3 + ] + edge + [ + source 14 + target 4 + ] + edge + [ + source 17 + target 6 + ] + edge + [ + source 17 + target 7 + ] + edge + [ + source 18 + target 1 + ] + edge + [ + source 18 + target 2 + ] + edge + [ + source 20 + target 1 + ] + edge + [ + source 20 + target 2 + ] + edge + [ + source 22 + target 1 + ] + edge + [ + source 22 + target 2 + ] + edge + [ + source 26 + target 24 + ] + edge + [ + source 26 + target 25 + ] + edge + [ + source 28 + target 3 + ] + edge + [ + source 28 + target 24 + ] + edge + [ + source 28 + target 25 + ] + edge + [ + source 29 + target 3 + ] + edge + [ + source 30 + target 24 + ] + edge + [ + source 30 + target 27 + ] + edge + [ + source 31 + target 2 + ] + edge + [ + source 31 + target 9 + ] + edge + [ + source 32 + target 1 + ] + edge + [ + source 32 + target 25 + ] + edge + [ + source 32 + target 26 + ] + edge + [ + source 32 + target 29 + ] + edge + [ + source 33 + target 3 + ] + edge + [ + source 33 + target 9 + ] + edge + [ + source 33 + target 15 + ] + edge + [ + source 33 + target 16 + ] + edge + [ + source 33 + target 19 + ] + edge + [ + source 33 + target 21 + ] + edge + [ + source 33 + target 23 + ] + edge + [ + source 33 + target 24 + ] + edge + [ + source 33 + target 30 + ] + edge + [ + source 33 + target 31 + ] + edge + [ + source 33 + target 32 + ] + edge + [ + source 34 + target 9 + ] + edge + [ + source 34 + target 10 + ] + edge + [ + source 34 + target 14 + ] + edge + [ + source 34 + target 15 + ] + edge + [ + source 34 + target 16 + ] + edge + [ + source 34 + target 19 + ] + edge + [ + source 34 + target 20 + ] + edge + [ + source 34 + target 21 + ] + edge + [ + source 34 + target 23 + ] + edge + [ + source 34 + target 24 + ] + edge + [ + source 34 + target 27 + ] + edge + [ + source 34 + target 28 + ] + edge + [ + source 34 + target 29 + ] + edge + [ + source 34 + target 30 + ] + edge + [ + source 34 + target 31 + ] + edge + [ + source 34 + target 32 + ] + edge + [ + source 34 + target 33 + ] +] diff --git a/data/karate/karate.txt b/data/karate/karate.txt new file mode 100644 index 0000000..9569ed5 --- /dev/null +++ b/data/karate/karate.txt @@ -0,0 +1,5 @@ +The file karate.gml contains the network of friendships between the 34 +members of a karate club at a US university, as described by Wayne Zachary +in 1977. If you use these data in your work, please cite W. W. Zachary, An +information flow model for conflict and fission in small groups, Journal of +Anthropological Research 33, 452-473 (1977). diff --git a/data/karate/karate.zip b/data/karate/karate.zip new file mode 100644 index 0000000..38d1083 Binary files /dev/null and b/data/karate/karate.zip differ diff --git a/data/karate/test_gml.py b/data/karate/test_gml.py new file mode 100644 index 0000000..5f5c2f5 --- /dev/null +++ b/data/karate/test_gml.py @@ -0,0 +1,19 @@ +import networkx as nx +import matplotlib.pyplot as plt +G=nx.karate_club_graph() +nx.draw(G, with_labels = True) +plt.show() + +file = open('karate.cites','w') +for i in G.edges(): + file.write(str(i[0])+' '+str(i[1])+'\n') +file.close() +file = open('karate.content','w') +for i in G.nodes(): + file.write(str(i)) + temp = [0 for j in range(34)] + temp[i] = 1 + for j in temp: + file.write(" "+str(j)) + file.write(' 1\n') +file.close() diff --git a/img/compare.png b/img/compare.png new file mode 100644 index 0000000..eec768d Binary files /dev/null and b/img/compare.png differ diff --git a/pygcn/models2.py b/pygcn/models2.py new file mode 100644 index 0000000..1a68d88 --- /dev/null +++ b/pygcn/models2.py @@ -0,0 +1,22 @@ +import torch.nn as nn +import torch.nn.functional as F +from layers import GraphConvolution + + +class GCN(nn.Module): + def __init__(self, nfeat, nhid1, nhid2, nclass, dropout): + super(GCN, self).__init__() + + self.gc1 = GraphConvolution(nfeat, nhid1) + self.gc2 = GraphConvolution(nhid1, nhid2) + self.gc3 = GraphConvolution(nhid2, nclass) + self.dropout = dropout + + def forward(self, x, adj): + x = F.tanh(self.gc1(x, adj)) + x = F.dropout(x, self.dropout, training=self.training) + x = F.tanh(self.gc2(x, adj)) + x = F.dropout(x, self.dropout, training=self.training) + x = self.gc3(x, adj) + + return F.softmax(x) diff --git a/pygcn/train_karate.py b/pygcn/train_karate.py new file mode 100644 index 0000000..bf9bd6b --- /dev/null +++ b/pygcn/train_karate.py @@ -0,0 +1,146 @@ +from __future__ import division +from __future__ import print_function + +import time +import argparse +import numpy as np + +import torch +import torch.nn.functional as F +import torch.nn as nn +import torch.optim as optim + +from utils import load_data, accuracy, encode_onehot +from models2 import GCN + +# Training settings +parser = argparse.ArgumentParser() +parser.add_argument('--no-cuda', action='store_true', default=False, + help='Disables CUDA training.') +parser.add_argument('--fastmode', action='store_true', default=False, + help='Validate during training pass.') +parser.add_argument('--seed', type=int, default=42, help='Random seed.') +parser.add_argument('--epochs', type=int, default=200, + help='Number of epochs to train.') +parser.add_argument('--lr', type=float, default=0.01, + help='Initial learning rate.') +parser.add_argument('--weight_decay', type=float, default=5e-4, + help='Weight decay (L2 loss on parameters).') +parser.add_argument('--hidden1', type=int, default=200, + help='Number of hidden units.') +parser.add_argument('--hidden2', type=int, default=50, + help='Number of hidden units.') +parser.add_argument('--dropout', type=float, default=0.5, + help='Dropout rate (1 - keep probability).') + +args = parser.parse_args() +args.cuda = not args.no_cuda and torch.cuda.is_available() + +np.random.seed(args.seed) +torch.manual_seed(args.seed) +if args.cuda: + torch.cuda.manual_seed(args.seed) + +# Load data +adj, features, labels, idx_train, idx_val, idx_test = load_data("../data/karate/","karate") +# adj, features, labels, idx_train, idx_val, idx_test = load_data("../data/cora/","cora") + +# Model and optimizer +model = GCN(nfeat=features.shape[1], + nhid1=args.hidden1, + nhid2=args.hidden2, + nclass=4, + dropout=args.dropout) + +optimizer = optim.Adam(model.parameters(), + lr=args.lr, weight_decay=args.weight_decay) + +if args.cuda: + model.cuda() + features = features.cuda() + adj = adj.cuda() + labels = labels.cuda() + idx_train = idx_train.cuda() + idx_val = idx_val.cuda() + idx_test = idx_test.cuda() + + +criterion = nn.CrossEntropyLoss() +indices = torch.LongTensor([0, 16,18,24]) + +def train(epoch): + t = time.time() + model.train() + optimizer.zero_grad() + output = model(features, adj) + #output2 = PCA(output.detach().numpy())[0] + #ax = plt.scatter(output2[:,0],output2[:,1]) + #plt.show() + loss = criterion(torch.index_select(output, 0, indices),torch.LongTensor([0,1,2,3])) + loss.backward() + optimizer.step() + + if not args.fastmode: + model.eval() + output = model(features, adj) + + print('Epoch: {:04d}'.format(epoch+1), + 'time: {:.4f}s'.format(time.time() - t)) + +for epoch in range(args.epochs): + train(epoch) + +def PCA(data, dims_rescaled_data=2): + """ + returns: data transformed in 2 dims/columns + regenerated original data + pass in: data as 2D NumPy array + """ + import numpy as NP + from scipy import linalg as LA + m, n = data.shape + # mean center the data + data -= data.mean(axis=0) + # calculate the covariance matrix + R = NP.cov(data, rowvar=False) + # calculate eigenvectors & eigenvalues of the covariance matrix + # use 'eigh' rather than 'eig' since R is symmetric, + # the performance gain is substantial + evals, evecs = LA.eigh(R) + # sort eigenvalue in decreasing order + idx = NP.argsort(evals)[::-1] + evecs = evecs[:,idx] + # sort eigenvectors according to same index + evals = evals[idx] + # select the first n eigenvectors (n is desired dimension + # of rescaled data array, or dims_rescaled_data) + evecs = evecs[:, :dims_rescaled_data] + # carry out the transformation on the data using eigenvectors + # and return the re-scaled data, eigenvalues, and eigenvectors + return NP.dot(evecs.T, data.T).T, evals, evecs + +output = model(features, adj) +print (output) +output = PCA(output.detach().numpy())[0] +print (output) + +# draw +import matplotlib.pyplot as plt +plt.scatter(output[:,0],output[:,1]) +plt.show() + +# classify +community = dict() +for index,i in enumerate(output): + temp = [np.linalg.norm(i-output[0,:]), + np.linalg.norm(i-output[16,:]), + np.linalg.norm(i-output[18,:]), + np.linalg.norm(i-output[24,:])] + belongs = temp.index(min(temp)) + if belongs not in community.keys(): + community[belongs]=[] + community[belongs].append(index) + else: + community[belongs].append(index) +print (community) +#for i,_ in enumerate(output[:,0]): +# plt.annotate(i,(output[:,0][i],output[:,1][i]))