-
-
Save TheExGenesis/6b847c790c2d9237162b48d1f5a9d7e8 to your computer and use it in GitHub Desktop.
Regression with GCN. Run with `--many-graphs` to train it one graph at a time, default is one batched graph, many epoch.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#%% | |
"""GCN using DGL nn package | |
References: | |
- Semi-Supervised Classification with Graph Convolutional Networks | |
- Paper: https://arxiv.org/abs/1609.02907 | |
- Code: https://github.com/tkipf/gcn | |
""" | |
from random import randint | |
import torch | |
import torch.nn as nn | |
from dgl.nn.pytorch import GraphConv | |
class GCN(nn.Module): | |
def __init__(self, g, in_feats, n_hidden, n_classes, n_layers, activation, dropout): | |
super(GCN, self).__init__() | |
# self.g = g | |
self.layers = nn.ModuleList() | |
# input layer | |
self.layers.append(GraphConv(in_feats, n_hidden, activation=activation)) | |
# hidden layers | |
for i in range(n_layers - 1): | |
self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation)) | |
# output layer | |
self.layers.append(GraphConv(n_hidden, n_classes)) | |
self.dropout = nn.Dropout(p=dropout) | |
def forward(self, g, features): | |
# self.g = g | |
h = features | |
for i, layer in enumerate(self.layers): | |
if i != 0: | |
h = self.dropout(h) | |
# h = layer(self.g, h) | |
h = layer(g, h) | |
return h | |
def train_test_val_mask(N, train_size, test_size, val_size): | |
"""returns 3 masks as binary np arrays""" | |
train_mask = torch.zeros(N) | |
train_mask[:train_size] = True | |
test_mask = torch.zeros(N) | |
test_mask[train_size : train_size + test_size] = True | |
val_mask = torch.zeros(N) | |
val_mask[train_size + test_size : train_size + test_size + val_size] = True | |
return train_mask.bool(), test_mask.bool(), val_mask.bool() | |
def gen_random_graph(n_nodes, n_edges): | |
""" | |
generate a random dgl graph with n_nodes nodes and n_edges edges, with the following ndata properties: | |
strat: either 0 or 1 | |
degree: the node's degree | |
label: degree * label | |
""" | |
g = dgl.rand_graph(n_nodes, n_edges) | |
dgl.add_self_loop(g) | |
g.ndata["strat"] = torch.tensor([randint(0, 1) for _ in range(n_nodes)]).float() | |
g.ndata["degree"] = torch.tensor([g.in_degrees(i) for i in range(n_nodes)]).float() | |
g.ndata["feat"] = torch.stack((g.ndata["degree"], g.ndata["strat"]), axis=1).float() | |
g.ndata["label"] = g.ndata["degree"] * g.ndata["strat"] | |
# g.ndata["label"] = (g.ndata["degree"] * g.ndata["strat"] > 7).long() # classif | |
N = n_nodes | |
( | |
g.ndata["train_mask"], | |
g.ndata["val_mask"], | |
g.ndata["test_mask"], | |
) = train_test_val_mask(N, int(N * 0.8), int(N * 0.1), int(N * 0.1)) | |
return g | |
import argparse | |
import time | |
import numpy as np | |
import torch | |
import torch.nn.functional as F | |
import dgl | |
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset | |
# from gcn_mp import GCN | |
# from gcn_spmv import GCN | |
def evaluate(model, g, features, labels, mask): | |
model.eval() | |
with torch.no_grad(): | |
logits = model(g, features) | |
logits = logits[mask] | |
labels = labels[mask] | |
_, indices = torch.max(logits, dim=1) | |
correct = torch.sum(indices == labels) | |
return correct.item() * 1.0 / len(labels) | |
def main(args): | |
# load and preprocess dataset | |
num_graphs = 10 | |
batch_size = 50 | |
gs = [ | |
dgl.batch([gen_random_graph(10, 70) for _ in range(num_graphs)]) | |
for _ in range(batch_size) | |
] | |
g = dgl.batch(gs) | |
if args.gpu < 0: | |
cuda = False | |
else: | |
cuda = True | |
g = g.int().to(args.gpu) | |
features = g.ndata["feat"] | |
labels = g.ndata["label"] | |
train_mask = g.ndata["train_mask"] | |
val_mask = g.ndata["val_mask"] | |
test_mask = g.ndata["test_mask"] | |
in_feats = features.shape[1] | |
# n_classes = data.num_labels | |
n_classes = 1 | |
n_edges = g.num_edges() | |
print( | |
"""----Data statistics------' | |
#Edges %d | |
#Classes %d | |
#Train samples %d | |
#Val samples %d | |
#Test samples %d""" | |
% ( | |
n_edges, | |
n_classes, | |
train_mask.int().sum().item(), | |
val_mask.int().sum().item(), | |
test_mask.int().sum().item(), | |
) | |
) | |
# add self loop | |
if args.self_loop: | |
g = dgl.remove_self_loop(g) | |
g = dgl.add_self_loop(g) | |
n_edges = g.number_of_edges() | |
# normalization | |
# degs = g.in_degrees().float() | |
# norm = torch.pow(degs, -0.5) | |
# norm[torch.isinf(norm)] = 0 | |
# if cuda: | |
# norm = norm.cuda() | |
# g.ndata["norm"] = norm.unsqueeze(1) | |
# create GCN model | |
model = GCN( | |
g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout | |
) | |
if cuda: | |
model.cuda() | |
loss_fcn = torch.nn.MSELoss() | |
# loss_fcn = torch.nn.CrossEntropyLoss() | |
# use optimizer | |
optimizer = torch.optim.Adam( | |
model.parameters(), lr=args.lr, weight_decay=args.weight_decay | |
) | |
# initialize graph | |
dur = [] | |
for epoch in range(args.n_epochs): | |
model.train() | |
if epoch >= 3: | |
t0 = time.time() | |
losses = [] | |
# forward | |
if args.many_graphs: | |
for g in gs: | |
features = g.ndata["feat"] | |
labels = g.ndata["label"] | |
train_mask = g.ndata["train_mask"] | |
val_mask = g.ndata["val_mask"] | |
test_mask = g.ndata["test_mask"] | |
in_feats = features.shape[1] | |
# n_classes = data.num_labels | |
n_classes = n_classes | |
n_edges = g.num_edges() | |
logits = model(g, features) | |
loss = loss_fcn(logits[train_mask], labels[train_mask]) | |
losses.append(loss.item()) | |
optimizer.zero_grad() | |
loss.backward() | |
optimizer.step() | |
else: | |
logits = model(g, features) | |
loss = loss_fcn(logits[train_mask], labels[train_mask]) | |
losses.append(loss.item()) | |
optimizer.zero_grad() | |
loss.backward() | |
optimizer.step() | |
if epoch >= 3: | |
dur.append(time.time() - t0) | |
acc = evaluate(model, g, features, labels, val_mask) | |
print( | |
"Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | " | |
"ETputs(KTEPS) {:.2f}".format( | |
epoch, np.mean(dur), np.mean(losses), n_edges / np.mean(dur) / 1000 | |
) | |
) | |
print() | |
acc = evaluate(model, g, features, labels, test_mask) | |
print("Test accuracy {:.2%}".format(acc)) | |
#%% | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="GCN") | |
parser.add_argument( | |
"--dataset", | |
type=str, | |
default="cora", | |
help="Dataset name ('cora', 'citeseer', 'pubmed').", | |
) | |
parser.add_argument( | |
"--dropout", type=float, default=0.5, help="dropout probability" | |
) | |
parser.add_argument("--gpu", type=int, default=-1, help="gpu") | |
parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") | |
parser.add_argument( | |
"--n-epochs", type=int, default=200, help="number of training epochs" | |
) | |
parser.add_argument( | |
"--n-hidden", type=int, default=16, help="number of hidden gcn units" | |
) | |
parser.add_argument( | |
"--n-layers", type=int, default=1, help="number of hidden gcn layers" | |
) | |
parser.add_argument( | |
"--weight-decay", type=float, default=5e-4, help="Weight for L2 loss" | |
) | |
parser.add_argument( | |
"--self-loop", action="store_true", help="graph self-loop (default=False)" | |
) | |
parser.add_argument( | |
"--many-graphs", action="store_true", help="graph many-graphs (default=False)" | |
) | |
parser.set_defaults(self_loop=False) | |
parser.set_defaults(many_graphs=False) | |
args = parser.parse_args() | |
print(args) | |
main(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment