Skip to content

Instantly share code, notes, and snippets.

@TheExGenesis
Last active Oct 8, 2021
Embed
What would you like to do?
Regression with GCN. Run with `--many-graphs` to train it one graph at a time, default is one batched graph, many epoch.
#%%
"""GCN using DGL nn package
References:
- Semi-Supervised Classification with Graph Convolutional Networks
- Paper: https://arxiv.org/abs/1609.02907
- Code: https://github.com/tkipf/gcn
"""
from random import randint
import torch
import torch.nn as nn
from dgl.nn.pytorch import GraphConv
class GCN(nn.Module):
def __init__(self, g, in_feats, n_hidden, n_classes, n_layers, activation, dropout):
super(GCN, self).__init__()
# self.g = g
self.layers = nn.ModuleList()
# input layer
self.layers.append(GraphConv(in_feats, n_hidden, activation=activation))
# hidden layers
for i in range(n_layers - 1):
self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation))
# output layer
self.layers.append(GraphConv(n_hidden, n_classes))
self.dropout = nn.Dropout(p=dropout)
def forward(self, g, features):
# self.g = g
h = features
for i, layer in enumerate(self.layers):
if i != 0:
h = self.dropout(h)
# h = layer(self.g, h)
h = layer(g, h)
return h
def train_test_val_mask(N, train_size, test_size, val_size):
"""returns 3 masks as binary np arrays"""
train_mask = torch.zeros(N)
train_mask[:train_size] = True
test_mask = torch.zeros(N)
test_mask[train_size : train_size + test_size] = True
val_mask = torch.zeros(N)
val_mask[train_size + test_size : train_size + test_size + val_size] = True
return train_mask.bool(), test_mask.bool(), val_mask.bool()
def gen_random_graph(n_nodes, n_edges):
"""
generate a random dgl graph with n_nodes nodes and n_edges edges, with the following ndata properties:
strat: either 0 or 1
degree: the node's degree
label: degree * label
"""
g = dgl.rand_graph(n_nodes, n_edges)
dgl.add_self_loop(g)
g.ndata["strat"] = torch.tensor([randint(0, 1) for _ in range(n_nodes)]).float()
g.ndata["degree"] = torch.tensor([g.in_degrees(i) for i in range(n_nodes)]).float()
g.ndata["feat"] = torch.stack((g.ndata["degree"], g.ndata["strat"]), axis=1).float()
g.ndata["label"] = g.ndata["degree"] * g.ndata["strat"]
# g.ndata["label"] = (g.ndata["degree"] * g.ndata["strat"] > 7).long() # classif
N = n_nodes
(
g.ndata["train_mask"],
g.ndata["val_mask"],
g.ndata["test_mask"],
) = train_test_val_mask(N, int(N * 0.8), int(N * 0.1), int(N * 0.1))
return g
import argparse
import time
import numpy as np
import torch
import torch.nn.functional as F
import dgl
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
# from gcn_mp import GCN
# from gcn_spmv import GCN
def evaluate(model, g, features, labels, mask):
model.eval()
with torch.no_grad():
logits = model(g, features)
logits = logits[mask]
labels = labels[mask]
_, indices = torch.max(logits, dim=1)
correct = torch.sum(indices == labels)
return correct.item() * 1.0 / len(labels)
def main(args):
# load and preprocess dataset
num_graphs = 10
batch_size = 50
gs = [
dgl.batch([gen_random_graph(10, 70) for _ in range(num_graphs)])
for _ in range(batch_size)
]
g = dgl.batch(gs)
if args.gpu < 0:
cuda = False
else:
cuda = True
g = g.int().to(args.gpu)
features = g.ndata["feat"]
labels = g.ndata["label"]
train_mask = g.ndata["train_mask"]
val_mask = g.ndata["val_mask"]
test_mask = g.ndata["test_mask"]
in_feats = features.shape[1]
# n_classes = data.num_labels
n_classes = 1
n_edges = g.num_edges()
print(
"""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d"""
% (
n_edges,
n_classes,
train_mask.int().sum().item(),
val_mask.int().sum().item(),
test_mask.int().sum().item(),
)
)
# add self loop
if args.self_loop:
g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g)
n_edges = g.number_of_edges()
# normalization
# degs = g.in_degrees().float()
# norm = torch.pow(degs, -0.5)
# norm[torch.isinf(norm)] = 0
# if cuda:
# norm = norm.cuda()
# g.ndata["norm"] = norm.unsqueeze(1)
# create GCN model
model = GCN(
g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout
)
if cuda:
model.cuda()
loss_fcn = torch.nn.MSELoss()
# loss_fcn = torch.nn.CrossEntropyLoss()
# use optimizer
optimizer = torch.optim.Adam(
model.parameters(), lr=args.lr, weight_decay=args.weight_decay
)
# initialize graph
dur = []
for epoch in range(args.n_epochs):
model.train()
if epoch >= 3:
t0 = time.time()
losses = []
# forward
if args.many_graphs:
for g in gs:
features = g.ndata["feat"]
labels = g.ndata["label"]
train_mask = g.ndata["train_mask"]
val_mask = g.ndata["val_mask"]
test_mask = g.ndata["test_mask"]
in_feats = features.shape[1]
# n_classes = data.num_labels
n_classes = n_classes
n_edges = g.num_edges()
logits = model(g, features)
loss = loss_fcn(logits[train_mask], labels[train_mask])
losses.append(loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
else:
logits = model(g, features)
loss = loss_fcn(logits[train_mask], labels[train_mask])
losses.append(loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch >= 3:
dur.append(time.time() - t0)
acc = evaluate(model, g, features, labels, val_mask)
print(
"Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | "
"ETputs(KTEPS) {:.2f}".format(
epoch, np.mean(dur), np.mean(losses), n_edges / np.mean(dur) / 1000
)
)
print()
acc = evaluate(model, g, features, labels, test_mask)
print("Test accuracy {:.2%}".format(acc))
#%%
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="GCN")
parser.add_argument(
"--dataset",
type=str,
default="cora",
help="Dataset name ('cora', 'citeseer', 'pubmed').",
)
parser.add_argument(
"--dropout", type=float, default=0.5, help="dropout probability"
)
parser.add_argument("--gpu", type=int, default=-1, help="gpu")
parser.add_argument("--lr", type=float, default=1e-2, help="learning rate")
parser.add_argument(
"--n-epochs", type=int, default=200, help="number of training epochs"
)
parser.add_argument(
"--n-hidden", type=int, default=16, help="number of hidden gcn units"
)
parser.add_argument(
"--n-layers", type=int, default=1, help="number of hidden gcn layers"
)
parser.add_argument(
"--weight-decay", type=float, default=5e-4, help="Weight for L2 loss"
)
parser.add_argument(
"--self-loop", action="store_true", help="graph self-loop (default=False)"
)
parser.add_argument(
"--many-graphs", action="store_true", help="graph many-graphs (default=False)"
)
parser.set_defaults(self_loop=False)
parser.set_defaults(many_graphs=False)
args = parser.parse_args()
print(args)
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment