Skip to content

Instantly share code, notes, and snippets.

@callumm-graphcore
Created October 4, 2022 08:10
Show Gist options
  • Save callumm-graphcore/27168a510c570004121e1661cd84b66a to your computer and use it in GitHub Desktop.
Save callumm-graphcore/27168a510c570004121e1661cd84b66a to your computer and use it in GitHub Desktop.
Perform coordinate checking on IPU/CPU or compare results for IPU/CPU
import argparse
import mup
import poptorch
import torch
from torch import nn
torch.manual_seed(17244)
parser = argparse.ArgumentParser()
parser.add_argument('--ipu', help="Run coordinate checking on IPU", action="store_true")
parser.add_argument('--cpu', help="Run coordinate checking on CPU", action="store_true")
parser.add_argument('--compare', help="Run on IPU and CPU and compare results", action="store_true")
parser.add_argument('--width-lower', help="Smallest width to test", type=int)
parser.add_argument('--width-upper', help="Largest width to test, inclusive", type=int)
parser.add_argument('--width-step', help="Step between widths to test", type=int)
parser.add_argument('--training-steps',
help="Number of training steps to run (must be >= 2)", type=int)
parser.add_argument('--lr', help="Learning rate", type=float)
args = parser.parse_args()
num_specified = sum(int(x) for x in [args.ipu, args.cpu, args.compare])
if num_specified != 1:
raise ValueError("Exactly one of --ipu, --cpu, --compare should be specified")
if args.training_steps < 2 and (args.cpu or args.ipu):
raise ValueError("Must run at least 2 training steps for coordinate checking")
BASE_WIDTH = 8
DELTA_WIDTH = 16
# width_upper is inclusive
widths_to_test = list(range(
args.width_lower,
args.width_upper + args.width_step,
args.width_step
))
class PopReadout(mup.MuReadout):
"""
Not quite a drop-in replacement for `mup.MuReadout` - you need to specify
`base_width`.
Set `base_width` to width of base model passed to `mup.set_base_shapes`
to get same results on IPU and CPU. Should still "work" with any other
value, but won't give the same results as CPU
"""
def __init__(self, in_features, *args, base_width=None, **kwargs):
if base_width is None:
raise ValueError("base_width must be specified in PopReadout")
self._base_width = base_width
self._absolute_width = float(in_features)
super().__init__(in_features, *args, **kwargs)
def width_mult(self):
return self._absolute_width / self._base_width
class CIFAR10MLP(nn.Module):
def __init__(self, width, ipu):
super().__init__()
self.width = width
self.lin1 = nn.Linear(3*32*32, width)
self.lin2 = nn.Linear(width, width)
if ipu:
self.lin3 = PopReadout(width, 10, base_width=BASE_WIDTH)
else:
self.lin3 = mup.MuReadout(width, 10)
self.log_softmax = nn.LogSoftmax(dim=1)
self.loss = nn.NLLLoss()
def forward(self, x, labels=None):
x = torch.flatten(x, start_dim=1)
preact1 = self.lin1(x)
act1 = preact1.relu()
preact2 = self.lin2(act1)
act2 = preact2.relu()
logits = self.lin3(act2)
out = self.log_softmax(logits)
if self.training:
return (act1, act2, out), self.loss(out, labels)
return x
base_model = CIFAR10MLP(width=BASE_WIDTH, ipu=False)
delta_model = CIFAR10MLP(width=DELTA_WIDTH, ipu=False)
examples = [
(torch.rand(16, 3, 32, 32), torch.randint(high=10, size=(16,)))
for _ in range(args.training_steps)
]
def get_model(width, ipu, seed=0):
torch.manual_seed(seed)
model = CIFAR10MLP(width=width, ipu=ipu)
mup.set_base_shapes(model, base_model, delta=delta_model)
for param in model.parameters():
mup.init.uniform_(param, -0.1, 0.1)
model.train()
if ipu:
opts = poptorch.Options()
optimizer = mup.MuAdam(model.parameters(), lr=args.lr, impl=poptorch.optim.Adam)
poptorch_model = poptorch.trainingModel(model, options=opts, optimizer=optimizer)
return poptorch_model
else:
return model
for width in widths_to_test:
print(f"Testing width: {width}")
if args.ipu:
poptorch_model = get_model(width, ipu=True)
for index, (inp, label) in enumerate(examples):
(act1, act2, out), _ = poptorch_model(inp, label)
if index == 0:
act1_at_t0 = act1
act2_at_t0 = act2
else:
print(f"stddev(x_{index} - x_0): "
f"{(act1 - act1_at_t0).std().item()}, "
f"{(act2 - act2_at_t0).std().item()}")
if args.cpu:
model = get_model(width, ipu=False)
optimizer = mup.MuAdam(model.parameters(), lr=args.lr)
for index, (inp, label) in enumerate(examples):
optimizer.zero_grad()
(act1, act2, out), loss = model(inp, label)
loss.backward()
optimizer.step()
if index == 0:
act1_at_t0 = act1
act2_at_t0 = act2
else:
print(f"stddev(x_{index} - x_0): "
f"{(act1 - act1_at_t0).std().item()}, "
f"{(act2 - act2_at_t0).std().item()}")
if args.compare:
# Generate IPU and CPU models with same seed
# so their initialisations are the same
cpu_model = get_model(width, ipu=False, seed=width)
ipu_model = get_model(width, ipu=True, seed=width)
optimizer = mup.MuAdam(cpu_model.parameters(), lr=args.lr)
for index, (inp, label) in enumerate(examples):
optimizer.zero_grad()
(act1_cpu, act2_cpu, out_cpu), loss = cpu_model(inp, label)
loss.backward()
optimizer.step()
(act1_ipu, act2_ipu, out_ipu), _ = ipu_model(inp, label)
act1_max_abs_error = (act1_cpu - act1_ipu).abs().max().item()
act2_max_abs_error = (act2_cpu - act2_ipu).abs().max().item()
out_max_abs_error = (out_cpu - out_ipu).abs().max().item()
print(f"{act1_max_abs_error}, {act2_max_abs_error}, {out_max_abs_error}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment