Skip to content

Instantly share code, notes, and snippets.

View gist:071da5c68c884b750ca4ec81dc414183
import time
import torch
import torch.nn as nn
from apex.normalization import FusedLayerNorm
torch.backends.cudnn.benchmark = True
View gist:b5d6b5d2f0a9f3fc4e2a5797d41aa8c7
#!/usr/bin/env python3
import argparse
import time
import torch
import torch_xla
import torch_xla.core.xla_model as xm
import torch_xla.distributed.xla_multiprocessing as xmp
View gist:cdf685b8b3ce20b0221e1842782bce74
text: a b c </s> d e f g </s>
Suppose the model is trained with a context length of 4.
Then the most favorable way to evaluate your model's perplexity is:
batch 1: a b c </s>
|----------| <-- count perplexity of this
batch 2: b c </s> d
|-| <-- count perplexity of this
batch 3: c </s> d e
View gist:a66ba69601cbd21a5a2218a33b6363f8
import sys
import time
import torch
import torch.nn as nn
import torch.optim as optim
platform = "gpu" if torch.cuda.is_available() else "tpu"
View gist:fa49c10039c89b9472e6b0c59590b10b
Metric: CompileTime
TotalSamples: 2
Accumulator: 226ms137.620us
ValueRate: 439ms863.608us / second
Rate: 3.88139 / second
Percentiles: 1%=109ms635.741us; 5%=109ms635.741us; 10%=109ms635.741us; 20%=109ms635.741us; 50%=118ms501.879us; 80%=118ms501.879us; 90%=118ms501.879us; 95%=118ms501.879us; 99%=118ms501.879us
Metric: DeviceLockWait
TotalSamples: 101
Accumulator: 31s573ms487.494us
ValueRate: 754ms252.918us / second
View benchmark.py
import sys
import time
import torch
import torch.nn as nn
import torch.optim as optim
#platform = "tpu"
platform = "gpu"
View gist:333fc9c2e1dc8e3ebbec933905ccc905
# maybe change "fairseq-benchmark" below to your own tag
BENCHMARK=4node aws ec2 run-instances \
--image-id ami-0707a23a1930bb11c \
--count 4 \
--instance-type p3dn.24xlarge \
--key-name fair-$USER \
--tag-specifications "ResourceType=instance,Tags=[{Key=fair-user,Value=$USER},{Key=fairseq-benchmark,Value=''}]" \
--placement "GroupName = ${PLACEMENT_GROUP_NAME}" \
--network-interfaces "DeviceIndex=0,InterfaceType=efa,Groups=${SECURITY_GROUP_ID},SubnetId=${SUBNET_ID}" \
--block-device-mappings file://block_device_mappings.gp2.json
View gist:0e019badf60514e5d8c68ef9e6b6ceed
# maybe change "fairseq-benchmark" below to your own tag
BENCHMARK=4node aws ec2 run-instances \
--image-id ami-0707a23a1930bb11c \
--count 4 \
--instance-type p3dn.24xlarge \
--key-name fair-$USER \
--tag-specifications "ResourceType=instance,Tags=[{Key=fair-user,Value=$USER},{Key=fairseq-benchmark,Value=''}]" \
--placement "GroupName = ${PLACEMENT_GROUP_NAME}" \
--network-interfaces "DeviceIndex=0,InterfaceType=efa,Groups=${SECURITY_GROUP_ID},SubnetId=${SUBNET_ID}" \
--block-device-mappings file://block_device_mappings.gp2.json
View fairseq_benchmark_masked_lm.c10d.200mb.sh
#!/bin/bash
source activate fairseq
fairseq-train --no-save --disable-validation --task dummy_masked_lm --masked-lm-only --arch bert_large --num-segment 0 --optimizer adam --lr 1e-4 --max-sentences 8 --update-freq 1 --fp16 --ddp-backend $DDP_BACKEND --bucket-cap-mb $BUCKET_CAP_MB --tokens-per-sample 512 --dataset-size $DATASET_SIZE --criterion masked_lm_loss --log-format json --log-interval 10 --max-epoch 1