Skip to content

Instantly share code, notes, and snippets.

#!/usr/bin/env python
import argparse
import gzip
import json
import requests
from io import BytesIO, StringIO
from urllib.parse import urlencode
#!/bin/bash
GEN=$1
SYS=$GEN.sys
REF=$GEN.ref
grep ^H $GEN | cut -f3- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > $SYS
grep ^T $GEN | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > $REF
python score.py --sys $SYS --ref $REF
#!/bin/bash
source activate fairseq
fairseq-train --no-save --disable-validation --task dummy_masked_lm --masked-lm-only --arch bert_large --num-segment 0 --optimizer adam --lr 1e-4 --max-sentences 8 --update-freq 1 --fp16 --ddp-backend $DDP_BACKEND --bucket-cap-mb $BUCKET_CAP_MB --tokens-per-sample 512 --dataset-size $DATASET_SIZE --criterion masked_lm_loss --log-format json --log-interval 10 --max-epoch 1
# maybe change "fairseq-benchmark" below to your own tag
BENCHMARK=4node aws ec2 run-instances \
--image-id ami-0707a23a1930bb11c \
--count 4 \
--instance-type p3dn.24xlarge \
--key-name fair-$USER \
--tag-specifications "ResourceType=instance,Tags=[{Key=fair-user,Value=$USER},{Key=fairseq-benchmark,Value=''}]" \
--placement "GroupName = ${PLACEMENT_GROUP_NAME}" \
--network-interfaces "DeviceIndex=0,InterfaceType=efa,Groups=${SECURITY_GROUP_ID},SubnetId=${SUBNET_ID}" \
--block-device-mappings file://block_device_mappings.gp2.json
# maybe change "fairseq-benchmark" below to your own tag
BENCHMARK=4node aws ec2 run-instances \
--image-id ami-0707a23a1930bb11c \
--count 4 \
--instance-type p3dn.24xlarge \
--key-name fair-$USER \
--tag-specifications "ResourceType=instance,Tags=[{Key=fair-user,Value=$USER},{Key=fairseq-benchmark,Value=''}]" \
--placement "GroupName = ${PLACEMENT_GROUP_NAME}" \
--network-interfaces "DeviceIndex=0,InterfaceType=efa,Groups=${SECURITY_GROUP_ID},SubnetId=${SUBNET_ID}" \
--block-device-mappings file://block_device_mappings.gp2.json
import sys
import time
import torch
import torch.nn as nn
import torch.optim as optim
#platform = "tpu"
platform = "gpu"
Metric: CompileTime
TotalSamples: 2
Accumulator: 226ms137.620us
ValueRate: 439ms863.608us / second
Rate: 3.88139 / second
Percentiles: 1%=109ms635.741us; 5%=109ms635.741us; 10%=109ms635.741us; 20%=109ms635.741us; 50%=118ms501.879us; 80%=118ms501.879us; 90%=118ms501.879us; 95%=118ms501.879us; 99%=118ms501.879us
Metric: DeviceLockWait
TotalSamples: 101
Accumulator: 31s573ms487.494us
ValueRate: 754ms252.918us / second
import sys
import time
import torch
import torch.nn as nn
import torch.optim as optim
platform = "gpu" if torch.cuda.is_available() else "tpu"
text: a b c </s> d e f g </s>
Suppose the model is trained with a context length of 4.
Then the most favorable way to evaluate your model's perplexity is:
batch 1: a b c </s>
|----------| <-- count perplexity of this
batch 2: b c </s> d
|-| <-- count perplexity of this
batch 3: c </s> d e
#!/usr/bin/env python3
import argparse
import time
import torch
import torch_xla
import torch_xla.core.xla_model as xm
import torch_xla.distributed.xla_multiprocessing as xmp