Skip to content

Instantly share code, notes, and snippets.


Heikki Arponen harpone

View GitHub Profile
harpone /
Created Jan 15, 2021
Test Webdataset with torch-xla multiprocessing distributed setting
from itertools import islice
import os
import torch
from import DataLoader
from torchvision import transforms
import numpy as np
import torch_xla.distributed.parallel_loader as pl
import torch_xla.core.xla_model as xm
import torch_xla.distributed.xla_multiprocessing as xmp
harpone /
Last active Dec 3, 2020
Testing/profiling webdataset data loading speed issue
from itertools import islice
from munch import Munch
import sys, os
from import DataLoader
from torchvision import transforms
import time
import webdataset as wds
harpone /
Last active Dec 2, 2020
Differentiable k-nearest neighbor (Kozachenko-Leonenko) based estimates of KL-divergence and entropy
MIT License
knn, kl_div, entropy Copyright (c) 2017 Heikki Arponen
import torch
def knn(x, y, k=3, last_only=False, discard_nearest=True):
harpone /
Created Sep 24, 2020
Lamb optimizer that doesn't work on TPUs
class Lamb(Optimizer):
r"""Implements Lamb algorithm.
It has been proposed in `Large Batch Optimization for Deep Learning: Training BERT in 76 minutes`_.
params (iterable): iterable of parameters to optimize or dicts defining
parameter groups
lr (float, optional): learning rate (default: 1e-3)
betas (Tuple[float, float], optional): coefficients used for computing
running averages of gradient and its square (default: (0.9, 0.999))
eps (float, optional): term added to the denominator to improve
harpone / open_images_metrics_report.txt
Created Jun 10, 2020
torch-xla metrics report for Open Images segmentation model
View open_images_metrics_report.txt
Metric: CompileTime
TotalSamples: 6
Accumulator: 02m08s502ms131.239us
ValueRate: 586ms507.322us / second
Rate: 0.0275528 / second
Percentiles: 1%=054ms401.673us; 5%=054ms401.673us; 10%=054ms401.673us; 20%=04s843ms345.108us; 50%=37s174ms061.499us; 80%=39s225ms175.754us; 90%=41s025ms091.573us; 95%=41s025ms091.573us; 99%=41s025ms091.573us
Metric: DeviceLockWait
TotalSamples: 577
Accumulator: 05m37s032ms777.084us
ValueRate: 362ms844.572us / second
View gist:c50b80a1f0c2b0bd401e720fd248d0b0
path = './National_Custom_Data.csv' # obtained by downloading all 'National' data from CDC for all age groups and seasons
df = pd.read_csv(path, thousands=',')
fig, ax = plt.subplots(1, 2, figsize=(20, 8))
deaths = df['TOTAL DEATHS'].values[::-1].copy()
# Last season seems to be in reverse:
deaths[-34:] = deaths[-34:][::-1]
class GCSDataset(Dataset):
"""Generic PyTorch dataset for GCS. Streams data from GCS and (optionally) caches to local disk.
def __init__(self,
path_list=None, # TODO: list bucket/path contents if None
View metrics_report.txt
Metric: CompileTime
TotalSamples: 4
Accumulator: 30s441ms477.869us
ValueRate: 970ms854.584us / second
Rate: 0.127439 / second
Percentiles: 1%=015ms613.544us; 5%=015ms613.544us; 10%=015ms613.544us; 20%=015ms613.544us; 50%=14s494ms393.979us; 80%=16s710ms73.162us; 90%=16s710ms73.162us; 95%=16s710ms73.162us; 99%=16s710ms73.162us
Metric: DeviceLockWait
TotalSamples: 199
Accumulator: 26s508ms379.925us
ValueRate: 277ms99.827us / second
import tensorflow as tf
from tensorflow.python.framework import ops
import numpy as np
# Define custom py_func which takes also a grad op as argument:
def py_func(func, inp, Tout, stateful=True, name=None, grad=None):
# Need to generate a unique name to avoid duplicates:
rnd_name = 'PyFuncGrad' + str(np.random.randint(0, 1E+8))
View gist:3e5f53edee3ecb615f8f
import numpy as np
cimport cython
cimport numpy as np
from libc.stdint cimport uint32_t, int32_t
from libc.math cimport sqrt
from libc.math cimport fabs
from libc.math cimport pow