Skip to content

Instantly share code, notes, and snippets.

View pzelasko's full-sized avatar
🌴
Building stuff!

Piotr Żelasko pzelasko

🌴
Building stuff!
View GitHub Profile
@pzelasko
pzelasko / analyze_wer.py
Created April 5, 2024 23:44
Analyze where the most errors are found in ASR transcripts using a NeMo manifest with `text` and `pred_text` keys.
"""
Make sure to first run:
$ pip install click pandas lhotse kaldialign
"""
import click
import pandas as pd
from lhotse.serialization import load_jsonl
from kaldialign import align, bootstrap_wer_ci
EPS = '*'
@pzelasko
pzelasko / download_citations.py
Created July 25, 2023 21:13
Download Google Scholar citation list as a persistent Python dict
# Note: after downloading ~250 citations further connections might be blocked by Google Scholar.
import time
import shelve
# Make sure to run: pip install scholarly tqdm
from scholarly import scholarly
from tqdm.auto import tqdm
search_query = scholarly.search_author('Piotr Żelasko') # replace the author
@pzelasko
pzelasko / lhotse_datapipes.py
Last active November 5, 2021 16:06
A draft of Lhotse + DataPipes integration
#!/usr/bin/env python
import warnings
from collections import deque, defaultdict
from functools import partial
from pathlib import Path
from typing import Optional
from lhotse import CutSet, load_manifest
from lhotse.utils import Seconds
@pzelasko
pzelasko / debug_pickle.py
Created May 23, 2021 02:52 — forked from jneight/debug_pickle.py
Debug unpickled errors.
# from http://stackoverflow.com/questions/569754/how-to-tell-for-which-object-attribute-pickle-fails
"""
Show which fields cannot be pickled
"""
import pickle
def get_pickling_errors(obj,seen=None):
if seen is None:
seen = []
@pzelasko
pzelasko / install_k2.sh
Last active February 25, 2022 05:40
Steps needed to install K2 from scratch
#!/usr/bin/env bash
# Common steps
conda create -n k2 python=3.8
conda activate k2
conda install -c nvidia cudnn=7.6.5 cudatoolkit=10.2
conda install -c pytorch pytorch torchaudio
pip install cmake
mkdir build
pushd build
@pzelasko
pzelasko / pytorch_shared_memory_disable.py
Created March 1, 2021 16:59
Disable shared memory in PyTorch dataloader
import sys
import torch
from torch.utils.data import dataloader
from torch.multiprocessing import reductions
from multiprocessing.reduction import ForkingPickler
default_collate_func = dataloader.default_collate
def default_collate_override(batch):
@pzelasko
pzelasko / interactive_audio_plots.py
Created June 9, 2020 20:16 — forked from scottire/interactive_audio_plots.py
Interactive and clickable plots using Panel, Holoviz and Bokeh in Jupyter Notebooks
import numpy as np
import panel as pn
pn.extension()
import holoviews as hv
hv.notebook_extension("bokeh")
# hv.extension('matplotlib')
from holoviews.streams import Stream, Params
from scipy.io import wavfile
from scipy.signal import spectrogram
@pzelasko
pzelasko / gist:41d46cef4fd43caaa8eb8efbef7d4137
Created May 17, 2020 21:27
Very poor man's English text normalization
sed "/[[:punct:]]*/{ s/[^[:alnum:][:space:]'-]//g}" | tr '[:upper:]' '[:lower:]' | grep -v '[0-9]'
@pzelasko
pzelasko / fancy_logging.py
Last active March 30, 2020 21:38
Enable fancy logging
import logging
from sys import stdout
def fancy_logging(level=logging.DEBUG, stream=stdout):
logging.basicConfig(
level=level,
format="[%(asctime)s] %(levelname)s [%(name)s.%(funcName)s:%(lineno)d] %(message)s",
datefmt="%H:%M:%S",
stream=stream
)
@pzelasko
pzelasko / parsync
Last active May 19, 2023 02:55
Parallel rsync for downloading a directory from remote server
#!/bin/bash
host="$1"
source_dir="$2"
target_dir="$3"
ssh "$host" ls "$source_dir" | parallel -j8 -v --sshdelay 0.2 rsync -raz --progress "$host":"$source_dir"/{} "$target_dir"/{}