Skip to content

Instantly share code, notes, and snippets.

View vmarkovtsev's full-sized avatar

Vadim Markovtsev vmarkovtsev

View GitHub Profile
def sentry_span(func):
if asyncio.iscoroutinefunction(func):
@async_wraps(func)
async def wrapped_async_sentry_span(*args, **kwargs):
__tracebackhide__ = True
with sentry_sdk.Hub(sentry_sdk.Hub.current):
with sentry_sdk.start_span(op=func.__qualname__):
return await func(*args, **kwargs)
return wrapped_async_sentry_span
@vmarkovtsev
vmarkovtsev / .bashrc
Last active July 18, 2021 09:21
Shell notifications
# apt install xdotool dateutils
# if "history" outputs a whitespace on the left, change -f3 to -f4
# insert at the bottom:
# Add an "alert" alias for long running commands. Use like so:
# sleep 10; alert
alias alert='notify-send --urgency=low -t 5000 -i "$([ $? = 0 ] && echo terminal || echo error)" "$(dateutils.ddiff $(HISTTIMEFORMAT="%FT%T " history 1|cut -d\ -f3) $(date +%FT%T))" "$(HISTTIMEFORMAT= history 1|sed -e '\''s/^\s*[0-9]\+\s*//;s/[;&|]\s*alert$//'\'')"'
import random
random.seed(777)
dataset = {}
for acc, members in acc_membership.items():
if len(members) < 10 or len(members) > 1000:
# too small or too big accounts
continue
group1 = {}
group2 = {}
for member in members:
import gzip
from collections import defaultdict
import numpy as np
import pandas as pd
from tqdm import tqdm
# calculate the number of lines with zcat commits.csv.gz | wc -l
size = 485226041
hashes = np.zeros(size, dtype="S20")
authors = np.zeros(size, dtype="u4")
import re
from metaphone import doublemetaphone
from unidecode import unidecode
nonalphanumeric_re = re.compile(r"[^\w ]+")
whitespace_re = re.compile(r" +")
def normalize_simple(name: str) -> str:
return whitespace_re.sub(" ", nonalphanumeric_re.sub(" ", name.lower()))
from fuzzywuzzy import fuzz
def distance_ratio(s1: str, s2: str) -> int:
return 100 - fuzz.ratio(s1, s2)
def distance_join_ratio(s1: str, s2: str) -> int:
return 100 - fuzz.ratio(s1.replace(" ", ""), s2.replace(" ", ""))
def distance_sort_ratio(s1: str, s2: str) -> int:
return 100 - fuzz.token_sort_ratio(s1, s2)
from lapjv import lapjv
import numpy as np
from typing import Callable, List
def greedy_match(names1: List[str],
names2: List[str],
compare: Callable[[str, str], int],
) -> np.ndarray:
result = np.zeros(len(names1), dtype=int)
for x, sx in enumerate(names1):
min_dist = 100
from tqdm.notebook import tqdm
results = []
for norm_name, normalize in tqdm((("simple", normalize_simple),
("unidecode", normalize_unidecode),
("unidecode_metaphone", normalize_unidecode_metaphone))):
for comp_name, compare in tqdm((("ratio", distance_ratio),
("sort_ratio", distance_sort_ratio),
("set_ratio", distance_set_ratio),
("ratio_join", distance_ratio_join),
@vmarkovtsev
vmarkovtsev / README.md
Last active January 21, 2021 12:52
How to configure bash to notify finished commands when unfocused

Open ~/.bashrc. Append export WINDOWID=... in the header as follows:

# ~/.bashrc: executed by bash(1) for non-login shells.
# see /usr/share/doc/bash/examples/startup-files (in the package bash-doc)
# for examples

# If not running interactively, don't do anything
case $- in
 *i*) ;;
def save_image(tensor: Union[np.ndarray, tf.Tensor], path: str):
if hasattr(tensor, "numpy"):
tensor = tensor.numpy()
tensor = np.clip(tensor, 0, 255) # <<< this is new
Image.fromarray(np.squeeze(tensor).astype(np.uint8), "RGB").save(path)