Skip to content

Instantly share code, notes, and snippets.

@xLaszlo
xLaszlo / DataFile
Last active January 6, 2023 17:53
File like class to store dataclasses or pydantic classes as gzipped JSONL-s
import gzip
import json
from dataclasses import asdict
from pydantic import BaseModel
class DataFile:
def __init__(self, data_type, filename, mode, loader=None):
if mode not in ['r', 'w', 'rt', 'wt']:
@xLaszlo
xLaszlo / repo_data.py
Created December 19, 2022 19:03
How to get information about a git repository in python
# !pip install GitPython
from git import Repo
from git.exc import NoSuchPathError
from git.exc import InvalidGitRepositoryError
class RepoData:
def __init__(self, repo_directory):
self.repo_directory = repo_directory
self.repo_exists = False
import re
import os
import json
import typer
import pandas as pd
def nb_query(query, fnames=None):
if isinstance(query, str):
query_fun = lambda line: re.match(f'.*{query}.*', line)
@xLaszlo
xLaszlo / dataclasses_howto.py
Created August 13, 2022 11:50
Dataclasses howto (code for the carbon.now.sh images)
import json
from datetime import datetime
from datetime import date
from pydantic.dataclasses import dataclass
from dataclasses import asdict
@dataclass(frozen=True, order=True)
class Foo:
name: str
int_value: int
@xLaszlo
xLaszlo / VoseAliasRNG
Created December 19, 2020 12:11
Fast alias sampling using Vose's initialisation
# from http://www.keithschwarz.com/darts-dice-coins/ by Keith Schwarz (htiek@cs.stanford.edu)
# and https://github.com/asmith26/Vose-Alias-Method/blob/master/vose_sampler/vose_sampler.py
class VoseAlias:
def __init__(self, probs, seed=42, rng=None):
self.N = len(probs)
self.rng = rng or np.random.default_rng(seed)
self.aliases = np.zeros(self.N, dtype=np.int64)
self.probs = np.zeros(self.N)
@xLaszlo
xLaszlo / LICENSE
Last active March 5, 2024 17:33
This license applies to all public gists at https://gist.github.com/xLaszlo
MIT License
Copyright (c) 2024 Laszlo Sragner
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
@xLaszlo
xLaszlo / string_searcher
Last active December 19, 2020 12:08
Simple approximate string search
import numpy as np
import scipy.sparse as sps
from collections import Counter
from sklearn.feature_extraction.text import CountVectorizer
# Use sklearn's vectorizers with a custom tokenizer to turn a string into a one-hot vector of 2 and 3 long substrings.
# Store a normalised version in a sparse matrix.
class StringSearcher: