This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gzip | |
import json | |
from dataclasses import asdict | |
from pydantic import BaseModel | |
class DataFile: | |
def __init__(self, data_type, filename, mode, loader=None): | |
if mode not in ['r', 'w', 'rt', 'wt']: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# !pip install GitPython | |
from git import Repo | |
from git.exc import NoSuchPathError | |
from git.exc import InvalidGitRepositoryError | |
class RepoData: | |
def __init__(self, repo_directory): | |
self.repo_directory = repo_directory | |
self.repo_exists = False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import os | |
import json | |
import typer | |
import pandas as pd | |
def nb_query(query, fnames=None): | |
if isinstance(query, str): | |
query_fun = lambda line: re.match(f'.*{query}.*', line) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from datetime import datetime | |
from datetime import date | |
from pydantic.dataclasses import dataclass | |
from dataclasses import asdict | |
@dataclass(frozen=True, order=True) | |
class Foo: | |
name: str | |
int_value: int |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# from http://www.keithschwarz.com/darts-dice-coins/ by Keith Schwarz (htiek@cs.stanford.edu) | |
# and https://github.com/asmith26/Vose-Alias-Method/blob/master/vose_sampler/vose_sampler.py | |
class VoseAlias: | |
def __init__(self, probs, seed=42, rng=None): | |
self.N = len(probs) | |
self.rng = rng or np.random.default_rng(seed) | |
self.aliases = np.zeros(self.N, dtype=np.int64) | |
self.probs = np.zeros(self.N) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
MIT License | |
Copyright (c) 2024 Laszlo Sragner | |
Permission is hereby granted, free of charge, to any person obtaining a copy | |
of this software and associated documentation files (the "Software"), to deal | |
in the Software without restriction, including without limitation the rights | |
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
copies of the Software, and to permit persons to whom the Software is | |
furnished to do so, subject to the following conditions: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import scipy.sparse as sps | |
from collections import Counter | |
from sklearn.feature_extraction.text import CountVectorizer | |
# Use sklearn's vectorizers with a custom tokenizer to turn a string into a one-hot vector of 2 and 3 long substrings. | |
# Store a normalised version in a sparse matrix. | |
class StringSearcher: | |