Skip to content

Instantly share code, notes, and snippets.

@xflr6
xflr6 / MakeElanSentences.ipynb
Last active October 19, 2022 13:51
Make a skeleton ELAN document from (text, translation) pairs
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@xflr6
xflr6 / Pandas_read_sparql_query.ipynb
Last active May 22, 2022 09:57
Read pandas.DataFrame from SPARQL query
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@xflr6
xflr6 / ControlCharacters.ipynb
Last active May 22, 2022 09:56
Drop Glottolog bibfiles for control characters
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@xflr6
xflr6 / Wikidata.ipynb
Last active May 22, 2022 09:56
Check Glottolog -> Wikidata mapping
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
"""Benchmark FCA concepts lattice generation with bob_ross.csv."""
from collections.abc import Iterable, Iterator, Sequence
import csv
import os
import pathlib
import time
from typing import NamedTuple, Optional
import urllib.request
@xflr6
xflr6 / fca_variants.py
Last active June 4, 2022 14:16
Compare different methods to brute-force FCA concept generation (sets, longs, gmpy2, NumPy bools, Numpy uint64)
"""Compare different brute-force FCA concept generation methods."""
from collections.abc import Iterator, Sequence
from itertools import combinations, compress
import time
import gmpy2
import numpy as np
OBJECTS = ('1s', '1de', '1pe', '1di', '1pi',
@xflr6
xflr6 / walk_subdirs.py
Last active June 4, 2022 13:51
Compare subdirectory generator using os.walk() with one using scandir.scandir()
"""Compare two ways to iterate over subdirectories of a tree."""
from collections.abc import Iterator
import os
import platform
import time
START_DIR = 'c:\\Users' if platform.system() == 'Windows' else '/usr'
@xflr6
xflr6 / shasum_chunked.py
Last active June 4, 2022 18:02
Compare while-loop with break to for-loop with two-argument iter() for iterating over a large file in chunks
"""Compare three ways to iterate over a large file in chunks."""
import functools
import hashlib
import mmap
import os
import pathlib
import shutil
import time
import types
@xflr6
xflr6 / sa_sqlite3_regex.py
Last active June 4, 2022 13:37
Register a Python stdlib re handler with sqlite3 create_function() to use the SQLite REGEXP operator under SQLAlchemy
"""Use Python re for sqlite3 REGEXP operator wíth SQLAlchemy.
added in https://docs.sqlalchemy.org/en/14/changelog/migration_14.html#support-for-sql-regular-expression-operators
"""
import re
from typing import Optional
import sqlalchemy as sa
import sqlalchemy.orm
@xflr6
xflr6 / autotyp_nlevels.py
Last active June 4, 2022 13:32
Add missing autotyp variable N.levels information from metadata_overview.csv to metadata/*.yaml files
"""Insert missing https://www.autotyp.uzh.ch N.levels from overview into metadata files
see https://github.com/autotyp/autotyp-data/pull/7
"""
import csv
import operator
import pathlib
import regex