Skip to content

Instantly share code, notes, and snippets.

@xflr6
xflr6 / glottolog.ipynb
Last active December 17, 2018 20:34
Glottolog with Python
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@xflr6
xflr6 / rdfbuild.py
Last active June 4, 2022 07:45
Example building RDF with rdflib
"""Build RDF with rdflib and serialize in turtle format."""
import rdflib
from rdflib.namespace import DCTERMS, RDF, RDFS, SKOS
GOLD = rdflib.Namespace('http://purl.org/linguistics/gold/')
LANGUOID = rdflib.Namespace('http://glottolog.org/resource/languoid/id/')
VOID = rdflib.Namespace('http://rdfs.org/ns/void#')
@xflr6
xflr6 / bench_pg_array.py
Last active June 4, 2022 08:29
Benchmark PostgreSQL array vs. join performance
"""Benchmark PostgreSQL array vs. join performance.
Replicate http://shon.github.io/2015/12/21/postgres_array_performance.html
with proper join table indexes (uniqueness constraints) using SQLAlchemy.
$ python -i bench_pg_array.py
>>> setup()
$ python -m timeit -s "import bench_pg_array" "bench_pg_array.test_join()"
500 loops, best of 5: 445 usec per loop
@xflr6
xflr6 / languoids_csv.py
Last active January 8, 2022 14:08
Dump basic https://glottolog.org languoid info into CSV file
"""Dump basic https://glottolog.org languoid info to CSV file."""
import pandas as pd
ENGINE = 'postgresql://postgres@/glottolog3'
QUERY = '''
SELECT
l.id AS glottocode,
l.name,
@xflr6
xflr6 / fca_numpy.py
Last active June 4, 2022 14:00
Brute force FCA concepts by trying all combinations with NumPy
"""Generate concepts by trying all combinations with NumPy."""
from collections.abc import Iterator, Sequence
import itertools
import numpy as np
OBJECTS = ('1s', '1de', '1pe', '1di', '1pi',
'2s', '2d', '2p',
'3s.m', '3s.f', '3s.n',
@xflr6
xflr6 / langdoc_csv.py
Last active January 8, 2022 14:06
Download and combine https://glottolog.org/glottolog/language.csv parts using pandas
"""Combine https://glottolog.org/langdoc.csv parts."""
import urllib.parse
import pandas as pd
ENDPOINT = urllib.parse.urlparse('https://glottolog.org/langdoc.csv')
QUERY = {'sEcho': 1,
'iSortingCols': 1,
@xflr6
xflr6 / sa_iterparams.py
Last active June 4, 2022 08:40
Compare sqlalchemy inserts with sqlite3 executemany consuming an iterator
"""SQLAlalchemy inserts vs. sqlite3 lazy executemany()."""
from collections.abc import Iterator
import time
import sqlalchemy as sa
import sqlalchemy.orm
ENGINE = sa.create_engine('sqlite:///spam.sqlite3')
@xflr6
xflr6 / sa_log_psycopg2.py
Last active June 4, 2022 08:43
Use psycopg2 LoggingConnection with sqlalchemy
"""Use psycopg2 LoggingConnection with SQLAlalchemy."""
import logging
import psycopg2.extras
import sqlalchemy as sa
logging.basicConfig(level=logging.DEBUG)
@xflr6
xflr6 / subclasses.py
Last active June 4, 2022 10:54
Find all subclasses of a given class
"""Find all subclasses of a class with queue or stack."""
import collections
from collections.abc import Collection, Iterator
def itersubclasses(parent: type, *,
proper: bool = True,
exclude: Collection[type] = (type,)) -> Iterator[type]:
"""Yield `parent` subclasses recursively in breadth-first order."""
@xflr6
xflr6 / memorymapped.py
Last active June 4, 2022 18:12
More convenient context manager for mmap.mmap
"""More convenient context manager for mmap.mmap."""
from collections.abc import Iterator
import contextlib
import mmap
import os
@contextlib.contextmanager
def memorymapped_compat(path: os.PathLike | str) -> Iterator[mmap.mmap]: