Skip to content

Instantly share code, notes, and snippets.

View bob_ross.py
# bob_ross.py - benchmark FCA concepts lattice generation with bob_ross.csv
import csv
import pathlib
import time
import urllib.request
import concepts
URL = ('https://raw.githubusercontent.com/fivethirtyeight/data'
@xflr6
xflr6 / fca_variants.py
Last active Jan 1, 2021
Compare different methods to brute-force FCA concept generation (sets, longs, gmpy2, NumPy bools, Numpy uint64)
View fca_variants.py
# fca_variants.py - compare different brute-force concept generation methods
from itertools import combinations, compress
import time
import gmpy2
import numpy as np
OBJECTS = ('1s', '1de', '1pe', '1di', '1pi',
'2s', '2d', '2p',
@xflr6
xflr6 / walk_subdirs.py
Last active Jan 1, 2021
Compare subdirectory generator using os.walk() with one using scandir.scandir()
View walk_subdirs.py
# walk_subdirs.py - compare two ways to iterate over subdirectories of a tree
import os
import platform
import time
START_DIR = 'c:\\Users' if platform.system() == 'Windows' else '/usr'
def itersubdirs_walk(start_dir):
@xflr6
xflr6 / shasum_chunked.py
Last active Jan 1, 2021
Compare while-loop with break to for-loop with two-argument iter() for iterating over a large file in chunks
View shasum_chunked.py
# shasum_chunked.py - compare three ways to iterate over a large file in chunks
import contextlib
import functools
import hashlib
import mmap
import pathlib
import time
PATH = pathlib.Path('spam.bin')
@xflr6
xflr6 / sa_sqlite3_regex.py
Last active Dec 14, 2020
Register a Python stdlib re handler with sqlite3 create_function() to use the SQLite REGEXP operator under SQLAlchemy
View sa_sqlite3_regex.py
# sa_sqlite3_regex.py - use Python re for sqlite3 REGEXP operator w/ sqlalchemy
import re
import sqlalchemy as sa
import sqlalchemy.ext.declarative
@sa.event.listens_for(sa.engine.Engine, 'connect')
def sqlite_engine_connect(dbapi_conn, connection_record):
dbapi_conn.create_function('regexp', 2, _regexp)
@xflr6
xflr6 / autotyp_nlevels.py
Last active Apr 17, 2020
Add missing autotyp variable N.levels information from metadata_overview.csv to metadata/*.yaml files
View autotyp_nlevels.py
#!/usr/bin/env python3
# autotyp_nlevels.py - insert missing N.levels from overview into metadata files
import csv
import operator
import pathlib
import regex
import yaml
@xflr6
xflr6 / unique_null.py
Last active Jan 1, 2021
Compare different ways to have unique columns with nulls under SQLite and PostgreSQL
View unique_null.py
# unique_null.py - compare ways to have unique columns w/ nulls
import os
import subprocess
import time
import uuid
import sqlalchemy as sa
import sqlalchemy.ext.declarative
@xflr6
xflr6 / pl_pgsql.py
Last active Jan 1, 2021
SQL injection safe dynamic query execution via PL/pgSQL quote_ident() and format('%I')
View pl_pgsql.py
# pl_pgsql.py - sql injection safe dynamic query with pl/pgsql
import sqlalchemy as sa
UNIQUE_NULL = [('contributioncontributor', ['contribution_pk', 'contributor_pk'], []),
('contributionreference', ['contribution_pk', 'source_pk', 'description'], []),
('editor', ['dataset_pk', 'contributor_pk'], []),
('languageidentifier', ['language_pk', 'identifier_pk'], []),
('languagesource', ['language_pk', 'source_pk'], []),
('sentencereference', ['sentence_pk', 'source_pk', 'description'], []),
@xflr6
xflr6 / decorator.py
Last active Sep 23, 2017
Decorator with an optional parameter
View decorator.py
import functools
funcs = {}
def register(func=None, name=None):
if func is None:
return functools.partial(register, name=name)
if name is None:
name = func.__name__
funcs[name] = func
@xflr6
xflr6 / itersplit.py
Last active Apr 17, 2020
Split a string into chunks by a pattern matching at the start of each item
View itersplit.py
"""Split a string into chunks by a pattern matching at the start of each item.
>>> list(itersplit(r'!', 'spam !eggs !ham'))
['spam ', '!eggs ', '!ham']
>>> list(itersplit(r'X', 'spam !eggs !ham'))
['spam !eggs !ham']
>>> list(itersplit(r'!', '!spam !eggs !ham'))
['', '!spam ', '!eggs ', '!ham']