Skip to content

Instantly share code, notes, and snippets.

@xflr6
xflr6 / unique_null.py
Last active June 4, 2022 13:19
Compare different ways to have unique columns with nulls under SQLite and PostgreSQL
"""Compare ways to have unique columns with NULLs."""
import os
import subprocess
import time
import uuid
import sqlalchemy as sa
import sqlalchemy.orm
@xflr6
xflr6 / pl_pgsql.py
Last active December 22, 2021 23:42
SQL injection safe dynamic query execution via PL/pgSQL quote_ident() and format('%I')
"""SQL-injection safe dynamic query with pl/pgsql."""
import sqlalchemy as sa
UNIQUE_NULL = [('contributioncontributor', ['contribution_pk', 'contributor_pk'], []),
('contributionreference', ['contribution_pk', 'source_pk', 'description'], []),
('editor', ['dataset_pk', 'contributor_pk'], []),
('languageidentifier', ['language_pk', 'identifier_pk'], []),
('languagesource', ['language_pk', 'source_pk'], []),
('sentencereference', ['sentence_pk', 'source_pk', 'description'], []),
@xflr6
xflr6 / decorator.py
Last active June 4, 2022 13:13
Decorator with an optional parameter
"""Decorator with an optional parameter."""
from collections.abc import Callable
import functools
from typing import Optional
FUNCS = {}
def register(func: Optional[Callable] = None,
@xflr6
xflr6 / itersplit.py
Last active June 4, 2022 13:09
Split a string into chunks by a pattern matching at the start of each item
"""Split a string into chunks by a pattern matching at the start of each item.
>>> list(itersplit(r'!', 'spam !eggs !ham'))
['spam ', '!eggs ', '!ham']
>>> list(itersplit(r'X', 'spam !eggs !ham'))
['spam !eggs !ham']
>>> list(itersplit(r'!', '!spam !eggs !ham'))
['', '!spam ', '!eggs ', '!ham']
@xflr6
xflr6 / walk_gdrive.py
Last active January 31, 2024 19:11
Recursively traverse the directory tree of a Google Drive folder as variation of os.walk()
"""os.walk() variation with Google Drive API."""
import os
from apiclient.discovery import build # pip install google-api-python-client
FOLDER = 'application/vnd.google-apps.folder'
def get_credentials(scopes, *,
@xflr6
xflr6 / gsheets.py
Last active December 7, 2023 15:37
Download all sheets of a Google Docs spreadsheet and export to individual CSV files
"""Download all sheets of a Google Docs spreadsheet as CSV."""
import contextlib, csv, itertools, os
from apiclient.discovery import build # pip install google-api-python-client
SHEET = '1dR13B3Wi_KJGUJQ0BZa2frLAVxhZnbz0hpwCcWSvb20'
def get_credentials(scopes, *, secrets='~/client_secrets.json', storage='~/storage.json'):
@xflr6
xflr6 / iceportal.py
Last active April 22, 2024 20:56
Download all available audio books from ICE portal
"""Download all available audio books from DB ICE Portal."""
import json
import os
import urllib.parse
import urllib.request
BASE = 'http://iceportal.de/api1/rs/'
@xflr6
xflr6 / feedsizes.py
Last active December 18, 2021 00:13
Compare RSS feed enclosure length with content-length header of file when downloading the URL
"""Compare feed enclosure length with content-length of file url."""
import urllib.request
import xml.etree.ElementTree as etree
URL = 'https://feeds.feedburner.com/thebuglefeed?format=xml'
with urllib.request.urlopen(URL) as f:
tree = etree.parse(f)
@xflr6
xflr6 / xpath.py
Last active June 5, 2022 10:11
Use some advanced XPath features of lxml for scraping html/xml
"""Use advanced XPath features of lxml (see also scrapy parsel)."""
from __future__ import annotations
from typing import Optional
import urllib.request
import lxml.etree
import lxml.html
@xflr6
xflr6 / parse_url.py
Last active June 5, 2022 16:41
Compare different ways to get a html tree from an url with streaming
"""Compare ways to return HTML tree streamed and parsed from a given URL."""
import contextlib
from typing import Literal, overload
import urllib.request
import xml.etree.ElementTree as etree
import certifi
import html5lib
import lxml.html