Skip to content

Instantly share code, notes, and snippets.

@xflr6
xflr6 / iceportal.py
Last active May 8, 2024 07:40
Download all available audio books from ICE portal
"""Download all available audio books from DB ICE Portal."""
import json
import os
import urllib.parse
import urllib.request
BASE = 'http://iceportal.de/api1/rs/'
@xflr6
xflr6 / walk_gdrive.py
Last active January 31, 2024 19:11
Recursively traverse the directory tree of a Google Drive folder as variation of os.walk()
"""os.walk() variation with Google Drive API."""
import os
from apiclient.discovery import build # pip install google-api-python-client
FOLDER = 'application/vnd.google-apps.folder'
def get_credentials(scopes, *,
@xflr6
xflr6 / gsheets.py
Last active December 7, 2023 15:37
Download all sheets of a Google Docs spreadsheet and export to individual CSV files
"""Download all sheets of a Google Docs spreadsheet as CSV."""
import contextlib, csv, itertools, os
from apiclient.discovery import build # pip install google-api-python-client
SHEET = '1dR13B3Wi_KJGUJQ0BZa2frLAVxhZnbz0hpwCcWSvb20'
def get_credentials(scopes, *, secrets='~/client_secrets.json', storage='~/storage.json'):
@xflr6
xflr6 / urlretrieve.py
Last active June 20, 2023 15:11
Replacement for urllib.urlretrieve(url, filename) using the requests library
"""Implement `urllib.urlretrieve(url, filename)` with requests library."""
import contextlib
import os
import urllib
import requests
def urlretrieve(url: str,
@xflr6
xflr6 / MakeElanSentences.ipynb
Last active October 19, 2022 13:51
Make a skeleton ELAN document from (text, translation) pairs
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@xflr6
xflr6 / ethnologue.py
Last active August 16, 2022 19:44
Download and parse ethnologue.com language code tables
"""Download and parse https://www.ethnologue.com code files."""
from __future__ import annotations
from collections.abc import Iterable, Iterator, Mapping
import contextlib
import csv
import enum
import fnmatch
import functools
@xflr6
xflr6 / iso639p3.py
Last active August 16, 2022 19:44
Download and parse ISO 639-3 code tables from sil.org
"""Download and parse ISO 639-3 code tables from https://www.sil.org."""
from __future__ import annotations
from collections.abc import Iterable, Iterator, Mapping
import contextlib
import csv
import enum
import fnmatch
import functools
"""Benchmark FCA concepts lattice generation with bob_ross.csv."""
from collections.abc import Iterable, Iterator, Sequence
import csv
import os
import pathlib
import time
from typing import NamedTuple, Optional
import urllib.request
@xflr6
xflr6 / parse_url.py
Last active June 5, 2022 16:41
Compare different ways to get a html tree from an url with streaming
"""Compare ways to return HTML tree streamed and parsed from a given URL."""
import contextlib
from typing import Literal, overload
import urllib.request
import xml.etree.ElementTree as etree
import certifi
import html5lib
import lxml.html
@xflr6
xflr6 / xpath.py
Last active June 5, 2022 10:11
Use some advanced XPath features of lxml for scraping html/xml
"""Use advanced XPath features of lxml (see also scrapy parsel)."""
from __future__ import annotations
from typing import Optional
import urllib.request
import lxml.etree
import lxml.html