Skip to content

Instantly share code, notes, and snippets.

View mnot's full-sized avatar

Mark Nottingham mnot

View GitHub Profile
@mnot
mnot / RSS.py
Created October 29, 2009 12:48
RSS.py: work with RSS channels as data structures
#!/usr/bin/env python
"""
RSS.py
Classes for working with RSS channels as arbitrary data structures.
Requires Python 2.2 or newer and PyXML 0.7.1 or newer.
ChannelBase - Base class for RSS Channels.
CollectionChannel - RSS Channel modeled as a URI-per-entry
@mnot
mnot / snowden-ietf93.md
Last active September 12, 2023 13:40
Transcript of Edward Snowden's comments at IETF93.
@mnot
mnot / crossref-lookup.applescript
Last active April 30, 2023 20:29
Look up PDFs on crossref.org in DevonThink
-- Look up document metadata on CrossRef.org in DevonThink 3
--
-- Currently sets:
-- * Created date to the document's publication date
-- * Title in document properties
-- * Author in document properties (first author only)
-- * Crossref DOI URL in document properties
-- * A tag for the type of document
-- * Finder comments to a formatted citation (see below)
@mnot
mnot / iri_to_uri.py
Created January 20, 2013 03:15
Convert an IRI to a URI
import urllib
import urlparse
def iri_to_uri(iri, encoding='Latin-1'):
"Takes a Unicode string that can contain an IRI and emits a URI."
scheme, authority, path, query, frag = urlparse.urlsplit(iri)
scheme = scheme.encode(encoding)
if ":" in authority:
host, port = authority.split(":", 1)
authority = host.encode('idna') + ":%s" % port
from collections import defaultdict
from datetime import datetime
import csv
import sys
__doc__ = """
This is a script that uses CSV input data (for example, collected by a Google Form) to find the acceptable times for a meeting.
The CSV columns are:
from typing import Tuple
def decode_integer(data: bytes) -> Tuple[int, int]:
v = data[0]
prefix = v >> 6
length = 1 << prefix
v = v & 0x3F
for i in range(1, length):
v = (v << 8) + data[i]
@mnot
mnot / uri_validate.py
Last active March 3, 2022 14:32
uri_validate.py: Validation regex for URIs, URI references, and relative URIs
#!/usr/bin/env python
"""
Regex for URIs
These regex are directly derived from the collected ABNF in RFC3986
(except for DIGIT, ALPHA and HEXDIG, defined by RFC2234).
Additional regex are defined to validate the following schemes according to
their respective specifications:
@mnot
mnot / wikipedia_links.py
Created February 25, 2011 00:30
Pull links out of a Wikipedia XML dump, real fast.
#!/usr/bin/env python3
""" Use the W3C API to understand what document licenses are in use."""
import re
import sys
import time
from urllib.parse import urlparse, urlunparse, urljoin
from bs4 import BeautifulSoup, SoupStrainer
#!/usr/bin/python
from __future__ import division
from bitarray import bitarray
from hashlib import sha256
from math import log
import struct
def gcs_hash(w, (N,P)):