Skip to content

Instantly share code, notes, and snippets.

Created December 7, 2020 21:00
Convert the news section of the Datasette README to YAML
from collections import OrderedDict
import httpx, dateutil, yaml
url = ""
content = httpx.get(url).text
news = content.split("## News")[1].split("## Installation")[0].strip()
# If a line starts with `* ` then it is a news item - if it does not it is a continuation of the previous item
items = []
current = []
for line in news.split("\n"):
if line.startswith('* '):
if current:
current = [line]
if current:
cleaned = []
for item in items:
date, rest = item[0][2:].split(":", 1)
lines = [rest.strip()] + item[1:]
cleaned.append({"date": dateutil.parser.parse(date).date().isoformat(), "body": "\n".join(lines)})
class literal(str):
def literal_presenter(dumper, data):
return dumper.represent_scalar(',2002:str', data, style='>')
yaml.add_representer(literal, literal_presenter)
def represent_ordereddict(dumper, data):
value = []
for item_key, item_value in data.items():
node_key = dumper.represent_data(item_key)
node_value = dumper.represent_data(item_value)
value.append((node_key, node_value))
return yaml.nodes.MappingNode(u',2002:map', value)
yaml.add_representer(OrderedDict, represent_ordereddict)
("date", dateutil.parser.parse(item["date"]).date()),
("body", literal(item["body"]))
]) for item in cleaned], width=100))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment