Skip to content

Instantly share code, notes, and snippets.

@simonw
Created December 7, 2020 21:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save simonw/6a59833eee83bec1f1317c7f80406275 to your computer and use it in GitHub Desktop.
Save simonw/6a59833eee83bec1f1317c7f80406275 to your computer and use it in GitHub Desktop.
Convert the news section of the Datasette README to YAML
from collections import OrderedDict
import httpx, dateutil, yaml
url = "https://raw.githubusercontent.com/simonw/datasette/8ae0f9f7f0d644b0161165a1084f53acd2786f7c/README.md"
content = httpx.get(url).text
news = content.split("## News")[1].split("## Installation")[0].strip()
# If a line starts with `* ` then it is a news item - if it does not it is a continuation of the previous item
items = []
current = []
for line in news.split("\n"):
if line.startswith('* '):
if current:
items.append(current)
current = [line]
else:
current.append(line)
if current:
items.append(current)
cleaned = []
for item in items:
date, rest = item[0][2:].split(":", 1)
lines = [rest.strip()] + item[1:]
cleaned.append({"date": dateutil.parser.parse(date).date().isoformat(), "body": "\n".join(lines)})
class literal(str):
pass
def literal_presenter(dumper, data):
return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='>')
yaml.add_representer(literal, literal_presenter)
def represent_ordereddict(dumper, data):
value = []
for item_key, item_value in data.items():
node_key = dumper.represent_data(item_key)
node_value = dumper.represent_data(item_value)
value.append((node_key, node_value))
return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value)
yaml.add_representer(OrderedDict, represent_ordereddict)
print(yaml.dump([OrderedDict([
("date", dateutil.parser.parse(item["date"]).date()),
("body", literal(item["body"]))
]) for item in cleaned], width=100))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment