simonw/news_to_yaml.py

## news_to_yaml.py
from collections import OrderedDict
import httpx, dateutil, yaml

url = "https://raw.githubusercontent.com/simonw/datasette/8ae0f9f7f0d644b0161165a1084f53acd2786f7c/README.md"

content = httpx.get(url).text
news = content.split("## News")[1].split("## Installation")[0].strip()

# If a line starts with `* ` then it is a news item - if it does not it is a continuation of the previous item
items = []
current = []
for line in news.split("\n"):
    if line.startswith('* '):
        if current:
            items.append(current)
        current = [line]
    else:
        current.append(line)
if current:
    items.append(current)

cleaned = []
for item in items:
    date, rest = item[0][2:].split(":", 1)
    lines = [rest.strip()] + item[1:]
    cleaned.append({"date": dateutil.parser.parse(date).date().isoformat(), "body": "\n".join(lines)})

class literal(str):
    pass

def literal_presenter(dumper, data):
    return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='>')
yaml.add_representer(literal, literal_presenter)

def represent_ordereddict(dumper, data):
    value = []

    for item_key, item_value in data.items():
        node_key = dumper.represent_data(item_key)
        node_value = dumper.represent_data(item_value)

        value.append((node_key, node_value))

    return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value)

yaml.add_representer(OrderedDict, represent_ordereddict)

print(yaml.dump([OrderedDict([
    ("date", dateutil.parser.parse(item["date"]).date()),
    ("body", literal(item["body"]))
]) for item in cleaned], width=100))
	from collections import OrderedDict
	import httpx, dateutil, yaml

	url = "https://raw.githubusercontent.com/simonw/datasette/8ae0f9f7f0d644b0161165a1084f53acd2786f7c/README.md"

	content = httpx.get(url).text
	news = content.split("## News")[1].split("## Installation")[0].strip()

	# If a line starts with `* ` then it is a news item - if it does not it is a continuation of the previous item
	items = []
	current = []
	for line in news.split("\n"):
	if line.startswith('* '):
	if current:
	items.append(current)
	current = [line]
	else:
	current.append(line)
	if current:
	items.append(current)

	cleaned = []
	for item in items:
	date, rest = item[0][2:].split(":", 1)
	lines = [rest.strip()] + item[1:]
	cleaned.append({"date": dateutil.parser.parse(date).date().isoformat(), "body": "\n".join(lines)})

	class literal(str):
	pass

	def literal_presenter(dumper, data):
	return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='>')
	yaml.add_representer(literal, literal_presenter)

	def represent_ordereddict(dumper, data):
	value = []

	for item_key, item_value in data.items():
	node_key = dumper.represent_data(item_key)
	node_value = dumper.represent_data(item_value)

	value.append((node_key, node_value))

	return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value)

	yaml.add_representer(OrderedDict, represent_ordereddict)

	print(yaml.dump([OrderedDict([
	("date", dateutil.parser.parse(item["date"]).date()),
	("body", literal(item["body"]))
	]) for item in cleaned], width=100))