Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Convert Blogger Dump to Hugo Markdown Files
#!/usr/bin/env python3
# Author: Kirubakaran Athmanathan
# Website: https://kirubakaran.com/
import feedparser
import django
from django.utils import html as h
import moment
from collections import defaultdict
d = feedparser.parse('blog-07-22-2017.xml')
i = 0
commentdict = defaultdict(lambda: [])
for entry in d.entries:
comment = 'thr_in-reply-to' in entry.keys()
if not comment:
continue
commentdict[entry['thr_in-reply-to']['ref']].append(entry)
for entry in d.entries:
op = ""
i += 1
# if i > 100:
# break
#thr_total is present, then it was a post written by me
#thr_in-reply-to is present, then it was a comment
post = 'thr_total' in entry.keys()
comment = 'thr_in-reply-to' in entry.keys()
if not post:
continue
ztitle = h.escape(entry.title)
if ztitle == "":
ztitle = "Untitled"
lnk = entry.link
if lnk.startswith("tag:blogger.com"):
dt = moment.date(entry.published)
lnk = "http://blog.kirubakaran.com/{}/{:02}/{}.html".format(dt.year, dt.month, ztitle).replace(' ','-').replace('!','')
print(entry.link)
print(lnk)
print()
try:
link = lnk.split('blog.kirubakaran.com/')[1]
except Exception:
print(">"*80)
print(i)
print(entry.link)
print(entry.title)
print("<"*80)
slug = link.replace('/','-').replace('.html','')
if ztitle == "":
ztitle = slug
alias = entry.link.replace('http://blog.kirubakaran.com/','/blogger/')
content = ""
for c in entry.content:
content += c.value.replace('<br />','\n') + '\n'
currcomments = commentdict[entry.id]
cstr = "comment" if (len(currcomments) == 1) else "comments"
comment_op = "<div class='blog-comment-count'>{} {}</div>".format(len(currcomments), cstr)
for cc in currcomments:
xdt = moment.date(cc.published)
xa = cc.author_detail
try:
if xa.email != 'noreply@blogger.com':
xae = "<span class='blog-comment-email'>{}</span>".format(xa.email)
else:
xae = ""
except AttributeError:
xae = ""
try:
author = "<span class='blog-comment-author'><a href='{}' rel='nofollow'>{}</a> {}</span>".format(xa.href, xa.name, xae)
except AttributeError:
author = "<span class='blog-comment-author'>{} {}</span>".format(xa.name, xae)
try:
comment_content = "<div class='blog-comment-content'>{}</div>".format(cc.content[0].value)
except AttributeError:
comment_content = ""
comment_op += \
"""
<div class='blog-comment'>
{} wrote on {}:
{}
</div>
""".format(author, xdt.format('MMM DD, YYYY'), comment_content)
op = \
"""---
title: "{}"
date: {}
slug: "{}"
origlink: "{}"
aliases: ["{}"]
draft: false
---
{}
<div class='blog-comments'>
{}
{}
{}
</div>
""".format(ztitle, entry.published, slug, entry.link, alias, content, "{{% md %}}", comment_op, "{{% /md %}}")
f = open("md/{}.md".format(slug), 'w')
f.write(op)
f.close()
print("done")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment