Skip to content

Instantly share code, notes, and snippets.

@alex
Created January 18, 2010 04:04
Show Gist options
  • Save alex/279772 to your computer and use it in GitHub Desktop.
Save alex/279772 to your computer and use it in GitHub Desktop.
import re
from urllib import urlopen
from django.contrib.auth.models import User
from django.core.management.base import BaseCommand, CommandError
from django.template.defaultfilters import slugify
from dateutil.parser import parse as parse_date
from pyquery import PyQuery
from blago.models import Post
class Command(BaseCommand):
help = "Imports a blog post for blogspot"
def handle(self, url=None, **options):
if not url:
raise CommandError("Provide a post!")
page = PyQuery(urlopen(url).read())
kwargs = {}
kwargs["title"] = page.find(".post-title a").html().strip()
kwargs["slug"] = slugify(kwargs["title"])
kwargs["author"] = User.objects.get(username="alex")
kwargs["published"] = parse_date(page.find(".date-header").html())
# convert linebreaks
body = page.find(".post-body").html().replace("<br />", "\n")
# remove this fucked up HTML spelling error shit, wtf
body = re.sub(
'<span class="blsp-spelling-error" id="SPELLING_ERROR_\d+">(.*?)</span>',
lambda match: match.groups()[0],
body
)
# turn links into reST
body = re.sub(
'<a href="(.*?)">(.*?)</a>',
lambda match: "`%s <%s>`_" % (match.groups()[1], match.groups()[0]),
body
)
# remove this
body = body.replace('<div style="clear: both;"></div>', "")
# remove this
body = body.replace("<ul>", "").replace("</ul>", "")
body = body.replace("<ol>", "").replace("</ol>", "")
# turn list elements into proper reST lists
body = body.replace("</li>", "\n").replace("<li>", " * ")
# convert code blocks
body = re.compile("<pre>(.*?)</pre>", re.DOTALL).sub(
lambda match: "\n.. sourcecode:: python\n" + "\n".join([" " + x for x in match.groups()[0].splitlines()]),
body
)
kwargs["body"] = body.strip()
tags = page.find(".post-labels a").text()
if tags is not None:
tags = tags.split()
p = Post.objects.create(**kwargs)
if tags:
p.tags.add(*tags)
print "Imported: %s" % p.title
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment