Skip to content

Instantly share code, notes, and snippets.

@fedepaol
Created August 24, 2014 21:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fedepaol/e46635e3d7de475b0546 to your computer and use it in GitHub Desktop.
Save fedepaol/e46635e3d7de475b0546 to your computer and use it in GitHub Desktop.
import argparse
import xml.etree.cElementTree as ET
import os
def is_post(node):
t = node.find('{http://www.w3.org/2005/Atom}category').get('term')
d = node.find('{http://purl.org/atom/app#}control')
return (not d) and (t == 'http://schemas.google.com/blogger/2008/kind#post')
def get_posts_id(blogger_dump):
title_map = {}
tree = ET.parse(blogger_dump)
elem = tree.getroot()
feed = elem.findall('{http://www.w3.org/2005/Atom}entry')
for f in filter(is_post, feed):
post_id = f.find('{http://www.w3.org/2005/Atom}id')
id_number = post_id.text.split('-')[2]
post_title = f.find('{http://www.w3.org/2005/Atom}title').text
title_map[post_title] = id_number
return title_map
def inject_redirect(file_name, title_map):
yaml_separator = 0
old_file = file_name + 'old'
print file_name
print old_file
os.rename(file_name, old_file)
f = open(old_file, 'r')
n = open(file_name, 'w+')
for line in f:
if line == '---':
yaml_separator = yaml_separator + 1
if yaml_separator == 2:
n.write(line)
continue
if line.startswith('title'):
title = line.split('"')[1]
try:
post_id = title_map[title]
n.write('redirect_from:\n')
n.write(' - /blog/%s/\n'%post_id)
except KeyError:
print 'id not found for ' + title
n.write(line)
def set_redirect(posts_path, titles_map):
for file_name in os.listdir(posts_path):
if (file_name.endswith('html')):
inject_redirect(os.path.join(posts_path, file_name), titles_map)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-p', '--posts', dest='posts_path', help='path of the converted _posts')
parser.add_argument('-b', '--blogger', dest='blogger_dump', help='blogger dump file')
args = parser.parse_args()
titles_map = get_posts_id(args.blogger_dump)
set_redirect(args.posts_path, titles_map)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment