Skip to content

Instantly share code, notes, and snippets.

@shreeshga
Created January 16, 2012 06:27
Show Gist options
  • Save shreeshga/1619396 to your computer and use it in GitHub Desktop.
Save shreeshga/1619396 to your computer and use it in GitHub Desktop.
python code to convert from hyde blog to rst format
"""
* Creation Date : 14-01-2012
* Last Modified :
* Created By : Shreesh Ayachit (shreesh.ayachit@gmail.com)
* Description :
"""
import os
import sys
import re
from subprocess import call
pat = re.compile(r'---\n(?P<header>[^-].*?)---\n',re.DOTALL)
TITLE = re.compile('\s*title:\s*(?P<title>.*)')
DATE = re.compile(r'(?P<date>\d{4}-\d{2}-\d{2})')
TIME = re.compile(r'(?P<time>\d{2}:\d{2})')
SNIP = re.compile('\s*snip:\s*(?P<snip>.*?)')
TAGS = re.compile('\s*-\s+(?P<tags>.*)')
def usage():
print 'usage: port.py <blog_dir>'
def fileList(source):
matches = []
for root, dirnames, filenames in os.walk(source):
for filename in filenames:
if filename.endswith(('.html')) and ((filename.find('index') == -1) and (filename.find('listing') == -1)):
matches.append(os.path.join(root, filename))
return matches
def read_file(name):
lines = []
with open(name,'r') as f:
for line in f:
lines.append(line)
return ''.join(lines)
def convert_rst(from_file):
print from_file
string = read_file(from_file)
match = pat.search(string)
header = ''
author = 'Shreesh'
body = pat.sub('',string)
if match:
header = match.group('header')
title = TITLE.search(header).group('title') or ''
time = TIME.search(header).group('time') or ''
date = DATE.search(header).group('date') or ''
#snip = SNIP.search(header).group('snip') or ''
tags = TAGS.search(header).group('tags') or ''
print body
def convert_header():
rst_head = '.. title:: '+title+'\n'
rst_head += '.. author:: '+author+'\n'
rst_head += '.. updated:: '+date+' '+time+'\n'
rst_head += '.. timezone:: UTC'+'\n'
rst_head += '.. feed:: all'+'\n'
rst_head += '.. copyright:: Creative Commons Attribution 3.0 Unported'+'\n'
return rst_head
with open('temp1.txt','w') as temp1:
print >>temp1,body
newhead = convert_header()
title = re.sub(r'\s+','-',title)
call(['pandoc','-s','-w','rst','temp1.txt','-o','temp2.txt'])
lines = read_file('temp2.txt')
with open('shreesh/posts/'+date+'-'+title+'.rst','w') as f2:
print>>f2,newhead
print>>f2,lines
os.remove('temp1.txt')
os.remove('temp2.txt')
if __name__ == "__main__":
if len(sys.argv) < 2:
usage()
exit(1)
blog_dir = sys.argv[1]
for file_name in fileList(blog_dir):
convert_rst(file_name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment