Created
January 16, 2012 06:27
-
-
Save shreeshga/1619396 to your computer and use it in GitHub Desktop.
python code to convert from hyde blog to rst format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
* Creation Date : 14-01-2012 | |
* Last Modified : | |
* Created By : Shreesh Ayachit (shreesh.ayachit@gmail.com) | |
* Description : | |
""" | |
import os | |
import sys | |
import re | |
from subprocess import call | |
pat = re.compile(r'---\n(?P<header>[^-].*?)---\n',re.DOTALL) | |
TITLE = re.compile('\s*title:\s*(?P<title>.*)') | |
DATE = re.compile(r'(?P<date>\d{4}-\d{2}-\d{2})') | |
TIME = re.compile(r'(?P<time>\d{2}:\d{2})') | |
SNIP = re.compile('\s*snip:\s*(?P<snip>.*?)') | |
TAGS = re.compile('\s*-\s+(?P<tags>.*)') | |
def usage(): | |
print 'usage: port.py <blog_dir>' | |
def fileList(source): | |
matches = [] | |
for root, dirnames, filenames in os.walk(source): | |
for filename in filenames: | |
if filename.endswith(('.html')) and ((filename.find('index') == -1) and (filename.find('listing') == -1)): | |
matches.append(os.path.join(root, filename)) | |
return matches | |
def read_file(name): | |
lines = [] | |
with open(name,'r') as f: | |
for line in f: | |
lines.append(line) | |
return ''.join(lines) | |
def convert_rst(from_file): | |
print from_file | |
string = read_file(from_file) | |
match = pat.search(string) | |
header = '' | |
author = 'Shreesh' | |
body = pat.sub('',string) | |
if match: | |
header = match.group('header') | |
title = TITLE.search(header).group('title') or '' | |
time = TIME.search(header).group('time') or '' | |
date = DATE.search(header).group('date') or '' | |
#snip = SNIP.search(header).group('snip') or '' | |
tags = TAGS.search(header).group('tags') or '' | |
print body | |
def convert_header(): | |
rst_head = '.. title:: '+title+'\n' | |
rst_head += '.. author:: '+author+'\n' | |
rst_head += '.. updated:: '+date+' '+time+'\n' | |
rst_head += '.. timezone:: UTC'+'\n' | |
rst_head += '.. feed:: all'+'\n' | |
rst_head += '.. copyright:: Creative Commons Attribution 3.0 Unported'+'\n' | |
return rst_head | |
with open('temp1.txt','w') as temp1: | |
print >>temp1,body | |
newhead = convert_header() | |
title = re.sub(r'\s+','-',title) | |
call(['pandoc','-s','-w','rst','temp1.txt','-o','temp2.txt']) | |
lines = read_file('temp2.txt') | |
with open('shreesh/posts/'+date+'-'+title+'.rst','w') as f2: | |
print>>f2,newhead | |
print>>f2,lines | |
os.remove('temp1.txt') | |
os.remove('temp2.txt') | |
if __name__ == "__main__": | |
if len(sys.argv) < 2: | |
usage() | |
exit(1) | |
blog_dir = sys.argv[1] | |
for file_name in fileList(blog_dir): | |
convert_rst(file_name) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment