shreeshga/hyde2rst.py

## hyde2rst.py
"""
* Creation Date : 14-01-2012
* Last Modified :
* Created By :  Shreesh Ayachit (shreesh.ayachit@gmail.com)
* Description :
"""
import os
import sys
import re
from subprocess import call
pat = re.compile(r'---\n(?P<header>[^-].*?)---\n',re.DOTALL)
TITLE = re.compile('\s*title:\s*(?P<title>.*)')
DATE = re.compile(r'(?P<date>\d{4}-\d{2}-\d{2})')
TIME = re.compile(r'(?P<time>\d{2}:\d{2})')
SNIP = re.compile('\s*snip:\s*(?P<snip>.*?)')
TAGS = re.compile('\s*-\s+(?P<tags>.*)')


def usage():
    print 'usage: port.py  <blog_dir>'

def fileList(source):
    matches = []
    for root, dirnames, filenames in os.walk(source):
        for filename in filenames:
            if filename.endswith(('.html')) and ((filename.find('index') == -1) and (filename.find('listing') == -1)):
                matches.append(os.path.join(root, filename))
    return matches

def read_file(name):
    lines = []
    with open(name,'r') as f:
        for line in f:
            lines.append(line)
    return ''.join(lines)


def convert_rst(from_file):
    print from_file
    string = read_file(from_file)
    match = pat.search(string)
    header = ''
    author = 'Shreesh'
    body =  pat.sub('',string)
    if match:
        header = match.group('header')
    title  = TITLE.search(header).group('title') or ''
    time = TIME.search(header).group('time') or ''
    date  = DATE.search(header).group('date') or ''
    #snip = SNIP.search(header).group('snip') or ''
    tags = TAGS.search(header).group('tags') or ''
    print body

    def convert_header():
        rst_head = '.. title:: '+title+'\n'
        rst_head += '.. author:: '+author+'\n'
        rst_head += '.. updated:: '+date+' '+time+'\n'
        rst_head += '.. timezone:: UTC'+'\n'
        rst_head += '.. feed:: all'+'\n'
        rst_head += '.. copyright:: Creative Commons Attribution 3.0 Unported'+'\n'
        return rst_head

    with open('temp1.txt','w') as temp1:
        print >>temp1,body

    newhead = convert_header()
    title = re.sub(r'\s+','-',title)
    call(['pandoc','-s','-w','rst','temp1.txt','-o','temp2.txt'])
    lines = read_file('temp2.txt')
    with open('shreesh/posts/'+date+'-'+title+'.rst','w') as f2:
        print>>f2,newhead
        print>>f2,lines
    os.remove('temp1.txt')
    os.remove('temp2.txt')

if __name__ == "__main__":
    if len(sys.argv) < 2:
        usage()
        exit(1)
    blog_dir = sys.argv[1]
    for file_name in fileList(blog_dir):
        convert_rst(file_name)
	"""
	* Creation Date : 14-01-2012
	* Last Modified :
	* Created By : Shreesh Ayachit (shreesh.ayachit@gmail.com)
	* Description :
	"""
	import os
	import sys
	import re
	from subprocess import call
	pat = re.compile(r'---\n(?P<header>[^-].*?)---\n',re.DOTALL)
	TITLE = re.compile('\stitle:\s(?P<title>.*)')
	DATE = re.compile(r'(?P<date>\d{4}-\d{2}-\d{2})')
	TIME = re.compile(r'(?P<time>\d{2}:\d{2})')
	SNIP = re.compile('\ssnip:\s(?P<snip>.*?)')
	TAGS = re.compile('\s-\s+(?P<tags>.)')


	def usage():
	print 'usage: port.py <blog_dir>'

	def fileList(source):
	matches = []
	for root, dirnames, filenames in os.walk(source):
	for filename in filenames:
	if filename.endswith(('.html')) and ((filename.find('index') == -1) and (filename.find('listing') == -1)):
	matches.append(os.path.join(root, filename))
	return matches

	def read_file(name):
	lines = []
	with open(name,'r') as f:
	for line in f:
	lines.append(line)
	return ''.join(lines)



	def convert_rst(from_file):
	print from_file
	string = read_file(from_file)
	match = pat.search(string)
	header = ''
	author = 'Shreesh'
	body = pat.sub('',string)
	if match:
	header = match.group('header')
	title = TITLE.search(header).group('title') or ''
	time = TIME.search(header).group('time') or ''
	date = DATE.search(header).group('date') or ''
	#snip = SNIP.search(header).group('snip') or ''
	tags = TAGS.search(header).group('tags') or ''
	print body

	def convert_header():
	rst_head = '.. title:: '+title+'\n'
	rst_head += '.. author:: '+author+'\n'
	rst_head += '.. updated:: '+date+' '+time+'\n'
	rst_head += '.. timezone:: UTC'+'\n'
	rst_head += '.. feed:: all'+'\n'
	rst_head += '.. copyright:: Creative Commons Attribution 3.0 Unported'+'\n'
	return rst_head

	with open('temp1.txt','w') as temp1:
	print >>temp1,body

	newhead = convert_header()
	title = re.sub(r'\s+','-',title)
	call(['pandoc','-s','-w','rst','temp1.txt','-o','temp2.txt'])
	lines = read_file('temp2.txt')
	with open('shreesh/posts/'+date+'-'+title+'.rst','w') as f2:
	print>>f2,newhead
	print>>f2,lines
	os.remove('temp1.txt')
	os.remove('temp2.txt')

	if __name__ == "__main__":
	if len(sys.argv) < 2:
	usage()
	exit(1)
	blog_dir = sys.argv[1]
	for file_name in fileList(blog_dir):
	convert_rst(file_name)