Skip to content

Instantly share code, notes, and snippets.

Created October 21, 2012 21:33
Show Gist options
  • Save rufuspollock/3928586 to your computer and use it in GitHub Desktop.
Save rufuspollock/3928586 to your computer and use it in GitHub Desktop.
Upload data wrangling handbook to wordpress
''' Upload datawrangling handbook to wordpress site.
Copy this file to same directory as your sphinx build directory and then do
python -h
NB: You need to enable XML-RPC access to the wordpress site (via Settings -> Writing)
NB: this requires pywordpress (pip install pywordpress) and associated config
file - see
import os
import optparse
import pywordpress
# TODO: deal with utf8 encoding
def prepare_html(fileobj):
data =
# just pull out the main content
start = data.index('<div class="content">')
end = data.index('<div class="well sidebar-nav">')
out = data[start:end]
# strip last 3 lines
out = '\n'.join(out.split('\n')[:-3])
# TODO: do we want to extract the title
# Do we want title at all?
# TODO: insert toc (??)
# insert after h1 on 4th ine
# lines = out.split('\n')
# out = '\n'.join(lines[:4] + [ '[toc]' ] + lines[4:])
# now various regex
import re
# replace .html with / and index.html with simple ./
pattern = '(href=".[^"]*)index\.html"'
out = re.sub(pattern, '\\1"', out)
pattern = 'href="index\.html"'
out = re.sub(pattern, 'href="./"', out)
pattern = '(href="[^"]*).html"'
out = re.sub(pattern, '\\1/"', out)
return out
def upload(wordpress_site_url='', handbook_path='/handbook/'):
'''Convert and upload built sphinx content to destination site
1. Clean up and extract html for uploading
2. Upload
NB: you'll need a config.ini to exist as per pywordpress requirements
pages = {}
for (root, dirs, files) in os.walk('build/html'):
if '_sources' in root:
for f in files:
path = os.path.join(root, f)
print path
subpath = os.path.join(
# index.html => /
f.replace('index.html', '')
urlpath = handbook_path + os.path.splitext(subpath)[0]
# everything has a trailing '/' e.g. /handbook/introduction/
if not urlpath.endswith('/'):
urlpath += '/'
out = prepare_html(open(path))
pages[urlpath] = {
'title': urlpath.split('/')[-1].capitalize(),
'description': out
# do the upload
wp = pywordpress.Wordpress.init_from_config('config.ini')
wp.verbose =True
print 'Creating pages in wordpress'
changes = wp.create_many_pages(pages)
print 'Summary of changes'
if __name__ == '__main__':
usage = '''%prog {action}
upload: upload handbook to website
parser = optparse.OptionParser(usage)
options, args = parser.parse_args()
if len(args) < 1:
action = args[0]
if action == 'upload':
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment