Skip to content

Instantly share code, notes, and snippets.

@robla
Last active December 16, 2015 09:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save robla/5416776 to your computer and use it in GitHub Desktop.
Save robla/5416776 to your computer and use it in GitHub Desktop.
Work-in-progress for publishing the WMF Roadmap back to mediawiki.org
#!/usr/bin/env python
#
# Copyright (c) 2013 Rob Lanphier, Wikimedia Foundation
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import os
import optparse
import wikipedia as pywikibot
import shutil
import subprocess
import tempfile
import lxml.objectify
import re
import string
import zipfile
DEFAULTCOMMENT = "Sync with 'Engineering Projects Roadmap' GDoc"
def load_roadmap_doc(googledocname, savedoc=None):
tempdir = tempfile.mkdtemp()
tempfilename = os.path.join(tempdir, 'roadmap.ods')
subprocess.call(["google", "docs", "get", googledocname, tempfilename])
if savedoc is not None:
shutil.copy(tempfilename, savedoc)
tempzip = zipfile.ZipFile(tempfilename)
retval = tempzip.read('content.xml')
os.remove(tempfilename)
os.rmdir(tempdir)
return retval
def convert_doc_to_object(roadmap_doc):
tablens = '{urn:oasis:names:tc:opendocument:xmlns:table:1.0}'
obj = lxml.objectify.fromstring(roadmap_doc)
table = obj['body']['spreadsheet'][tablens + 'table']
retval = {'months': None, 'groups': [] }
rowiter = table.iterchildren(tag=tablens + 'table-row')
# load the header from the top row
toprow = rowiter.next()
retval['months'] = []
celliter = toprow.iterchildren(tag=tablens + 'table-cell')
# skip over the first two cells, which as of this writing, contain
# "WMF Engineering" and "(links)" respectively
celliter.next()
celliter.next()
for cell in celliter:
retval['months'].append(str(cell.getchildren()[0]))
# loop through the remaining rows, using styles to figure out if
# the row is a group header or an activity line. We'll assume the
# second line is a group header line, and whatever lines that have
# the same style are also group header lines
groupstyle = None
for row in rowiter:
celliter = row.iterchildren(tag=tablens + 'table-cell')
try:
leftcell = celliter.next()
except:
continue
if groupstyle is None:
groupstyle = leftcell.attrib[tablens+'style-name']
if leftcell.attrib[tablens+'style-name'] == groupstyle:
newgroup = {}
newgroup['name'] = leftcell.getchildren()[0]
nextcell = celliter.next()
if nextcell.countchildren() > 0:
newgroup['link'] = str(nextcell.getchildren()[0])
newgroup['activities'] = []
retval['groups'].append(newgroup)
else:
activity = {}
# if there's no text in the far left cell, assume this row doesn't
# contain any useful information
try:
activity['name'] = leftcell.getchildren()[0]
except IndexError:
continue
linkcell = celliter.next()
if linkcell.countchildren() > 0:
activity['link'] = str(linkcell.getchildren()[0])
activity['plans'] = []
for cell in celliter:
try:
repeatcount = int(cell.attrib[tablens+'number-columns-repeated'])
except KeyError:
repeatcount = 1
for x in range(repeatcount):
try:
activity['plans'].append(cell.getchildren()[0])
except IndexError:
activity['plans'].append('')
retval['groups'][-1]['activities'].append(activity)
return retval
def render_link(linkobj):
if linkobj.has_key('link'):
mobj = re.match(r'https?://www.mediawiki.org/wiki/(.*)', linkobj['link'])
if mobj:
# internal page link
wikipage = string.replace(mobj.group(1),'_',' ')
if linkobj['name'] == wikipage:
retval = "[[" + wikipage + "]]"
else:
retval = "[[" + wikipage + "|" + linkobj['name'] + "]]"
else:
# full hyperlink
retval = "[" + linkobj['link'] + " " + linkobj['name'] + "]"
else:
# no link
retval = linkobj['name']
return retval
def item_link(item):
if item.has_key('link'):
mobj = re.match(r'https?://www.mediawiki.org/wiki/(.*)', item['link'])
if mobj:
# internal page link
wikipage = string.replace(mobj.group(1),'_',' ')
return {'page': wikipage}
else:
# full hyperlink
return {'link': item['link']}
else:
# no link
return {}
def optional_group_params(group):
retval = ''
link = item_link(group)
if link.has_key('page'):
retval += "|grouppage=" + link['page']
if link.has_key('link'):
retval += "|grouplink=" + link['link']
return retval
def optional_activity_params(activity):
retval = ''
link = item_link(activity)
if link.has_key('page'):
retval += "|activitypage=" + link['page']
if link.has_key('link'):
retval += "|activitylink=" + link['link']
return retval
def group_header_template(group):
retval = "{{/groupHeader|groupname=" + group['name']
retval += optional_group_params(group)
retval += "}}\n"
return retval
def activity_header_template(group, activity):
retval = "{{/activityHeader|groupname=" + group['name']
retval += "|activityname=" + activity['name']
retval += optional_group_params(group)
retval += optional_activity_params(activity)
retval += "}}\n"
return retval
def activity_month_template(group, activity, month, plan):
retval = "{{/activityMonth|groupname=" + group['name']
retval += "|activityname=" + activity['name']
retval += optional_group_params(group)
retval += optional_activity_params(activity)
retval += "|month=" + month
retval += "|plan=" + plan
retval += "}}\n"
return retval
def generate_wikitext(roadmap_obj):
retval = "{{/header}}\n"
for group in roadmap_obj['groups']:
retval += group_header_template(group)
for activity in group['activities']:
retval += activity_header_template(group, activity)
for i in range(len(activity['plans'])):
if (activity['plans'][i] + ' ').strip():
retval += activity_month_template(
group,
activity,
roadmap_obj['months'][i],
activity['plans'][i])
retval += "{{/footer}}\n"
return retval
def publish_to_wiki(pagename, newcontent, summary):
site = pywikibot.getSite(code='mediawiki', fam='mediawiki')
page = pywikibot.Page(site, pagename)
page.put(newtext=newcontent, comment=summary)
def main():
'''Loads GDoc Roadmap, converts to wikitext, and publishes'''
usage = "usage: %prog [options]"
parser = optparse.OptionParser(usage)
parser.add_option("-l", "--loaddoc", dest="loaddoc",
default=None,
help="Load local doc file instead of pulling from Google Docs")
parser.add_option("-p", "--publish", dest="publish", action="store_true",
default=False,
help="Publish result to wiki")
parser.add_option("-s", "--savedoc", dest="savedoc",
default=None,
help="Save doc file from Google Docs to given location")
parser.add_option("-c", "--comment", dest="comment",
default=DEFAULTCOMMENT,
help='Comment to pass (default: "' + DEFAULTCOMMENT + '")')
(options, args) = parser.parse_args()
if options.loaddoc is None:
roadmap_doc = load_roadmap_doc("Engineering Projects Roadmap", options.savedoc)
else:
tempzip = zipfile.ZipFile(options.loaddoc)
roadmap_doc = tempzip.read('content.xml')
roadmap_obj = convert_doc_to_object(roadmap_doc)
wikitext = generate_wikitext(roadmap_obj)
if options.publish:
if options.comment:
comment = options.comment
elif options.interactive:
comment = DEFAULTCOMMENT
publish_to_wiki(u'Roadmap', wikitext, comment)
else:
print wikitext.encode('utf-8')
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment