robla/roadmap-publisher.py

## roadmap-publisher.py
#!/usr/bin/env python
#
# Copyright (c) 2013 Rob Lanphier, Wikimedia Foundation
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

import os
import optparse
import wikipedia as pywikibot
import shutil
import subprocess
import tempfile
import lxml.objectify
import re
import string
import zipfile

DEFAULTCOMMENT = "Sync with 'Engineering Projects Roadmap' GDoc"

def load_roadmap_doc(googledocname, savedoc=None):
    tempdir = tempfile.mkdtemp()
    tempfilename = os.path.join(tempdir, 'roadmap.ods')
    subprocess.call(["google", "docs", "get", googledocname, tempfilename])
    if savedoc is not None:
        shutil.copy(tempfilename, savedoc)
    tempzip = zipfile.ZipFile(tempfilename)
    retval = tempzip.read('content.xml')

    os.remove(tempfilename)
    os.rmdir(tempdir)
    return retval

def convert_doc_to_object(roadmap_doc):
    tablens = '{urn:oasis:names:tc:opendocument:xmlns:table:1.0}'
    obj = lxml.objectify.fromstring(roadmap_doc)
    table = obj['body']['spreadsheet'][tablens + 'table']

    retval = {'months': None, 'groups': [] }
    rowiter = table.iterchildren(tag=tablens + 'table-row')

    # load the header from the top row
    toprow = rowiter.next()
    retval['months'] = []
    celliter = toprow.iterchildren(tag=tablens + 'table-cell')
    # skip over the first two cells, which as of this writing, contain
    # "WMF Engineering" and "(links)" respectively
    celliter.next()
    celliter.next()
    for cell in celliter:
        retval['months'].append(str(cell.getchildren()[0]))

    # loop through the remaining rows, using styles to figure out if
    # the row is a group header or an activity line.  We'll assume the
    # second line is a group header line, and whatever lines that have
    # the same style are also group header lines
    groupstyle = None
    for row in rowiter:
        celliter = row.iterchildren(tag=tablens + 'table-cell')
        try:
            leftcell = celliter.next()
        except:
            continue
        if groupstyle is None:
            groupstyle = leftcell.attrib[tablens+'style-name']
        if leftcell.attrib[tablens+'style-name'] == groupstyle:
            newgroup = {}
            newgroup['name'] = leftcell.getchildren()[0]
            nextcell = celliter.next()
            if nextcell.countchildren() > 0:
                newgroup['link'] = str(nextcell.getchildren()[0])
            newgroup['activities'] = []
            retval['groups'].append(newgroup)
        else:
            activity = {}
            # if there's no text in the far left cell, assume this row doesn't
            # contain any useful information
            try:
                activity['name'] = leftcell.getchildren()[0]
            except IndexError:
                continue
            linkcell = celliter.next()
            if linkcell.countchildren() > 0:
                activity['link'] = str(linkcell.getchildren()[0])
            activity['plans'] = []
            for cell in celliter:
                try:
                    repeatcount = int(cell.attrib[tablens+'number-columns-repeated'])
                except KeyError:
                    repeatcount = 1
                for x in range(repeatcount):
                    try:
                        activity['plans'].append(cell.getchildren()[0])
                    except IndexError:
                        activity['plans'].append('')
            retval['groups'][-1]['activities'].append(activity)
    return retval

def render_link(linkobj):
    if linkobj.has_key('link'):
        mobj = re.match(r'https?://www.mediawiki.org/wiki/(.*)', linkobj['link'])
        if mobj:
        # internal page link
            wikipage = string.replace(mobj.group(1),'_',' ')
            if linkobj['name'] == wikipage:
                retval = "[[" + wikipage + "]]"
            else:
                retval = "[[" + wikipage + "|" + linkobj['name'] + "]]"
        else:
        # full hyperlink
            retval = "[" + linkobj['link'] + " " + linkobj['name'] + "]"
    else:
        # no link
        retval = linkobj['name']
    return retval

def item_link(item):
    if item.has_key('link'):
        mobj = re.match(r'https?://www.mediawiki.org/wiki/(.*)', item['link'])
        if mobj:
        # internal page link
            wikipage = string.replace(mobj.group(1),'_',' ')
            return {'page': wikipage}
        else:
        # full hyperlink
            return {'link': item['link']}
    else:
        # no link
        return {}

def optional_group_params(group):
    retval = ''
    link = item_link(group)
    if link.has_key('page'):
        retval += "|grouppage=" + link['page']
    if link.has_key('link'):
        retval += "|grouplink=" + link['link']
    return retval

def optional_activity_params(activity):
    retval = ''
    link = item_link(activity)
    if link.has_key('page'):
        retval += "|activitypage=" + link['page']
    if link.has_key('link'):
        retval += "|activitylink=" + link['link']
    return retval

def group_header_template(group):
    retval = "{{/groupHeader|groupname=" + group['name']
    retval += optional_group_params(group)
    retval += "}}\n"
    return retval

def activity_header_template(group, activity):
    retval = "{{/activityHeader|groupname=" + group['name']
    retval += "|activityname=" + activity['name']
    retval += optional_group_params(group)
    retval += optional_activity_params(activity)
    retval += "}}\n"
    return retval

def activity_month_template(group, activity, month, plan):
    retval = "{{/activityMonth|groupname=" + group['name']
    retval += "|activityname=" + activity['name']
    retval += optional_group_params(group)
    retval += optional_activity_params(activity)
    retval += "|month=" + month
    retval += "|plan=" + plan
    retval += "}}\n"
    return retval

def generate_wikitext(roadmap_obj):
    retval = "{{/header}}\n"
    for group in roadmap_obj['groups']:
        retval += group_header_template(group)
        for activity in group['activities']:
            retval += activity_header_template(group, activity)
            for i in range(len(activity['plans'])):
                if (activity['plans'][i] + ' ').strip():
                    retval += activity_month_template(
                        group,
                        activity,
                        roadmap_obj['months'][i],
                        activity['plans'][i])
    retval += "{{/footer}}\n"
    return retval

def publish_to_wiki(pagename, newcontent, summary):
    site = pywikibot.getSite(code='mediawiki', fam='mediawiki')
    page = pywikibot.Page(site, pagename)
    page.put(newtext=newcontent, comment=summary)

def main():
    '''Loads GDoc Roadmap, converts to wikitext, and publishes'''
    usage = "usage: %prog [options]"
    parser = optparse.OptionParser(usage)
    parser.add_option("-l", "--loaddoc", dest="loaddoc",
                      default=None,
                      help="Load local doc file instead of pulling from Google Docs")
    parser.add_option("-p", "--publish", dest="publish", action="store_true",
                      default=False,
                      help="Publish result to wiki")
    parser.add_option("-s", "--savedoc", dest="savedoc",
                      default=None,
                      help="Save doc file from Google Docs to given location")
    parser.add_option("-c", "--comment", dest="comment",
                      default=DEFAULTCOMMENT,
                      help='Comment to pass (default: "' + DEFAULTCOMMENT + '")')
    (options, args) = parser.parse_args()

    if options.loaddoc is None:
        roadmap_doc = load_roadmap_doc("Engineering Projects Roadmap", options.savedoc)
    else:
        tempzip = zipfile.ZipFile(options.loaddoc)
        roadmap_doc = tempzip.read('content.xml')
    roadmap_obj = convert_doc_to_object(roadmap_doc)
    wikitext = generate_wikitext(roadmap_obj)
    if options.publish:
        if options.comment:
            comment = options.comment
        elif options.interactive:
            comment = DEFAULTCOMMENT
        publish_to_wiki(u'Roadmap', wikitext, comment)
    else:
        print wikitext.encode('utf-8')

if __name__ == "__main__":
    main()
	#!/usr/bin/env python
	#
	# Copyright (c) 2013 Rob Lanphier, Wikimedia Foundation
	#
	# Permission is hereby granted, free of charge, to any person obtaining a copy
	# of this software and associated documentation files (the "Software"), to deal
	# in the Software without restriction, including without limitation the rights
	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	# copies of the Software, and to permit persons to whom the Software is
	# furnished to do so, subject to the following conditions:
	#
	# The above copyright notice and this permission notice shall be included in
	# all copies or substantial portions of the Software.
	#
	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	# THE SOFTWARE.

	import os
	import optparse
	import wikipedia as pywikibot
	import shutil
	import subprocess
	import tempfile
	import lxml.objectify
	import re
	import string
	import zipfile

	DEFAULTCOMMENT = "Sync with 'Engineering Projects Roadmap' GDoc"

	def load_roadmap_doc(googledocname, savedoc=None):
	tempdir = tempfile.mkdtemp()
	tempfilename = os.path.join(tempdir, 'roadmap.ods')
	subprocess.call(["google", "docs", "get", googledocname, tempfilename])
	if savedoc is not None:
	shutil.copy(tempfilename, savedoc)
	tempzip = zipfile.ZipFile(tempfilename)
	retval = tempzip.read('content.xml')

	os.remove(tempfilename)
	os.rmdir(tempdir)
	return retval

	def convert_doc_to_object(roadmap_doc):
	tablens = '{urn:oasis:names:tc:opendocument:xmlns:table:1.0}'
	obj = lxml.objectify.fromstring(roadmap_doc)
	table = obj['body']['spreadsheet'][tablens + 'table']

	retval = {'months': None, 'groups': [] }
	rowiter = table.iterchildren(tag=tablens + 'table-row')

	# load the header from the top row
	toprow = rowiter.next()
	retval['months'] = []
	celliter = toprow.iterchildren(tag=tablens + 'table-cell')
	# skip over the first two cells, which as of this writing, contain
	# "WMF Engineering" and "(links)" respectively
	celliter.next()
	celliter.next()
	for cell in celliter:
	retval['months'].append(str(cell.getchildren()[0]))

	# loop through the remaining rows, using styles to figure out if
	# the row is a group header or an activity line. We'll assume the
	# second line is a group header line, and whatever lines that have
	# the same style are also group header lines
	groupstyle = None
	for row in rowiter:
	celliter = row.iterchildren(tag=tablens + 'table-cell')
	try:
	leftcell = celliter.next()
	except:
	continue
	if groupstyle is None:
	groupstyle = leftcell.attrib[tablens+'style-name']
	if leftcell.attrib[tablens+'style-name'] == groupstyle:
	newgroup = {}
	newgroup['name'] = leftcell.getchildren()[0]
	nextcell = celliter.next()
	if nextcell.countchildren() > 0:
	newgroup['link'] = str(nextcell.getchildren()[0])
	newgroup['activities'] = []
	retval['groups'].append(newgroup)
	else:
	activity = {}
	# if there's no text in the far left cell, assume this row doesn't
	# contain any useful information
	try:
	activity['name'] = leftcell.getchildren()[0]
	except IndexError:
	continue
	linkcell = celliter.next()
	if linkcell.countchildren() > 0:
	activity['link'] = str(linkcell.getchildren()[0])
	activity['plans'] = []
	for cell in celliter:
	try:
	repeatcount = int(cell.attrib[tablens+'number-columns-repeated'])
	except KeyError:
	repeatcount = 1
	for x in range(repeatcount):
	try:
	activity['plans'].append(cell.getchildren()[0])
	except IndexError:
	activity['plans'].append('')
	retval['groups'][-1]['activities'].append(activity)
	return retval

	def render_link(linkobj):
	if linkobj.has_key('link'):
	mobj = re.match(r'https?://www.mediawiki.org/wiki/(.*)', linkobj['link'])
	if mobj:
	# internal page link
	wikipage = string.replace(mobj.group(1),'_',' ')
	if linkobj['name'] == wikipage:
	retval = "[[" + wikipage + "]]"
	else:
	retval = "[[" + wikipage + "\|" + linkobj['name'] + "]]"
	else:
	# full hyperlink
	retval = "[" + linkobj['link'] + " " + linkobj['name'] + "]"
	else:
	# no link
	retval = linkobj['name']
	return retval

	def item_link(item):
	if item.has_key('link'):
	mobj = re.match(r'https?://www.mediawiki.org/wiki/(.*)', item['link'])
	if mobj:
	# internal page link
	wikipage = string.replace(mobj.group(1),'_',' ')
	return {'page': wikipage}
	else:
	# full hyperlink
	return {'link': item['link']}
	else:
	# no link
	return {}

	def optional_group_params(group):
	retval = ''
	link = item_link(group)
	if link.has_key('page'):
	retval += "\|grouppage=" + link['page']
	if link.has_key('link'):
	retval += "\|grouplink=" + link['link']
	return retval

	def optional_activity_params(activity):
	retval = ''
	link = item_link(activity)
	if link.has_key('page'):
	retval += "\|activitypage=" + link['page']
	if link.has_key('link'):
	retval += "\|activitylink=" + link['link']
	return retval

	def group_header_template(group):
	retval = "{{/groupHeader\|groupname=" + group['name']
	retval += optional_group_params(group)
	retval += "}}\n"
	return retval

	def activity_header_template(group, activity):
	retval = "{{/activityHeader\|groupname=" + group['name']
	retval += "\|activityname=" + activity['name']
	retval += optional_group_params(group)
	retval += optional_activity_params(activity)
	retval += "}}\n"
	return retval

	def activity_month_template(group, activity, month, plan):
	retval = "{{/activityMonth\|groupname=" + group['name']
	retval += "\|activityname=" + activity['name']
	retval += optional_group_params(group)
	retval += optional_activity_params(activity)
	retval += "\|month=" + month
	retval += "\|plan=" + plan
	retval += "}}\n"
	return retval

	def generate_wikitext(roadmap_obj):
	retval = "{{/header}}\n"
	for group in roadmap_obj['groups']:
	retval += group_header_template(group)
	for activity in group['activities']:
	retval += activity_header_template(group, activity)
	for i in range(len(activity['plans'])):
	if (activity['plans'][i] + ' ').strip():
	retval += activity_month_template(
	group,
	activity,
	roadmap_obj['months'][i],
	activity['plans'][i])
	retval += "{{/footer}}\n"
	return retval

	def publish_to_wiki(pagename, newcontent, summary):
	site = pywikibot.getSite(code='mediawiki', fam='mediawiki')
	page = pywikibot.Page(site, pagename)
	page.put(newtext=newcontent, comment=summary)

	def main():
	'''Loads GDoc Roadmap, converts to wikitext, and publishes'''
	usage = "usage: %prog [options]"
	parser = optparse.OptionParser(usage)
	parser.add_option("-l", "--loaddoc", dest="loaddoc",
	default=None,
	help="Load local doc file instead of pulling from Google Docs")
	parser.add_option("-p", "--publish", dest="publish", action="store_true",
	default=False,
	help="Publish result to wiki")
	parser.add_option("-s", "--savedoc", dest="savedoc",
	default=None,
	help="Save doc file from Google Docs to given location")
	parser.add_option("-c", "--comment", dest="comment",
	default=DEFAULTCOMMENT,
	help='Comment to pass (default: "' + DEFAULTCOMMENT + '")')
	(options, args) = parser.parse_args()

	if options.loaddoc is None:
	roadmap_doc = load_roadmap_doc("Engineering Projects Roadmap", options.savedoc)
	else:
	tempzip = zipfile.ZipFile(options.loaddoc)
	roadmap_doc = tempzip.read('content.xml')
	roadmap_obj = convert_doc_to_object(roadmap_doc)
	wikitext = generate_wikitext(roadmap_obj)
	if options.publish:
	if options.comment:
	comment = options.comment
	elif options.interactive:
	comment = DEFAULTCOMMENT
	publish_to_wiki(u'Roadmap', wikitext, comment)
	else:
	print wikitext.encode('utf-8')

	if __name__ == "__main__":
	main()