Skip to content

Instantly share code, notes, and snippets.

Last active Sep 23, 2020
What would you like to do?
Covnvert Jive CMS document to a Git repository
#!/usr/bin/python3 -tt
# -*- coding: UTF-8 -*-
# vim: ts=4 sw=4 et ai si
# SPDX-License-Identifier: GPL-2.0-or-later
import argparse
import dateutil.parser
import errno
import logging
import netrc
import os
import sys
import urllib
from dateutil import parser as dateutil_parser
from lxml import etree
# All libraries are available via PyPi
# Jive API:
# Python bindings to libgit2:
import jiveapi
import pygit2
__copyright__ = "Copyright (C) 2020"
__author__ = "Andy Shevchenko <>"
LOG = logging.getLogger(__name__)
def get_auth_data(host):
"""Returns login, account and password tuple for given host if found."""
dotnetrc = netrc.netrc()
except IOError as err:
raise SystemExit("Can't read ~/.netrc file: %s" % str(err))
except netrc.NetrcParseError as err:
raise SystemExit("Can't parse ~/.netrc file: %s" % str(err))
return dotnetrc.authenticators(host)
def setup_logging(verbosity=0):
"""Setup logging."""
conh = logging.StreamHandler()
import colorlog
fmt = "%(log_color)s%(levelname)s%(reset)s: %(message)s"
except ImportError:
fmt = "%(levelname)s: %(message)s"
if verbosity >= 1:
jivelog = logging.getLogger('jiveapi.api')
if verbosity >= 2:
urllib3log = logging.getLogger('urllib3')
if verbosity >= 3:
if verbosity >= 4:
from http.client import HTTPConnection
HTTPConnection.debuglevel = 1
def get_version_objs(api, content):
"""Get last version of the content."""
return api._get(content['resources']['versions']['ref'])
def get_name(api, content):
"""Get name of the content author as 'Full Name'."""
return content['author']['name']['fullName']
def get_first_email(api, content):
"""Get first email of the content author as ''."""
return content['author']['emails'][0]['value']
def get_updated_datetime(api, content):
return dateutil_parser.parse(content['content']['updated'])
def get_subject_per_version(api, content):
return "%s v%d" % (content['content']['subject'], content['versionNumber'])
def get_content_obj_by_version(api, version):
return api._get(version['resources']['self']['ref'])
def write_data_to_disk(fname, data):
with open(fname, 'w') as xfile:
def write_content_to_disk(api, content, fname):
data = content['content']['content']['text']
tree = etree.fromstring(data, etree.HTMLParser())
data = etree.tostring(tree, pretty_print=True).decode('utf-8')
write_data_to_disk(fname, data)
def write_version_to_disk(api, content, fname):
data = "%d\n" % content['versionNumber']
write_data_to_disk(fname, data)
def add_files_to_repo(repo, *args):
files = [os.path.relpath(fname, repo.workdir) for fname in args]
for fname in files:
for fname in files:
if repo.status_file(fname) != pygit2.GIT_STATUS_CURRENT:
return True
return False
def commit_current_version_of_files(repo, name, email, date, title):
author = pygit2.Signature(name, email, int(date.timestamp()), 0)
committer = repo.default_signature
tree = repo.index.write_tree()
msg = '%s\n\nAdd %s.' % (title, title)
repo.create_commit('HEAD', author, committer, msg, tree, [])
def main(args):
"""MAIN routine."""
parser = argparse.ArgumentParser(description="Converts Jive document to a Git repository.")
parser.add_argument('-v', '--verbose', default=0, action="count", help='enable debug messages')
parser.add_argument('URI', help='URI of the document to convert')
args = parser.parse_args()
setup_logging(args.verbose)'Using %s v%s...', jiveapi.version.PROJECT_URL, jiveapi.version.VERSION)
repodir = os.getcwd()
url = urllib.parse.urlparse(args.URI)
base = os.path.basename(url.path)
doc = os.path.join(repodir, "%s-%s.html" % (base, 'content'))
ver = os.path.join(repodir, "%s-%s.txt" % (base, 'version'))
repo = pygit2.Repository(repodir)
login, _, passwd = get_auth_data(url.hostname)
api = jiveapi.JiveApi('%s://%s/api' % (url.scheme, url.hostname), login, passwd)
content_id = api._get_content_id_by_html_url(urllib.parse.urlunparse(url))
content = api.get_content(content_id)'Found content ID %s for %s', content_id, base)
versions = get_version_objs(api, content)
if (len(versions) == 1):'There is %d version of the document', len(versions))
else:'There are %d versions of the document', len(versions))
last_version = int(open(ver, 'r').read())
except FileNotFoundError:
last_version = 0'Skipping %d versions...' % last_version)
for version in sorted(versions, key=lambda x: x['versionNumber']):
current_version = version['versionNumber']
if last_version >= current_version:
LOG.debug('Skipping v%d...', current_version)
LOG.debug('Retrieving v%d...', current_version)
content = get_content_obj_by_version(api, version)
name = get_name(api, content)
email = get_first_email(api, content)
updated = get_updated_datetime(api, content)
subject = get_subject_per_version(api, content)'%s, uploaded %s by %s <%s>', subject, updated, name, email)
write_content_to_disk(api, content, doc)
write_version_to_disk(api, content, ver)
if add_files_to_repo(repo, doc, ver) == False:
LOG.error("No changes to commit! Duplicate version?")
return errno.EEXIST
commit_current_version_of_files(repo, name, email, updated, subject)
if __name__ == "__main__":
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment