Last active
September 23, 2020 17:53
-
-
Save andy-shev/53aed29e9075b0a0bf850afa92f9a256 to your computer and use it in GitHub Desktop.
Covnvert Jive CMS document to a Git repository
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 -tt | |
# -*- coding: UTF-8 -*- | |
# vim: ts=4 sw=4 et ai si | |
# | |
# SPDX-License-Identifier: GPL-2.0-or-later | |
# | |
import argparse | |
import dateutil.parser | |
import errno | |
import logging | |
import netrc | |
import os | |
import sys | |
import urllib | |
from dateutil import parser as dateutil_parser | |
from lxml import etree | |
# | |
# All libraries are available via PyPi | |
# | |
# Jive API: https://jiveapi.readthedocs.io/en/latest/ | |
# Python bindings to libgit2: https://www.pygit2.org/ | |
# | |
import jiveapi | |
import pygit2 | |
__copyright__ = "Copyright (C) 2020" | |
__author__ = "Andy Shevchenko <andy.shevchenko@gmail.com>" | |
LOG = logging.getLogger(__name__) | |
def get_auth_data(host): | |
"""Returns login, account and password tuple for given host if found.""" | |
try: | |
dotnetrc = netrc.netrc() | |
except IOError as err: | |
raise SystemExit("Can't read ~/.netrc file: %s" % str(err)) | |
except netrc.NetrcParseError as err: | |
raise SystemExit("Can't parse ~/.netrc file: %s" % str(err)) | |
return dotnetrc.authenticators(host) | |
def setup_logging(verbosity=0): | |
"""Setup logging.""" | |
conh = logging.StreamHandler() | |
try: | |
import colorlog | |
fmt = "%(log_color)s%(levelname)s%(reset)s: %(message)s" | |
conh.setFormatter(colorlog.ColoredFormatter(fmt)) | |
except ImportError: | |
fmt = "%(levelname)s: %(message)s" | |
conh.setFormatter(logging.Formatter(fmt)) | |
LOG.addHandler(conh) | |
if verbosity >= 1: | |
LOG.setLevel(logging.DEBUG) | |
else: | |
LOG.setLevel(logging.INFO) | |
jivelog = logging.getLogger('jiveapi.api') | |
jivelog.addHandler(conh) | |
if verbosity >= 2: | |
jivelog.setLevel(logging.DEBUG) | |
else: | |
jivelog.setLevel(logging.INFO) | |
urllib3log = logging.getLogger('urllib3') | |
urllib3log.addHandler(conh) | |
if verbosity >= 3: | |
urllib3log.setLevel(logging.DEBUG) | |
else: | |
urllib3log.setLevel(logging.INFO) | |
if verbosity >= 4: | |
from http.client import HTTPConnection | |
HTTPConnection.debuglevel = 1 | |
def get_version_objs(api, content): | |
"""Get last version of the content.""" | |
return api._get(content['resources']['versions']['ref']) | |
def get_name(api, content): | |
"""Get name of the content author as 'Full Name'.""" | |
return content['author']['name']['fullName'] | |
def get_first_email(api, content): | |
"""Get first email of the content author as 'email@example.com'.""" | |
return content['author']['emails'][0]['value'] | |
def get_updated_datetime(api, content): | |
return dateutil_parser.parse(content['content']['updated']) | |
def get_subject_per_version(api, content): | |
return "%s v%d" % (content['content']['subject'], content['versionNumber']) | |
def get_content_obj_by_version(api, version): | |
return api._get(version['resources']['self']['ref']) | |
def write_data_to_disk(fname, data): | |
with open(fname, 'w') as xfile: | |
xfile.write(data) | |
def write_content_to_disk(api, content, fname): | |
data = content['content']['content']['text'] | |
tree = etree.fromstring(data, etree.HTMLParser()) | |
data = etree.tostring(tree, pretty_print=True).decode('utf-8') | |
write_data_to_disk(fname, data) | |
def write_version_to_disk(api, content, fname): | |
data = "%d\n" % content['versionNumber'] | |
write_data_to_disk(fname, data) | |
def add_files_to_repo(repo, *args): | |
files = [os.path.relpath(fname, repo.workdir) for fname in args] | |
for fname in files: | |
repo.index.add(fname) | |
repo.index.write() | |
for fname in files: | |
if repo.status_file(fname) != pygit2.GIT_STATUS_CURRENT: | |
return True | |
return False | |
def commit_current_version_of_files(repo, name, email, date, title): | |
author = pygit2.Signature(name, email, int(date.timestamp()), 0) | |
committer = repo.default_signature | |
tree = repo.index.write_tree() | |
msg = '%s\n\nAdd %s.' % (title, title) | |
repo.create_commit('HEAD', author, committer, msg, tree, [repo.head.target]) | |
def main(args): | |
"""MAIN routine.""" | |
parser = argparse.ArgumentParser(description="Converts Jive document to a Git repository.") | |
parser.add_argument('-v', '--verbose', default=0, action="count", help='enable debug messages') | |
parser.add_argument('URI', help='URI of the document to convert') | |
args = parser.parse_args() | |
setup_logging(args.verbose) | |
LOG.info('Using %s v%s...', jiveapi.version.PROJECT_URL, jiveapi.version.VERSION) | |
repodir = os.getcwd() | |
url = urllib.parse.urlparse(args.URI) | |
base = os.path.basename(url.path) | |
doc = os.path.join(repodir, "%s-%s.html" % (base, 'content')) | |
ver = os.path.join(repodir, "%s-%s.txt" % (base, 'version')) | |
repo = pygit2.Repository(repodir) | |
login, _, passwd = get_auth_data(url.hostname) | |
api = jiveapi.JiveApi('%s://%s/api' % (url.scheme, url.hostname), login, passwd) | |
content_id = api._get_content_id_by_html_url(urllib.parse.urlunparse(url)) | |
content = api.get_content(content_id) | |
LOG.info('Found content ID %s for %s', content_id, base) | |
versions = get_version_objs(api, content) | |
if (len(versions) == 1): | |
LOG.info('There is %d version of the document', len(versions)) | |
else: | |
LOG.info('There are %d versions of the document', len(versions)) | |
try: | |
last_version = int(open(ver, 'r').read()) | |
except FileNotFoundError: | |
last_version = 0 | |
LOG.info('Skipping %d versions...' % last_version) | |
for version in sorted(versions, key=lambda x: x['versionNumber']): | |
current_version = version['versionNumber'] | |
if last_version >= current_version: | |
LOG.debug('Skipping v%d...', current_version) | |
continue | |
LOG.debug('Retrieving v%d...', current_version) | |
content = get_content_obj_by_version(api, version) | |
name = get_name(api, content) | |
email = get_first_email(api, content) | |
updated = get_updated_datetime(api, content) | |
subject = get_subject_per_version(api, content) | |
LOG.info('%s, uploaded %s by %s <%s>', subject, updated, name, email) | |
write_content_to_disk(api, content, doc) | |
write_version_to_disk(api, content, ver) | |
if add_files_to_repo(repo, doc, ver) == False: | |
LOG.error("No changes to commit! Duplicate version?") | |
return errno.EEXIST | |
commit_current_version_of_files(repo, name, email, updated, subject) | |
if __name__ == "__main__": | |
sys.exit(main(sys.argv)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment