Last active July 5, 2023 19:09
The migration script that was used in Oct, 2017 to migrate about 4900 bug reports from Coq Bugzilla to GitHub issues. For the associated notes, see
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
# Bugzilla XML File to GitHub Issues Converter
# by Andriy Berestovskyy (
# Adapted for the Coq bug tracker migration by Théo Zimmermann
# This script is licensed under the Apache 2.0 license.
# How to use the script:
# 1. Generate a GitHub access token:
# - on GitHub select "Settings"
# - select "Personal access tokens"
# - click "Generate new token"
# - type a token description, i.e. "bugzilla2github"
# - select "public_repo" to access just public repositories
# - save the generated token into the migration script
# 2. Export Bugzilla issues into an XML file:
# - go to
# - at the very end click the XML icon
# - save the XML into a file: bugzilla.xml
# 3. Run the migration script and check all the warnings:
# bugzilla2github -x bugzilla.xml -o berestovskyy -r test -t beefbeefbeef
# 4. Run the migration script again and force the updates:
# bugzilla2github -x bugzilla.xml -o berestovskyy -r test -t beefbeefbeef -f
# The script depends on the requests package.
# Nix users can get the right environment by running:
# $ nix-shell -p python2 python2Packages.requests
import csv, getopt, json, os, pprint, re, requests, sys, time, xml.etree.ElementTree
# Existing issues means issue numbers already taken on GitHub (by PRs mostly).
# The script can find these by itself but this will spare API requests.
existingIssues = 1155
force_update = False
xml_file = "bugzilla.xml"
github_url = ""
github_owner = ""
github_repo = ""
github_token = ""
email2login = {
"__name__": "email to GitHub login",
"": "aa755",
"": "aa755",
"": "achlipala-biz",
"": "achlipala-biz",
"": "achlipala-biz",
"": "akoprow",
"": "akhirsch",
"": "brabalan",
"": "pilki",
"": "amintimany",
"": "anderslundstedt",
"": "andersk",
"": "andres-erbsen",
"": "andrejbauer",
"": "amccreight",
"": "amccreight",
"": "anne-pacalet",
"": "andrew-appel",
"": "Armael",
"": "aspiwack",
"": "arthuraa",
"": "charguer",
"": "charguer",
"": "aspiwack",
"": "amahboubi",
"": "amahboubi",
"": "asr",
"": "asya-bergal",
"": "brianaydemir2",
"": "bcpierce00",
"": "bschommer",
"": "fblanqui",
"": "yakobowski",
"": "barras",
"": "barras",
"": "bastiaanzapf",
"": "catalin-hritcu",
"": "ckeller",
"": "chris-martin",
"": "dasuxullebt",
"": "cpitclaudel",
"": "CohenCyril",
"": "const",
"": "Eelis",
"": "Eelis",
"": "jbapple",
"": "robbertkrebbers",
"": "yforster",
"": "cmangin",
"": "roglo",
"": "monniaux",
"": "davidnowak",
"": "davidnowak",
"": "dredozubov",
"": "dfoxfranke",
"": "DanFu09",
"": "dirk-pattinson",
"": "didickman",
"": "daniel-ziegler",
"": "DanGrayson",
"": "ejgallego",
"": "elazarg",
"": "gares",
"": "erikmd",
"": "eternaleye",
"": "ezyang",
"": "HuStmpHrrr",
"": "staffehn",
"": "backtracking",
"": "hivert",
"": "forestjulien",
"": "huitseeker",
"": "huitseeker",
"": "fpottier",
"": "fajb",
"": "fblanqui",
"": "fredokun",
"": "tuong",
"": "SkySkimmer",
"": "SkySkimmer",
"": "gasche",
"": "gares",
"": "gdsfh",
"": "ggonthier",
"": "gmalecha",
"": "gmalecha",
"": "greenrd",
"": "gallais",
"": "gbury",
"": "silene",
"": "silene",
"": "guillomovitch",
"": "hendriktews",
"": "herbelin",
"": "herbelin",
"": "sayon",
"": "ichung",
"": "ilyasergey",
"": "nobrowser",
"": "jadephilipoom",
"": "Janno",
"": "JasonGross",
"": "jashug",
"": "backtracking",
"": "eponier",
"": "jesper-bengtson",
"": "wires",
"": "jeremie-koenig",
"": "jeremie-koenig",
"": "jlottes",
"": "jonleivent",
"": "jwiegley",
"": "jhjourdan",
"": "JBons",
"": "jnarboux",
"": "jnarboux",
"": "signoles",
"": "picojulien",
"": "signoles",
"": "signoles",
"": "klara-zielinska",
"": "zunction",
"": "larsr",
"laurent.pubs@free.Fr": "olaure01",
"": "kwanghoon",
"": "letouzey",
"": "letouzey",
"": "letouzey",
"": "Lionel-Rieg",
"": "MarisaKirisame",
"": "magaud",
"": "magaud",
"": "mlasson",
"": "maggesi",
"": "matejkosik",
"": "teto",
"": "mattam82",
"": "maximedenes",
"": "doerrie",
"": "bluelightning32",
"": "mhelvens",
"": "MSoegtropIMC",
"": "nadeemabdulhamid",
"": "SamB",
"": "ntc2",
"": "zeldovich",
"": "magaud",
"": "NicolasOury",
"": "paul-kline",
"": "pirbo",
"": "Matafou",
"": "Matafou",
"": "Matafou",
"": "Matafou",
"": "pierrecregut",
"": "pierrecregut",
"": "pierrecregut",
"": "letouzey",
"": "ppedrot",
"": "proux01",
"": "vporton",
"": "RalfJung",
"": "piyush-kurur",
"": "psteckler",
"": "clarus",
"": "roconnor",
"": "roconnor",
"": "roconnor",
"": "roconnor",
"": "robrwo",
"": "samuelgruetter",
"": "pi8027",
"": "bmsherman",
"": "tupelo-schneck",
"": "mikeshulman",
"": "sbriais",
"": "siegebell",
"": "siegebell",
"": "sigurdschneider",
"": "sliverdragon37",
"": "SimonBoulier",
"": "shlomif",
"": "shlomif",
"": "spitters",
"": "spitters",
"": "sorear",
"": "glondu",
"": "Zdancewic",
"": "kevinsullivan",
"": "boulme",
"": "boulme",
"": "tabareau",
"": "tadeuzagallo",
"": "gares",
"": "tchajed",
"": "thery",
"": "tbelaire",
"": "TheoWinterhalter",
"": "Zimmi48",
"": "thierry-martinez",
"": "braibant",
"": "tomprince",
"": "treinen",
"": "tebbi",
"": "Ptival",
"": "vsiles",
"": "vzaliva",
"": "vgbl",
"": "wangpengmit",
"": "eddywestbrook",
"": "xclerc",
"": "xavierleroy",
"": "yurug",
"": "YaZko",
"": "yurug",
"": "ybertot",
"": "ybertot",
"": "ybertot",
status2state = {
"__name__": "status to GitHub state",
"NEW": False,
"VERIFIED": False,
"ASSIGNED": False,
"CLOSED": True,
"REOPENED": False,
component2labels = {
"__name__": "component to GitHub labels",
"Main": [],
"Checker": ["component: checker"],
"Doc": ["kind: documentation"],
"Extraction": ["component: extraction"],
"Funind": ["component: funind"],
"IDE": ["component: IDE"],
"Installation": [ "component: installation" ],
"Kernel": ["component: kernel"],
"Ltac": ["component: ltac"],
"Modules": ["component: modules"],
"Native compiler": ["component: native compiler"],
"Notations": ["component: notations"],
"Program": ["component: program"],
"SSReflect": ["component: ssreflect"],
"Stdlib": ["component: stdlib"],
"STM": ["component: STM"],
"Tactics": ["component: tactics"],
"Tools": ["component: tools"],
"Typeclasses": ["component: typeclasses"],
"VM": ["component: VM"],
"Website": ["component: website"],
keywords2labels = {
"__name__": "keywords to GitHub labels",
"compatibility": ["kind: compatibility"],
"performance": ["kind: performance"],
"performance, regression": ["kind: performance", "kind: regression"],
"regression": ["kind: regression"],
resolution2labels = {
"__name__": "resolution to GitHub labels",
"FIXED": [],
"DUPLICATE": ["resolved: duplicate"],
"INVALID": ["resolved: invalid"],
"MOVED": ["resolved: moved"],
"WONTFIX": ["resolved: won't fix"],
"WORKSFORME": ["resolved: works for me"],
op_sys2labels = {
"__name__": "Operating System to GitHub labels",
"Mac OS": [ "platform: OS X" ],
"Windows": [ "platform: Windows" ],
"Linux": [],
"Other": [],
"All": []
bug_unused_fields = [
comment_unused_fields = [
attachment_unused_fields = [
def usage():
print "Bugzilla XML file to GitHub Issues Converter"
print "Usage: %s [-h] [-f]\n" \
"\t[-x <src XML file>]\n" \
"\t[-o <dst GitHub owner>] [-r <dst repo>] [-t <dst access token>]\n" \
% os.path.basename(__file__)
print "Example:"
print "\t%s -h" % os.path.basename(__file__)
print "\t%s -x bugzilla.xml -o dst_login -r dst_repo -t dst_token" \
% os.path.basename(__file__)
def XML2dict(parent):
ret = {}
for key in parent:
# TODO: debug
# print len(key), key.tag, key.attrib, key.text
if len(key) > 0:
val = XML2dict(key)
val = key.text
if key.text:
if key.tag not in ret:
ret[key.tag] = val
if isinstance(ret[key.tag], list):
ret[key.tag] = [ret[key.tag], val]
# Parse attributes
for name, val in key.items():
ret["%s.%s" % (key.tag, name)] = val
return ret
def str2list(map, str):
if str not in map:
print "WARNING: unable to convert %s: %s" % (map["__name__"], str)
# Suppress further reports
map[str] = []
return map[str]
def str2str(map, str):
if str not in map:
print "WARNING: unable to convert %s: %s" % (map["__name__"], str)
# Suppress further reports
map[str] = None
return map[str]
def id_convert(id):
global github_owner, github_repo
return "[BZ#" + id + "](" + github_owner + "/" + github_repo + "/issues?q=is%3Aissue%20%22Original%20bug%20ID%3A%20BZ%23" + id + "%22)"
def id_convert_from_match(match):
return re.sub(r'\#', "", + id_convert(
def ids_convert(ids):
ret = []
if not ids:
return ""
if isinstance(ids, list):
for id in ids:
return ", ".join(ret)
def see_also_convert(see_also):
result ='id=(\d+)$', see_also)
if not result:
return see_also
return id_convert(
def email_convert(email, name):
ret = str2str(email2login, email)
if ret:
return "@" + ret
if name and not name.find("@") >= 0:
return "%s &lt;<%s>&gt;" % (name, email)
return email
def emails_convert(emails):
ret = []
if isinstance(emails, list):
for email in emails:
if email != "":
ret.append(email_convert(email, None))
ret.append(email_convert(emails, None))
return ret
def fields_ignore(obj, fields):
# Ignore some Bugzilla fields
for field in fields:
obj.pop(field, None)
def fields_dump(obj):
# Make sure we have converted all the fields
for key, val in obj.items():
print " " * 8 + "%s[%d] = %s" % (key, len(val), val)
def attachment_convert(idx, attach):
ret = []
id = attach.pop("attachid")
ret.append("> Attached file: [%s]( (%s, %s bytes)" % (attach.pop("filename"), id, attach.pop("type"), attach.pop("size")))
if "desc" in attach:
ret.append("> Description: " + attach.pop("desc"))
# Ignore some fields
global attachment_unused_fields
fields_ignore(attach, attachment_unused_fields)
# Make sure we have converted all the fields
if attach:
print "WARNING: unconverted attachment fields:"
idx[id] = "\n".join(ret)
def attachments_convert(attachments):
ret = {}
if isinstance(attachments, list):
for attachment in attachments:
attachment_convert(ret, attachment)
attachment_convert(ret, attachments)
return ret
def date_convert(date):
result = re.match(r'(\d\d\d\d-\d\d-\d\d) (\d\d:\d\d:\d\d) \+(\d\d)(\d\d)', date)
if not result:
print("Date %s was not converted!" % date)
return "{a}T{b}+{c}:{d}".format(a =, b =,
c =, d =
def comment_convert(comment, attachments):
ret = []
id = int(comment.pop("commentid"))
if id >= 1658:
ret.append("Comment author: "
+ email_convert(comment.pop("who"), comment.pop("", None)))
ret.append(comment.pop("thetext", "*No description provided.*").replace("@", "@ "))
# Convert attachments if any
if "attachid" in comment:
attachid = comment.pop("attachid")
if attachid in attachments:
# Syntax: convert "bug id" to "BZ#id"
for i, val in enumerate(ret):
val = re.sub(r"\(In reply to comment \#\d+\)","", val)
ret[i] = re.sub(r"(?i)(bug(?:\s+report)?\s+|feature wish\s+|\s\#)(\d\d?\d?\d?)", id_convert_from_match, val)
created_at = date_convert(comment.pop("bug_when"))
# Ignore some comment fields
global comment_unused_fields
fields_ignore(comment, comment_unused_fields)
# Make sure we have converted all the fields
if comment:
print "WARNING: unconverted comment fields:"
return { "body": "\n".join(ret), "created_at": created_at }
def comments_convert(comments, attachments):
ret = []
if isinstance(comments, list):
for comment in comments:
ret.append(comment_convert(comment, attachments))
ret.append(comment_convert(comments, attachments))
return ret
def bug_convert(bug):
ret = {}
ret["body"] = []
ret["body"].append("Note: the issue was created automatically with %s tool"
% os.path.basename(__file__))
ret["labels"] = []
ret["comments"] = []
attachments = {}
# Convert bug_id to number
ret["number"] = int(bug.pop("bug_id"))
# Convert attachments if any
if "attachment" in bug:
attachments = attachments_convert(bug.pop("attachment"))
# Convert long_desc and attachment to comments
ret["comments"].extend(comments_convert(bug.pop("long_desc"), attachments))
# Convert short_desc to title
ret["title"] = bug.pop("short_desc")
# Convert creation_ts to created_at
ret["created_at"] = date_convert(bug.pop("creation_ts"))
# Convert component to labels
ret["labels"].extend(str2list(component2labels, bug.pop("component")))
# Convert bug_status to state
ret["closed"] = str2str(status2state, bug.pop("bug_status"))
# We only assign open bug reports
assignee = str2str(email2login, bug.pop("assigned_to"))
if not ret["closed"] and assignee:
ret["assignee"] = assignee
# Approximate closing date with last update date
updated_at = bug.pop("delta_ts")
if ret["closed"]:
ret["closed_at"] = date_convert(updated_at)
# Convert (optional) keywords to labels
ret["labels"].extend(str2list(keywords2labels, bug.pop("keywords","")))
# Convert resolution to labels
if "resolution" in bug:
ret["labels"].extend(str2list(resolution2labels, bug.pop("resolution")))
# Convert op_sys to labels
if "op_sys" in bug:
ret["labels"].extend(str2list(op_sys2labels, bug.pop("op_sys")))
# Create the bug description
ret["body"].append("Original bug ID: BZ#%d" % ret["number"])
ret["body"].append("From: " + email_convert(bug.pop("reporter"),
bug.pop("", None)))
ret["body"].append("Reported version: " + bug.pop("version"))
if "cc" in bug:
ret["body"].append("CC: " + ", ".join(emails_convert(bug.pop("cc"))))
# Extra information
if "dup_id" in bug:
ret["body"].append("Duplicates: " + ids_convert(bug.pop("dup_id")))
if "dependson" in bug:
ret["body"].append("Depends on: " + ids_convert(bug.pop("dependson")))
if "blocked" in bug:
ret["body"].append("Blocker for: " + ids_convert(bug.pop("blocked")))
if "see_also" in bug:
see_also = bug.pop("see_also")
if isinstance(see_also, basestring):
ret["body"].append("See also: " + see_also_convert(see_also))
for item in see_also:
ret["body"].append("See also: " + see_also_convert(item))
# Put everything together
ret["body"] = "\n".join(ret["body"])
# Ignore some bug fields
global bug_unused_fields
fields_ignore(bug, bug_unused_fields)
# Make sure we have converted all the fields
if bug:
print "WARNING: unconverted bug fields:"
# Make sure we have converted all the attachments
if attachments:
print "WARNING: unconverted attachments:"
return ret
def bugs_convert(xml_root):
issues = {}
for xml_bug in xml_root.iter("bug"):
bug = XML2dict(xml_bug)
issue = bug_convert(bug)
# Check for duplicates
id = issue.pop("number")
if id in issues:
print("Error checking for duplicates: bug #%d is duplicated in the '%s'"
% (id, xml_file))
issues[id] = issue
return issues
def github_get(url, avs = {}):
global xml_file, github_url, github_owner, github_repo, github_token
if url[0] == "/":
u = "%s%s" % (github_url, url)
elif url.startswith("https://"):
u = url
elif url.startswith("http://"):
u = url
u = "%s/repos/%s/%s/%s" % (github_url, github_owner, github_repo, url)
# TODO: debug
# print "GET: " + u
avs["access_token"] = github_token
return requests.get(u, params = avs)
def github_post(url, avs = {}, fields = []):
global force_update
global xml_file, github_url, github_owner, github_repo, github_token
if url[0] == "/":
u = "%s%s" % (github_url, url)
u = "%s/repos/%s/%s/%s" % (github_url, github_owner, github_repo, url)
d = {}
# Copy fields into the data
for field in fields:
if field not in avs:
print "Error posting filed %s to %s" % (field, url)
d[field] = avs[field]
# TODO: debug
# print "POST: " + u
# print "DATA: " + json.dumps(d)
if force_update:
return, params = { "access_token": github_token },
data = json.dumps(d))
if not github_post.warn:
print "Skipping POST... (use -f to force updates)"
github_post.warn = True
return True
github_post.warn = False
def github_label_create(label):
if not github_get("labels/" + label):
print "\tcreating label '%s' on GitHub..." % label
r = github_post("labels", {
"name": label,
"color": "0"*6,
}, ["name", "color"])
if not r:
print "Error creating label %s: %s" % (label, r.headers)
def github_labels_check(issues):
global force_update
labels_set = set()
for id in issues:
for label in issues[id]["labels"]:
for label in labels_set:
if github_get("labels/" + label):
print "\tlabel '%s' exists on GitHub" % label
if force_update:
print "WARNING: label '%s' does not exist on GitHub" % label
def github_assignees_check(issues):
a_set = set()
for id in issues:
if "assignee" in issues[id]:
for assignee in a_set:
if not github_get("/users/" + assignee):
print "Error checking user '%s' on GitHub" % assignee
print "Assignee '%s' exists" % assignee
def github_issue_exist(number):
if github_get("issues/%d" % number):
return True
return False
def github_issue_get(number):
req = github_get("issues/%d" % number)
if not req:
print "Error getting GitHub issue #%d: %s" % (number, req.headers)
return req.json()
def github_issue_append(bugzilla_id, issue):
global github_owner, github_repo, github_token
params = { "access_token": github_token }
headers = { "Accept": "application/vnd.github.golden-comet-preview+json" }
print "\timporting BZ#%d on GitHub..." % bugzilla_id
u = "" % (github_owner, github_repo)
comments = issue.pop("comments", [])
# We can't assign people which are not in the organization / collaborators on the repo
if github_owner != "coq":
issue.pop("assignee", None)
r =, params = params, headers = headers,
data = json.dumps({ "issue": issue, "comments": comments }))
if not r:
print "Error importing issue on GitHub:\n%s" % r.text
print "For the record, here was the request:\n%s" % json.dumps({ "issue": issue, "comments": comments })
u = r.json()["url"]
wait = 1
r = False
while not r or r.json()["status"] == "pending":
wait = 2 * wait
r = requests.get(u, params = params, headers = headers)
if not r.json()["status"] == "imported":
print "Error importing issue on GitHub:\n%s" % r.text
# The issue_url field of the answer should be of the form .../ISSUE_NUMBER
# So it's easy to get the issue number, to check that it is what was expected
result = re.match("" + github_owner + "/" + github_repo + "/issues/(\d+)", r.json()["issue_url"])
if not result:
print "Error while parsing issue number:\n%s" % r.text
issue_number =
with open("bugzilla2github.log", "a") as f:
f.write("%d, %s\n" % (bugzilla_id, issue_number))
return issue_number
def github_issues_add(issues):
postponed = {}
id = 0
while True:
id += 1
if id <= existingIssues or github_get("issues/%d" % id):
if id in issues:
print "Issue #%d already exists, postponing..." % id
postponed[id] = issues.pop(id)
if id in issues:
bugzilla_id = id
issue = issues.pop(id)
if len(postponed) == 0:
if len(issues) == 0:
print "===> All done."
print "Error: No more postponed issues."
# Find the first postponed issue
bugzilla_id = sorted(postponed.keys())[0]
issue = postponed.pop(bugzilla_id)
if force_update:
print "Creating issue #%d..." % id
github_issue_append(bugzilla_id, issue)
def args_parse(argv):
global force_update
global xml_file, github_owner, github_repo, github_token
opts, args = getopt.getopt(argv,"hfo:r:t:x:")
except getopt.GetoptError:
for opt, arg in opts:
if opt == '-h':
elif opt == "-f":
print "WARNING: the repo will be UPDATED! No backups, no undos!"
print "Press Ctrl+C within next 5 seconds to cancel the update:"
force_update = True
elif opt == "-o":
github_owner = arg
elif opt == "-r":
github_repo = arg
elif opt == "-t":
github_token = arg
elif opt == "-x":
xml_file = arg
# Check the arguments
if (not xml_file or not github_owner or not github_repo or not github_token):
print("Error parsing arguments: "
"please specify XML file, GitHub owner, repo and token")
def main(argv):
global xml_file, github_owner, github_repo, existingIssues
# Parse command line arguments
print "===> Converting Bugzilla reports to GitHub Issues..."
print "\tSource XML file: %s" % xml_file
print "\tDest. GitHub owner: %s" % github_owner
print "\tDest. GitHub repo: %s" % github_repo
xml_tree = xml.etree.ElementTree.parse(xml_file)
xml_root = xml_tree.getroot()
issues = bugs_convert(xml_root)
with open("bugzilla2github.log", "r") as f:
print "===> Skipping already imported issues (WARNING: this shouldn't happen when you run this script for the first time)..."
imported_bugs = csv.reader(f)
for imported_bug in imported_bugs:
issues.pop(int(imported_bug[0]), None)
existingIssues = max(existingIssues, int(imported_bug[1]))
except IOError:
print "===> No log file found. Not skipping any issue."
print "===> Checking last existing issue actually exists."
if not github_issue_exist(existingIssues):
print "Last existing issue doesn't actually exist. Aborting!"
print "===> Checking whether the following issue was created but not saved."
github_issue = github_get("issues/%d" % (existingIssues + 1))
if github_issue:
result ="Original bug ID: BZ#(\d+)", github_issue.json()["body"])
if result:
print "Indeed, this was the case."
bugzilla_id = int(
issues.pop(bugzilla_id, None)
with open("bugzilla2github.log", "a") as f:
f.write("%d, %d\n" % (bugzilla_id, existingIssues + 1))
print "===> Checking all the labels exist on GitHub..."
print "===> Checking all the assignees exist on GitHub..."
# fake_issue = { "title": "Fake issue", "body": "Fake issue", "closed": True }
# for i in xrange(1,existingIssues + 1):
# github_issue_append(0, fake_issue)
print "===> Adding Bugzilla reports on GitHub..."
if __name__ == "__main__":
