Skip to content

Instantly share code, notes, and snippets.

@denilsonsa
Last active July 1, 2020 19:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save denilsonsa/413fa6f414171a0599d3e180a597c894 to your computer and use it in GitHub Desktop.
Save denilsonsa/413fa6f414171a0599d3e180a597c894 to your computer and use it in GitHub Desktop.
Migrate Bitbucket mercurial repositories to GitHub
#!/usr/bin/env python3
#
# migrate_bitbucket_mercurial_to_github.py
#
# This is a single-use script to export all my Bitbucket.org mercurial
# repositories into my GitHub. Why? Because:
# https://bitbucket.org/blog/sunsetting-mercurial-support-in-bitbucket
#
# This script has my username hard-coded in the code.
#
# This script assumes the user has setup the SSH key on both services:
# * https://bitbucket.org/account/settings/ssh-keys/
# * https://github.com/settings/keys
#
# This script assumes no ssh passphrase will be requested (i.e. the
# user should be running an ssh-agent).
#
# This script assumes two files in the same directory:
# * cookies.txt - Contains the value of the Cookie HTTP header; used for access
# to private repositories in Bitbucket.org.
# * github_access_token.txt - The GitHub access token, for automating the
# creation of GitHub repositories. See: https://github.com/settings/tokens
#
# This script uses https://github.com/frej/fast-export to convert from hg to
# git. This is needed because GitHub import from Bitbucket messes up with
# non-ASCII characters.
# https://stackoverflow.com/q/57716968#comment103206517_57716969
import json
import os
import os.path
import re
import sh # pip install sh
import urllib.request
import github # pip install PyGithub
# Hardcoded list of my repositories in bitbucket.
REPOS = [
# Public repos:
'atmega8-blinking-leds',
'atmega8-hidkeys-helloworld',
'atmega8-magnetometer-usb-mouse',
'autostereogram',
'browser-selector',
'fontpreviewer',
# 'frb', # I'm ignoring this repository.
'js_binary_tree',
'keyboard_led_watcher',
'noise-generation',
'pygame-joystick-test',
'pygtk-matplotlib',
'pythonlogica',
'redstone_javascript',
'retroadapter',
'servermon',
'small_scripts',
'stdio_vs_iostream',
'trabalho-ad-2010-1',
'trabalho-pnaol-2009-2',
'trabalho-so-2010-2',
'trabalho-tp-2010-1',
# Private repos:
'curriculo',
'lajejs',
'maratona-eri-2011',
'maze-solver-from-image',
'meiamaratona2010',
'stickynotewall',
]
def slurp(filename):
with open(filename) as f:
return f.read().strip()
def fetch_metadata(reponame):
#curl https://bitbucket.org/denilsonsa/maze-solver-from-image/ | fgrep __initial_state__
url = 'https://bitbucket.org/denilsonsa/' + reponame + '/'
req = urllib.request.Request(url)
# The cookie is required to fetch metadata from private repositories.
req.add_header('Cookie', slurp('cookies.txt'))
with urllib.request.urlopen(req) as u:
body = u.read().decode('utf8')
match = re.search(r'window.__initial_state__ *= *([^\n]+);', body)
initial_state = match.group(1)
data = json.loads(initial_state)
return {
'website': data.get('section', {}).get('repository', {}).get('currentRepository', {}).get('website', ''),
'description': re.sub(r' *[\r\n]+ *', ' ', data.get('section', {}).get('repository', {}).get('currentRepository', {}).get('description', '')),
'is_private': data.get('section', {}).get('repository', {}).get('currentRepository', {}).get('is_private', ''),
'raw': data,
}
def main():
basedir = os.path.expanduser('~/bitbucket-export')
os.chdir(basedir)
hg_fast_export = sh.Command(os.path.expanduser('~/hg-fast-export/fast-export/hg-fast-export.sh'))
gh = github.Github(slurp('github_access_token.txt'))
ghuser = gh.get_user()
for reponame in REPOS:
print('>> START >> ' + reponame)
os.chdir(basedir)
ghrepo = None
try:
ghrepo = ghuser.get_repo(reponame)
except github.UnknownObjectException:
pass
if ghrepo is not None:
print('<< SKIP << repository already exists: ' + ghrepo.full_name)
continue
print('=== SCRAPE metadata from bitbucket website')
metadata = fetch_metadata(reponame)
with open('metadata-' + reponame + '.txt', 'w') as f:
json.dump(metadata, f, indent='\t')
print('=== CLONE from bitbucket')
sh.hg.clone('ssh://hg@bitbucket.org/denilsonsa/' + reponame, 'hg-' + reponame)
print('=== CONVERT to git')
os.mkdir('git-' + reponame)
os.chdir('git-' + reponame)
sh.git.init()
hg_fast_export('-r', '../hg-' + reponame)
sh.git.checkout('master')
print('=== CREATE GitHub repo')
ghrepo = ghuser.create_repo(
name=reponame,
description=metadata['description'],
homepage=metadata['website'],
private=metadata['is_private'],
has_issues=False,
has_wiki=False,
has_projects=False,
auto_init=False,
)
print('=== PUSH to GitHub repo')
sh.git.remote.add('origin', ghrepo.ssh_url)
#sh.git.remote('set-url', 'origin', ghrepo.ssh_url)
sh.git.push('--all', '-u', '--tags')
if os.path.isfile('../hg-' + reponame + '/.hgignore'):
print('!!! NOTE: Please manually migrate .hgignore to .gitignore')
print('<< END << repository migrated: ' + ghrepo.full_name)
#print("Aborting because I'm debugging")
#break
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment