Last active
July 1, 2020 19:45
-
-
Save denilsonsa/413fa6f414171a0599d3e180a597c894 to your computer and use it in GitHub Desktop.
Migrate Bitbucket mercurial repositories to GitHub
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# | |
# migrate_bitbucket_mercurial_to_github.py | |
# | |
# This is a single-use script to export all my Bitbucket.org mercurial | |
# repositories into my GitHub. Why? Because: | |
# https://bitbucket.org/blog/sunsetting-mercurial-support-in-bitbucket | |
# | |
# This script has my username hard-coded in the code. | |
# | |
# This script assumes the user has setup the SSH key on both services: | |
# * https://bitbucket.org/account/settings/ssh-keys/ | |
# * https://github.com/settings/keys | |
# | |
# This script assumes no ssh passphrase will be requested (i.e. the | |
# user should be running an ssh-agent). | |
# | |
# This script assumes two files in the same directory: | |
# * cookies.txt - Contains the value of the Cookie HTTP header; used for access | |
# to private repositories in Bitbucket.org. | |
# * github_access_token.txt - The GitHub access token, for automating the | |
# creation of GitHub repositories. See: https://github.com/settings/tokens | |
# | |
# This script uses https://github.com/frej/fast-export to convert from hg to | |
# git. This is needed because GitHub import from Bitbucket messes up with | |
# non-ASCII characters. | |
# https://stackoverflow.com/q/57716968#comment103206517_57716969 | |
import json | |
import os | |
import os.path | |
import re | |
import sh # pip install sh | |
import urllib.request | |
import github # pip install PyGithub | |
# Hardcoded list of my repositories in bitbucket. | |
REPOS = [ | |
# Public repos: | |
'atmega8-blinking-leds', | |
'atmega8-hidkeys-helloworld', | |
'atmega8-magnetometer-usb-mouse', | |
'autostereogram', | |
'browser-selector', | |
'fontpreviewer', | |
# 'frb', # I'm ignoring this repository. | |
'js_binary_tree', | |
'keyboard_led_watcher', | |
'noise-generation', | |
'pygame-joystick-test', | |
'pygtk-matplotlib', | |
'pythonlogica', | |
'redstone_javascript', | |
'retroadapter', | |
'servermon', | |
'small_scripts', | |
'stdio_vs_iostream', | |
'trabalho-ad-2010-1', | |
'trabalho-pnaol-2009-2', | |
'trabalho-so-2010-2', | |
'trabalho-tp-2010-1', | |
# Private repos: | |
'curriculo', | |
'lajejs', | |
'maratona-eri-2011', | |
'maze-solver-from-image', | |
'meiamaratona2010', | |
'stickynotewall', | |
] | |
def slurp(filename): | |
with open(filename) as f: | |
return f.read().strip() | |
def fetch_metadata(reponame): | |
#curl https://bitbucket.org/denilsonsa/maze-solver-from-image/ | fgrep __initial_state__ | |
url = 'https://bitbucket.org/denilsonsa/' + reponame + '/' | |
req = urllib.request.Request(url) | |
# The cookie is required to fetch metadata from private repositories. | |
req.add_header('Cookie', slurp('cookies.txt')) | |
with urllib.request.urlopen(req) as u: | |
body = u.read().decode('utf8') | |
match = re.search(r'window.__initial_state__ *= *([^\n]+);', body) | |
initial_state = match.group(1) | |
data = json.loads(initial_state) | |
return { | |
'website': data.get('section', {}).get('repository', {}).get('currentRepository', {}).get('website', ''), | |
'description': re.sub(r' *[\r\n]+ *', ' ', data.get('section', {}).get('repository', {}).get('currentRepository', {}).get('description', '')), | |
'is_private': data.get('section', {}).get('repository', {}).get('currentRepository', {}).get('is_private', ''), | |
'raw': data, | |
} | |
def main(): | |
basedir = os.path.expanduser('~/bitbucket-export') | |
os.chdir(basedir) | |
hg_fast_export = sh.Command(os.path.expanduser('~/hg-fast-export/fast-export/hg-fast-export.sh')) | |
gh = github.Github(slurp('github_access_token.txt')) | |
ghuser = gh.get_user() | |
for reponame in REPOS: | |
print('>> START >> ' + reponame) | |
os.chdir(basedir) | |
ghrepo = None | |
try: | |
ghrepo = ghuser.get_repo(reponame) | |
except github.UnknownObjectException: | |
pass | |
if ghrepo is not None: | |
print('<< SKIP << repository already exists: ' + ghrepo.full_name) | |
continue | |
print('=== SCRAPE metadata from bitbucket website') | |
metadata = fetch_metadata(reponame) | |
with open('metadata-' + reponame + '.txt', 'w') as f: | |
json.dump(metadata, f, indent='\t') | |
print('=== CLONE from bitbucket') | |
sh.hg.clone('ssh://hg@bitbucket.org/denilsonsa/' + reponame, 'hg-' + reponame) | |
print('=== CONVERT to git') | |
os.mkdir('git-' + reponame) | |
os.chdir('git-' + reponame) | |
sh.git.init() | |
hg_fast_export('-r', '../hg-' + reponame) | |
sh.git.checkout('master') | |
print('=== CREATE GitHub repo') | |
ghrepo = ghuser.create_repo( | |
name=reponame, | |
description=metadata['description'], | |
homepage=metadata['website'], | |
private=metadata['is_private'], | |
has_issues=False, | |
has_wiki=False, | |
has_projects=False, | |
auto_init=False, | |
) | |
print('=== PUSH to GitHub repo') | |
sh.git.remote.add('origin', ghrepo.ssh_url) | |
#sh.git.remote('set-url', 'origin', ghrepo.ssh_url) | |
sh.git.push('--all', '-u', '--tags') | |
if os.path.isfile('../hg-' + reponame + '/.hgignore'): | |
print('!!! NOTE: Please manually migrate .hgignore to .gitignore') | |
print('<< END << repository migrated: ' + ghrepo.full_name) | |
#print("Aborting because I'm debugging") | |
#break | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment