Skip to content

Instantly share code, notes, and snippets.

@dhimmel
Last active February 16, 2018 20:32
Show Gist options
  • Save dhimmel/5cf98acc58f60ede9504422e7a0a9f41 to your computer and use it in GitHub Desktop.
Save dhimmel/5cf98acc58f60ede9504422e7a0a9f41 to your computer and use it in GitHub Desktop.
Convert Manubot gh-pages to use versioned directories
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Created versioned gh-pages directories for legacy Manubot repos\n",
"\n",
"Designed to be run from the root of a repository to be upgraded.\n",
"\n",
"See https://github.com/greenelab/manubot-rootstock/pull/101 and https://github.com/greenelab/manubot-rootstock/issues/96.\n",
"\n",
"```\n",
"# First make sure you have a local gh-pages branch\n",
"# Checkout from upstream like\n",
"git checkout --track upstream/gh-pages\n",
"# Pull to make sure its up to date\n",
"git pull\n",
"```\n",
"\n",
"You must be in the gh-pages branch for this to work. This notebook is online as a [GitHub gist](https://gist.github.com/dhimmel/5cf98acc58f60ede9504422e7a0a9f41) and is released under a [CC0 1.0 License](https://creativecommons.org/publicdomain/zero/1.0/legalcode)."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import collections\n",
"import re\n",
"import pathlib\n",
"import subprocess\n",
"\n",
"import git\n",
"import pandas"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Set parameters\n",
"\n",
"This notebook was developed for https://github.com/greenelab/meta-review/. Update for your own repository."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"b74e916beb5c5e070b51ca42fc6b32655b3256ee\n"
]
}
],
"source": [
"# Set to path of repository\n",
"repo = git.Repo(path = '.')\n",
"# State of gh-pages branch\n",
"print(repo.heads['gh-pages'].commit)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"slug = 'greenelab/meta-review'\n",
"# Commit range in gh-pages to convert to versioned directories\n",
"first_commit = '3cac19dad403e4a0262c287a05a1b0adce355744' # oldest\n",
"last_commit = '8b6861af46df7ec2664aca2d2d5e08a0bf3bb238' # most recent"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Identify commits to port to directories"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>commit</th>\n",
" <th>author_name</th>\n",
" <th>author_email</th>\n",
" <th>authored_datetime</th>\n",
" <th>summary</th>\n",
" <th>count</th>\n",
" <th>merge</th>\n",
" <th>parents</th>\n",
" <th>source_commit</th>\n",
" <th>files</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>90f15d32caa3f4de7d23e9e3f58986f72daeaf63</td>\n",
" <td>Daniel</td>\n",
" <td>daniel.himmelstein@gmail.com</td>\n",
" <td>2017-07-16 18:02:55+00:00</td>\n",
" <td>Second attempt for Travis deploy timeout (#23)</td>\n",
" <td>16</td>\n",
" <td>0</td>\n",
" <td>3cac19dad403e4a0262c287a05a1b0adce355744</td>\n",
" <td>123b84b73fde43e2f5df9fc47a62fe3b45295d24</td>\n",
" <td>[github-pandoc.css, index.html, index.html.ots...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>27f6d900eaf7cec3e5a2f40fa92d259d75735e59</td>\n",
" <td>David</td>\n",
" <td>slochower@gmail.com</td>\n",
" <td>2017-07-16 18:54:36+00:00</td>\n",
" <td>Fix typo in 03.figures.md (#22)</td>\n",
" <td>17</td>\n",
" <td>0</td>\n",
" <td>90f15d32caa3f4de7d23e9e3f58986f72daeaf63</td>\n",
" <td>6c732a3a4eebe8a23ef5a0dbc37330746d629ead</td>\n",
" <td>[github-pandoc.css, index.html, index.html.ots...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" commit author_name \\\n",
"0 90f15d32caa3f4de7d23e9e3f58986f72daeaf63 Daniel \n",
"1 27f6d900eaf7cec3e5a2f40fa92d259d75735e59 David \n",
"\n",
" author_email authored_datetime \\\n",
"0 daniel.himmelstein@gmail.com 2017-07-16 18:02:55+00:00 \n",
"1 slochower@gmail.com 2017-07-16 18:54:36+00:00 \n",
"\n",
" summary count merge \\\n",
"0 Second attempt for Travis deploy timeout (#23) 16 0 \n",
"1 Fix typo in 03.figures.md (#22) 17 0 \n",
"\n",
" parents \\\n",
"0 3cac19dad403e4a0262c287a05a1b0adce355744 \n",
"1 90f15d32caa3f4de7d23e9e3f58986f72daeaf63 \n",
"\n",
" source_commit \\\n",
"0 123b84b73fde43e2f5df9fc47a62fe3b45295d24 \n",
"1 6c732a3a4eebe8a23ef5a0dbc37330746d629ead \n",
"\n",
" files \n",
"0 [github-pandoc.css, index.html, index.html.ots... \n",
"1 [github-pandoc.css, index.html, index.html.ots... "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rows = list()\n",
"\n",
"# commit in backticks regex finder\n",
"pattern = re.compile('`([0-9a-f]{40})`')\n",
" \n",
"for commit in repo.iter_commits(rev=f'{first_commit}...{last_commit}'):\n",
" row = collections.OrderedDict()\n",
" row['commit'] = commit.hexsha\n",
" row['author_name'] = commit.author.name\n",
" row['author_email'] = commit.author.email\n",
" row['authored_datetime'] = commit.authored_datetime\n",
" row['summary'] = commit.summary\n",
" row['count'] = commit.count()\n",
" row['merge'] = int(len(commit.parents) > 1)\n",
" row['parents'] = ', '.join(x.hexsha for x in commit.parents)\n",
" readme = repo.git.show(f'{commit}:README.md')\n",
" match = pattern.search(readme)\n",
" row['source_commit'] = match.group(1) if match else None\n",
" files = repo.git.execute(['git', 'ls-tree', '--name-only', commit.hexsha]).splitlines()\n",
" files = sorted(set(files) - {'README.md', 'README.md.ots'})\n",
" row['files'] = files\n",
" rows.append(row)\n",
"\n",
"rows = list(reversed(rows))\n",
"commit_df = pandas.DataFrame(rows)\n",
"commit_df = commit_df.drop_duplicates('source_commit', keep='last')\n",
"commit_df.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"38"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(commit_df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Output shell commands"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"assert repo.active_branch.name == 'gh-pages'\n",
"\n",
"for row in commit_df.itertuples():\n",
" directory = pathlib.Path(f'v/{row.source_commit}')\n",
" directory.mkdir(exist_ok=True)\n",
" args = [\n",
" 'git',\n",
" f'--work-tree={directory}',\n",
" 'checkout',\n",
" row.commit,\n",
" '--',\n",
" ] + row.files\n",
" process = subprocess.run(args, stderr=subprocess.PIPE)\n",
" if process.stderr:\n",
" print(' '.join(process.args))\n",
" print(process.stderr.decode())\n",
" subprocess.run(['git', 'add', '--update'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Commit and upgrade timestamps\n",
"\n",
"```\n",
"git add v\n",
"git commit\n",
"```\n",
"\n",
"Upgrade timestamps\n",
"\n",
"```\n",
"## The following may fail due to https://github.com/opentimestamps/opentimestamps-client/issues/71\n",
"# ots upgrade v/*/*.ots\n",
"## Instead use\n",
"ls -t --reverse v/*/*.ots | xargs ots upgrade\n",
"rm v/*/*.ots.bak\n",
"git add v/*/*.ots\n",
"git commit\n",
"```"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda root]",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment