qurben/pull.sh

## pull.sh
#! /bin/bash

# Copyright 2019 Gerben Oolbekkink
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.

# README
#
# Retrieve a token from https://github.com/settings/tokens no scopes are
# required, it is only used to make the GitHub API quota 5000 reqs/h instead
# of 60 reqs/h. (which is equivalent to downloading about 10 pull requests)
#
# This script requires jq to be installed. If you have jq installed on
# another location than the path, change the following variable.

JQ=jq

# ----------------------------------------------------------------------------

# Report errors
set -e

# Check arguments
if [ -z "${3}" ]; then
  echo "Usage: ./pull.sh <github access token> <organisation>/<project> \
<pull request id>"
  echo Downloads a pull request and saves it\'s comments as: JSON, CSV and MD
  exit
fi

# Unpack argv
token=$1
project=$2
pull_id=$3

# Create temporary directory
mkdir -p .tmp/

# Create initial results file
echo "[]" > .tmp/result.json

function gh_fetch {
    slug=$1
    page_number=1
    # Run while there is still more info
    while true; do
        echo Downloading \
          https://api.github.com${slug}?per_page=100\&page=${page_number}
        curl -so .tmp/download.json \
          -H "Authorization: token ${token}" \
          https://api.github.com${slug}?per_page=100\&page=${page_number} \
          > /dev/null

        # When github returns an empty array, stop processing.
        if [ "[]" == "$($JQ '.' .tmp/download.json)" ]; then
        echo "Finished!"
        break
        fi

        # Join the files errors if input is not a list.
        # (when rate limiter is hit for instance).
        $JQ -s '.[0] + .[1]'  .tmp/result.json .tmp/download.json \
          > .tmp/joined.json

        mv .tmp/joined.json .tmp/result.json

        ((page_number++))
    done
}

# Download pull request issue comments (normal comments)
gh_fetch /repos/${project}/issues/${pull_id}/comments
# Download pull request pull comments (inside reviews)
gh_fetch /repos/${project}/pulls/${pull_id}/comments
# Download pull request reviews (header for review)
gh_fetch /repos/${project}/pulls/${pull_id}/reviews

# Clean up
rm -f .tmp/joined.json .tmp/file.json

# Move result to here
mv .tmp/result.json pull_${pull_id}.json

# Reviews have .submitted_at, comments have .created_at, normalize this.
# Remove any newlines from body, because they make formatting csv hard.
# Remove any comments with empty body, they exist and are not interesting.
$JQ '[.[] | {
    user: ("[" + .user.login + "](" + .user.html_url + ")"),
    moment: (
        if (.created_at | length) > 0 then
          .created_at
        else
          .submitted_at
        end
    ),
    author_association: .author_association,
    body: .body | gsub("\r?\n"; "<br>"),
    url: ("[link](" + .html_url + ")")
  }]' pull_${pull_id}.json \
  | $JQ '. - map(select(.body == ""))' \
  | $JQ 'sort_by(.moment)' \
  | $JQ -r '
    ["moment", "user", "author_association", "body", "url"] as $cols
    | map(. as $row | $cols | map($row[.])) as $rows
    | $cols, $rows[]
    | @csv' \
  > pull_${pull_id}.csv

# Convert to a table
cat pull_${pull_id}.csv \
  | sed -e 's/|/\\|/g' \
  | sed -e 's/","/|/g' \
  | sed -e 's/^"/|/g' \
  | sed -e 's/"$/|/g' \
  | sed -e 's/""/"/g' \
  > pull_${pull_id}.md

# Insert table header
sed -i '2i|---|---|---|---|---|' pull_${pull_id}.md

# Insert title
sed -i "1i# Pull request analysis for \
  [#${pull_id}](https://github.com/${project}/pull/${pull_id})" \
  pull_${pull_id}.md
	#! /bin/bash

	# Copyright 2019 Gerben Oolbekkink
	#
	# Permission is hereby granted, free of charge, to any person obtaining a copy
	# of this software and associated documentation files (the "Software"), to
	# deal in the Software without restriction, including without limitation the
	# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
	# sell copies of the Software, and to permit persons to whom the Software is
	# furnished to do so, subject to the following conditions:
	#
	# The above copyright notice and this permission notice shall be included in
	# all copies or substantial portions of the Software.
	#
	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	# IN THE SOFTWARE.

	# README
	#
	# Retrieve a token from https://github.com/settings/tokens no scopes are
	# required, it is only used to make the GitHub API quota 5000 reqs/h instead
	# of 60 reqs/h. (which is equivalent to downloading about 10 pull requests)
	#
	# This script requires jq to be installed. If you have jq installed on
	# another location than the path, change the following variable.

	JQ=jq

	# ----------------------------------------------------------------------------

	# Report errors
	set -e

	# Check arguments
	if [ -z "${3}" ]; then
	echo "Usage: ./pull.sh <github access token> <organisation>/<project> \
	<pull request id>"
	echo Downloads a pull request and saves it\'s comments as: JSON, CSV and MD
	exit
	fi

	# Unpack argv
	token=$1
	project=$2
	pull_id=$3

	# Create temporary directory
	mkdir -p .tmp/

	# Create initial results file
	echo "[]" > .tmp/result.json

	function gh_fetch {
	slug=$1
	page_number=1
	# Run while there is still more info
	while true; do
	echo Downloading \
	https://api.github.com${slug}?per_page=100\&page=${page_number}
	curl -so .tmp/download.json \
	-H "Authorization: token ${token}" \
	https://api.github.com${slug}?per_page=100\&page=${page_number} \
	> /dev/null

	# When github returns an empty array, stop processing.
	if [ "[]" == "$($JQ '.' .tmp/download.json)" ]; then
	echo "Finished!"
	break
	fi

	# Join the files errors if input is not a list.
	# (when rate limiter is hit for instance).
	$JQ -s '.[0] + .[1]' .tmp/result.json .tmp/download.json \
	> .tmp/joined.json

	mv .tmp/joined.json .tmp/result.json

	((page_number++))
	done
	}

	# Download pull request issue comments (normal comments)
	gh_fetch /repos/${project}/issues/${pull_id}/comments
	# Download pull request pull comments (inside reviews)
	gh_fetch /repos/${project}/pulls/${pull_id}/comments
	# Download pull request reviews (header for review)
	gh_fetch /repos/${project}/pulls/${pull_id}/reviews

	# Clean up
	rm -f .tmp/joined.json .tmp/file.json

	# Move result to here
	mv .tmp/result.json pull_${pull_id}.json

	# Reviews have .submitted_at, comments have .created_at, normalize this.
	# Remove any newlines from body, because they make formatting csv hard.
	# Remove any comments with empty body, they exist and are not interesting.
	$JQ '[.[] \| {
	user: ("[" + .user.login + "](" + .user.html_url + ")"),
	moment: (
	if (.created_at \| length) > 0 then
	.created_at
	else
	.submitted_at
	end
	),
	author_association: .author_association,
	body: .body \| gsub("\r?\n"; "<br>"),
	url: ("[link](" + .html_url + ")")
	}]' pull_${pull_id}.json \
	\| $JQ '. - map(select(.body == ""))' \
	\| $JQ 'sort_by(.moment)' \
	\| $JQ -r '
	["moment", "user", "author_association", "body", "url"] as $cols
	\| map(. as $row \| $cols \| map($row[.])) as $rows
	\| $cols, $rows[]
	\| @csv' \
	> pull_${pull_id}.csv

	# Convert to a table
	cat pull_${pull_id}.csv \
	\| sed -e 's/\|/\\\|/g' \
	\| sed -e 's/","/\|/g' \
	\| sed -e 's/^"/\|/g' \
	\| sed -e 's/"$/\|/g' \
	\| sed -e 's/""/"/g' \
	> pull_${pull_id}.md

	# Insert table header
	sed -i '2i\|---\|---\|---\|---\|---\|' pull_${pull_id}.md

	# Insert title
	sed -i "1i# Pull request analysis for \
	[#${pull_id}](https://github.com/${project}/pull/${pull_id})" \
	pull_${pull_id}.md