Download Github repo's Pull Request history
project="paddlepaddle/paddle" | |
if [[ $# -ne 0 ]]; then | |
project=$1 | |
fi | |
file=${project/\//-} | |
> $file.pulls | |
pulls_per_page=25 | |
page=1 | |
until [ $pulls_per_page -lt 25 ]; do | |
echo Parsing page: $page >&2 | |
url=$(printf 'https://github.com/%s/pulls?page=%d' $project $page) | |
url=$url+'&q=is%3Apr' | |
echo $url >&2 | |
pulls_per_page=$(curl -s "$url" | grep 'relative-time' | sed "s/^.*datetime=\"\([^T]*\).*$/\1/" | tee -a $file.pulls | wc -l) | |
if [[ $pulls_per_page -eq 0 ]]; then | |
sleep 61 # work around the Github abuse detection mechanism. | |
pulls_per_page=$(curl -s "$url" | grep 'relative-time' | sed "s/^.*datetime=\"\([^T]*\).*$/\1/" | tee -a $file.pulls | wc -l) | |
fi | |
echo pulls per page: $pulls_per_page >&2 | |
let page=page+1 | |
sleep 2 # work around the Github abuse detection mechanism. | |
done | |
cat $file.pulls | sort | cut -f1,2 -d '-' | uniq -c | gawk \ | |
'BEGIN { | |
year=2015; | |
month=6; | |
current=sprintf("%04d-%02d", year, month); | |
cum=0; | |
} | |
{ | |
while (current < $2) { | |
printf("%s-30,%d\n", current, cum); | |
month++; | |
if (month > 12) { | |
month=1; | |
year++; | |
} | |
current=sprintf("%04d-%02d", year, month); | |
} | |
if (current > $2) { | |
cum+=$1; | |
} | |
if (current == $2) { | |
cum+=$1; | |
printf("%s-30,%d\n", current, cum); | |
month++; | |
if (month > 12) { | |
month=1; | |
year++; | |
} | |
current=sprintf("%04d-%02d", year, month); | |
} | |
} | |
' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment