View acl2018stats.sh
#!/bin/bash
# Get the data
wget -nv http://acl2018.org/conference/accepted-papers/index.html
# Keep the list of papers only
sed -i '/paper-title/!d' index.html
# Extract author lists to csv
sed 's:.*<span class="paper-authors">\([^<]*\)</span>.*:\1:;s:(, | and ):,:g' index.html | tr '[:upper:]' '[:lower:]' > authors.txt
# Author frequencies
sed 's/,/\n/g' authors.txt | sort | uniq -c | sort -n | cut -d" " -f7 | uniq -c