Skip to content

Instantly share code, notes, and snippets.

@Explorer09
Last active December 29, 2015 14:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Explorer09/7687913 to your computer and use it in GitHub Desktop.
Save Explorer09/7687913 to your computer and use it in GitHub Desktop.
簡單抓取伴侶盟與護家盟的連署人數的shell script
#!/bin/bash
# watch -n 600 "( ./tapcpr-petition-counter.sh | tee -a log.txt )"
# $1 Destination file name
# $2 URL
download_file () {
if wget -q -O ${1} ${2}; then
echo "${1} downloaded." >&2
else
exit 1
fi
}
fetch_data () {
for i in tf tapcpr psycho acatw edutw unistu law culture swr health psychia thu ntu
do
if [ -e ${i}.htm ]; then
echo "ERROR: ${i}.htm exists."
exit 1
fi
done
# 護家盟 (http://taiwanfamily.com/)
download_file tf.htm "http://taiwanfamily.com/"
# 伴侶盟 (http://tapcpr.wordpress.com/)
download_file tapcpr.htm "http://tapcpr-petition.twbbs.org/"
# 心理工作者 (https://docs.google.com/forms/d/1Os3AGX9EN31RQFbBTJfY-Gg3AJQx-XujZnShca_JC5Q/viewform)
download_file psycho.htm "http://docs.google.com/forms/d/1Os3AGX9EN31RQFbBTJfY-Gg3AJQx-XujZnShca_JC5Q/viewform"
# 學術工作者 (https://sites.google.com/site/acatwsignformarr/home)
download_file acatw.htm "http://spreadsheets.google.com/spreadsheet/lv?key=0AqZiS9f8Cm9qdDhsNFJyOWJNM0lNNmxpX0pvTXVGbVE&rowsperpage=1"
# 基層教師 (https://sites.google.com/site/edutwformarr/)
download_file edutw.htm "http://spreadsheets.google.com/spreadsheet/lv?key=0AqZiS9f8Cm9qdHdnUDA0SnE4N2RTNW1CU2tid2cxT1E&rowsperpage=1"
# 同學陣 (https://sites.google.com/site/unistusignformarr/)
download_file unistu.htm "http://spreadsheets.google.com/spreadsheet/lv?key=0AqZiS9f8Cm9qdHdydUVJOUlFNkNrX041b3MzSWlYUnc&rowsperpage=1"
# 法學界 (https://sites.google.com/site/marriagequalitytw/home/marriagequalitypetition)
download_file law.htm "http://spreadsheets.google.com/spreadsheet/lv?key=0Aog8zRm2J3P3dFZlQkVjalNqTy1mbWFLVF9NcVhGQ1E&rowsperpage=1"
# 文化界 (https://sites.google.com/site/culturesignformarr/)
download_file culture.htm "http://spreadsheets.google.com/spreadsheet/lv?key=0AvUpRpAst1MYdFByS0lEMTk3bnpuTmhXTHVUWmZldUE&rowsperpage=1"
# 社會工作者 (https://sites.google.com/site/swrgoforlgbt/)
download_file swr.htm "http://spreadsheets.google.com/spreadsheet/lv?key=0AqTjGLK2BCu0dEM2YTRkVS1maG9QNzdQT2dsLWJnSWc&rowsperpage=1"
# 醫療界 (https://sites.google.com/site/healthandmarriageequality/home)
download_file health.htm "http://spreadsheets.google.com/spreadsheet/lv?key=0AigJhZWFOs8vdFNicG8xZmRzM0VhY1V5UkhNQjVYMWc&rowsperpage=1"
# 精神科醫師 (https://sites.google.com/site/twpsychiatristforequalmarriage/home)
download_file psychia.htm "http://spreadsheets.google.com/spreadsheet/lv?key=0AigJhZWFOs8vdGtsTjNtZHNwcnB1WUEzV2lETEhCOVE&rowsperpage=1"
# 東海大學 (https://sites.google.com/site/thuequalmarriage/home)
download_file thu.htm "http://spreadsheets.google.com/spreadsheet/lv?key=0AiIKbn5JwMM-dGpxdl80N2pTNFE4T2JQaUc1ak15OVE&rowsperpage=1"
# 台大法律系 (https://docs.google.com/forms/d/1VTAorznQrV05g71yOVsjUuDH7aasRSwxn5wEs26PYEw/viewform)
download_file ntu.htm "http://spreadsheets.google.com/spreadsheet/lv?key=0Ak6vsYc7YAQydGdWWTM4djk1elZ2R283eUxEWDBaWGc&rowsperpage=1"
}
grep_data () {
# TaiwanFamily counter
# Strip leading zeros (or the shell will parse the number as base-8)
oppose_net="$(grep -m 1 -o -e '網路連署人數:[0-9][0-9]*' <tf.htm |
sed -e 's/^[^1-9]*\([1-9][0-9]*\)[^0-9]*$/\1/')"
oppose_paper="$(grep -m 1 -o -e '紙本連署人數:[0-9][0-9]*' <tf.htm |
sed -e 's/^[^1-9]*\([1-9][0-9]*\)[^0-9]*$/\1/')"
oppose_total=$(($oppose_net+$oppose_paper))
# TAPCPR counter
# Strip leading zeros (or the shell will parse the number as base-8)
support_net="$(grep -m 1 -o -e '<strong>網路連署人數: *</strong> *[0-9][0-9]*人' <tapcpr.htm |
sed -e 's/^[^1-9]*\([1-9][0-9]*\)人.*$/\1/')"
support_paper="$(grep -m 1 -o -e '<strong>紙本連署人數: *</strong> *[0-9][0-9]*人' <tapcpr.htm |
sed -e 's/^[^1-9]*\([1-9][0-9]*\)人.*$/\1/')"
support_line="$(grep -m 1 -o -e '<strong>Line連署人數: *</strong> *[0-9][0-9]*人' <tapcpr.htm |
sed -e 's/^[^1-9]*\([1-9][0-9]*\)人.*$/\1/')"
support_mobile="$(grep -m 1 -o -e '<strong>手機連署人數: *</strong> *[0-9][0-9]*人' <tapcpr.htm |
sed -e 's/^[^1-9]*\([1-9][0-9]*\)人.*$/\1/')"
support_tapcpr=$(($support_net+$support_paper+$support_line+$support_mobile))
# Other
# Strip leading zeros (or the shell will parse the number as base-8)
support_psycho="$(grep -m 1 -o -e '共[^0-9]*[0-9][0-9]*[^0-9]*人,包含:' <psycho.htm |
sed -e 's/^[^1-9]*\([1-9][0-9]*\)[^0-9]*$/\1/')"
# Google Spreadsheet counters
# Strip leading zeros (or the shell will parse the number as base-8)
support_acatw="$(grep -m 1 -o -e '<span class=\"total\">[0-9][0-9]*</span>' <acatw.htm |
sed -e 's/^[^1-9]*\([1-9][0-9]*\)[^0-9]*$/\1/')"
support_edutw="$(grep -m 1 -o -e '<span class=\"total\">[0-9][0-9]*</span>' <edutw.htm |
sed -e 's/^[^1-9]*\([1-9][0-9]*\)[^0-9]*$/\1/')"
support_unistu="$(grep -m 1 -o -e '<span class=\"total\">[0-9][0-9]*</span>' <unistu.htm |
sed -e 's/^[^1-9]*\([1-9][0-9]*\)[^0-9]*$/\1/')"
support_law="$(grep -m 1 -o -e '<span class=\"total\">[0-9][0-9]*</span>' <law.htm |
sed -e 's/^[^1-9]*\([1-9][0-9]*\)[^0-9]*$/\1/')"
support_culture="$(grep -m 1 -o -e '<span class=\"total\">[0-9][0-9]*</span>' <culture.htm |
sed -e 's/^[^1-9]*\([1-9][0-9]*\)[^0-9]*$/\1/')"
support_swr="$(grep -m 1 -o -e '<span class=\"total\">[0-9][0-9]*</span>' <swr.htm |
sed -e 's/^[^1-9]*\([1-9][0-9]*\)[^0-9]*$/\1/')"
support_health="$(grep -m 1 -o -e '<span class=\"total\">[0-9][0-9]*</span>' <health.htm |
sed -e 's/^[^1-9]*\([1-9][0-9]*\)[^0-9]*$/\1/')"
support_psychia="$(grep -m 1 -o -e '<span class=\"total\">[0-9][0-9]*</span>' <psychia.htm |
sed -e 's/^[^1-9]*\([1-9][0-9]*\)[^0-9]*$/\1/')"
support_thu="$(grep -m 1 -o -e '<span class=\"total\">[0-9][0-9]*</span>' <thu.htm |
sed -e 's/^[^1-9]*\([1-9][0-9]*\)[^0-9]*$/\1/')"
support_ntu="$(grep -m 1 -o -e '<span class=\"total\">[0-9][0-9]*</span>' <ntu.htm |
sed -e 's/^[^1-9]*\([1-9][0-9]*\)[^0-9]*$/\1/')"
support_schools=$(($support_thu+$support_ntu))
support_misc=$(($support_psycho+$support_acatw+$support_edutw+$support_unistu+$support_law+$support_culture+$support_schools+$support_swr+$support_health+$support_psychia))
support_total=$(($support_tapcpr+$support_misc))
total=$(($support_total+$oppose_total))
}
plot () {
num_plus=$(($support_total*66/$total))
num_dash=$(($oppose_total*66/$total))
printf '[\033[32m%*s' $num_plus '' | tr ' ' 'O'
if [ "$(($num_plus+$num_dash))" -lt "66" ]; then
printf '\033[33m!'
fi
printf '\033[31m%*s' $num_dash '' | tr ' ' 'X'
printf '\033[m]%10d\n' $total
}
output_table () {
printf "\033[m連署總數 \033[32m支持修法\033[m | \033[31m反對修法\033[m\n"
printf " \033[1;32m%6s %10d\033[m | \033[1;31m%6s %10d\033[m\n" \
'('"$(($support_total*100/$total))"'%)' $support_total \
'('"$(($oppose_total*100/$total))"'%)' $oppose_total
printf -- "-------------------------+-------------------------\n"
printf "伴侶盟 小計: \033[33m%10d\033[m | 護家盟 小計: \033[33m%10d\033[m\n" \
$support_tapcpr $oppose_total
printf "網路連署 \033[32m%10d\033[m | 網路連署 \033[31m%10d\033[m\n" \
$support_net $oppose_net
printf "紙本連署 \033[32m%10d\033[m | 紙本連署 \033[31m%10d\033[m\n" \
$support_paper $oppose_paper
printf "Line連署 \033[32m%10d\033[m |\n" $support_line
printf "手機連署 \033[32m%10d\033[m |\n" $support_mobile
printf -- "-------------------------+-------------------------\n"
printf "各界支持婚姻平權連署 小計: \033[33m%10d\033[m\n" $support_misc
printf "(可能會有重複連署)\n"
printf "心理工作者: \033[32m%10d\033[m 社會工作者: \033[32m%10d\033[m\n" \
$support_psycho $support_swr
printf "學術工作者: \033[32m%10d\033[m 醫療界: \033[32m%10d\033[m\n" \
$support_acatw $support_health
printf "基層教師: \033[32m%10d\033[m 精神科醫師: \033[32m%10d\033[m\n" \
$support_edutw $support_psychia
printf "學生: \033[32m%10d\033[m 【校內連署】 \033[33m%10d\033[m\n" \
$support_unistu $support_schools
printf "法學界: \033[32m%10d\033[m 東海大學: \033[32m%10d\033[m\n" \
$support_law $support_thu
printf "文化界: \033[32m%10d\033[m 台大法律系: \033[32m%10d\033[m\n" \
$support_culture $support_ntu
echo ""
echo "由於Google Spreadsheet本身不會過濾重複和空白的資料,有些連署的人數並不準確。"
echo "Source code: https://gist.github.com/Explorer09/7687913"
echo "最後更新:$(date --rfc-2822)"
}
del_files () {
for i in tf tapcpr psycho acatw edutw unistu law culture swr health psychia thu ntu
do
rm -f ${i}.htm
done
}
fetch_data
grep_data
plot
output_table
del_files
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment