Skip to content

Instantly share code, notes, and snippets.

@hacolab
Last active May 15, 2020 13:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hacolab/f8be9702eb4e0dcfd09363f4d32fb663 to your computer and use it in GitHub Desktop.
Save hacolab/f8be9702eb4e0dcfd09363f4d32fb663 to your computer and use it in GitHub Desktop.
#!/bin/sh -u
################################################################################
#-Get infecter of COVID-19 info in Japan.
#-
# [VERSION] 0.0.3
#
#+[USAGE]
#+ $0 [-hV]
#+ $0 [-ctu][-m 'cond']
#+ $0 [-tu][-f field][-m 'cond'] [field1 field2...]
#+
#-[OPTIONS]
#- -c print count of rows the match the condition.
#-
#- -f field print count of rows the match the condition each field items.
#- for can be used filed-name, see field-name.
#-
#- -h print script help.
#-
#- -m cond print only recode matched the conditions.
#- expression of awk can be used.
#- exp.) $0 -m 'age>10 && date~"2020/05"'
#-
#- -t print total count of rows in japan each of date
#- for can be used filed-name, see field-name.
#-
#- -u update database.
#-
#- -V print script version.
#-
#- field print target field. names can be used.
#- - id
#- - date
#- - area1
#- - area2
#- - area3
#- - age
#- - sex
#- - job
#- - note
#- - sts
#-
#- if use '-t'
#- - date
#- - inf
#- - infd
#- - die
#- - leav
#- - leavd
#- - pcr
#- - pcrd
#-
#- if not given field, print all fields.
#-
#-[EXIT-STATUS]
#- 0 no error
#- 1 parameter error
#- 2 process error
#-
#-[DEPENDENCY]
#- - curl
#- - https://dl.dropboxusercontent.com/s/6mztoeb6xf78g5w/COVID-19.csv
#-
#-[REFERENCE]
#- - https://jag-japan.com/covid19map-readme/
#-
################################################################################
CMD_NAME=$(basename $0)
###################################################
# Configs
###################################################
# database
GET_CMD="curl -sSf"
DB_URL=https://dl.dropboxusercontent.com/s/6mztoeb6xf78g5w/COVID-19.csv
DB_DIR="${XDG_DATA_HOME:-${HOME}/.local/share/}/${CMD_NAME}"
[ ! -d "$DB_DIR" ] && mkdir -p "$DB_DIR"
RAW_DB_FILE="${DB_DIR}/COVID-19.csv"
ITEM_DB_FILE="${DB_DIR}/ITEM_DB_COVID-19.csv"
TOTAL_DB_FILE="${DB_DIR}/TOTAL_DB_COVID-19.csv"
###################################################
# Utilities
###################################################
usage_exit() {
echo "$1" 1>&2
sed -n '/^#+/s/^#+//p' "$0" | sed "s/\$0/${CMD_NAME}/g" 1>&2
exit 1
}
help_exit() {
sed -n '/^#[-+]/s/^#[-+]//p' "$0" | sed "s/\$0/${CMD_NAME}/g" 1>&2
exit 0
}
err_exit() {
echo "$1" 1>&2
exit 2
}
version_exit() {
sed -n "/^# \[VERSION\]/s/^# \[VERSION\] *//p" "$1" 1>&2
exit 0
}
###################################################
# Analyze options & parameters
###################################################
# options
UPDATE_DB=
PICKUP_COND=
CNT_IN_FIELD=
CNT_OF_RECODE=
TOTAL_COUNT=
while getopts cf:hm:tuV OPT
do
case $OPT in
c ) CNT_OF_RECODE=1 ;;
f ) CNT_IN_FIELD=$OPTARG ;;
h ) help_exit "$0" ;;
m ) PICKUP_COND=$OPTARG ;;
t ) TOTAL_COUNT=1 ;;
u ) UPDATE_DB=1 ;;
V ) version_exit "$0" ;;
\? ) usage_exit "$0" ;;
esac
done
shift $(expr $OPTIND - 1)
# params
ERR_MSG=
if [ -n "$TOTAL_COUNT" ]; then
for field in "$@";do
case $field in
date | inf | infd | die | leav | leavd | pcr | pcrd ) ;;
* ) ERR_MSG="invalid field-name '$field'" ;;
esac
done
else
for field in "$@";do
case $field in
id | date | area1 | area2 | area3 | age | sex | job | note | sts ) ;;
* ) ERR_MSG="invalid field-name '$field'" ;;
esac
done
fi
[ -n "$ERR_MSG" ] && usage_exit "$ERR_MSG"
# make output format for awk
# "id date job age" --> "$1,$2,$8,$6"
item_name_to_pos(){
sed -e 's/id/$1/g' \
-e 's/date/$2/g' \
-e 's/area1/$3/g' \
-e 's/area2/$4/g' \
-e 's/area3/$5/g' \
-e 's/age/$6/g' \
-e 's/sex/$7/g' \
-e 's/job/$8/g' \
-e 's/note/$9/g' \
-e 's/sts/$10/g'
}
total_name_to_pos(){
sed -e 's/date/$1/g' \
-e 's/infd/$3/g' \
-e 's/inf/$2/g' \
-e 's/die/$4/g' \
-e 's/leavd/$6/g' \
-e 's/leav/$5/g' \
-e 's/pcrd/$8/g' \
-e 's/pcr/$7/g'
}
NAME_TO_POS=item_name_to_pos
DB_FILE="$ITEM_DB_FILE"
if [ -n "$TOTAL_COUNT" ]; then
NAME_TO_POS=total_name_to_pos
DB_FILE="$TOTAL_DB_FILE"
fi
OUTPUT_ITEMS='$0'
if [ $# -gt 0 ]; then
OUTPUT_ITEMS=`echo "$*" | $NAME_TO_POS | sed -e 's/[ ]\{1,\}/,/g'`
fi
# echo $OUTPUT_ITEMS
###################################################
# Main
###################################################
# get csv & make database
#==================================================
PICKUP_DB_ITEMS='$1,$40,$10,$11,$14,$6,$7,$32,$19,$18 "(" $3 ")"'
#PICKUP_DB_HEADER='id,date,area1,area2,area3,age,sex,job,note,sts'
PICKUP_DB_HEADER='ID,確定日,受診都道府県,居住地,居住地(詳細),年代,性別,職業,備考,状態'
TOTAL_DB_ITEMS='$40,$24,$25,$27,$28,$29,$30,$31'
#TOTAL_DB_HEADER='date,inf,infd,die,leav,leavd,pcr,pcrd'
TOTAL_DB_HEADER='確定日,感染者数累計,感染者数前日比,死者合計,退院数累計,退院数前日比,PCR検査実施人数,PCR検査実施人数前日比'
get_body_recode(){
tr -d '\r' \
| sed '1d' \
| grep -v '^,'
}
delete_space(){
sed 's/[  ]\{1,\},/,/g' \
| sed 's/,[  ]\{1,\}/,/g' \
| sed 's/[  ]\{1,\}$//'
}
format_date(){
sed 's@,\([0-9]\{4\}\)/\([1-9]\)/\([0-9]\{1,2\}\),@,\1/0\2/\3,@' \
| sed 's@,\([0-9]\{4\}\)/\([0-9][0-9]\)/\([0-9]\),@,\1/\2/0\3,@'
}
make_database(){
get_body_recode \
| awk -F"," \
' BEGIN{ OFS="," }
{
if($19~"100歳以上"){$6=100}
if($6==""){$6="不明"}
if($6=="0-10"){$6="0"}
print '"$PICKUP_DB_ITEMS"'
}' \
| sed 's/()$//' \
| delete_space \
| format_date
}
make_total_database(){
get_body_recode \
| delete_space \
| format_date \
| sort -t ',' -k 1 -nr \
| awk -F"," \
' BEGIN{ OFS="," }
$24!="" && $24!="#REF!" && !uniq[$40]++ {
print '"$TOTAL_DB_ITEMS"'
}' \
| sort -t ',' -k 1
}
# get raw database from web
if [ ! -f "$RAW_DB_FILE" -o -n "$UPDATE_DB" ]; then
$GET_CMD "$DB_URL" > "$RAW_DB_FILE"
if [ $? -ne 0 ]; then
err_exit "can't db update error from $DB_URL"
fi
rm "$ITEM_DB_FILE" "$TOTAL_DB_FILE" 1>&2 2>/dev/null
fi
# make database for this script
if [ ! -f "$ITEM_DB_FILE" ]; then
echo "$PICKUP_DB_HEADER" > "$ITEM_DB_FILE"
cat "$RAW_DB_FILE" \
| make_database >> "$ITEM_DB_FILE"
fi
# make database for total count
if [ ! -f "$TOTAL_DB_FILE" ]; then
echo "$TOTAL_DB_HEADER" > "${TOTAL_DB_FILE}"
cat "$RAW_DB_FILE" \
| make_total_database >> "${TOTAL_DB_FILE}"
fi
# pickup & output items
#==================================================
PICKUP_COND=`echo $PICKUP_COND | $NAME_TO_POS \
| sed 's/\([^!~><=]\)=\([^=]\)/\1==\2/g'`
if [ -n "$CNT_IN_FIELD" ]; then
CNT_IN_FIELD=`echo $CNT_IN_FIELD | $NAME_TO_POS`
# output count each of field
cat "$DB_FILE" \
| sed '1d' \
| awk -F"," '
BEGIN{ OFS="," }
'"$PICKUP_COND"'{
count['$CNT_IN_FIELD']++
}
END{
total=0
for (field in count) {
print field,count[field]
total+=count[field]
}
print ":TOTAL",total
}'
elif [ -n "$CNT_OF_RECODE" ]; then
# output count of pickup recodes
cat "$DB_FILE" \
| sed '1d' \
| awk -F"," '
BEGIN{ count=0 }
'"$PICKUP_COND"'{
count++
}
END{ print count }'
else
# output pickup headr
head -n 1 "$DB_FILE" \
| awk -F"," '
BEGIN{ OFS="," }
{
print '"${OUTPUT_ITEMS}"'
}'
# output pickup recodes
cat "$DB_FILE" \
| sed '1d' \
| awk -F"," '
BEGIN{ OFS="," }
'"$PICKUP_COND"'{
print '"${OUTPUT_ITEMS}"'
}'
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment