Last active
May 15, 2020 13:47
-
-
Save hacolab/f8be9702eb4e0dcfd09363f4d32fb663 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh -u | |
################################################################################ | |
#-Get infecter of COVID-19 info in Japan. | |
#- | |
# [VERSION] 0.0.3 | |
# | |
#+[USAGE] | |
#+ $0 [-hV] | |
#+ $0 [-ctu][-m 'cond'] | |
#+ $0 [-tu][-f field][-m 'cond'] [field1 field2...] | |
#+ | |
#-[OPTIONS] | |
#- -c print count of rows the match the condition. | |
#- | |
#- -f field print count of rows the match the condition each field items. | |
#- for can be used filed-name, see field-name. | |
#- | |
#- -h print script help. | |
#- | |
#- -m cond print only recode matched the conditions. | |
#- expression of awk can be used. | |
#- exp.) $0 -m 'age>10 && date~"2020/05"' | |
#- | |
#- -t print total count of rows in japan each of date | |
#- for can be used filed-name, see field-name. | |
#- | |
#- -u update database. | |
#- | |
#- -V print script version. | |
#- | |
#- field print target field. names can be used. | |
#- - id | |
#- - date | |
#- - area1 | |
#- - area2 | |
#- - area3 | |
#- - age | |
#- - sex | |
#- - job | |
#- - note | |
#- - sts | |
#- | |
#- if use '-t' | |
#- - date | |
#- - inf | |
#- - infd | |
#- - die | |
#- - leav | |
#- - leavd | |
#- - pcr | |
#- - pcrd | |
#- | |
#- if not given field, print all fields. | |
#- | |
#-[EXIT-STATUS] | |
#- 0 no error | |
#- 1 parameter error | |
#- 2 process error | |
#- | |
#-[DEPENDENCY] | |
#- - curl | |
#- - https://dl.dropboxusercontent.com/s/6mztoeb6xf78g5w/COVID-19.csv | |
#- | |
#-[REFERENCE] | |
#- - https://jag-japan.com/covid19map-readme/ | |
#- | |
################################################################################ | |
CMD_NAME=$(basename $0) | |
################################################### | |
# Configs | |
################################################### | |
# database | |
GET_CMD="curl -sSf" | |
DB_URL=https://dl.dropboxusercontent.com/s/6mztoeb6xf78g5w/COVID-19.csv | |
DB_DIR="${XDG_DATA_HOME:-${HOME}/.local/share/}/${CMD_NAME}" | |
[ ! -d "$DB_DIR" ] && mkdir -p "$DB_DIR" | |
RAW_DB_FILE="${DB_DIR}/COVID-19.csv" | |
ITEM_DB_FILE="${DB_DIR}/ITEM_DB_COVID-19.csv" | |
TOTAL_DB_FILE="${DB_DIR}/TOTAL_DB_COVID-19.csv" | |
################################################### | |
# Utilities | |
################################################### | |
usage_exit() { | |
echo "$1" 1>&2 | |
sed -n '/^#+/s/^#+//p' "$0" | sed "s/\$0/${CMD_NAME}/g" 1>&2 | |
exit 1 | |
} | |
help_exit() { | |
sed -n '/^#[-+]/s/^#[-+]//p' "$0" | sed "s/\$0/${CMD_NAME}/g" 1>&2 | |
exit 0 | |
} | |
err_exit() { | |
echo "$1" 1>&2 | |
exit 2 | |
} | |
version_exit() { | |
sed -n "/^# \[VERSION\]/s/^# \[VERSION\] *//p" "$1" 1>&2 | |
exit 0 | |
} | |
################################################### | |
# Analyze options & parameters | |
################################################### | |
# options | |
UPDATE_DB= | |
PICKUP_COND= | |
CNT_IN_FIELD= | |
CNT_OF_RECODE= | |
TOTAL_COUNT= | |
while getopts cf:hm:tuV OPT | |
do | |
case $OPT in | |
c ) CNT_OF_RECODE=1 ;; | |
f ) CNT_IN_FIELD=$OPTARG ;; | |
h ) help_exit "$0" ;; | |
m ) PICKUP_COND=$OPTARG ;; | |
t ) TOTAL_COUNT=1 ;; | |
u ) UPDATE_DB=1 ;; | |
V ) version_exit "$0" ;; | |
\? ) usage_exit "$0" ;; | |
esac | |
done | |
shift $(expr $OPTIND - 1) | |
# params | |
ERR_MSG= | |
if [ -n "$TOTAL_COUNT" ]; then | |
for field in "$@";do | |
case $field in | |
date | inf | infd | die | leav | leavd | pcr | pcrd ) ;; | |
* ) ERR_MSG="invalid field-name '$field'" ;; | |
esac | |
done | |
else | |
for field in "$@";do | |
case $field in | |
id | date | area1 | area2 | area3 | age | sex | job | note | sts ) ;; | |
* ) ERR_MSG="invalid field-name '$field'" ;; | |
esac | |
done | |
fi | |
[ -n "$ERR_MSG" ] && usage_exit "$ERR_MSG" | |
# make output format for awk | |
# "id date job age" --> "$1,$2,$8,$6" | |
item_name_to_pos(){ | |
sed -e 's/id/$1/g' \ | |
-e 's/date/$2/g' \ | |
-e 's/area1/$3/g' \ | |
-e 's/area2/$4/g' \ | |
-e 's/area3/$5/g' \ | |
-e 's/age/$6/g' \ | |
-e 's/sex/$7/g' \ | |
-e 's/job/$8/g' \ | |
-e 's/note/$9/g' \ | |
-e 's/sts/$10/g' | |
} | |
total_name_to_pos(){ | |
sed -e 's/date/$1/g' \ | |
-e 's/infd/$3/g' \ | |
-e 's/inf/$2/g' \ | |
-e 's/die/$4/g' \ | |
-e 's/leavd/$6/g' \ | |
-e 's/leav/$5/g' \ | |
-e 's/pcrd/$8/g' \ | |
-e 's/pcr/$7/g' | |
} | |
NAME_TO_POS=item_name_to_pos | |
DB_FILE="$ITEM_DB_FILE" | |
if [ -n "$TOTAL_COUNT" ]; then | |
NAME_TO_POS=total_name_to_pos | |
DB_FILE="$TOTAL_DB_FILE" | |
fi | |
OUTPUT_ITEMS='$0' | |
if [ $# -gt 0 ]; then | |
OUTPUT_ITEMS=`echo "$*" | $NAME_TO_POS | sed -e 's/[ ]\{1,\}/,/g'` | |
fi | |
# echo $OUTPUT_ITEMS | |
################################################### | |
# Main | |
################################################### | |
# get csv & make database | |
#================================================== | |
PICKUP_DB_ITEMS='$1,$40,$10,$11,$14,$6,$7,$32,$19,$18 "(" $3 ")"' | |
#PICKUP_DB_HEADER='id,date,area1,area2,area3,age,sex,job,note,sts' | |
PICKUP_DB_HEADER='ID,確定日,受診都道府県,居住地,居住地(詳細),年代,性別,職業,備考,状態' | |
TOTAL_DB_ITEMS='$40,$24,$25,$27,$28,$29,$30,$31' | |
#TOTAL_DB_HEADER='date,inf,infd,die,leav,leavd,pcr,pcrd' | |
TOTAL_DB_HEADER='確定日,感染者数累計,感染者数前日比,死者合計,退院数累計,退院数前日比,PCR検査実施人数,PCR検査実施人数前日比' | |
get_body_recode(){ | |
tr -d '\r' \ | |
| sed '1d' \ | |
| grep -v '^,' | |
} | |
delete_space(){ | |
sed 's/[ ]\{1,\},/,/g' \ | |
| sed 's/,[ ]\{1,\}/,/g' \ | |
| sed 's/[ ]\{1,\}$//' | |
} | |
format_date(){ | |
sed 's@,\([0-9]\{4\}\)/\([1-9]\)/\([0-9]\{1,2\}\),@,\1/0\2/\3,@' \ | |
| sed 's@,\([0-9]\{4\}\)/\([0-9][0-9]\)/\([0-9]\),@,\1/\2/0\3,@' | |
} | |
make_database(){ | |
get_body_recode \ | |
| awk -F"," \ | |
' BEGIN{ OFS="," } | |
{ | |
if($19~"100歳以上"){$6=100} | |
if($6==""){$6="不明"} | |
if($6=="0-10"){$6="0"} | |
print '"$PICKUP_DB_ITEMS"' | |
}' \ | |
| sed 's/()$//' \ | |
| delete_space \ | |
| format_date | |
} | |
make_total_database(){ | |
get_body_recode \ | |
| delete_space \ | |
| format_date \ | |
| sort -t ',' -k 1 -nr \ | |
| awk -F"," \ | |
' BEGIN{ OFS="," } | |
$24!="" && $24!="#REF!" && !uniq[$40]++ { | |
print '"$TOTAL_DB_ITEMS"' | |
}' \ | |
| sort -t ',' -k 1 | |
} | |
# get raw database from web | |
if [ ! -f "$RAW_DB_FILE" -o -n "$UPDATE_DB" ]; then | |
$GET_CMD "$DB_URL" > "$RAW_DB_FILE" | |
if [ $? -ne 0 ]; then | |
err_exit "can't db update error from $DB_URL" | |
fi | |
rm "$ITEM_DB_FILE" "$TOTAL_DB_FILE" 1>&2 2>/dev/null | |
fi | |
# make database for this script | |
if [ ! -f "$ITEM_DB_FILE" ]; then | |
echo "$PICKUP_DB_HEADER" > "$ITEM_DB_FILE" | |
cat "$RAW_DB_FILE" \ | |
| make_database >> "$ITEM_DB_FILE" | |
fi | |
# make database for total count | |
if [ ! -f "$TOTAL_DB_FILE" ]; then | |
echo "$TOTAL_DB_HEADER" > "${TOTAL_DB_FILE}" | |
cat "$RAW_DB_FILE" \ | |
| make_total_database >> "${TOTAL_DB_FILE}" | |
fi | |
# pickup & output items | |
#================================================== | |
PICKUP_COND=`echo $PICKUP_COND | $NAME_TO_POS \ | |
| sed 's/\([^!~><=]\)=\([^=]\)/\1==\2/g'` | |
if [ -n "$CNT_IN_FIELD" ]; then | |
CNT_IN_FIELD=`echo $CNT_IN_FIELD | $NAME_TO_POS` | |
# output count each of field | |
cat "$DB_FILE" \ | |
| sed '1d' \ | |
| awk -F"," ' | |
BEGIN{ OFS="," } | |
'"$PICKUP_COND"'{ | |
count['$CNT_IN_FIELD']++ | |
} | |
END{ | |
total=0 | |
for (field in count) { | |
print field,count[field] | |
total+=count[field] | |
} | |
print ":TOTAL",total | |
}' | |
elif [ -n "$CNT_OF_RECODE" ]; then | |
# output count of pickup recodes | |
cat "$DB_FILE" \ | |
| sed '1d' \ | |
| awk -F"," ' | |
BEGIN{ count=0 } | |
'"$PICKUP_COND"'{ | |
count++ | |
} | |
END{ print count }' | |
else | |
# output pickup headr | |
head -n 1 "$DB_FILE" \ | |
| awk -F"," ' | |
BEGIN{ OFS="," } | |
{ | |
print '"${OUTPUT_ITEMS}"' | |
}' | |
# output pickup recodes | |
cat "$DB_FILE" \ | |
| sed '1d' \ | |
| awk -F"," ' | |
BEGIN{ OFS="," } | |
'"$PICKUP_COND"'{ | |
print '"${OUTPUT_ITEMS}"' | |
}' | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment