Last active
August 29, 2015 14:07
-
-
Save oldergod/54eacf5ce90affe5e851 to your computer and use it in GitHub Desktop.
アクセスログから月単位でユーザID(整数5桁)毎にアクセス数をCSV形式提出
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# アクセス数を抽出するスクリプト | |
# yyyy/dd/*.log.bz2 しか対応しない | |
# アクセスログから月単位でユーザID(整数5桁)毎にアクセス数をCSVファイルに提出 | |
declare -A USERS | |
# 6月から8月まで | |
months=( 6 7 8 ) | |
output=~/access_count_per_user.csv | |
for month in ${months[@]};do | |
IFS_BACKUP=$IFS | |
# uniq -c の結果を一行ずつうけとれるように | |
IFS=$'\n' | |
# 当日のアクセスをまとめ、ユーザ単位で抽出 | |
for pair in $(bzgrep -oP "\d{5}(?=/v1)" ./2014/0${month}/access.20140${month}*.log.bz2 | grep -oP "\d{5}\z" | sort -n | uniq -c | sed "s/^\s*//");do | |
count=$(echo $pair | cut -d' ' -f1) | |
user_id=$(echo $pair | cut -d' ' -f2) | |
# 既に値が入っていたら区切りを | |
[ ! -z ${USERS[$user_id]} ] && USERS[$user_id]+=, | |
USERS[$user_id]+=$month:$count | |
done | |
IFS=$IFS_BACKUP | |
done | |
# header | |
echo CLIENT_ID,$( IFS=$','; echo "${months[*]}" ) > $output | |
for user_id in ${!USERS[@]};do | |
string=$user_id | |
for month in ${months[@]};do | |
# 区切り | |
string+=, | |
# 当月のアクセスがあった場合は tmp_string に格納 | |
tmp_string=$(echo ${USERS[$user_id]} | grep -oP "(?<=${month}:)\d+") | |
if [ -z $tmp_string ];then | |
string+=0 | |
else | |
string+=$tmp_string | |
fi | |
done | |
echo $string >> $output | |
done | |
echo finished, everything is in $output |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment