Skip to content

Instantly share code, notes, and snippets.

@pavelch
Last active June 23, 2018 15:29
Show Gist options
  • Save pavelch/c4a087f2824b3eadab41acaa865a323d to your computer and use it in GitHub Desktop.
Save pavelch/c4a087f2824b3eadab41acaa865a323d to your computer and use it in GitHub Desktop.
Please provide source file and destination path
#!/usr/bin/env bash
# Execute script example ./log_to_list.sh ~/Downloads/NASA_access_log_Jul95 ~/temp
# prevent Illegal characters in sort
export LC_CTYPE=C
input_file=$1
output_path=$2
sorted_uniq_views=$2/sorted_unique_views
urls_broken_relocated=$2/urls_broken_relocated
echo $(awk 'BEGIN {ORS = "\n"; FS = " "} {print NF}' ${input_file} | sort | uniq)
echo $(awk 'BEGIN {ORS = "\n"; FS = " "} {print $(NF-1)}' ${input_file} | sort | uniq)
echo $(awk 'BEGIN {ORS = "\n"} $9 ~ /302|304|400|403|404|500|501/ {print $6}' ${input_file} | sort | uniq)
views=($(awk 'BEGIN {ORS = "\n"; FS = " "; digs = "[0-9]+"} ($(NF-1) == 200) && ($NF ~ digs) {print $1":"$NF}' ${input_file}))
unique_views=($(printf "%s\n" "${views[@]}" | awk 'BEGIN {ORS = "\n"; FS = ":"} {arr[$1]+=$2} END {for (i in arr) {print i":"arr[i]}}'))
IFS=$'\n'
sorted=($(printf "%s\n" "${unique_views[@]}" | sort -t : -k 2,2n))
echo "#### Length views:${#views[@]} uniq:${#unique_views[@]} sorted:${#sorted[@]}"
printf "%s\n" "${sorted[@]}" > ${sorted_uniq_views}
echo -e "\n\n################ Uniq views"
tail -10 ${sorted_uniq_views}
broken=($(awk 'BEGIN {ORS = "\n"} $9 ~ /302|304|400|403|404|500|501/ {print $7}' ${input_file}))
printf "%s\n" "${broken[@]}" | sort | uniq > ${urls_broken_relocated}
echo -e "\n\n################ Broken or relocated"
tail -10 ${urls_broken_relocated}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment