Last active
April 11, 2024 11:10
-
-
Save Cybso/77b8f7bdec78e0a8df8f8435e3ccb8fc to your computer and use it in GitHub Desktop.
Sort apache access logs and convert 'common' into 'combined'. The output of this script can be used as input for awffull or webalizer.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
### | |
# Sorts the rows of one or multiple apache access_log files | |
# by date (ascending) and converts them from 'common' log format | |
# into 'combined' (if you switched the log file format meanwhile). | |
# | |
# This is useful when you want to merge multiple log files, e.g. when | |
# you have different files for HTTP and HTTPS. | |
# | |
# Usage: | |
# sort-access-log [file [file...]] | |
# | |
# If the argument ends on .gz it will be extracted using 'gunzip'. | |
# If no argument has been given the script expects the input on STDIN. | |
# | |
# The date is expected to have the following format, which must occur | |
# *anywhere* within the line: | |
# | |
# [20/Mar/2017:12:00:37 +0100] | |
# | |
# The timezone argument is ignored, so this might be inaccurate when | |
# changing from summer time to winter time - for me this doesn't matter. | |
# | |
# The output of the program can be used as input for awffull or | |
# webalizer. | |
# | |
# Autor: Roland Tapken <roland@dau-sicher.de> | |
# Datum: 2017-06-23 | |
# License: Beerware | |
### | |
( | |
if [[ "$#" -gt 0 ]]; then | |
for n in $*; do | |
if [[ ! -f "$n" ]]; then | |
echo "Ignoring missing file '$n'" >&2 | |
elif [[ "$n" == *.gz ]]; then | |
gunzip -c "$n" | |
else | |
cat "$n" | |
fi | |
done | |
else | |
# Read from stdin | |
cat | |
fi | |
) \ | |
| sed \ | |
-re 's|.*\[([0-9]{2})/([a-z]+)/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})|\3-\2-\1\4\5\6 \0|i' \ | |
-e 's/-jan-/01/i' \ | |
-e 's/-feb-/02/i' \ | |
-e 's/-mar-/03/i' \ | |
-e 's/-apr-/04/i' \ | |
-e 's/-may-/05/i' \ | |
-e 's/-jun-/06/i' \ | |
-e 's/-jul-/07/i' \ | |
-e 's/-aug-/08/i' \ | |
-e 's/-sep-/09/i' \ | |
-e 's/-oct-/10/i' \ | |
-e 's/-nov-/11/i' \ | |
-e 's/-dec-/12/i' \ | |
| sort \ | |
| sed -re 's/^[0-9]+ //' -e 's/" [0-9]+ [0-9]+$/\0 "-" "-"/' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Works beautifully. Thank you!