Skip to content

Instantly share code, notes, and snippets.

@felmoltor
Last active September 24, 2020 09:37
Show Gist options
  • Save felmoltor/29760bc6194ef541be3d to your computer and use it in GitHub Desktop.
Save felmoltor/29760bc6194ef541be3d to your computer and use it in GitHub Desktop.
OVH Apache Access log periodic download
#!/bin/bash
# Author: Felipe Molina (@felmoltor)
# Date: 05/03/2015
# Summary:
# This script analyzes the Apache logs previously downloaded with "download.ovh.logs.sh"
# It compares the requests done yesterday with the whitelist of files of the website contained in "whitelist.files.list"
# If one of the requests is not pressent in this whitelist, the script stores it as suspicious along with the server response
# of the request and finally a summary is sent to your email.
NEWLOGS="logs/new"
ANALYZED="logs/analyzed"
DSTMAIL="<YOUR EMAIL@EMAIL.COM>"
DOMAIN="riveraguitar.com"
WHITELIST="whitelist.files.list"
LEARNING=0
NOTINWLFILE=".notinwl.txt"
SUMMARYFILE=".summary.txt"
ATTACHEMENT="accesses.list.txt"
TODAY=$(date +%m/%d/%Y)
SUSPICIOUSHISTORY="suspicious.history.csv"
extensionwl=("js" "css" "jpg" "jpeg" "gif" "png")
GEOLITEDB="/<path>/maxmind/GeoIPCountryWhois.db"
function ip2dec()
{
local IFS=. ip num e
ip=($1)
for e in 3 2 1
do
(( num += ip[3-e] * 256 ** e ))
done
(( num += ip[3] ))
echo "$num"
}
function getIPCountry()
{
local country
ipdec=$(ip2dec $ip)
query="select cty_name from GeoIPCountryWhois where $ipdec between CAST(initipdec AS INTEGER) and CAST(endipdec AS INTEGER) limit 1;"
country=$(sqlite3 $GEOLITEDB "$query")
echo $country
}
echo "" > $NOTINWLFILE
echo "" > $SUMMARYFILE
echo "" > $ATTACHEMENT
# Gzipped logs and new
for newlog in `ls $NEWLOGS/$DOMAIN*.log.gz $NEWLOGS/$DOMAIN*.log`; do
if [[ $newlog =~ ^.*.log.gz$ ]];then
gunzip $newlog
newlog=$(echo $newlog|cut -d'.' -f-3)
fi
echo "======================"
echo "= $newlog ="
echo "======================"
# echo $cuentaaccesos
# TODO: Contar con sort | uniq -c | sort -k1 -n el numero de apariciones de esa peticion
for access in $(awk '{print $1":"$9":"$7}' $newlog | sort -u);do
# For each ath accesses check if it is in the whitelist
ip=$(echo $access | awk -F':' '{print $1}')
url=$(echo $access | awk -F':' '{print $3}')
response=$(echo $access | awk -F':' '{print $2}')
# path=$(echo $url | egrep -o "\/?[^?]+")
path=$(echo $url | sed 's/\?.*$//g')
f=$(basename $path)
extension=${f##*.}
# Fi is in the whitelist of extensions skip further checks
exinwl=0
for wlex in ${extensionwl[@]};do
if [[ "$wlex" == "$extension" ]];then
exinwl=1
break
fi
done
# If is in the whitelist of files skip further checks
inwl=$(grep -i $path $WHITELIST | wc -l)
if [[ ! "$inwl" -gt 0 && "$exinwl" -eq 0 ]];then
echo "$ip => $url (Response $response) not in whitelist"
echo "$access" >> $NOTINWLFILE
echo "$TODAY:$access" >> $SUSPICIOUSHISTORY
if [[ "$LEARNING" -gt 0 ]];then
echo "Add to whitelist?: [y/N]"
read add
if [[ "$add" -eq "y" || "$add" -eq "Y" ]];then
echo $path >> $WHITELIST
fi
fi
fi
done
mv $newlog $ANALYZED
done
echo "Summary of IP address:" > $SUMMARYFILE
for iptimes in $(awk -F':' '{print $1}' $NOTINWLFILE | sort | uniq -c | sort -k1 -nr | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]([[:digit:]].*)/\1:\2/');do
if [[ ! "$iptimes" == "" ]];then
ip=$(echo $iptimes | awk -F':' '{print $2}')
hits=$(echo $iptimes | awk -F':' '{print $1}')
country=$(getIPCountry $ip)
echo "$ip ($country): $hits" >> $SUMMARYFILE
fi
done
# Sort the email attachement to group it by response
existentfiles=$(grep ":200:" $NOTINWLFILE)
nonexistenfiles=$(grep ":404:" $NOTINWLFILE)
forbiddenfiles=$(grep ":403:" $NOTINWLFILE)
authfiles=$(grep ":401:" $NOTINWLFILE)
otherfiles=$(grep -vE ":200:|:404:|:403:|:401:" $NOTINWLFILE)
if [[ $(echo $existentfiles | wc -l) -gt 0 ]];then
echo "==========================================" >> $ATTACHEMENT
echo "= WARNING: Existent files (response 200) =" >> $ATTACHEMENT
echo "==========================================" >> $ATTACHEMENT
for f in $existentfiles;do echo $f >> $ATTACHEMENT ; done
fi
if [[ $(echo $nonexistenfiles | wc -l) -gt 0 ]];then
echo "=============================================" >> $ATTACHEMENT
echo "= Non existent files. Scans? (response 404) =" >> $ATTACHEMENT
echo "=============================================" >> $ATTACHEMENT
for f in $nonexistenfiles; do echo $f >> $ATTACHEMENT ; done
fi
if [[ $(echo $forbiddenfiles | wc -l) -gt 0 ]];then
echo "==============================================" >> $ATTACHEMENT
echo "= Forbidden files. Banned IPs (response 403) =" >> $ATTACHEMENT
echo "==============================================" >> $ATTACHEMENT
for f in $forbiddenfiles;do echo $f >> $ATTACHEMENT ; done
fi
if [[ $(echo $authfiles | wc -l) -gt 0 ]];then
echo "================================================" >> $ATTACHEMENT
echo "= Authentication requests files (response 401) =" >> $ATTACHEMENT
echo "================================================" >> $ATTACHEMENT
for f in $authfiles;do echo $f >> $ATTACHEMENT ; done
fi
if [[ $(echo $otherfiles | wc -l) -gt 0 ]];then
echo "==============================================" >> $ATTACHEMENT
echo "= Other files (response not 200,404,403,401) =" >> $ATTACHEMENT
echo "==============================================" >> $ATTACHEMENT
for f in $otherfiles;do echo $f >> $ATTACHEMENT; done
fi
# Send email with the requests done to the web not found in the whitelist
cat $SUMMARYFILE | mutt $DSTMAIL -a $ATTACHEMENT -s "[IDS] Not found in whitelist rerquests"
#!/bin/bash
# Author: Felipe Molina (@felmoltor)
# Date: 04/03/2015
# Summary:
# If you manage a hosted webpage in OVH maybe this script is for you.
# This script automaticaly access to https://logs.ovh.net/ and downloads the access logs of your apache server.
# You can filter the downloaded logs by start and stop date.
# Set a cron to download every day or week the OVH logs. Then analyze them in your own box.
# TODO: Download also the "error" logs and "ftp" logs
usr="<YOUR OVH USER>"
pwd="<YOUR OVH PASSWORD>"
domain="<YOUR OVH DOMAIN>"
function printUsage() {
echo "Usage: $0 <from date> [<to date>]"
echo "* Format of the dates: (month/day/year MM/DD/YYYY)"
echo "* from date: Is mandatory "
echo "* to date: Is optional. Default value is today $(date +%M/%d/%Y)"
}
if [[ "$#" -gt 2 || "$#" -lt 1 ]];then
printUsage
exit 1
fi
fromdate=$1
todate=$2
if [[ "$fromdate" != "" ]];then
if [[ ! $fromdate =~ ^[[:digit:]]{2}/[[:digit:]]{2}/[[:digit:]]{4}$ ]];then
printUsage
exit 2
fi
fi
if [[ "$todate" != "" ]];then
if [[ ! $todate =~ ^[[:digit:]]{2}/[[:digit:]]{2}/[[:digit:]]{4}$ ]];then
printUsage
exit 3
fi
else
todate=$(date +%M/%d/%Y)
fi
echo "Retrieving log files from $fromdate to $todate"
frommonth=$(echo $fromdate | cut -d'/' -f1)
fromyear=$(echo $fromdate | cut -d'/' -f3)
fromday=$(echo $fromdate | cut -d'/' -f2)
tomonth=$(echo $todate | cut -d'/' -f1)
toyear=$(echo $todate | cut -d'/' -f3)
today=$(echo $todate | cut -d'/' -f2)
fromepoch=$(date -d "$fromdate" +%s)
toepoch=$(date -d "$todate" +%s)
if [[ ! -d tmp ]];then
mkdir tmp
fi
if [[ ! -d lists ]];then
mkdir lists
fi
if [[ ! -d logs ]];then
mkdir logs
fi
# Get the available logs dates
echo "Obtaining the list of available folders for domain $domain"
wget --quiet --http-user=$usr --http-password=$pwd https://logs.ovh.net/$domain/ -O tmp/$domain.available.logs.html
# get only the name of the foler and filter it by month date
folderlist=$(egrep -o "<a href=\"logs-.*\">" tmp/$domain.available.logs.html | cut -f2 -d'"' | cut -f1 -d'"' | sort -u)
for folder in $(echo $folderlist);do
fmonth=$(echo $folder | cut -d'-' -f2)
fyear=$(echo $folder | cut -d'-' -f3)
download=0
if [[ "10#$year" -lt "10#$toyear" && "10#$fyear" -gt "10#$fromyear" ]];then
download=1
elif [[ "10#$fyear" -eq "10#$toyear" || "10#$fyear" -eq "10#$fromyear" ]];then
if [[ "10#$fmonth" -le "10#$tomonth" && "10#$fmonth" -ge "10#$frommonth" ]];then
download=1
fi
fi
if [[ "$download" -eq "1" ]];then
echo "Will visit folder $folder"
echo $folder >> "lists/$domain.available.logs.list"
fi
done
rm "tmp/$domain.available.logs.html"
for folder in $(cat lists/$domain.available.logs.list); do
wget --quiet --http-user=$usr --http-password=$pwd https://logs.ovh.net/$domain/$folder/ -O tmp/$domain.$folder.available.logs.html
filelist=$(egrep -o "<a href=\".*.log.gz\">" tmp/$domain.$folder.available.logs.html | cut -f2 -d'"' | cut -f1 -d'"' | sort -u)
echo "======================="
echo "==== $folder ==="
echo "======================="
for file in $(echo $filelist);do
ffile=$(echo $file | egrep -o "[[:digit:]]{2}-[[:digit:]]{2}-[[:digit:]]{4}" | sed 's/\(.*\)-\(.*\)-\(.*\)/\2\/\1\/\3/g' )
fyear=$(echo $ffile|cut -d'/' -f3)
fmonth=$(echo $ffile|cut -d'/' -f1)
fday=$(echo $ffile|cut -d'/' -f2)
fepoch=$(date -d "$fmonth/$fday/$fyear" +%s)
if [[ "10#$fepoch" -ge "10#$fromepoch" && "10#$fepoch" -le "10#$toepoch" ]];then
echo " * Downloading file $file..."
wget --quiet --http-user=$usr --http-password=$pwd https://logs.ovh.net/$domain/$folder/$file -O logs/$file
fi
done
rm "tmp/$domain.$folder.available.logs.html"
done
rm -rf tmp
rm -rf lists
# Add to crontab
0 7 * * * cd /home/<user>/<path>/ && ./download.yesterday.logs.sh && ./analyze.new.logs.sh
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment