Last active
September 24, 2020 09:37
-
-
Save felmoltor/29760bc6194ef541be3d to your computer and use it in GitHub Desktop.
OVH Apache Access log periodic download
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Author: Felipe Molina (@felmoltor) | |
# Date: 05/03/2015 | |
# Summary: | |
# This script analyzes the Apache logs previously downloaded with "download.ovh.logs.sh" | |
# It compares the requests done yesterday with the whitelist of files of the website contained in "whitelist.files.list" | |
# If one of the requests is not pressent in this whitelist, the script stores it as suspicious along with the server response | |
# of the request and finally a summary is sent to your email. | |
NEWLOGS="logs/new" | |
ANALYZED="logs/analyzed" | |
DSTMAIL="<YOUR EMAIL@EMAIL.COM>" | |
DOMAIN="riveraguitar.com" | |
WHITELIST="whitelist.files.list" | |
LEARNING=0 | |
NOTINWLFILE=".notinwl.txt" | |
SUMMARYFILE=".summary.txt" | |
ATTACHEMENT="accesses.list.txt" | |
TODAY=$(date +%m/%d/%Y) | |
SUSPICIOUSHISTORY="suspicious.history.csv" | |
extensionwl=("js" "css" "jpg" "jpeg" "gif" "png") | |
GEOLITEDB="/<path>/maxmind/GeoIPCountryWhois.db" | |
function ip2dec() | |
{ | |
local IFS=. ip num e | |
ip=($1) | |
for e in 3 2 1 | |
do | |
(( num += ip[3-e] * 256 ** e )) | |
done | |
(( num += ip[3] )) | |
echo "$num" | |
} | |
function getIPCountry() | |
{ | |
local country | |
ipdec=$(ip2dec $ip) | |
query="select cty_name from GeoIPCountryWhois where $ipdec between CAST(initipdec AS INTEGER) and CAST(endipdec AS INTEGER) limit 1;" | |
country=$(sqlite3 $GEOLITEDB "$query") | |
echo $country | |
} | |
echo "" > $NOTINWLFILE | |
echo "" > $SUMMARYFILE | |
echo "" > $ATTACHEMENT | |
# Gzipped logs and new | |
for newlog in `ls $NEWLOGS/$DOMAIN*.log.gz $NEWLOGS/$DOMAIN*.log`; do | |
if [[ $newlog =~ ^.*.log.gz$ ]];then | |
gunzip $newlog | |
newlog=$(echo $newlog|cut -d'.' -f-3) | |
fi | |
echo "======================" | |
echo "= $newlog =" | |
echo "======================" | |
# echo $cuentaaccesos | |
# TODO: Contar con sort | uniq -c | sort -k1 -n el numero de apariciones de esa peticion | |
for access in $(awk '{print $1":"$9":"$7}' $newlog | sort -u);do | |
# For each ath accesses check if it is in the whitelist | |
ip=$(echo $access | awk -F':' '{print $1}') | |
url=$(echo $access | awk -F':' '{print $3}') | |
response=$(echo $access | awk -F':' '{print $2}') | |
# path=$(echo $url | egrep -o "\/?[^?]+") | |
path=$(echo $url | sed 's/\?.*$//g') | |
f=$(basename $path) | |
extension=${f##*.} | |
# Fi is in the whitelist of extensions skip further checks | |
exinwl=0 | |
for wlex in ${extensionwl[@]};do | |
if [[ "$wlex" == "$extension" ]];then | |
exinwl=1 | |
break | |
fi | |
done | |
# If is in the whitelist of files skip further checks | |
inwl=$(grep -i $path $WHITELIST | wc -l) | |
if [[ ! "$inwl" -gt 0 && "$exinwl" -eq 0 ]];then | |
echo "$ip => $url (Response $response) not in whitelist" | |
echo "$access" >> $NOTINWLFILE | |
echo "$TODAY:$access" >> $SUSPICIOUSHISTORY | |
if [[ "$LEARNING" -gt 0 ]];then | |
echo "Add to whitelist?: [y/N]" | |
read add | |
if [[ "$add" -eq "y" || "$add" -eq "Y" ]];then | |
echo $path >> $WHITELIST | |
fi | |
fi | |
fi | |
done | |
mv $newlog $ANALYZED | |
done | |
echo "Summary of IP address:" > $SUMMARYFILE | |
for iptimes in $(awk -F':' '{print $1}' $NOTINWLFILE | sort | uniq -c | sort -k1 -nr | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]([[:digit:]].*)/\1:\2/');do | |
if [[ ! "$iptimes" == "" ]];then | |
ip=$(echo $iptimes | awk -F':' '{print $2}') | |
hits=$(echo $iptimes | awk -F':' '{print $1}') | |
country=$(getIPCountry $ip) | |
echo "$ip ($country): $hits" >> $SUMMARYFILE | |
fi | |
done | |
# Sort the email attachement to group it by response | |
existentfiles=$(grep ":200:" $NOTINWLFILE) | |
nonexistenfiles=$(grep ":404:" $NOTINWLFILE) | |
forbiddenfiles=$(grep ":403:" $NOTINWLFILE) | |
authfiles=$(grep ":401:" $NOTINWLFILE) | |
otherfiles=$(grep -vE ":200:|:404:|:403:|:401:" $NOTINWLFILE) | |
if [[ $(echo $existentfiles | wc -l) -gt 0 ]];then | |
echo "==========================================" >> $ATTACHEMENT | |
echo "= WARNING: Existent files (response 200) =" >> $ATTACHEMENT | |
echo "==========================================" >> $ATTACHEMENT | |
for f in $existentfiles;do echo $f >> $ATTACHEMENT ; done | |
fi | |
if [[ $(echo $nonexistenfiles | wc -l) -gt 0 ]];then | |
echo "=============================================" >> $ATTACHEMENT | |
echo "= Non existent files. Scans? (response 404) =" >> $ATTACHEMENT | |
echo "=============================================" >> $ATTACHEMENT | |
for f in $nonexistenfiles; do echo $f >> $ATTACHEMENT ; done | |
fi | |
if [[ $(echo $forbiddenfiles | wc -l) -gt 0 ]];then | |
echo "==============================================" >> $ATTACHEMENT | |
echo "= Forbidden files. Banned IPs (response 403) =" >> $ATTACHEMENT | |
echo "==============================================" >> $ATTACHEMENT | |
for f in $forbiddenfiles;do echo $f >> $ATTACHEMENT ; done | |
fi | |
if [[ $(echo $authfiles | wc -l) -gt 0 ]];then | |
echo "================================================" >> $ATTACHEMENT | |
echo "= Authentication requests files (response 401) =" >> $ATTACHEMENT | |
echo "================================================" >> $ATTACHEMENT | |
for f in $authfiles;do echo $f >> $ATTACHEMENT ; done | |
fi | |
if [[ $(echo $otherfiles | wc -l) -gt 0 ]];then | |
echo "==============================================" >> $ATTACHEMENT | |
echo "= Other files (response not 200,404,403,401) =" >> $ATTACHEMENT | |
echo "==============================================" >> $ATTACHEMENT | |
for f in $otherfiles;do echo $f >> $ATTACHEMENT; done | |
fi | |
# Send email with the requests done to the web not found in the whitelist | |
cat $SUMMARYFILE | mutt $DSTMAIL -a $ATTACHEMENT -s "[IDS] Not found in whitelist rerquests" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Author: Felipe Molina (@felmoltor) | |
# Date: 04/03/2015 | |
# Summary: | |
# If you manage a hosted webpage in OVH maybe this script is for you. | |
# This script automaticaly access to https://logs.ovh.net/ and downloads the access logs of your apache server. | |
# You can filter the downloaded logs by start and stop date. | |
# Set a cron to download every day or week the OVH logs. Then analyze them in your own box. | |
# TODO: Download also the "error" logs and "ftp" logs | |
usr="<YOUR OVH USER>" | |
pwd="<YOUR OVH PASSWORD>" | |
domain="<YOUR OVH DOMAIN>" | |
function printUsage() { | |
echo "Usage: $0 <from date> [<to date>]" | |
echo "* Format of the dates: (month/day/year MM/DD/YYYY)" | |
echo "* from date: Is mandatory " | |
echo "* to date: Is optional. Default value is today $(date +%M/%d/%Y)" | |
} | |
if [[ "$#" -gt 2 || "$#" -lt 1 ]];then | |
printUsage | |
exit 1 | |
fi | |
fromdate=$1 | |
todate=$2 | |
if [[ "$fromdate" != "" ]];then | |
if [[ ! $fromdate =~ ^[[:digit:]]{2}/[[:digit:]]{2}/[[:digit:]]{4}$ ]];then | |
printUsage | |
exit 2 | |
fi | |
fi | |
if [[ "$todate" != "" ]];then | |
if [[ ! $todate =~ ^[[:digit:]]{2}/[[:digit:]]{2}/[[:digit:]]{4}$ ]];then | |
printUsage | |
exit 3 | |
fi | |
else | |
todate=$(date +%M/%d/%Y) | |
fi | |
echo "Retrieving log files from $fromdate to $todate" | |
frommonth=$(echo $fromdate | cut -d'/' -f1) | |
fromyear=$(echo $fromdate | cut -d'/' -f3) | |
fromday=$(echo $fromdate | cut -d'/' -f2) | |
tomonth=$(echo $todate | cut -d'/' -f1) | |
toyear=$(echo $todate | cut -d'/' -f3) | |
today=$(echo $todate | cut -d'/' -f2) | |
fromepoch=$(date -d "$fromdate" +%s) | |
toepoch=$(date -d "$todate" +%s) | |
if [[ ! -d tmp ]];then | |
mkdir tmp | |
fi | |
if [[ ! -d lists ]];then | |
mkdir lists | |
fi | |
if [[ ! -d logs ]];then | |
mkdir logs | |
fi | |
# Get the available logs dates | |
echo "Obtaining the list of available folders for domain $domain" | |
wget --quiet --http-user=$usr --http-password=$pwd https://logs.ovh.net/$domain/ -O tmp/$domain.available.logs.html | |
# get only the name of the foler and filter it by month date | |
folderlist=$(egrep -o "<a href=\"logs-.*\">" tmp/$domain.available.logs.html | cut -f2 -d'"' | cut -f1 -d'"' | sort -u) | |
for folder in $(echo $folderlist);do | |
fmonth=$(echo $folder | cut -d'-' -f2) | |
fyear=$(echo $folder | cut -d'-' -f3) | |
download=0 | |
if [[ "10#$year" -lt "10#$toyear" && "10#$fyear" -gt "10#$fromyear" ]];then | |
download=1 | |
elif [[ "10#$fyear" -eq "10#$toyear" || "10#$fyear" -eq "10#$fromyear" ]];then | |
if [[ "10#$fmonth" -le "10#$tomonth" && "10#$fmonth" -ge "10#$frommonth" ]];then | |
download=1 | |
fi | |
fi | |
if [[ "$download" -eq "1" ]];then | |
echo "Will visit folder $folder" | |
echo $folder >> "lists/$domain.available.logs.list" | |
fi | |
done | |
rm "tmp/$domain.available.logs.html" | |
for folder in $(cat lists/$domain.available.logs.list); do | |
wget --quiet --http-user=$usr --http-password=$pwd https://logs.ovh.net/$domain/$folder/ -O tmp/$domain.$folder.available.logs.html | |
filelist=$(egrep -o "<a href=\".*.log.gz\">" tmp/$domain.$folder.available.logs.html | cut -f2 -d'"' | cut -f1 -d'"' | sort -u) | |
echo "=======================" | |
echo "==== $folder ===" | |
echo "=======================" | |
for file in $(echo $filelist);do | |
ffile=$(echo $file | egrep -o "[[:digit:]]{2}-[[:digit:]]{2}-[[:digit:]]{4}" | sed 's/\(.*\)-\(.*\)-\(.*\)/\2\/\1\/\3/g' ) | |
fyear=$(echo $ffile|cut -d'/' -f3) | |
fmonth=$(echo $ffile|cut -d'/' -f1) | |
fday=$(echo $ffile|cut -d'/' -f2) | |
fepoch=$(date -d "$fmonth/$fday/$fyear" +%s) | |
if [[ "10#$fepoch" -ge "10#$fromepoch" && "10#$fepoch" -le "10#$toepoch" ]];then | |
echo " * Downloading file $file..." | |
wget --quiet --http-user=$usr --http-password=$pwd https://logs.ovh.net/$domain/$folder/$file -O logs/$file | |
fi | |
done | |
rm "tmp/$domain.$folder.available.logs.html" | |
done | |
rm -rf tmp | |
rm -rf lists |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Add to crontab | |
0 7 * * * cd /home/<user>/<path>/ && ./download.yesterday.logs.sh && ./analyze.new.logs.sh |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment