Created
June 18, 2020 09:55
-
-
Save Ian-Gabaraev/ced6df75940330c20e48034c9fa33084 to your computer and use it in GitHub Desktop.
Parse server access logs, and know where the request originated from, all organised in an Excel worksheet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import os | |
import requests | |
import xlsxwriter | |
# create a Workbook object by passing | |
# your Excel worksheet filename | |
workbook = xlsxwriter.Workbook('IPADDR.xlsx') | |
worksheet = workbook.add_worksheet() | |
# Your free ipstack access token | |
TOKEN = None | |
api_call = "http://api.ipstack.com/%s?access_key=" + TOKEN | |
# regular expression to filter out ipv4 addresses | |
# like 255.255.255.255 | |
regex = r'\b(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|' \ | |
r'[1-9]?[0-9])\.(25[0-5]|2[0-4][0-9]|' \ | |
r'1[0-9][0-9]|[1-9]?[0-9])\.(25[0-5]|' \ | |
r'2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])' \ | |
r'\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|' \ | |
r'[1-9]?[0-9])\b' | |
# sets do not allow duplicates | |
all_ips = set() | |
# starting with row 0 and column 0 | |
row = 0 | |
col = 0 | |
# read logs from the /data directory in the current folder | |
# filter out IPs and add them to all_ips | |
for fname in os.listdir(os.getcwd()+'/data'): | |
with open('data/'+fname) as current_file: | |
for line in current_file: | |
all_ips.add(re.match(regex, line).group(0)) | |
# iterate over the set and query the api | |
# extract the data from JSON | |
for element in list(all_ips): | |
response = requests.get(api_call % element).json() | |
worksheet.write(row, col, response['country_name']) | |
worksheet.write(row, col + 1, response['ip']) | |
row += 1 | |
workbook.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment