Skip to content

Instantly share code, notes, and snippets.

@Ian-Gabaraev
Created June 18, 2020 09:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Ian-Gabaraev/ced6df75940330c20e48034c9fa33084 to your computer and use it in GitHub Desktop.
Save Ian-Gabaraev/ced6df75940330c20e48034c9fa33084 to your computer and use it in GitHub Desktop.
Parse server access logs, and know where the request originated from, all organised in an Excel worksheet
import re
import os
import requests
import xlsxwriter
# create a Workbook object by passing
# your Excel worksheet filename
workbook = xlsxwriter.Workbook('IPADDR.xlsx')
worksheet = workbook.add_worksheet()
# Your free ipstack access token
TOKEN = None
api_call = "http://api.ipstack.com/%s?access_key=" + TOKEN
# regular expression to filter out ipv4 addresses
# like 255.255.255.255
regex = r'\b(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|' \
r'[1-9]?[0-9])\.(25[0-5]|2[0-4][0-9]|' \
r'1[0-9][0-9]|[1-9]?[0-9])\.(25[0-5]|' \
r'2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])' \
r'\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|' \
r'[1-9]?[0-9])\b'
# sets do not allow duplicates
all_ips = set()
# starting with row 0 and column 0
row = 0
col = 0
# read logs from the /data directory in the current folder
# filter out IPs and add them to all_ips
for fname in os.listdir(os.getcwd()+'/data'):
with open('data/'+fname) as current_file:
for line in current_file:
all_ips.add(re.match(regex, line).group(0))
# iterate over the set and query the api
# extract the data from JSON
for element in list(all_ips):
response = requests.get(api_call % element).json()
worksheet.write(row, col, response['country_name'])
worksheet.write(row, col + 1, response['ip'])
row += 1
workbook.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment