Skip to content

Instantly share code, notes, and snippets.

@Programmer-Lily
Created May 28, 2025 20:42
Show Gist options
  • Select an option

  • Save Programmer-Lily/8965d247806bd2c5fe29ff10ad5b17b4 to your computer and use it in GitHub Desktop.

Select an option

Save Programmer-Lily/8965d247806bd2c5fe29ff10ad5b17b4 to your computer and use it in GitHub Desktop.
IIS Log Parser UI -- Investigative inquiry
import os
import re
import pandas as pd
import subprocess
from PyQt5.QtWidgets import (
QApplication, QWidget, QVBoxLayout, QLabel, QComboBox,
QTextEdit, QPushButton, QHBoxLayout, QFileDialog,
QTabWidget, QLineEdit, QListWidget, QDateEdit, QCheckBox
)
from PyQt5.QtCore import Qt, QDate
import pyqtgraph as pg
IIS_LOG_DIR = r"C:\inetpub\logs\LogFiles"
USER_AGENT_WHITELIST = [
"Mozilla/5.0",
"Chrome/",
"Safari/",
"Edge/",
"Firefox/",
"Opera/",
"Trident/"
]
def get_site_mapping():
try:
output = subprocess.check_output(['netsh', 'http', 'show', 'servicestate'], encoding='utf-8', stderr=subprocess.DEVNULL)
except Exception:
return {}
blocks = re.split(r'URL group ID:', output)
mapping = {}
for block in blocks:
log_match = re.search(r'Log directory:\s+(.*?W3SVC\d+)', block, re.IGNORECASE)
if not log_match:
continue
log_dir = log_match.group(1).strip()
svc_name = os.path.basename(log_dir)
urls = []
if 'Registered URLs:' in block:
for line in block.splitlines():
if m := re.search(r'(https?://[^\s/]+(:\d+)?(/[^\s]*)?)', line.strip(), re.IGNORECASE):
urls.append(m.group(1))
display_name = ', '.join(urls) if urls else svc_name
mapping[display_name] = svc_name
return mapping
def parse_log_file(file_path):
records = []
with open(file_path, encoding="utf-8", errors="ignore") as f:
fields = []
for line in f:
if line.startswith("#Fields:"):
fields = line.strip().split()[1:]
elif not line.startswith("#") and fields:
parts = line.strip().split()
if len(parts) == len(fields):
records.append(dict(zip(fields, parts)))
df = pd.DataFrame(records)
if 'date' in df.columns and 'time' in df.columns:
df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'], errors='coerce')
return df
class IISAnalyzer(QWidget):
def __init__(self):
super().__init__()
self.setWindowTitle("IIS Log Analyzer")
self.resize(1000, 700)
self.df = None
self.sites = {}
self.all_ips = []
self.layout = QVBoxLayout(self)
self.tabs = QTabWidget()
self.layout.addWidget(self.tabs)
self.init_overview_tab()
self.init_ip_lookup_tab()
self.init_bot_tab()
self.init_url_search_tab()
self.init_detailed_filter_tab()
self.populate_sites()
def init_overview_tab(self):
self.overview_tab = QWidget()
self.tabs.addTab(self.overview_tab, "Overview")
layout = QVBoxLayout(self.overview_tab)
controls = QHBoxLayout()
self.site_selector = QComboBox()
self.export_button = QPushButton("Export Summary")
controls.addWidget(QLabel("Select IIS Site:"))
controls.addWidget(self.site_selector)
controls.addWidget(self.export_button)
layout.addLayout(controls)
self.output = QTextEdit()
self.output.setReadOnly(True)
layout.addWidget(self.output, 2)
self.graph_widget = pg.PlotWidget(title="Requests Per Day")
self.graph_widget.setLabel('left', 'Requests')
self.graph_widget.setLabel('bottom', 'Day')
self.graph_widget.showGrid(x=True, y=True)
layout.addWidget(self.graph_widget, 3)
self.site_selector.currentTextChanged.connect(self.load_site_logs)
self.export_button.clicked.connect(self.export_summary)
def init_ip_lookup_tab(self):
self.ip_tab = QWidget()
self.tabs.addTab(self.ip_tab, "IP Lookup")
layout = QVBoxLayout(self.ip_tab)
controls = QHBoxLayout()
self.ip_search_input = QLineEdit()
self.ip_search_input.setPlaceholderText("Type to search IPs...")
self.ip_button = QPushButton("Clear Search")
controls.addWidget(QLabel("Search IP:"))
controls.addWidget(self.ip_search_input)
controls.addWidget(self.ip_button)
layout.addLayout(controls)
self.ip_result_list = QListWidget()
self.ip_output = QTextEdit()
self.ip_output.setReadOnly(True)
layout.addWidget(self.ip_result_list)
layout.addWidget(self.ip_output)
self.ip_search_input.textChanged.connect(self.filter_ip_list)
self.ip_result_list.itemClicked.connect(self.display_ip_report)
self.ip_button.clicked.connect(lambda: self.ip_search_input.clear())
def init_bot_tab(self):
self.bot_tab = QWidget()
self.tabs.addTab(self.bot_tab, "Bot/User-Agent Scan")
layout = QVBoxLayout(self.bot_tab)
self.bot_output = QTextEdit()
self.bot_output.setReadOnly(True)
layout.addWidget(self.bot_output)
self.scan_bots_button = QPushButton("Scan for Non-Standard User-Agents")
self.scan_bots_button.clicked.connect(self.scan_for_bots)
layout.addWidget(self.scan_bots_button)
def init_url_search_tab(self):
self.url_tab = QWidget()
self.tabs.addTab(self.url_tab, "URL Request Lookup")
layout = QVBoxLayout(self.url_tab)
controls = QHBoxLayout()
self.url_search_input = QLineEdit()
self.url_search_input.setPlaceholderText("Enter part of a URL to search")
self.url_search_button = QPushButton("Search")
controls.addWidget(QLabel("Search URL:"))
controls.addWidget(self.url_search_input)
controls.addWidget(self.url_search_button)
layout.addLayout(controls)
self.url_output = QTextEdit()
self.url_output.setReadOnly(True)
layout.addWidget(self.url_output)
self.url_search_button.clicked.connect(self.search_url_requests)
def init_detailed_filter_tab(self):
self.detailed_tab = QWidget()
self.tabs.addTab(self.detailed_tab, "Detailed Filter View")
layout = QVBoxLayout(self.detailed_tab)
control_layout = QHBoxLayout()
self.start_date = QDateEdit()
self.end_date = QDateEdit()
self.ua_input = QLineEdit()
self.filter_button = QPushButton("Apply Filter")
self.ua_input.setPlaceholderText("User-Agent contains...")
self.start_date.setDate(QDate.currentDate().addMonths(-1))
self.end_date.setDate(QDate.currentDate().addDays(1))
control_layout.addWidget(QLabel("Start Date:"))
control_layout.addWidget(self.start_date)
control_layout.addWidget(QLabel("End Date:"))
control_layout.addWidget(self.end_date)
control_layout.addWidget(QLabel("User-Agent Filter:"))
control_layout.addWidget(self.ua_input)
control_layout.addWidget(self.filter_button)
self.detailed_output = QTextEdit()
self.detailed_output.setReadOnly(True)
layout.addLayout(control_layout)
layout.addWidget(self.detailed_output)
self.filter_button.clicked.connect(self.run_detailed_filter)
def run_detailed_filter(self):
if self.df is None:
self.detailed_output.setText("No log data loaded.")
return
df = self.df.copy()
df = df[df['datetime'].notna()]
start = pd.Timestamp(self.start_date.date().toString("yyyy-MM-dd"))
end = pd.Timestamp(self.end_date.date().toString("yyyy-MM-dd"))
ua_substr = self.ua_input.text().strip()
df = df[(df['datetime'] >= start) & (df['datetime'] <= end)]
if ua_substr:
df = df[df['cs(User-Agent)'].str.contains(ua_substr, na=False)]
if df.empty:
self.detailed_output.setText("No matching entries found.")
return
df_sorted = df.sort_values(by='datetime')
lines = [
f"[{row.get('datetime', '')}] IP: {row.get('c-ip', '')} | {row.get('cs-method', '')} {row.get('cs-uri-stem', '')} | Status: {row.get('sc-status', '')} | UA: {row.get('cs(User-Agent)', '')}"
for _, row in df_sorted.iterrows()
]
self.detailed_output.setText("\n".join(lines))
def search_url_requests(self):
if self.df is None or 'cs-uri-stem' not in self.df.columns:
self.url_output.setText("No log data or URL field not found.")
return
query = self.url_search_input.text().strip()
if not query:
return
filtered = self.df[self.df['cs-uri-stem'].str.contains(query, na=False)]
grouped = filtered.groupby(['cs-uri-stem', 'c-ip']).size().reset_index(name='count')
grouped = grouped.sort_values(by='count', ascending=False)
lines = [f"{row['cs-uri-stem']} | {row['c-ip']} | {row['count']} requests" for _, row in grouped.iterrows()]
self.url_output.setText("\n".join(lines) if lines else "No matching URLs found.")
def scan_for_bots(self):
if self.df is None or 'cs(User-Agent)' not in self.df.columns:
self.bot_output.setText("No log data or User-Agent field not found.")
return
def is_suspect(ua):
if not ua:
return True
return not any(ua.startswith(allowed) for allowed in USER_AGENT_WHITELIST)
suspicious = self.df[self.df['cs(User-Agent)'].apply(is_suspect)]
grouped = suspicious.groupby(['cs(User-Agent)']).size().reset_index(name='count')
grouped = grouped.sort_values(by='count', ascending=False)
header = ["Suspicious User-Agent Count:"]
header.extend([f"{row['cs(User-Agent)']}: {row['count']}" for _, row in grouped.iterrows()])
ip_grouped = suspicious.groupby(['c-ip', 'cs(User-Agent)']).size().reset_index(name='count')
ip_grouped = ip_grouped.sort_values(by='count', ascending=False)
details = [f"{row['c-ip']} | {row['cs(User-Agent)']} | {row['count']} requests" for _, row in ip_grouped.iterrows()]
self.bot_output.setText("\n".join(header + ["\nDetails:"] + details) if details else "No suspicious user-agents found.")
def populate_sites(self):
self.site_selector.clear()
self.sites = get_site_mapping()
for label in self.sites:
self.site_selector.addItem(label)
def load_site_logs(self):
site = self.site_selector.currentText()
log_folder = self.sites.get(site)
site_path = os.path.join(IIS_LOG_DIR, log_folder)
logs = []
for file in os.listdir(site_path):
if file.endswith(".log"):
df = parse_log_file(os.path.join(site_path, file))
logs.append(df)
if not logs:
self.output.setText("No logs found.")
self.graph_widget.clear()
return
self.df = pd.concat(logs, ignore_index=True)
self.df = self.df[self.df['datetime'].notna()]
self.analyze_logs()
self.populate_ips()
def analyze_logs(self):
df = self.df
summary = [f"Total Requests: {len(df)}"]
if 'c-ip' in df.columns:
summary.append(f"Unique IPs: {df['c-ip'].nunique()}")
if 'sc-status' in df.columns:
summary.append("\nStatus Code Breakdown:")
for code, count in df['sc-status'].value_counts().sort_index().items():
summary.append(f"{code}: {count}")
if 'cs-method' in df.columns:
summary.append("\nMethod Breakdown:")
for method, count in df['cs-method'].value_counts().items():
summary.append(f"{method}: {count}")
if 'cs-uri-stem' in df.columns:
summary.append("\nTop Requested URLs:")
for url, count in df['cs-uri-stem'].value_counts().head(50).items():
summary.append(f"{url}: {count}")
if 'c-ip' in df.columns:
summary.append("\nTop IPs:")
for ip, count in df['c-ip'].value_counts().head(50).items():
summary.append(f"{ip}: {count}")
self.output.setText("\n".join(summary))
self.update_graph(df)
def update_graph(self, df):
date_counts = df['datetime'].dt.date.value_counts().sort_index()
x = list(date_counts.index.astype(str))
y = list(date_counts.values)
self.graph_widget.clear()
bars = pg.BarGraphItem(x=list(range(len(x))), height=y, width=0.6, brush='c')
self.graph_widget.addItem(bars)
self.graph_widget.getAxis('bottom').setTicks([list(enumerate(x))])
def export_summary(self):
if self.df is None:
return
filename, _ = QFileDialog.getSaveFileName(self, "Export Summary", "", "Text Files (*.txt)")
if filename:
with open(filename, "w", encoding="utf-8") as f:
f.write(self.output.toPlainText())
def populate_ips(self):
if self.df is not None and 'c-ip' in self.df.columns:
self.all_ips = sorted(set(self.df['c-ip'].dropna()))
self.filter_ip_list()
def filter_ip_list(self):
text = self.ip_search_input.text().strip()
self.ip_result_list.clear()
if not text:
return
matches = [ip for ip in self.all_ips if text in ip]
self.ip_result_list.addItems(matches[:100])
def display_ip_report(self, item):
selected_ip = item.text()
df_ip = self.df[self.df['c-ip'] == selected_ip]
report = [f"Activity for IP: {selected_ip}", f"Total Requests: {len(df_ip)}"]
if not df_ip.empty:
report.append("\nAll Requests:")
for _, row in df_ip.iterrows():
row_summary = f"[{row.get('datetime', '')}] {row.get('cs-method', '')} {row.get('cs-uri-stem', '')} {row.get('sc-status', '')} UA: {row.get('cs(User-Agent)', '')}"
report.append(row_summary)
self.ip_output.setText("\n".join(report))
if __name__ == "__main__":
import sys
app = QApplication(sys.argv)
analyzer = IISAnalyzer()
analyzer.show()
sys.exit(app.exec_())
@Programmer-Lily
Copy link
Copy Markdown
Author

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment