Created
May 28, 2025 20:42
-
-
Save Programmer-Lily/8965d247806bd2c5fe29ff10ad5b17b4 to your computer and use it in GitHub Desktop.
IIS Log Parser UI -- Investigative inquiry
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import re | |
| import pandas as pd | |
| import subprocess | |
| from PyQt5.QtWidgets import ( | |
| QApplication, QWidget, QVBoxLayout, QLabel, QComboBox, | |
| QTextEdit, QPushButton, QHBoxLayout, QFileDialog, | |
| QTabWidget, QLineEdit, QListWidget, QDateEdit, QCheckBox | |
| ) | |
| from PyQt5.QtCore import Qt, QDate | |
| import pyqtgraph as pg | |
| IIS_LOG_DIR = r"C:\inetpub\logs\LogFiles" | |
| USER_AGENT_WHITELIST = [ | |
| "Mozilla/5.0", | |
| "Chrome/", | |
| "Safari/", | |
| "Edge/", | |
| "Firefox/", | |
| "Opera/", | |
| "Trident/" | |
| ] | |
| def get_site_mapping(): | |
| try: | |
| output = subprocess.check_output(['netsh', 'http', 'show', 'servicestate'], encoding='utf-8', stderr=subprocess.DEVNULL) | |
| except Exception: | |
| return {} | |
| blocks = re.split(r'URL group ID:', output) | |
| mapping = {} | |
| for block in blocks: | |
| log_match = re.search(r'Log directory:\s+(.*?W3SVC\d+)', block, re.IGNORECASE) | |
| if not log_match: | |
| continue | |
| log_dir = log_match.group(1).strip() | |
| svc_name = os.path.basename(log_dir) | |
| urls = [] | |
| if 'Registered URLs:' in block: | |
| for line in block.splitlines(): | |
| if m := re.search(r'(https?://[^\s/]+(:\d+)?(/[^\s]*)?)', line.strip(), re.IGNORECASE): | |
| urls.append(m.group(1)) | |
| display_name = ', '.join(urls) if urls else svc_name | |
| mapping[display_name] = svc_name | |
| return mapping | |
| def parse_log_file(file_path): | |
| records = [] | |
| with open(file_path, encoding="utf-8", errors="ignore") as f: | |
| fields = [] | |
| for line in f: | |
| if line.startswith("#Fields:"): | |
| fields = line.strip().split()[1:] | |
| elif not line.startswith("#") and fields: | |
| parts = line.strip().split() | |
| if len(parts) == len(fields): | |
| records.append(dict(zip(fields, parts))) | |
| df = pd.DataFrame(records) | |
| if 'date' in df.columns and 'time' in df.columns: | |
| df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'], errors='coerce') | |
| return df | |
| class IISAnalyzer(QWidget): | |
| def __init__(self): | |
| super().__init__() | |
| self.setWindowTitle("IIS Log Analyzer") | |
| self.resize(1000, 700) | |
| self.df = None | |
| self.sites = {} | |
| self.all_ips = [] | |
| self.layout = QVBoxLayout(self) | |
| self.tabs = QTabWidget() | |
| self.layout.addWidget(self.tabs) | |
| self.init_overview_tab() | |
| self.init_ip_lookup_tab() | |
| self.init_bot_tab() | |
| self.init_url_search_tab() | |
| self.init_detailed_filter_tab() | |
| self.populate_sites() | |
| def init_overview_tab(self): | |
| self.overview_tab = QWidget() | |
| self.tabs.addTab(self.overview_tab, "Overview") | |
| layout = QVBoxLayout(self.overview_tab) | |
| controls = QHBoxLayout() | |
| self.site_selector = QComboBox() | |
| self.export_button = QPushButton("Export Summary") | |
| controls.addWidget(QLabel("Select IIS Site:")) | |
| controls.addWidget(self.site_selector) | |
| controls.addWidget(self.export_button) | |
| layout.addLayout(controls) | |
| self.output = QTextEdit() | |
| self.output.setReadOnly(True) | |
| layout.addWidget(self.output, 2) | |
| self.graph_widget = pg.PlotWidget(title="Requests Per Day") | |
| self.graph_widget.setLabel('left', 'Requests') | |
| self.graph_widget.setLabel('bottom', 'Day') | |
| self.graph_widget.showGrid(x=True, y=True) | |
| layout.addWidget(self.graph_widget, 3) | |
| self.site_selector.currentTextChanged.connect(self.load_site_logs) | |
| self.export_button.clicked.connect(self.export_summary) | |
| def init_ip_lookup_tab(self): | |
| self.ip_tab = QWidget() | |
| self.tabs.addTab(self.ip_tab, "IP Lookup") | |
| layout = QVBoxLayout(self.ip_tab) | |
| controls = QHBoxLayout() | |
| self.ip_search_input = QLineEdit() | |
| self.ip_search_input.setPlaceholderText("Type to search IPs...") | |
| self.ip_button = QPushButton("Clear Search") | |
| controls.addWidget(QLabel("Search IP:")) | |
| controls.addWidget(self.ip_search_input) | |
| controls.addWidget(self.ip_button) | |
| layout.addLayout(controls) | |
| self.ip_result_list = QListWidget() | |
| self.ip_output = QTextEdit() | |
| self.ip_output.setReadOnly(True) | |
| layout.addWidget(self.ip_result_list) | |
| layout.addWidget(self.ip_output) | |
| self.ip_search_input.textChanged.connect(self.filter_ip_list) | |
| self.ip_result_list.itemClicked.connect(self.display_ip_report) | |
| self.ip_button.clicked.connect(lambda: self.ip_search_input.clear()) | |
| def init_bot_tab(self): | |
| self.bot_tab = QWidget() | |
| self.tabs.addTab(self.bot_tab, "Bot/User-Agent Scan") | |
| layout = QVBoxLayout(self.bot_tab) | |
| self.bot_output = QTextEdit() | |
| self.bot_output.setReadOnly(True) | |
| layout.addWidget(self.bot_output) | |
| self.scan_bots_button = QPushButton("Scan for Non-Standard User-Agents") | |
| self.scan_bots_button.clicked.connect(self.scan_for_bots) | |
| layout.addWidget(self.scan_bots_button) | |
| def init_url_search_tab(self): | |
| self.url_tab = QWidget() | |
| self.tabs.addTab(self.url_tab, "URL Request Lookup") | |
| layout = QVBoxLayout(self.url_tab) | |
| controls = QHBoxLayout() | |
| self.url_search_input = QLineEdit() | |
| self.url_search_input.setPlaceholderText("Enter part of a URL to search") | |
| self.url_search_button = QPushButton("Search") | |
| controls.addWidget(QLabel("Search URL:")) | |
| controls.addWidget(self.url_search_input) | |
| controls.addWidget(self.url_search_button) | |
| layout.addLayout(controls) | |
| self.url_output = QTextEdit() | |
| self.url_output.setReadOnly(True) | |
| layout.addWidget(self.url_output) | |
| self.url_search_button.clicked.connect(self.search_url_requests) | |
| def init_detailed_filter_tab(self): | |
| self.detailed_tab = QWidget() | |
| self.tabs.addTab(self.detailed_tab, "Detailed Filter View") | |
| layout = QVBoxLayout(self.detailed_tab) | |
| control_layout = QHBoxLayout() | |
| self.start_date = QDateEdit() | |
| self.end_date = QDateEdit() | |
| self.ua_input = QLineEdit() | |
| self.filter_button = QPushButton("Apply Filter") | |
| self.ua_input.setPlaceholderText("User-Agent contains...") | |
| self.start_date.setDate(QDate.currentDate().addMonths(-1)) | |
| self.end_date.setDate(QDate.currentDate().addDays(1)) | |
| control_layout.addWidget(QLabel("Start Date:")) | |
| control_layout.addWidget(self.start_date) | |
| control_layout.addWidget(QLabel("End Date:")) | |
| control_layout.addWidget(self.end_date) | |
| control_layout.addWidget(QLabel("User-Agent Filter:")) | |
| control_layout.addWidget(self.ua_input) | |
| control_layout.addWidget(self.filter_button) | |
| self.detailed_output = QTextEdit() | |
| self.detailed_output.setReadOnly(True) | |
| layout.addLayout(control_layout) | |
| layout.addWidget(self.detailed_output) | |
| self.filter_button.clicked.connect(self.run_detailed_filter) | |
| def run_detailed_filter(self): | |
| if self.df is None: | |
| self.detailed_output.setText("No log data loaded.") | |
| return | |
| df = self.df.copy() | |
| df = df[df['datetime'].notna()] | |
| start = pd.Timestamp(self.start_date.date().toString("yyyy-MM-dd")) | |
| end = pd.Timestamp(self.end_date.date().toString("yyyy-MM-dd")) | |
| ua_substr = self.ua_input.text().strip() | |
| df = df[(df['datetime'] >= start) & (df['datetime'] <= end)] | |
| if ua_substr: | |
| df = df[df['cs(User-Agent)'].str.contains(ua_substr, na=False)] | |
| if df.empty: | |
| self.detailed_output.setText("No matching entries found.") | |
| return | |
| df_sorted = df.sort_values(by='datetime') | |
| lines = [ | |
| f"[{row.get('datetime', '')}] IP: {row.get('c-ip', '')} | {row.get('cs-method', '')} {row.get('cs-uri-stem', '')} | Status: {row.get('sc-status', '')} | UA: {row.get('cs(User-Agent)', '')}" | |
| for _, row in df_sorted.iterrows() | |
| ] | |
| self.detailed_output.setText("\n".join(lines)) | |
| def search_url_requests(self): | |
| if self.df is None or 'cs-uri-stem' not in self.df.columns: | |
| self.url_output.setText("No log data or URL field not found.") | |
| return | |
| query = self.url_search_input.text().strip() | |
| if not query: | |
| return | |
| filtered = self.df[self.df['cs-uri-stem'].str.contains(query, na=False)] | |
| grouped = filtered.groupby(['cs-uri-stem', 'c-ip']).size().reset_index(name='count') | |
| grouped = grouped.sort_values(by='count', ascending=False) | |
| lines = [f"{row['cs-uri-stem']} | {row['c-ip']} | {row['count']} requests" for _, row in grouped.iterrows()] | |
| self.url_output.setText("\n".join(lines) if lines else "No matching URLs found.") | |
| def scan_for_bots(self): | |
| if self.df is None or 'cs(User-Agent)' not in self.df.columns: | |
| self.bot_output.setText("No log data or User-Agent field not found.") | |
| return | |
| def is_suspect(ua): | |
| if not ua: | |
| return True | |
| return not any(ua.startswith(allowed) for allowed in USER_AGENT_WHITELIST) | |
| suspicious = self.df[self.df['cs(User-Agent)'].apply(is_suspect)] | |
| grouped = suspicious.groupby(['cs(User-Agent)']).size().reset_index(name='count') | |
| grouped = grouped.sort_values(by='count', ascending=False) | |
| header = ["Suspicious User-Agent Count:"] | |
| header.extend([f"{row['cs(User-Agent)']}: {row['count']}" for _, row in grouped.iterrows()]) | |
| ip_grouped = suspicious.groupby(['c-ip', 'cs(User-Agent)']).size().reset_index(name='count') | |
| ip_grouped = ip_grouped.sort_values(by='count', ascending=False) | |
| details = [f"{row['c-ip']} | {row['cs(User-Agent)']} | {row['count']} requests" for _, row in ip_grouped.iterrows()] | |
| self.bot_output.setText("\n".join(header + ["\nDetails:"] + details) if details else "No suspicious user-agents found.") | |
| def populate_sites(self): | |
| self.site_selector.clear() | |
| self.sites = get_site_mapping() | |
| for label in self.sites: | |
| self.site_selector.addItem(label) | |
| def load_site_logs(self): | |
| site = self.site_selector.currentText() | |
| log_folder = self.sites.get(site) | |
| site_path = os.path.join(IIS_LOG_DIR, log_folder) | |
| logs = [] | |
| for file in os.listdir(site_path): | |
| if file.endswith(".log"): | |
| df = parse_log_file(os.path.join(site_path, file)) | |
| logs.append(df) | |
| if not logs: | |
| self.output.setText("No logs found.") | |
| self.graph_widget.clear() | |
| return | |
| self.df = pd.concat(logs, ignore_index=True) | |
| self.df = self.df[self.df['datetime'].notna()] | |
| self.analyze_logs() | |
| self.populate_ips() | |
| def analyze_logs(self): | |
| df = self.df | |
| summary = [f"Total Requests: {len(df)}"] | |
| if 'c-ip' in df.columns: | |
| summary.append(f"Unique IPs: {df['c-ip'].nunique()}") | |
| if 'sc-status' in df.columns: | |
| summary.append("\nStatus Code Breakdown:") | |
| for code, count in df['sc-status'].value_counts().sort_index().items(): | |
| summary.append(f"{code}: {count}") | |
| if 'cs-method' in df.columns: | |
| summary.append("\nMethod Breakdown:") | |
| for method, count in df['cs-method'].value_counts().items(): | |
| summary.append(f"{method}: {count}") | |
| if 'cs-uri-stem' in df.columns: | |
| summary.append("\nTop Requested URLs:") | |
| for url, count in df['cs-uri-stem'].value_counts().head(50).items(): | |
| summary.append(f"{url}: {count}") | |
| if 'c-ip' in df.columns: | |
| summary.append("\nTop IPs:") | |
| for ip, count in df['c-ip'].value_counts().head(50).items(): | |
| summary.append(f"{ip}: {count}") | |
| self.output.setText("\n".join(summary)) | |
| self.update_graph(df) | |
| def update_graph(self, df): | |
| date_counts = df['datetime'].dt.date.value_counts().sort_index() | |
| x = list(date_counts.index.astype(str)) | |
| y = list(date_counts.values) | |
| self.graph_widget.clear() | |
| bars = pg.BarGraphItem(x=list(range(len(x))), height=y, width=0.6, brush='c') | |
| self.graph_widget.addItem(bars) | |
| self.graph_widget.getAxis('bottom').setTicks([list(enumerate(x))]) | |
| def export_summary(self): | |
| if self.df is None: | |
| return | |
| filename, _ = QFileDialog.getSaveFileName(self, "Export Summary", "", "Text Files (*.txt)") | |
| if filename: | |
| with open(filename, "w", encoding="utf-8") as f: | |
| f.write(self.output.toPlainText()) | |
| def populate_ips(self): | |
| if self.df is not None and 'c-ip' in self.df.columns: | |
| self.all_ips = sorted(set(self.df['c-ip'].dropna())) | |
| self.filter_ip_list() | |
| def filter_ip_list(self): | |
| text = self.ip_search_input.text().strip() | |
| self.ip_result_list.clear() | |
| if not text: | |
| return | |
| matches = [ip for ip in self.all_ips if text in ip] | |
| self.ip_result_list.addItems(matches[:100]) | |
| def display_ip_report(self, item): | |
| selected_ip = item.text() | |
| df_ip = self.df[self.df['c-ip'] == selected_ip] | |
| report = [f"Activity for IP: {selected_ip}", f"Total Requests: {len(df_ip)}"] | |
| if not df_ip.empty: | |
| report.append("\nAll Requests:") | |
| for _, row in df_ip.iterrows(): | |
| row_summary = f"[{row.get('datetime', '')}] {row.get('cs-method', '')} {row.get('cs-uri-stem', '')} {row.get('sc-status', '')} UA: {row.get('cs(User-Agent)', '')}" | |
| report.append(row_summary) | |
| self.ip_output.setText("\n".join(report)) | |
| if __name__ == "__main__": | |
| import sys | |
| app = QApplication(sys.argv) | |
| analyzer = IISAnalyzer() | |
| analyzer.show() | |
| sys.exit(app.exec_()) |
Author
Programmer-Lily
commented
May 28, 2025

Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment