-
-
Save designer2k2/417b040d1110c24967e4ef9b0895e039 to your computer and use it in GitHub Desktop.
Wigle result file comparison
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Compare Wigle logfiles from different devices | |
# Logfiles should be made at the same time, so idealy they contain the same data | |
# | |
# This will check: | |
# - Networks seen by all / only some / only one device | |
# - Performance of device based on this | |
# - Performance then also based on frequency ? | |
# - same for bluetooth and cells ? | |
# | |
# | |
# by Stephan Martin | |
# https://www.designer2k2.at | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import numpy as np | |
def doit(): | |
print("let do this") | |
data_S4 = pd.read_csv( | |
r"raw\WigleWifi_20211205105037.csv.gz", | |
skiprows=1, | |
compression="gzip", | |
encoding_errors="ignore", | |
) | |
data_S7 = pd.read_csv( | |
r"raw\WigleWifi_20211205105146.csv.gz", | |
skiprows=1, | |
compression="gzip", | |
encoding_errors="ignore", | |
) | |
data_S20 = pd.read_csv( | |
r"raw\WigleWifi_20211205105202.csv.gz", | |
skiprows=1, | |
compression="gzip", | |
encoding_errors="ignore", | |
) | |
data_RPI = pd.read_csv( | |
r"raw\Kismet-20211205-08-18-44-1.CSV", skiprows=1, encoding_errors="ignore" | |
) | |
# for now, wifi only: | |
data_S4 = data_S4[data_S4["Type"] == "WIFI"] | |
data_S7 = data_S7[data_S7["Type"] == "WIFI"] | |
data_S20 = data_S20[data_S20["Type"] == "WIFI"] | |
data_RPI = data_RPI[data_RPI["Type"] == "WIFI"] | |
# Datetime for easier calc | |
data_S4["FirstSeen"] = pd.to_datetime(data_S4["FirstSeen"]) | |
data_S7["FirstSeen"] = pd.to_datetime(data_S7["FirstSeen"]) | |
data_S20["FirstSeen"] = pd.to_datetime(data_S20["FirstSeen"]) | |
data_RPI["FirstSeen"] = pd.to_datetime(data_RPI["FirstSeen"]) | |
data_RPI = data_RPI.sort_values(by='FirstSeen') #somehow this needs a sort | |
print("Data loaded") | |
# input stats: | |
print(f" S4: {len(data_S4['MAC'].unique())}") | |
print(f" S7: {len(data_S7['MAC'].unique())}") | |
print(f"S20: {len(data_S20['MAC'].unique())}") | |
print(f"RPI: {len(data_RPI['MAC'].unique())}") | |
# RSSI histogram: | |
vcs4 = data_S4["RSSI"].value_counts() | |
vcs7 = data_S7["RSSI"].value_counts() | |
vcs20 = data_S20["RSSI"].value_counts() | |
vcRPI = data_RPI["RSSI"].value_counts() | |
plt.scatter(x=vcs4.index, y=vcs4, label="S4") | |
plt.scatter(x=vcs7.index, y=vcs7, label="S7") | |
plt.scatter(x=vcs20.index, y=vcs20, label="S20") | |
plt.scatter(x=vcRPI.index, y=vcRPI, label="RPI") | |
plt.legend() | |
plt.xlabel("RSSI") | |
plt.ylabel("Count") | |
plt.title("RSSI histogram") | |
plt.show() | |
# get unique timestamps series | |
dfS4 = data_S4["FirstSeen"].map(pd.Timestamp).unique() # this is garbage, big timeoffset and jumps | |
dfS7 = data_S20["FirstSeen"].map(pd.Timestamp).unique() | |
dfS20 = data_S7["FirstSeen"].map(pd.Timestamp).unique() | |
dfRPI = data_RPI["FirstSeen"].map(pd.Timestamp).unique() | |
#timeoff correction for the S4 data: | |
diff = dfS7[1] - dfS4[1] | |
dfS4 = dfS4 + diff | |
plt.plot(dfS4, label="S4") | |
plt.plot(dfS7, label="S7") | |
plt.plot(dfS20, label="S20") | |
plt.plot(dfRPI, label="RPI") | |
plt.legend() | |
plt.grid() | |
plt.xlabel("Count") | |
plt.ylabel("Time") | |
plt.title("Time / Counts") | |
plt.ylim([dfS20[0]-np.timedelta64(5, 'm'), dfS7[-1]]+np.timedelta64(5, 'm')) | |
plt.show() | |
runtime = (dfS20[-1] - dfS20[0]) / np.timedelta64(1, 's') | |
print(f"total time: {runtime:.0f}s") | |
print(f"unique S4 timestamps: {len(dfS4):5.0f}, average intervall: {runtime / len(dfS4):.2f}s") | |
print(f"unique S7 timestamps: {len(dfS7):5.0f}, average intervall: {runtime / len(dfS7):.2f}s") | |
print(f"unique S20 timestamps: {len(dfS20):5.0f}, average intervall: {runtime / len(dfS20):.2f}s") | |
print(f"unique RPI timestamps: {len(dfRPI):5.0f}, average intervall: {runtime / len(dfRPI):.2f}s") | |
# found by all: | |
allmac = [] | |
for mac in data_RPI["MAC"].str.lower(): | |
if mac in data_S4["MAC"].unique(): | |
if mac in data_S7["MAC"].unique(): | |
if mac in data_S20["MAC"].unique(): | |
allmac.append(mac) | |
print(f"Found by all: {len(allmac)}") | |
# the ones found by all, how often by each device?, give top 10: | |
temp = [] | |
for mac in allmac: | |
rpi = 1 # len(data_RPI[data_RPI["MAC"].str.lower() == mac]) # always 1... | |
s4 = len(data_S4[data_S4["MAC"] == mac]) | |
s7 = len(data_S7[data_S7["MAC"] == mac]) | |
s20 = len(data_S20[data_S20["MAC"] == mac]) | |
total = rpi + s4 + s7 + s20 | |
temp.append([mac, total, s4, s7, s20, rpi]) | |
macount = pd.DataFrame(temp, columns=['MAC', 'total', 'S4', 'S7', 'S20', 'RPI']) | |
macount = macount.sort_values(by='S7', ascending=False) | |
print("Top 10 sorted by total seen count:") | |
print(macount.head(10)) | |
# get all the hits for the top MAC: | |
tophitmac = macount.head(1)['MAC'].values[0] | |
s4hit = data_S4[data_S4["MAC"] == tophitmac] | |
print(s4hit[""]) | |
# Individual finds: | |
macs4 = [] | |
for mac in data_S4["MAC"].unique(): | |
if mac not in data_S7["MAC"].unique(): | |
if mac not in data_S20["MAC"].unique(): | |
if mac not in data_RPI["MAC"].str.lower(): | |
macs4.append(mac) | |
print(f"Found only by S4: {len(macs4)}") | |
macs7 = [] | |
for mac in data_S7["MAC"].unique(): | |
if mac not in data_S4["MAC"].unique(): | |
if mac not in data_S20["MAC"].unique(): | |
if mac not in data_RPI["MAC"].str.lower(): | |
macs7.append(mac) | |
print(f"Found only by S7: {len(macs7)}") | |
macs20 = [] | |
for mac in data_S20["MAC"].unique(): | |
if mac not in data_S4["MAC"].unique(): | |
if mac not in data_S7["MAC"].unique(): | |
if mac not in data_RPI["MAC"].str.lower(): | |
macs20.append(mac) | |
print(f"Found only by S20: {len(macs20)}") | |
macrpi = [] | |
for mac in data_RPI["MAC"].str.lower(): | |
if mac not in data_S4["MAC"].unique(): | |
if mac not in data_S7["MAC"].unique(): | |
if mac not in data_S20["MAC"].unique(): | |
macrpi.append(mac) | |
print(f"Found only by RPI: {len(macrpi)}") | |
if __name__ == "__main__": | |
doit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment