Skip to content

Instantly share code, notes, and snippets.

@designer2k2
Last active December 17, 2021 17:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save designer2k2/417b040d1110c24967e4ef9b0895e039 to your computer and use it in GitHub Desktop.
Save designer2k2/417b040d1110c24967e4ef9b0895e039 to your computer and use it in GitHub Desktop.
Wigle result file comparison
# Compare Wigle logfiles from different devices
# Logfiles should be made at the same time, so idealy they contain the same data
#
# This will check:
# - Networks seen by all / only some / only one device
# - Performance of device based on this
# - Performance then also based on frequency ?
# - same for bluetooth and cells ?
#
#
# by Stephan Martin
# https://www.designer2k2.at
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
def doit():
print("let do this")
data_S4 = pd.read_csv(
r"raw\WigleWifi_20211205105037.csv.gz",
skiprows=1,
compression="gzip",
encoding_errors="ignore",
)
data_S7 = pd.read_csv(
r"raw\WigleWifi_20211205105146.csv.gz",
skiprows=1,
compression="gzip",
encoding_errors="ignore",
)
data_S20 = pd.read_csv(
r"raw\WigleWifi_20211205105202.csv.gz",
skiprows=1,
compression="gzip",
encoding_errors="ignore",
)
data_RPI = pd.read_csv(
r"raw\Kismet-20211205-08-18-44-1.CSV", skiprows=1, encoding_errors="ignore"
)
# for now, wifi only:
data_S4 = data_S4[data_S4["Type"] == "WIFI"]
data_S7 = data_S7[data_S7["Type"] == "WIFI"]
data_S20 = data_S20[data_S20["Type"] == "WIFI"]
data_RPI = data_RPI[data_RPI["Type"] == "WIFI"]
# Datetime for easier calc
data_S4["FirstSeen"] = pd.to_datetime(data_S4["FirstSeen"])
data_S7["FirstSeen"] = pd.to_datetime(data_S7["FirstSeen"])
data_S20["FirstSeen"] = pd.to_datetime(data_S20["FirstSeen"])
data_RPI["FirstSeen"] = pd.to_datetime(data_RPI["FirstSeen"])
data_RPI = data_RPI.sort_values(by='FirstSeen') #somehow this needs a sort
print("Data loaded")
# input stats:
print(f" S4: {len(data_S4['MAC'].unique())}")
print(f" S7: {len(data_S7['MAC'].unique())}")
print(f"S20: {len(data_S20['MAC'].unique())}")
print(f"RPI: {len(data_RPI['MAC'].unique())}")
# RSSI histogram:
vcs4 = data_S4["RSSI"].value_counts()
vcs7 = data_S7["RSSI"].value_counts()
vcs20 = data_S20["RSSI"].value_counts()
vcRPI = data_RPI["RSSI"].value_counts()
plt.scatter(x=vcs4.index, y=vcs4, label="S4")
plt.scatter(x=vcs7.index, y=vcs7, label="S7")
plt.scatter(x=vcs20.index, y=vcs20, label="S20")
plt.scatter(x=vcRPI.index, y=vcRPI, label="RPI")
plt.legend()
plt.xlabel("RSSI")
plt.ylabel("Count")
plt.title("RSSI histogram")
plt.show()
# get unique timestamps series
dfS4 = data_S4["FirstSeen"].map(pd.Timestamp).unique() # this is garbage, big timeoffset and jumps
dfS7 = data_S20["FirstSeen"].map(pd.Timestamp).unique()
dfS20 = data_S7["FirstSeen"].map(pd.Timestamp).unique()
dfRPI = data_RPI["FirstSeen"].map(pd.Timestamp).unique()
#timeoff correction for the S4 data:
diff = dfS7[1] - dfS4[1]
dfS4 = dfS4 + diff
plt.plot(dfS4, label="S4")
plt.plot(dfS7, label="S7")
plt.plot(dfS20, label="S20")
plt.plot(dfRPI, label="RPI")
plt.legend()
plt.grid()
plt.xlabel("Count")
plt.ylabel("Time")
plt.title("Time / Counts")
plt.ylim([dfS20[0]-np.timedelta64(5, 'm'), dfS7[-1]]+np.timedelta64(5, 'm'))
plt.show()
runtime = (dfS20[-1] - dfS20[0]) / np.timedelta64(1, 's')
print(f"total time: {runtime:.0f}s")
print(f"unique S4 timestamps: {len(dfS4):5.0f}, average intervall: {runtime / len(dfS4):.2f}s")
print(f"unique S7 timestamps: {len(dfS7):5.0f}, average intervall: {runtime / len(dfS7):.2f}s")
print(f"unique S20 timestamps: {len(dfS20):5.0f}, average intervall: {runtime / len(dfS20):.2f}s")
print(f"unique RPI timestamps: {len(dfRPI):5.0f}, average intervall: {runtime / len(dfRPI):.2f}s")
# found by all:
allmac = []
for mac in data_RPI["MAC"].str.lower():
if mac in data_S4["MAC"].unique():
if mac in data_S7["MAC"].unique():
if mac in data_S20["MAC"].unique():
allmac.append(mac)
print(f"Found by all: {len(allmac)}")
# the ones found by all, how often by each device?, give top 10:
temp = []
for mac in allmac:
rpi = 1 # len(data_RPI[data_RPI["MAC"].str.lower() == mac]) # always 1...
s4 = len(data_S4[data_S4["MAC"] == mac])
s7 = len(data_S7[data_S7["MAC"] == mac])
s20 = len(data_S20[data_S20["MAC"] == mac])
total = rpi + s4 + s7 + s20
temp.append([mac, total, s4, s7, s20, rpi])
macount = pd.DataFrame(temp, columns=['MAC', 'total', 'S4', 'S7', 'S20', 'RPI'])
macount = macount.sort_values(by='S7', ascending=False)
print("Top 10 sorted by total seen count:")
print(macount.head(10))
# get all the hits for the top MAC:
tophitmac = macount.head(1)['MAC'].values[0]
s4hit = data_S4[data_S4["MAC"] == tophitmac]
print(s4hit[""])
# Individual finds:
macs4 = []
for mac in data_S4["MAC"].unique():
if mac not in data_S7["MAC"].unique():
if mac not in data_S20["MAC"].unique():
if mac not in data_RPI["MAC"].str.lower():
macs4.append(mac)
print(f"Found only by S4: {len(macs4)}")
macs7 = []
for mac in data_S7["MAC"].unique():
if mac not in data_S4["MAC"].unique():
if mac not in data_S20["MAC"].unique():
if mac not in data_RPI["MAC"].str.lower():
macs7.append(mac)
print(f"Found only by S7: {len(macs7)}")
macs20 = []
for mac in data_S20["MAC"].unique():
if mac not in data_S4["MAC"].unique():
if mac not in data_S7["MAC"].unique():
if mac not in data_RPI["MAC"].str.lower():
macs20.append(mac)
print(f"Found only by S20: {len(macs20)}")
macrpi = []
for mac in data_RPI["MAC"].str.lower():
if mac not in data_S4["MAC"].unique():
if mac not in data_S7["MAC"].unique():
if mac not in data_S20["MAC"].unique():
macrpi.append(mac)
print(f"Found only by RPI: {len(macrpi)}")
if __name__ == "__main__":
doit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment