Skip to content

Instantly share code, notes, and snippets.

@peterbe
Created October 10, 2023 12:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save peterbe/4a5a78470703071b473806320f699f4f to your computer and use it in GitHub Desktop.
Save peterbe/4a5a78470703071b473806320f699f4f to your computer and use it in GitHub Desktop.
import csv
from urllib.parse import parse_qs
import sys
from collections import Counter, defaultdict
counter = defaultdict(Counter)
with open(sys.argv[1]) as f:
reader = csv.reader(f)
next(reader)
for uri, code in reader:
if code != "200":
continue
for key, values in parse_qs(uri.split("?")[1]).items():
for value in values:
counter[key][value] += 1
for key in [x for x in counter if x in ["platform", "tool"]]:
print("KEY:", key)
rest = 0
for i, (value, count) in enumerate(counter[key].most_common()):
if i < 20:
print(" ", repr(value), count)
else:
rest += count
print(" ALL OTHERS COMBINED", rest)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment