Skip to content

Instantly share code, notes, and snippets.

@ranedk
Created November 27, 2017 09:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ranedk/f23e51cc5521f76a9cb9de7b44a383fd to your computer and use it in GitHub Desktop.
Save ranedk/f23e51cc5521f76a9cb9de7b44a383fd to your computer and use it in GitHub Desktop.
File read and parse in python
import datetime
from collections import defaultdict
lines = open("sampledata", 'r').read().split("\n")
data = []
for line in lines:
try:
ip, dt = line.split(" ")
except:
print(line)
attime = datetime.datetime.strptime(dt, "%d/%b/%Y:%H:%M:%S")
data.append((ip, attime))
print("Total records", len(data))
per_minute_cluster = defaultdict(list)
for d in data:
per_minute_cluster[d[1].strftime("%d/%m/%Y:%H:%M")].append(d)
print("Per minute cluster:", len(per_minute_cluster))
print(per_minute_cluster.keys())
per_minute_count = [(k, len(v)) for k, v in per_minute_cluster.items()]
print("Per minute count:", len(per_minute_count))
per_min_count_avg = sum([i[1] for i in per_minute_count]) / float(len(per_minute_count))
print("Per min average: %s" % per_min_count_avg)
offenders = [(i[0], i[1]) for i in per_minute_count if i[1] > per_min_count_avg * 4.2]
print("\n".join(["%s %s" % i for i in offenders]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment