Last active
October 22, 2020 05:34
-
-
Save jayswan/96df3f0b9606f2ce84f2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter,defaultdict | |
import re | |
import sys | |
""" | |
Counterpart to this blog post: | |
http://unroutable.blogspot.com/2014/07/simple-python-syslog-counter.html | |
Summarize counts of typical Cisco syslog messages. Most syslog servers produce lines that look something like this: | |
TIMESTAMP FACILITY.SEVERITY REPORTER_IP %SOME-X-MESSAGE_TYPE: message details | |
However, the format may vary depending on server. | |
This script extracts the first IPv4 address from the line and assumes that's the reporting device. | |
It then extracts the MESSAGE_TYPE field, if present. | |
It summarizes the count of each unique message type in the input, then summarizes the counts | |
of each message type, per reporting IPv4 address: | |
10 SOME-5-MESSAGE | |
4 SOME-4-OTHERMESSAGE | |
10.1.1.1 | |
4 SOME-5-MESSAGE | |
2 SOME-4-OTHERMESSAGE | |
10.1.1.2 | |
6 SOME-5-MESSAGE | |
2 SOME-4-OTHERMESSAGE | |
""" | |
# input file is first argument on the CLI | |
# you could easily make this a list or implement CLI switches with argparse | |
FILENAME = sys.argv[1] | |
# monster named regex to match IPv4 addresses, courtesy of Logstash grok pattern code | |
IPV4 = re.compile('(?P<ipv4>(?<![0-9])(?:(?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2}))(?![0-9]))') | |
# named regex to match Cisco syslog message types | |
CISCO_MSG = re.compile('%(?P<msg>.*?):') | |
# counter object for counting all message types in file | |
msg_count = Counter() | |
# dictionary of counts per syslog-sending IP address | |
per_reporter_counts = defaultdict(Counter) | |
def print_counter(counter,indent=0): | |
""" print contents of a Counter object with or without indent. """ | |
for item,count in counter.most_common(): | |
print '%s %-5i %s' % (' '*indent, count, item) | |
def main(): | |
with open(FILENAME) as f: | |
for line in f: | |
try: | |
# match first IPv4 address and first Cisco-like message-type block | |
reporter = re.search(IPV4,line).group('ipv4') | |
msg = re.search(CISCO_MSG,line).group('msg') | |
except: | |
# if regex match fails, this line is not a typical Cisco syslog message | |
continue | |
# increment message counter | |
msg_count[msg] += 1 | |
# increment message counter per reporting IP address | |
per_reporter_counts[reporter][msg] += 1 | |
# code below here just prints the results | |
print_counter(msg_count) | |
for reporter, counter in per_reporter_counts.items(): | |
print reporter | |
print_counter(counter,indent=4) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment