Created
December 10, 2014 09:40
-
-
Save bigeagle/009f2dc798b1d008edea to your computer and use it in GitHub Desktop.
loganalysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
# -*- coding:utf-8 -*- | |
import ipaddr | |
import logging | |
import re | |
from settings import LOG_FILE_FORMAT, THU_IP | |
logger = logging.getLogger(__name__) | |
FORMAT = '%(asctime)-15s [%(levelname)s] %(message)s' | |
logging.basicConfig(format=FORMAT) | |
logger.setLevel(logging.WARN) | |
class AccessHit(object): | |
def __init__(self): | |
self.p = re.compile(LOG_FILE_FORMAT['log_format']) | |
self.thu_ipv4 = [ipaddr.IPNetwork(ip) for ip in THU_IP['ipv4']] | |
self.thu_ipv6 = [ipaddr.IPNetwork(ip) for ip in THU_IP['ipv6']] | |
self.uniq_ip = { | |
'thu_ipv4': set([]), | |
'thu_ipv6': set([]), | |
'other_ipv4': set([]), | |
'other_ipv6': set([]), | |
} | |
def process_line(self, line): | |
matched = self.p.match(line) | |
if not matched: | |
logger.error("unable to parse line '%s'" % line) | |
return | |
ip_s = matched.group('remote_addr') | |
ip = ipaddr.IPAddress(ip_s) | |
if ip.version == 4: | |
for n in self.thu_ipv4: | |
if n.Contains(ip): | |
self.uniq_ip['thu_ipv4'].add(ip_s) | |
else: | |
self.uniq_ip['other_ipv4'].add(ip_s) | |
elif ip.version == 6: | |
for n in self.thu_ipv6: | |
if n.Contains(ip): | |
self.uniq_ip['thu_ipv6'].add(ip_s) | |
else: | |
self.uniq_ip['other_ipv6'].add(ip_s) | |
def summary(self): | |
print "Tsinghua IPv4 IP: %d" % len(self.uniq_ip['thu_ipv4']) | |
print "Tsinghua IPv6 IP: %d" % len(self.uniq_ip['thu_ipv6']) | |
print "Other IPv4 IP: %d" % len(self.uniq_ip['other_ipv4']) | |
print "Other IPv6 IP: %d" % len(self.uniq_ip['other_ipv6']) | |
def main(): | |
a = AccessHit() | |
with open('access.log') as f: | |
for line in f: | |
a.process_line(line) | |
a.summary() | |
if __name__ == "__main__": | |
main() | |
# vim: ts=4 sw=4 sts=4 expandtab |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
LOG_FILE_FORMAT = { | |
'log_format': r'^(?P<remote_addr>[0-9a-f.:]*) - (?P<remote_user>[^ ]*) \[(?P<time_local>[^]]*)\] "(?P<request>[^"]*)" (?P<status>\d*) (?P<body_bytes_sent>\d*) "(?P<http_referer>[^"]*)" "(?P<http_user_agent>[^"]*)" "(?P<http_x_forwarded_for>[^"]*)"$', | |
'time_format': '%d/%b/%Y:%H:%M:%S %z', | |
# e.g 09/Dec/2014:06:30:20 +0800 | |
} | |
THU_IP = { | |
'ipv4': [ | |
'166.111.0.0/16', | |
'59.66.0.0/16', | |
'101.5.0.0/16', | |
'101.6.0.0/16', | |
'183.172.0.0/16', | |
'183.173.0.0/16', | |
'118.229.0.0/20', | |
], | |
'ipv6': [ | |
'2001:da8:200::/48', | |
'2001:da8:225::/48', | |
'2001:da8:226::/48', | |
'2001:250:0200::/48', | |
'2402:f000::/32', | |
'2402:f000:5::/48', | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment