Skip to content

Instantly share code, notes, and snippets.

@airtoxin
Created May 12, 2013 15:25
Show Gist options
  • Save airtoxin/5563915 to your computer and use it in GitHub Desktop.
Save airtoxin/5563915 to your computer and use it in GitHub Desktop.
アクセスログ解析
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re, urllib
from BeautifulSoup import BeautifulSoup
r_ip = re.compile(r"\"?((\d{1,3}\.){3}\d{1,3})\"?")
r_content = re.compile(r".*whose location is (.*)\.")
placefile = open("place.txt", "r")
placeDictionary = {}
for line in placefile:
numAccess, dirtyIp = line.split(" ")
#if int(numAccess) < 100:
# continue
searchIp = r_ip.search(dirtyIp)
if searchIp:
cleanIp = searchIp.group(1)
openUrl = urllib.urlopen("http://www.iplocationfinder.com/"+cleanIp)
soup = BeautifulSoup(openUrl)
urlContent = soup.find("meta")["content"]
searchContent = r_content.search(urlContent)
if searchContent:
ipPlace = searchContent.group(1)
print numAccess,ipPlace
if ipPlace in placeDictionary:
placeDictionary[ipPlace] += int(numAccess)
else:
placeDictionary[ipPlace] = int(numAccess)
time.sleep(1)
savefile = open("ipPlace.txt","w+")
for key in placeDictionary:
writeline = str(placeDictionary[key])+" "+key+"\n"
savefile.write(writeline)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment