Skip to content

Instantly share code, notes, and snippets.

@nfisher
Created September 13, 2013 18:52
Show Gist options
  • Save nfisher/6554585 to your computer and use it in GitHub Desktop.
Save nfisher/6554585 to your computer and use it in GitHub Desktop.
Look-up request IP's and categorise into a distribution by country using MaxMind DB.
#!/usr/bin/env python
# Usage:
#
# ip_locations.py LOCATIONS BLOCKS RESERVOIR
#
# Dependencies:
#
# - reservoir sample set with 'ips' column.
# - MaxMind IP and Location CSV.
# - python 2.6+ - pandas, numpy, scipy
#
import sys
import socket
import pandas as pd
import matplotlib.pyplot as plt
maxmind_locations_filename = sys.argv[1] # 'GeoIPCity-134-Location.csv'
maxmind_blocks_filename = sys.argv[2] # 'GeoIPCity-134-Blocks.csv'
reservoir_filename = sys.argv[3] # 'reservoir_sample.txt'
reservoir = pd.read_csv(reservoir_filename)
blocks = pd.read_csv(maxmind_blocks_filename, skiprows=1)
locations = pd.read_csv(maxmind_locations_filename, skiprows=1)
location_blocks = pd.merge(blocks, locations, on='locId')
reservoir['ipAsInt'] = reservoir.apply(lambda row: int(socket.inet_aton(row["ips"]).encode('hex'), 16), axis=1)
def country_by_ip(ip):
if not isinstance(ip, int):
return 'unknown'
row = location_blocks[(ip >= location_blocks['startIpNum']) & (ip <= location_blocks['endIpNum'])]
if len(row['country']) == 1:
return row.irow(0).get('country', 'unknown')
return 'unknown'
reservoir['country'] = reservoir.apply(lambda x: country_by_ip(x['ipAsInt']), axis=1)
s = reservoir.groupby('country').size()
s.sort()
s.plot(kind='barh', figsize=(10,12))
plt.savefig('regional-distrib.png')
print s.order(ascending=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment