Created
November 17, 2017 23:37
-
-
Save chrisdlangton/a10979c7e30f227a9f396dc4e4edc6c3 to your computer and use it in GitHub Desktop.
Python script to enrich data about IP hosts from csv file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
######################################### | |
# pip install pandas ipwhois | |
# usage: | |
# ./collect-ip-info.py | |
# Place a csv in same directory and change name of file_in variable | |
# First csv row is header, followed by a single ip per row | |
# Make sure you put your own google maps api key below | |
import csv | |
import requests | |
import pandas as pd | |
from datetime import datetime | |
from ipwhois import IPWhois | |
from ipwhois.utils import get_countries | |
API_KEY='' # your maps.googleapis.com API key here | |
file_in = "./internet-connected.csv" | |
file_out = "./enriched-ip-list.csv" | |
def get_coords(ip): | |
url = "https://freegeoip.net/json/%s" % ip | |
r = requests.get(url) | |
return r.json() | |
def get_geo(lat, lon): | |
url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json?key={}&radius=1&location={},{}".format(API_KEY, lat, lon) | |
r = requests.get(url) | |
return r.json() | |
def get_place(id): | |
url = "https://maps.googleapis.com/maps/api/place/details/json?key={}&placeid={}".format(API_KEY, id) | |
r = requests.get(url) | |
return r.json() | |
def whois(ip): | |
countries = get_countries() | |
obj = IPWhois(ip) | |
result = obj.lookup_rdap(depth=1, asn_methods=['dns', 'whois', 'http']) | |
country = countries[result['asn_country_code']] | |
type = result['network']['type'] | |
name = result['network']['name'] | |
description = result['asn_description'] | |
registry = result['asn_registry'] | |
entities = ', '.join(result['entities']) | |
return country, type, name, description, registry, entities | |
def seconds_to_gmt_offset_str(secs): | |
if not isinstance(secs, int): | |
return None | |
prefix = '+' | |
if secs < 0: | |
prefix = '-' | |
return "%s%02d:%02d" % (prefix, abs(secs) / 60, abs(secs) % 60) | |
from netaddr import IPAddress | |
fileReader = csv.reader(open(file_in), delimiter=",") | |
header = fileReader.next() | |
for ip in fileReader: | |
dt = datetime.now() | |
data = whois(ip) | |
coords = get_coords(ip) | |
geo = get_geo(float(coords['latitude']), float(coords['longitude'])) | |
placeid = None | |
placeurl = None | |
address = None | |
offset = None | |
gmt = None | |
if len(geo['results']) > 0: | |
placeid = geo['results'][0]['place_id'] | |
place = get_place(placeid) | |
if 'formatted_address' in place['result']: | |
address = place['result']['formatted_address'] | |
if 'url' in place['result']: | |
placeurl = place['result']['url'] | |
if 'utc_offset' in place['result']: | |
offset = int(place['result']['utc_offset']) | |
if offset: | |
gmt = seconds_to_gmt_offset_str(offset) | |
if not isinstance(gmt, basestring): | |
gmt = None | |
data_dict = { | |
'ipaddr': ip, | |
'reversedns': IPAddress(ip).reverse_dns, | |
'type': data[1], | |
'name': data[2], | |
'description': data[3], | |
'registry': data[4], | |
'entities': data[5], | |
'country': data[0], | |
'city': coords['city'], | |
'region': coords['region_name'], | |
'address': address, | |
'zip': coords['zip_code'], | |
'placeid': placeid, | |
'url': placeurl, | |
'lat': float(coords['latitude']), | |
'lon': float(coords['longitude']), | |
'tz': coords['time_zone'], | |
'utc': offset, | |
'gmt': gmt, | |
'checked': dt.isoformat() | |
} | |
df = pd.DataFrame(data_dict, index=[0]) | |
c = { | |
# 'compression': 'gzip', | |
'quotechar': '"', | |
'quoting': csv.QUOTE_MINIMAL, | |
'doublequote': False, | |
'index': False, | |
'sep': ',' | |
} | |
df.to_csv(file_out, **c) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment