Skip to content

Instantly share code, notes, and snippets.

@ogarrett
Created December 23, 2015 15:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ogarrett/1a0473f40b6dddbfbf55 to your computer and use it in GitHub Desktop.
Save ogarrett/1a0473f40b6dddbfbf55 to your computer and use it in GitHub Desktop.
Convert GeoIPv2 to GeoIPv1
#!/usr/bin/perl -w
use strict;
use warnings FATAL => 'all';
use Text::CSV;
# ./convert.pl GeoLite2-City-CSV_20151201/GeoLite2-City-Locations-en.csv GeoLite2-City-CSV_20151201/GeoLite2-City-Blocks-IPv4.csv
my ( $locfile, $blocksfile ) = @ARGV;
my @l = ();
open LOC, $locfile or die "Can't open locations file '$locfile': $_";
# geoname_id,locale_code,continent_code,continent_name,country_iso_code,country_name,subdivision_1_iso_code,subdivision_1_name,subdivision_2_iso_code,subdivision_2_name,city_name,metro_code,time_zone
# 107290,en,NA,"North America",US,"United States",NY,"New York",,,Arcade,514,America/New_York
# 5107311,en,NA,"North America",US,"United States",NY,"New York",,,Ardsley,501,America/New_York
# 5107322,en,NA,"North America",US,"United States",NY,"New York",,,Argyle,532,America/New_York
# 5107329,en,NA,"North America",US,"United States",NY,"New York",,,Arkport,538,America/New_York
# 5142090,en,NA,"North America",US,"United States",NY,"New York",,,Valhalla,501,America/New_York
my $header = <LOC>; # skip first line
while (<LOC>) {
chomp;
my( $id, $data ) = split /,/, $_, 2;
$l[$id] = $data;
}
close LOC;
my %b;
open BLOCKS, $blocksfile or die "Can't open blocks file '$blocksfile': $_";
# network,geoname_id,registered_country_geoname_id,represented_country_geoname_id,is_anonymous_proxy,is_satellite_provider,postal_code,latitude,longitude
# 1.0.118.0/23,1862415,1861060,,0,0,730-0011,34.3963,132.4594
# 2.32.122.0/24,3171152,3175395,,0,0,37019,45.4414,10.6844
# 2.32.123.0/24,3175395,3175395,,0,0,,42.8333,12.8333
# 216.221.144.0/24,5128581,6252001,,0,0,10022,40.7588,-73.9680
my $csv = Text::CSV->new ({ sep_char => ',', quote_char => '"' , binary => 1});
$header = <BLOCKS>; # skip first line
while (<BLOCKS>) {
chomp;
$csv->parse( $_ ) or die "Can't parse block line '$_'";
my( $network, $geoname_id, $registered_country_geoname_id, $represented_country_geoname_id, $is_anonymous_proxy, $is_satellite_provider, $postal_code, $latitude, $longitude) = $csv->fields();
# construct the GEOIPv1 startipnum and endipnum from $network
my( $ip, $cidr ) = split '/', $network;
my $addr = pack "C4", split /\./, $ip;
my $nmsk = pack 'N', (0xffffffff << ( 32-$cidr ));
my $startipnum = unpack 'N', $addr & $nmsk;
my $endipnum = unpack 'N', $addr | ~$nmsk;
# construct the corresponding GeoIP1 location for this IP range
# http://dev.maxmind.com/geoip/legacy/csv/
# country,region,city,postalCode,latitude,longitude,metroCode,areaCode
my $id = $geoname_id || $registered_country_geoname_id;
next if !$id; # A very small number of entries have no geoname id - skip them
$csv->parse( $l[$id] ) or die "Can't parse location '$l[$id]'";
my @data = $csv->fields();
my $location = join ',',
'"' . substr( $data[3], 0, 2 ) . '"', # country (2-character country code)
'"' . $data[5] . '"', # region (subdivision_1_iso_code)
'"' . "$data[5]:$data[6]:$data[7]:$data[8]:$data[9]" . '"', # city (subdivision_1_iso_code:subdivision_1_name:subdivision_2_iso_code:subdivision_2_name:city_name)
'"' . $postal_code . '"', # postalCode
$latitude, # latitude
$longitude, # longitude
$data[10], # metroCode
''; # areaCode
# We're going to prefix the block with an easy-to-sort version of the startipnum using %012d
my $block = sprintf "%012d:\"%s\",\"%s\"", $startipnum, $startipnum, $endipnum;
# $b{$location} will contain a tab-separated list of blocks at that location
if( $b{$location} ) {
$b{$location} .= "\t$block";
#print "FYI: found additional IP range $block at location $location\n";
} else {
$b{$location} = $block;
}
}
close BLOCKS;
# now write the GeoIPv1 data as a CSV
my $v1locfile = "GeoCity-Location.csv";
my $v1blocksfile = "GeoCity-Blocks.csv";
open LOCATIONS, ">$v1locfile" or die "Can't write to $v1locfile: $!";
binmode(LOCATIONS, ":utf8");
print LOCATIONS <<EOM;
Copyright (c) 2012 MaxMind LLC. All Rights Reserved.
locId,country,region,city,postalCode,latitude,longitude,metroCode,areaCode
EOM
open BLOCKS, ">$v1blocksfile" or die "Can't write to $v1blocksfile: $!";
binmode(BLOCKS, ":utf8");
print BLOCKS <<EOM;
Copyright (c) 2011 MaxMind Inc. All Rights Reserved.
startIpNum,endIpNum,locId
EOM
my @blocks = (); # accumulate the blocks list and sort it before writing
my $i = 1;
foreach my $k ( sort keys %b ) {
print LOCATIONS "$i,$k\n";
foreach my $block ( split /\t/, $b{$k} ) {
push @blocks, "$block,\"$i\"";
}
$i++;
}
close LOCATIONS;
# sort blocks and strip the prefix
foreach my $block ( sort @blocks ) {
my( $a, $b ) = split /:/, $block, 2;
print BLOCKS "$b\n";
}
close BLOCKS;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment