Skip to content

Instantly share code, notes, and snippets.

@andrewharvey
Last active September 30, 2015 22:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andrewharvey/1876009 to your computer and use it in GitHub Desktop.
Save andrewharvey/1876009 to your computer and use it in GitHub Desktop.
Convert Geoscience Australia Gazetteer 2010 into the OSM XML format
#!/usr/bin/perl -w
# Info: This script converts the Geoscience Australia Gazetteer of Australia
# 2010 Release CSV file into an OSM file.
# Author: Andrew Harvey (http://andrewharvey4.wordpress.com/)
#
# To the extent possible under law, the person who associated CC0
# with this work has waived all copyright and related or neighboring
# rights to this work.
# http://creativecommons.org/publicdomain/zero/1.0/
use strict;
use Text::CSV;
use XML::Writer;
use Encode;
use Geo::Proj4;
# 1 -> source schema is transformed into OSM schema
# 0 -> source schema is converted as is to .osm format
my $transform_schema = 1;
my $csvfile = $ARGV[0];
my $csv = Text::CSV->new ( { binary => 1, empty_is_undef => 1 } )
or die "Cannot use CSV: ".Text::CSV->error_diag ();
open my $csvfh, "<", $csvfile or die "$csvfile: $!";
my $xmlout = new IO::File(">$ARGV[1]");
my $xmlwriter = new XML::Writer(OUTPUT => $xmlout, NEWLINES => 'true');
# The field names of the columns in the CSV file.
# These names were pulled from the User Guide
$csv->column_names("CSV_ID", "RECORD_ID", "AUTHORITY_ID", "STATE_ID", "NAME", "FEATURE_CODE", "STATUS", "POSTCODE", "CONCISE_CAZETTEER", "LONGITUDE", "LON_DEG", "LON_MIN", "LON_SEC", "LATITUDE", "LAT_DEG", "LAT_MIN", "LAT_SEC", "100K_MAP", "CGDN", "CSV_ID2");
# The list of fields to retain in the OSM file. We discard the rest.
my @RETAIN_LIST = ("AUTHORITY_ID", "STATE_ID", "NAME", "FEATURE_CODE", "STATUS", "POSTCODE", "CONCISE_CAZETTEER", "100K_MAP", "CGDN");
# We use these hash tables to expand abbreviations
my %AUTHORITY_LOOKUP = (
"AAD" => "Australian Antarctic Division",
"ACT" => "Australian Capital Territory",
"AHO" => "Australia Hydrographic Service",
"GA" => "Geoscience Australia",
"NSW" => "New South Wales",
"NT" => "Northern Territory",
"QLD" => "Queensland",
"SA" => "South Australia",
"TAS" => "Tasmania",
"VIC" => "Victoria",
"WA" => "Western Australia"
);
my %STATE_LOOKUP = (
"ACT" => "Australian Capital Territory",
"NSW" => "New South Wales",
"NT" => "Northern Territory",
"QLD" => "Queensland",
"SA" => "South Australia",
"TAS" => "Tasmania",
"VIC" => "Victoria",
"WA" => "Western Australia",
"JBT" => "Jervis Bay Territory",
"NFK" => "Norfolk Island",
"HRD" => "Heard",
"MCD" => "McDonald Islands",
"N/A" => "*Not applicable*",
"AAT" => "Australian Antarctic Territory"
);
my %STATUS_LOOKUP = (
"H" => "Historical name",
"O" => "Official status",
"U" => "Unofficial status"
);
my %CONCISE_GAZ_LOOKUP = (
"Y" => "yes",
"N" => "no");
my %CGDN_LOOKUP = (
"Y" => "yes",
"N" => "no"
);
my %FEATURE_LOOKUP = (
"AF" => [qw(aeroway aerodrome)],
"ANCH" => [qw(seamark:type anchorage)],
"ARCH" => [qw(place archipelago)],
"BANK" => [qw(natural shoal)],
"BATH" => [qw(natural reef)],
"BAY" => [qw(natural bay)],
"BCH" => [qw(natural beach)],
"BCST" => [qw(man_made communications_tower)],
"BEND" => [qw(waterway stream)],
"BGHT" => [qw(place sea)],
"BLDG" => [qw(building yes)],
"BORE" => [qw(man_made water_well)],
"BRDG" => [qw(bridge yes)],
"BRK" => [qw(natural breaker)],
"BRKW" => [qw(natural groyne)],
"CAPE" => [qw(natural cape)],
"CAVE" => [qw(natural cave_entrace)],
"CEM" => [qw(landuse cemetery)],
"CHAN" => [qw(place channel)],
"CLAY" => [qw(resource clay)],
"CLIF" => [qw(natural cliff)],
"CNAL" => [qw(waterway canal)],
"CNTY" => [qw(place county)],
"COMM" => [qw(amenity community_centre)],
"CONT" => [qw(place continent)],
"COVE" => [qw(natural water water cove)],
"CP" => [qw(tourism camp_site)],
"CRTR" => [qw(natural crater)],
"DAM" => [qw(waterway dam)],
"DEPR" => [qw(landuse basin)],
"DI" => [qw(place district)],
"DOCK" => [qw(waterway dock)],
"DRN" => [qw(waterway drain)],
"DSRT" => [qw(natural desert)],
"DUNE" => [qw(natural dune)],
"ENTR" => [qw(entrance yes)],
"ESTY" => [qw(estuary yes)],
"FARM" => [qw(place farm)],
"FORD" => [qw(highway ford)],
"FRNG" => [qw(sport shooting)],
"FRST" => [qw(landuse forest)],
"GASF" => [qw(man_made oil_well)],
"GATE" => [qw(historic city_gate)],
"GLCR" => [qw(natural glacier)],
"GORG" => [qw(natural valley)],
"GRDN" => [qw(leisure garden)],
"GULF" => [qw(natural bay)],
"GOLF" => [qw(leisure golf_course)], # not in source schema definition but found in data
"HBR" => [qw(leisure marina)],
"HILL" => [qw(natural peak)],
"HMSD" => [qw(landuse homestead)],
"HLPT" => [qw(aeroway helipad)], # not in source schema definition but found in data
"HWY" => [qw(highway motorway)],
"INTL" => [qw(natural water waterway lake intermittent yes)],
"IS" => [qw(place island)],
"ISTH" => [qw(natural isthmus)],
"LAGN" => [qw(natural water water lagoon)],
"LAKE" => [qw(natural water water lake)],
"LDGE" => [qw(natural cliff)],
"LH" => [qw(man_made lighthouse)],
"LOCB" => [qw(place town)],
"LOCK" => [qw(lock yes)],
"LOCU" => [qw(place locality)],
"MINE" => [qw(man_made adit)],
"MONU" => [qw(historic monument)],
"MT" => [qw(natural peak)],
"NAVB" => [qw(man_made beacon)],
"OCEN" => [qw(place ocean)],
"PASS" => [qw(mountain_pass yes)],
"PEAK" => [qw(natural peak)],
"PEN" => [qw(place locality)],
"PIER" => [qw(man_made pier)],
"PIPE" => [qw(man_made pipeline)], # not in source schema
"PL" => [qw(landform plateau)],
"PLAN" => [qw(landuse plantation)],
"PLCE" => [qw(place locality)], # not in source schema definition but found in data
"PLN" => [qw(landform plain)],
"PORT" => [qw(landuse port)],
"PRSH" => [qw(place parish)],
"PT" => [qw(natural point)],
"QUAR" => [qw(landuse quarry)],
"RCH" => [qw(waterway stream)],
"RDGE" => [qw(natural ridge)],
"REEF" => [qw(natural reef)],
"RES" => [qw(landuse reservoir)],
"RESV" => [qw(leisue park)],
"RH" => [qw(natural rockhole)],
"RLWY" => [qw(railway rail)], # not in source schema
"RNGE" => [qw(natural mountain_range)],
"ROAD" => [qw(highway road)],
"ROCK" => [qw(natural rock)],
"RSTA" => [qw(railway station)],
"RTRK" => [qw(highway raceway)],
"RUIN" => [qw(historic ruins)],
"SCHL" => [qw(amenity school)],
"SEA" => [qw(place sea)],
"SHOL" => [qw(natural shoal)],
"SITE" => [qw(historic yes)],
"SLP" => [qw(landform slope)],
"SND" => [qw(natural bay)],
"SOAK" => [qw(man_made water_well)],
"SPAN" => [qw(natural desert surface saltpan)],
"SPIT" => [qw(natural shoal)],
"SPRG" => [qw(natural spring)],
"STAT" => [qw(place state)],
"STOK" => [qw(highway track)],
"STR" => [qw(natural strait)],
"STRM" => [qw(waterway stream)],
"SUB" => [qw(place suburb)],
"SWP" => [qw(natural wetland)],
"TANK" => [qw(man_made storage_tank)],
"TOWR" => [qw(man_made tower)],
"TREE" => [qw(natural tree)],
"TRIG" => [qw(man_made survey_point)],
"TRK" => [qw(highway path)],
"TUNN" => [qw(tunnel yes)],
"URBN" => [qw(place city)],
"VAL" => [qw(natural valley)],
"WRCK" => [qw(historic wreck)],
"WRFL" => [qw(waterway waterfall)],
"WTRH" => [qw(natural water)],
"YD" => [qw(place yard)]
);
$xmlwriter->xmlDecl("UTF-8");
$xmlwriter->startTag("osm", "version" => "0.6");
# keep a node count for use as the osm node id
my $node_id = 0;
#read through the input CSV file line by line
while ( my $row_hash = $csv->getline_hr( $csvfh ) ) {
# Convert the source GDA94 LL coordinates to WGS84 LL as required by the .OSM format.
# ...however if you check up on GDA94 http://www.icsm.gov.au/gda/wgs84fact.pdf
# it is actually the same same datum (well at least for my purposes they are)
# so the values you get back will be the same as the source ones
#my $wgs84_ll = Geo::Proj4->new("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs ");
#my $gda94_ll = Geo::Proj4->new("+proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs ");
if (defined $row_hash->{'LONGITUDE'} && defined $row_hash->{'LATITUDE'}) {
#my $lonlat = $wgs84_ll->transform($gda94_ll, [ $row_hash->{'LONGITUDE'}, $row_hash->{'LATITUDE'} ]);
#my $lon = $lonlat->[0];
#my $lat = $lonlat->[1];
my $lon = $row_hash->{'LONGITUDE'};
my $lat = $row_hash->{'LATITUDE'};
$node_id++;
$xmlwriter->startTag('node', 'id' => $node_id, 'lat' => $lat, 'lon' => $lon, 'visible' => 'true', 'version' => '1');
$xmlwriter->emptyTag('tag', 'k' => encode('utf8', 'attribution'), 'v' => encode('utf8', 'Commonwealth of Australia (Geoscience Australia)'));
foreach my $key (keys %{ $row_hash } ) {
if (grep {$_ eq $key} @RETAIN_LIST) {
my $value = $row_hash->{$key};
# value isn't defined then drop this tag for this object
if (defined $value) {
if ((defined $key) && ($key eq "RECORD_ID")) {
if ($transform_schema) {
$key = "ref";
}
}
# replace values with those from the hash tables defined earlier
if ((defined $key) && ($key eq "AUTHORITY_ID")) {
if ($transform_schema) {
$key = "attribution:name";
}
$value = $AUTHORITY_LOOKUP{$value};
}
if ((defined $key) && ($key eq "STATE_ID")) {
if ($transform_schema) {
if ($value eq "N/A") {
$key = undef;
}else{
$key = 'is_in:state';
}
}
}
if ((defined $key) && ($key eq "NAME")) {
if ($transform_schema) {
if ($row_hash->{'STATUS'} eq "H") {
$key = 'old_name';
}elsif ($row_hash->{'STATUS'} eq "U") {
$key = 'loc_name';
}else{
$key = 'name';
}
}
}
if ((defined $key) && ($key eq "FEATURE_CODE")) {
if (exists $FEATURE_LOOKUP{$value}) {
my @keyvalues = @{$FEATURE_LOOKUP{$value}};
while (scalar @keyvalues > 0) {
$key = shift @keyvalues;
$value = shift @keyvalues;
if (defined $value) {
$xmlwriter->emptyTag('tag', 'k' => encode('utf8', $key), 'v' => encode('utf8', $value));
}else{
print STDERR "Value is not defined for key $key\n";
}
}
$key = undef;
}else{
print STDERR "$value not found in FEATURE_LOOKUP\n";
}
}
if ((defined $key) && ($key eq "STATUS")) {
$value = $STATUS_LOOKUP{$value};
# don't include when using OSM schema
if ($transform_schema) {
$key = undef;
}
}
if ((defined $key) && ($key eq "VARIANT_NAME")) {
if ($transform_schema) {
$key = 'alt_name';
}
}
if ((defined $key) && ($key eq "POSTCODE")) {
if ($transform_schema) {
if ($value eq "9999") {
# 9999 = No assigned postcode
$key = undef;
}else{
$key = 'is_in:postcode';
}
}
}
if ((defined $key) && ($key eq "CONCISE_CAZETTEER")) {
# don't include when using OSM schema
if ($transform_schema) {
$key = undef;
}
$value = $CONCISE_GAZ_LOOKUP{$value};
}
if ((defined $key) && ($key eq "100K_MAP")) {
# don't include when using OSM schema
if ($transform_schema) {
$key = undef;
}
}
if ((defined $key) && ($key eq "CGDN")) {
# don't include when using OSM schema
if ($transform_schema) {
$key = undef;
}
$value = $CGDN_LOOKUP{$value};
}
# if the hash didn't contain the value from the CSV file, add the original back in
if ((defined $key) && (!defined $value)) {
$value = $row_hash->{$key};
}
if (defined $key) {
# use lower case keys
$key = lc $key;
if (defined $value) {
$xmlwriter->emptyTag('tag', 'k' => encode('utf8', $key), 'v' => encode('utf8', $value));
}else{
print STDERR "Value is not defined for key $key\n";
}
}
}
}
}
}
$xmlwriter->endTag('node');
}
#finish the XML file and exit
$xmlwriter->endTag("osm");
$xmlwriter->end();
$xmlout->close();
<OGRVRTDataSource>
<OGRVRTLayer name="Gazetteer2010_txt">
<SrcDataSource>Gazetteer2010_txt.txt</SrcDataSource>
<GeometryType>wkbPoint</GeometryType>
<LayerSRS>GDA94</LayerSRS>
<GeometryField encoding="PointFromColumns" x="LONGITUDE" y="LATITUDE"/>
</OGRVRTLayer>
</OGRVRTDataSource>
# Author: Andrew Harvey (http://andrewharvey4.wordpress.com/)
#
# To the extent possible under law, the person who associated CC0
# with this work has waived all copyright and related or neighboring
# rights to this work.
# http://creativecommons.org/publicdomain/zero/1.0/
FILE=Gazetteer_ASCII
all : clean download convert
clean :
rm -fr ${FILE}
rm -f ${FILE}.zip*
rm -f ${FILE}.osm
download :
wget -O ${FILE}.zip 'http://www-a.ga.gov.au/products/servlet/controller?event=DOWNLOAD_FILE&dn=1168290&id=29822650'
unzip ${FILE}.zip -d ${FILE}
convert :
cp Gazetteer2010_txt.vrt ${FILE}/
ogr2ogr -f "SHP" Gazetteer2010.shp ${FILE}/Gazetteer2010_txt.vrt
./ga_gaz_2010_to_osm.pl ${FILE}/Gazetteer2010_txt.txt ${FILE}.osm
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment