Last active
September 30, 2015 22:57
-
-
Save andrewharvey/1876009 to your computer and use it in GitHub Desktop.
Convert Geoscience Australia Gazetteer 2010 into the OSM XML format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
# Info: This script converts the Geoscience Australia Gazetteer of Australia | |
# 2010 Release CSV file into an OSM file. | |
# Author: Andrew Harvey (http://andrewharvey4.wordpress.com/) | |
# | |
# To the extent possible under law, the person who associated CC0 | |
# with this work has waived all copyright and related or neighboring | |
# rights to this work. | |
# http://creativecommons.org/publicdomain/zero/1.0/ | |
use strict; | |
use Text::CSV; | |
use XML::Writer; | |
use Encode; | |
use Geo::Proj4; | |
# 1 -> source schema is transformed into OSM schema | |
# 0 -> source schema is converted as is to .osm format | |
my $transform_schema = 1; | |
my $csvfile = $ARGV[0]; | |
my $csv = Text::CSV->new ( { binary => 1, empty_is_undef => 1 } ) | |
or die "Cannot use CSV: ".Text::CSV->error_diag (); | |
open my $csvfh, "<", $csvfile or die "$csvfile: $!"; | |
my $xmlout = new IO::File(">$ARGV[1]"); | |
my $xmlwriter = new XML::Writer(OUTPUT => $xmlout, NEWLINES => 'true'); | |
# The field names of the columns in the CSV file. | |
# These names were pulled from the User Guide | |
$csv->column_names("CSV_ID", "RECORD_ID", "AUTHORITY_ID", "STATE_ID", "NAME", "FEATURE_CODE", "STATUS", "POSTCODE", "CONCISE_CAZETTEER", "LONGITUDE", "LON_DEG", "LON_MIN", "LON_SEC", "LATITUDE", "LAT_DEG", "LAT_MIN", "LAT_SEC", "100K_MAP", "CGDN", "CSV_ID2"); | |
# The list of fields to retain in the OSM file. We discard the rest. | |
my @RETAIN_LIST = ("AUTHORITY_ID", "STATE_ID", "NAME", "FEATURE_CODE", "STATUS", "POSTCODE", "CONCISE_CAZETTEER", "100K_MAP", "CGDN"); | |
# We use these hash tables to expand abbreviations | |
my %AUTHORITY_LOOKUP = ( | |
"AAD" => "Australian Antarctic Division", | |
"ACT" => "Australian Capital Territory", | |
"AHO" => "Australia Hydrographic Service", | |
"GA" => "Geoscience Australia", | |
"NSW" => "New South Wales", | |
"NT" => "Northern Territory", | |
"QLD" => "Queensland", | |
"SA" => "South Australia", | |
"TAS" => "Tasmania", | |
"VIC" => "Victoria", | |
"WA" => "Western Australia" | |
); | |
my %STATE_LOOKUP = ( | |
"ACT" => "Australian Capital Territory", | |
"NSW" => "New South Wales", | |
"NT" => "Northern Territory", | |
"QLD" => "Queensland", | |
"SA" => "South Australia", | |
"TAS" => "Tasmania", | |
"VIC" => "Victoria", | |
"WA" => "Western Australia", | |
"JBT" => "Jervis Bay Territory", | |
"NFK" => "Norfolk Island", | |
"HRD" => "Heard", | |
"MCD" => "McDonald Islands", | |
"N/A" => "*Not applicable*", | |
"AAT" => "Australian Antarctic Territory" | |
); | |
my %STATUS_LOOKUP = ( | |
"H" => "Historical name", | |
"O" => "Official status", | |
"U" => "Unofficial status" | |
); | |
my %CONCISE_GAZ_LOOKUP = ( | |
"Y" => "yes", | |
"N" => "no"); | |
my %CGDN_LOOKUP = ( | |
"Y" => "yes", | |
"N" => "no" | |
); | |
my %FEATURE_LOOKUP = ( | |
"AF" => [qw(aeroway aerodrome)], | |
"ANCH" => [qw(seamark:type anchorage)], | |
"ARCH" => [qw(place archipelago)], | |
"BANK" => [qw(natural shoal)], | |
"BATH" => [qw(natural reef)], | |
"BAY" => [qw(natural bay)], | |
"BCH" => [qw(natural beach)], | |
"BCST" => [qw(man_made communications_tower)], | |
"BEND" => [qw(waterway stream)], | |
"BGHT" => [qw(place sea)], | |
"BLDG" => [qw(building yes)], | |
"BORE" => [qw(man_made water_well)], | |
"BRDG" => [qw(bridge yes)], | |
"BRK" => [qw(natural breaker)], | |
"BRKW" => [qw(natural groyne)], | |
"CAPE" => [qw(natural cape)], | |
"CAVE" => [qw(natural cave_entrace)], | |
"CEM" => [qw(landuse cemetery)], | |
"CHAN" => [qw(place channel)], | |
"CLAY" => [qw(resource clay)], | |
"CLIF" => [qw(natural cliff)], | |
"CNAL" => [qw(waterway canal)], | |
"CNTY" => [qw(place county)], | |
"COMM" => [qw(amenity community_centre)], | |
"CONT" => [qw(place continent)], | |
"COVE" => [qw(natural water water cove)], | |
"CP" => [qw(tourism camp_site)], | |
"CRTR" => [qw(natural crater)], | |
"DAM" => [qw(waterway dam)], | |
"DEPR" => [qw(landuse basin)], | |
"DI" => [qw(place district)], | |
"DOCK" => [qw(waterway dock)], | |
"DRN" => [qw(waterway drain)], | |
"DSRT" => [qw(natural desert)], | |
"DUNE" => [qw(natural dune)], | |
"ENTR" => [qw(entrance yes)], | |
"ESTY" => [qw(estuary yes)], | |
"FARM" => [qw(place farm)], | |
"FORD" => [qw(highway ford)], | |
"FRNG" => [qw(sport shooting)], | |
"FRST" => [qw(landuse forest)], | |
"GASF" => [qw(man_made oil_well)], | |
"GATE" => [qw(historic city_gate)], | |
"GLCR" => [qw(natural glacier)], | |
"GORG" => [qw(natural valley)], | |
"GRDN" => [qw(leisure garden)], | |
"GULF" => [qw(natural bay)], | |
"GOLF" => [qw(leisure golf_course)], # not in source schema definition but found in data | |
"HBR" => [qw(leisure marina)], | |
"HILL" => [qw(natural peak)], | |
"HMSD" => [qw(landuse homestead)], | |
"HLPT" => [qw(aeroway helipad)], # not in source schema definition but found in data | |
"HWY" => [qw(highway motorway)], | |
"INTL" => [qw(natural water waterway lake intermittent yes)], | |
"IS" => [qw(place island)], | |
"ISTH" => [qw(natural isthmus)], | |
"LAGN" => [qw(natural water water lagoon)], | |
"LAKE" => [qw(natural water water lake)], | |
"LDGE" => [qw(natural cliff)], | |
"LH" => [qw(man_made lighthouse)], | |
"LOCB" => [qw(place town)], | |
"LOCK" => [qw(lock yes)], | |
"LOCU" => [qw(place locality)], | |
"MINE" => [qw(man_made adit)], | |
"MONU" => [qw(historic monument)], | |
"MT" => [qw(natural peak)], | |
"NAVB" => [qw(man_made beacon)], | |
"OCEN" => [qw(place ocean)], | |
"PASS" => [qw(mountain_pass yes)], | |
"PEAK" => [qw(natural peak)], | |
"PEN" => [qw(place locality)], | |
"PIER" => [qw(man_made pier)], | |
"PIPE" => [qw(man_made pipeline)], # not in source schema | |
"PL" => [qw(landform plateau)], | |
"PLAN" => [qw(landuse plantation)], | |
"PLCE" => [qw(place locality)], # not in source schema definition but found in data | |
"PLN" => [qw(landform plain)], | |
"PORT" => [qw(landuse port)], | |
"PRSH" => [qw(place parish)], | |
"PT" => [qw(natural point)], | |
"QUAR" => [qw(landuse quarry)], | |
"RCH" => [qw(waterway stream)], | |
"RDGE" => [qw(natural ridge)], | |
"REEF" => [qw(natural reef)], | |
"RES" => [qw(landuse reservoir)], | |
"RESV" => [qw(leisue park)], | |
"RH" => [qw(natural rockhole)], | |
"RLWY" => [qw(railway rail)], # not in source schema | |
"RNGE" => [qw(natural mountain_range)], | |
"ROAD" => [qw(highway road)], | |
"ROCK" => [qw(natural rock)], | |
"RSTA" => [qw(railway station)], | |
"RTRK" => [qw(highway raceway)], | |
"RUIN" => [qw(historic ruins)], | |
"SCHL" => [qw(amenity school)], | |
"SEA" => [qw(place sea)], | |
"SHOL" => [qw(natural shoal)], | |
"SITE" => [qw(historic yes)], | |
"SLP" => [qw(landform slope)], | |
"SND" => [qw(natural bay)], | |
"SOAK" => [qw(man_made water_well)], | |
"SPAN" => [qw(natural desert surface saltpan)], | |
"SPIT" => [qw(natural shoal)], | |
"SPRG" => [qw(natural spring)], | |
"STAT" => [qw(place state)], | |
"STOK" => [qw(highway track)], | |
"STR" => [qw(natural strait)], | |
"STRM" => [qw(waterway stream)], | |
"SUB" => [qw(place suburb)], | |
"SWP" => [qw(natural wetland)], | |
"TANK" => [qw(man_made storage_tank)], | |
"TOWR" => [qw(man_made tower)], | |
"TREE" => [qw(natural tree)], | |
"TRIG" => [qw(man_made survey_point)], | |
"TRK" => [qw(highway path)], | |
"TUNN" => [qw(tunnel yes)], | |
"URBN" => [qw(place city)], | |
"VAL" => [qw(natural valley)], | |
"WRCK" => [qw(historic wreck)], | |
"WRFL" => [qw(waterway waterfall)], | |
"WTRH" => [qw(natural water)], | |
"YD" => [qw(place yard)] | |
); | |
$xmlwriter->xmlDecl("UTF-8"); | |
$xmlwriter->startTag("osm", "version" => "0.6"); | |
# keep a node count for use as the osm node id | |
my $node_id = 0; | |
#read through the input CSV file line by line | |
while ( my $row_hash = $csv->getline_hr( $csvfh ) ) { | |
# Convert the source GDA94 LL coordinates to WGS84 LL as required by the .OSM format. | |
# ...however if you check up on GDA94 http://www.icsm.gov.au/gda/wgs84fact.pdf | |
# it is actually the same same datum (well at least for my purposes they are) | |
# so the values you get back will be the same as the source ones | |
#my $wgs84_ll = Geo::Proj4->new("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs "); | |
#my $gda94_ll = Geo::Proj4->new("+proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs "); | |
if (defined $row_hash->{'LONGITUDE'} && defined $row_hash->{'LATITUDE'}) { | |
#my $lonlat = $wgs84_ll->transform($gda94_ll, [ $row_hash->{'LONGITUDE'}, $row_hash->{'LATITUDE'} ]); | |
#my $lon = $lonlat->[0]; | |
#my $lat = $lonlat->[1]; | |
my $lon = $row_hash->{'LONGITUDE'}; | |
my $lat = $row_hash->{'LATITUDE'}; | |
$node_id++; | |
$xmlwriter->startTag('node', 'id' => $node_id, 'lat' => $lat, 'lon' => $lon, 'visible' => 'true', 'version' => '1'); | |
$xmlwriter->emptyTag('tag', 'k' => encode('utf8', 'attribution'), 'v' => encode('utf8', 'Commonwealth of Australia (Geoscience Australia)')); | |
foreach my $key (keys %{ $row_hash } ) { | |
if (grep {$_ eq $key} @RETAIN_LIST) { | |
my $value = $row_hash->{$key}; | |
# value isn't defined then drop this tag for this object | |
if (defined $value) { | |
if ((defined $key) && ($key eq "RECORD_ID")) { | |
if ($transform_schema) { | |
$key = "ref"; | |
} | |
} | |
# replace values with those from the hash tables defined earlier | |
if ((defined $key) && ($key eq "AUTHORITY_ID")) { | |
if ($transform_schema) { | |
$key = "attribution:name"; | |
} | |
$value = $AUTHORITY_LOOKUP{$value}; | |
} | |
if ((defined $key) && ($key eq "STATE_ID")) { | |
if ($transform_schema) { | |
if ($value eq "N/A") { | |
$key = undef; | |
}else{ | |
$key = 'is_in:state'; | |
} | |
} | |
} | |
if ((defined $key) && ($key eq "NAME")) { | |
if ($transform_schema) { | |
if ($row_hash->{'STATUS'} eq "H") { | |
$key = 'old_name'; | |
}elsif ($row_hash->{'STATUS'} eq "U") { | |
$key = 'loc_name'; | |
}else{ | |
$key = 'name'; | |
} | |
} | |
} | |
if ((defined $key) && ($key eq "FEATURE_CODE")) { | |
if (exists $FEATURE_LOOKUP{$value}) { | |
my @keyvalues = @{$FEATURE_LOOKUP{$value}}; | |
while (scalar @keyvalues > 0) { | |
$key = shift @keyvalues; | |
$value = shift @keyvalues; | |
if (defined $value) { | |
$xmlwriter->emptyTag('tag', 'k' => encode('utf8', $key), 'v' => encode('utf8', $value)); | |
}else{ | |
print STDERR "Value is not defined for key $key\n"; | |
} | |
} | |
$key = undef; | |
}else{ | |
print STDERR "$value not found in FEATURE_LOOKUP\n"; | |
} | |
} | |
if ((defined $key) && ($key eq "STATUS")) { | |
$value = $STATUS_LOOKUP{$value}; | |
# don't include when using OSM schema | |
if ($transform_schema) { | |
$key = undef; | |
} | |
} | |
if ((defined $key) && ($key eq "VARIANT_NAME")) { | |
if ($transform_schema) { | |
$key = 'alt_name'; | |
} | |
} | |
if ((defined $key) && ($key eq "POSTCODE")) { | |
if ($transform_schema) { | |
if ($value eq "9999") { | |
# 9999 = No assigned postcode | |
$key = undef; | |
}else{ | |
$key = 'is_in:postcode'; | |
} | |
} | |
} | |
if ((defined $key) && ($key eq "CONCISE_CAZETTEER")) { | |
# don't include when using OSM schema | |
if ($transform_schema) { | |
$key = undef; | |
} | |
$value = $CONCISE_GAZ_LOOKUP{$value}; | |
} | |
if ((defined $key) && ($key eq "100K_MAP")) { | |
# don't include when using OSM schema | |
if ($transform_schema) { | |
$key = undef; | |
} | |
} | |
if ((defined $key) && ($key eq "CGDN")) { | |
# don't include when using OSM schema | |
if ($transform_schema) { | |
$key = undef; | |
} | |
$value = $CGDN_LOOKUP{$value}; | |
} | |
# if the hash didn't contain the value from the CSV file, add the original back in | |
if ((defined $key) && (!defined $value)) { | |
$value = $row_hash->{$key}; | |
} | |
if (defined $key) { | |
# use lower case keys | |
$key = lc $key; | |
if (defined $value) { | |
$xmlwriter->emptyTag('tag', 'k' => encode('utf8', $key), 'v' => encode('utf8', $value)); | |
}else{ | |
print STDERR "Value is not defined for key $key\n"; | |
} | |
} | |
} | |
} | |
} | |
} | |
$xmlwriter->endTag('node'); | |
} | |
#finish the XML file and exit | |
$xmlwriter->endTag("osm"); | |
$xmlwriter->end(); | |
$xmlout->close(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<OGRVRTDataSource> | |
<OGRVRTLayer name="Gazetteer2010_txt"> | |
<SrcDataSource>Gazetteer2010_txt.txt</SrcDataSource> | |
<GeometryType>wkbPoint</GeometryType> | |
<LayerSRS>GDA94</LayerSRS> | |
<GeometryField encoding="PointFromColumns" x="LONGITUDE" y="LATITUDE"/> | |
</OGRVRTLayer> | |
</OGRVRTDataSource> | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Author: Andrew Harvey (http://andrewharvey4.wordpress.com/) | |
# | |
# To the extent possible under law, the person who associated CC0 | |
# with this work has waived all copyright and related or neighboring | |
# rights to this work. | |
# http://creativecommons.org/publicdomain/zero/1.0/ | |
FILE=Gazetteer_ASCII | |
all : clean download convert | |
clean : | |
rm -fr ${FILE} | |
rm -f ${FILE}.zip* | |
rm -f ${FILE}.osm | |
download : | |
wget -O ${FILE}.zip 'http://www-a.ga.gov.au/products/servlet/controller?event=DOWNLOAD_FILE&dn=1168290&id=29822650' | |
unzip ${FILE}.zip -d ${FILE} | |
convert : | |
cp Gazetteer2010_txt.vrt ${FILE}/ | |
ogr2ogr -f "SHP" Gazetteer2010.shp ${FILE}/Gazetteer2010_txt.vrt | |
./ga_gaz_2010_to_osm.pl ${FILE}/Gazetteer2010_txt.txt ${FILE}.osm |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment