Skip to content

Instantly share code, notes, and snippets.

@e-kotov
Created December 3, 2015 23:39
Show Gist options
  • Save e-kotov/73f78ae387862fed5841 to your computer and use it in GitHub Desktop.
Save e-kotov/73f78ae387862fed5841 to your computer and use it in GitHub Desktop.
R code for downloading Moscow BTI (building addresses and geometry) and save it to QGIS compatible CSV with WKT-encoded geometry
# See https://goo.gl/KHQnlk for a video walkthrough.
# Load and/or install missing packages
p <- c("data.table")
install_package <- function(pack)
{if(!(pack %in% row.names(installed.packages())))
{
update.packages(ask=F)
install.packages(pack,dependencies=T)
}
require(pack,character.only=TRUE)
}
for(pack in p) {install_package(pack)}
completeFun <- function(data, desiredCols) {
completeVec <- complete.cases(data[, desiredCols])
return(data[completeVec, ])
}
# clean up temprorary variables and perform garbage collection
rm(p,pack,completeFun,install_package)
gc()
#----------------------------------------------------------------------------
message("\n\nRun convert_geometry_to_wkt() function to download csv file with BTI data from data.mos.ru. The script will then convert the geometry field of the csv file to WKT-style (Well-known text) geometry, which can be easily opened in open source QGIS and then exported to any of the popular GIS formats like ESRI Shapefiles, MapInfo MIF or TAB and many others.\n\nThe function will create a CSV file 'bti_wkt.csv' in the root of your current R working directory.\n\nThis script could be extended to create a spatial object and save it to one of the GIS formats, but the readWKT() function from 'rgeos' package is too slow - you are better off using QGIS.\n\nSee https://goo.gl/KHQnlk for a video walkthrough.")
convert_geometry_to_wkt <- function() {
# download the dataset and save it to a temporary file
if ( file.exists("bti.zip") == FALSE ) {
download.file(url = "http://data.mos.ru/datasets/export/1927/csv", destfile = "bti.zip", mode = "wb")
}
# unzip archive
# the code is so complicated because of possible errors with extraction
# of files with cyrillic names from zip archives in windows systems
unzip(zipfile = "bti.zip", list = FALSE)
fn <- unzip(zipfile = "bti.zip", list = TRUE)[[1]]
file.rename(from = fn, to = "bti.csv")
# read the data from the temporary archive to a data.table
x <- data.table(read.csv2("bti.csv", encoding = "UTF-8"))
# clean up zip file
unlink("bti.zip")
# split the dataset into polygons, multipolygons. forget about objects with empty geometry
x_poly <- x[ grep('(?<=type\\": \\")Polygon(?=\\", \\"coord)', geoData, perl = TRUE) ] # polygons
x_mpoly <- x[ grep('(?<=type\\": \\")MultiPolygon(?=\\", \\"coord)', geoData, perl = TRUE) ] #multipolygons
# convert geometry field for polygons to WKT using regular expressions
x_poly[ , geoData := gsub( pattern = '\\{ "type": "Polygon", "coordinates": \\[ \\[ \\[ ', x = geoData, replacement = "POLYGON (("), ]
x_poly[ , geoData := gsub( pattern = ', (?=[0-9])', x = geoData, replacement = ' ', perl = TRUE), ]
x_poly[ , geoData := gsub( pattern = '\\] \\] \\] \\}', x = geoData, replacement = '))' ), ]
x_poly[ , geoData := gsub( pattern = ' \\], \\[ ', x = geoData, replacement = ', ' ), ]
x_poly[ , geoData := gsub( pattern = ' \\], \\[ ', x = geoData, replacement = '), (' ), ]
# convert geometry field for multipolygons to WKT using regular expressions
x_mpoly[ , geoData := gsub( pattern = '\\{ "type": "MultiPolygon", "coordinates": \\[ \\[ \\[ \\[ ', x = geoData, replacement = "MULTIPOLYGON ((("), ]
x_mpoly[ , geoData := gsub( pattern = ', (?=[0-9])', x = geoData, replacement = ' ', perl = TRUE), ]
x_mpoly[ , geoData := gsub( pattern = '\\] \\] \\] \\] \\}', x = geoData, replacement = ')))' ), ]
x_mpoly[ , geoData := gsub( pattern = ' \\] \\] \\], \\[ \\[ \\[ ', x = geoData, replacement = ') , ( ' ), ]
x_mpoly[ , geoData := gsub( pattern = ' \\], \\[ ', x = geoData, replacement = ', ' ), ]
x_mpoly[ , geoData := gsub( pattern = ' \\], \\[ ', x = geoData, replacement = ') , ( ' ), ]
xx <- rbind(x_poly, x_mpoly) # bind the polygons and multipolygons back together
write.csv2(xx, "bti_wkt.csv", row.names = FALSE) # write csv file
}
download_json_and_geojson <- function() {
# download json
download.file(url = "http://data.mos.ru/datasets/export/1927/json", destfile = "bti_json.zip", mode = "wb")
# unzip archive
# the code is so complicated because of possible errors with extraction
# of files with cyrillic names from zip archives in windows systems
unzip(zipfile = "bti_json.zip", list = FALSE)
fn <- unzip(zipfile = "bti_json.zip", list = TRUE)[[1]]
file.rename(from = fn, to = "bti.json")
# download geosjon
download.file("http://api.data.mos.ru/v1/datasets/1927/features", destfile = "bti.geojson", mode = "wb")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment