danthemango/ukMap.R

## ukMap.R
# generic place name maps in great britain (not northern ireland, my dataset didn't include it)

# I used the place name data from https://www.ordnancesurvey.co.uk
# it doesn't include latitude or longitude, rather "geometry_x", "geometry_y" values which correspond
# to the british ordinance survey national grid
# where 'geometry_x' is the number of metres east of the southwest corner of the grid
# and 'geometry_y' is the number of metres north of the southwest corner of the grid

# I am using the sgo library to convert BNG to WGS84
# (note: WGS84 is the standard, google maps location format)
# for more information, read:
#  - https://www.ordnancesurvey.co.uk/documents/resources/guide-coordinate-systems-great-britain.pdf
#  - https://www.ordnancesurvey.co.uk/documents/resources/guide-to-nationalgrid.pdf

# I am filtering the ordnance survey data for only populated places, however it doesn't include
# population sizes which may be interesting to filter for in future

# heavily inspired by the Map Men youtube video (and associated maps):
#  - https://www.youtube.com/watch?v=uYNzqgU7na4

# larger list of generic place names with more alternate spellings:
#  - https://en.wikipedia.org/wiki/List_of_generic_forms_in_place_names_in_the_British_Isles


if(system.file(package='sgo') == '') install.packages("sgo")
if(system.file(package='dplyr') == '') install.packages("dplyr")

library(sgo)
library(ggplot2)
library(dplyr)

# TODO
# using dynamic pattern list
# read all of the comma-separated patterns from the "Term" column
# fix regex to search for prefix, suffix, standalone, or interfix values specified in the "Position" column
# combine into bar-separated values for the regex (stripping whitespace)

# all_patterns <- read.csv('all_patterns_fixed.csv')

cities = read.csv('OSDH_populated_places2.csv')

# Create an sgo_points object (which is basically a list)
DT1_sgo <- sgo_points(cities, coords=c("GEOMETRY_X","GEOMETRY_Y"), epsg=27700)

# BNG to WGS84
DT1_sgo_4326 <- as.data.frame(sgo_bng_lonlat(DT1_sgo, to=4326))
cities$long <- DT1_sgo_4326$x
cities$lat <- DT1_sgo_4326$y

# test map made from these suffixes I found: https://pronunciationstudio.com/english-place-name-suffixes/
orig_patterns <- data.frame( pattern = c('emptypattern'), label = c(''), explanation = c(''))
orig_patterns <- rbind(orig_patterns, c('caster|cester|chester|cetter|xeter', '-caster, -cester, -chester', 'camp or fortification, from the latin "castrum"'))
orig_patterns <- rbind(orig_patterns, c('borough|brough|burgh|burg$|bury$', '-borough, -brough, -burg(h), -bury',   'fort or fortification, related to the German "-berg"'))
orig_patterns <- rbind(orig_patterns, c('by$', '-by', 'From Old Norse, meaning "settlement" or "village"'))
orig_patterns <- rbind(orig_patterns, c('ham$', '-ham', 'farm orhomestead, related to the German "heim"'))
orig_patterns <- rbind(orig_patterns, c('mouth', '-mouth', 'Mouth (of a river), bay'))
orig_patterns <- rbind(orig_patterns, c('stead', '-stead', 'place or enclosed pasture, related to the German "Stadt"'))
orig_patterns <- rbind(orig_patterns, c('tun$|ton$', '-ton, -tun', 'enclosure, estate, related to the modern word "town"'))
orig_patterns <- rbind(orig_patterns, c('worth|worthy|wardine', '-worth(y), -wardine', 'Old English: enclosure; related to the Dutch "-waard"'))

# celtic names
celtic_patterns <- data.frame( pattern = c('emptypattern'), label = c(''), explanation = c(''))
celtic_patterns <- rbind(celtic_patterns, c('^tre|^tra', 'Tre-, Tra-', 'celtic: settlement'))
celtic_patterns <- rbind(celtic_patterns, c('loch|lough', 'Loch, Lough', 'celtic: lake'))
celtic_patterns <- rbind(celtic_patterns, c('bryn|brin|bren', 'Bryn, Brin, Bren', 'celtic: hill'))
celtic_patterns <- rbind(celtic_patterns, c('aber', 'Aber', 'celtic: mouth (of a river) or meeting of waters'))

# saxon names
saxon_patterns <- data.frame( pattern = c('emptypattern'), label = c(''), explanation = c(''))
saxon_patterns <- rbind(saxon_patterns, c('hurst|hirst', 'hurst,hirst', 'hill'))
saxon_patterns <- rbind(saxon_patterns, c('borough|brough|burgh|burg$|bury$', '-borough, -brough, -burg(h), -bury', 'fort or fortification, similar to the German "berg"'))
saxon_patterns <- rbind(saxon_patterns, c('port', 'port', 'port or harbour'))
saxon_patterns <- rbind(saxon_patterns, c('mere$|mer$', '-mere, -mer', 'lake or pond, similar to the German "Meer"'))
saxon_patterns <- rbind(saxon_patterns, c('stead', '-stead', 'place, enclorue, pasture, related to the German "Stadt"'))
saxon_patterns <- rbind(saxon_patterns, c('stow', 'stow', '(holy) place (of assembly)'))
saxon_patterns <- rbind(saxon_patterns, c('wick|wich|wych|wyke', '-wick, -wich, -wych, -wyke', 'place, settlement, similar to the German "-weig" in "Braunschweig"'))

# norse/danish/viking patterns
dane_patterns <- data.frame(pattern = c('emptypattern'), label = c(''), explanation = c(''))
dane_patterns <- rbind(dane_patterns, c('by$', '-by', 'settlement or village'))
dane_patterns <- rbind(dane_patterns, c('thwaite|twatt', 'thwaite, twatt', 'a forest clearing with a dwelling, or parcel of land'))
dane_patterns <- rbind(dane_patterns, c('thorp|thorpe', 'thorp, thorpe', 'an outlier of an earlier settlement, related to the German "Dorf"'))
dane_patterns <- rbind(dane_patterns, c('kirk', 'kirk', 'church, similar to the German "-kirch" in "Altkirch"'))

# let me check all patterns that are relative to a lake, river, bay, port
water_patterns <- data.frame(pattern = c('emptypattern'),label = c(''),explanation = c(''))
# water_patterns <- rbind(water_patterns, c('ford|forth|ffordd', 'ford, forth, ffordd', 'river crossing or road, like German "-furt" like in "Frankfurt"'))
water_patterns <- rbind(water_patterns, c('port', 'port', 'port or harbour'))
water_patterns <- rbind(water_patterns, c('loch|lough', 'loch, lough', 'celtic: lake'))
water_patterns <- rbind(water_patterns, c('aber', 'aber', 'celtic: mouth (of a river) or meeting of waters'))
water_patterns <- rbind(water_patterns, c('mouth', '-mouth', 'Mouth (of a river), bay'))

# view patterns comparing celtic, saxon, norse/danish
combined_patterns <- data.frame( pattern = c('emptypattern'), label = c(''), explanation = c(''))
combined_patterns <- rbind(combined_patterns, c('^tre|^tra|loch|lough|bryn|brin|bren|aber', 'Celtic Patterns (Tre-, Loch, Bryn, Aber)', 'Celtic Patterns'))
combined_patterns <- rbind(combined_patterns, c('hurst|hirst|borough|brough|burgh|burg$|bury$|port|mere$|mer$|stead|stow|wick|wich|wych|wyke', 'Saxon Patterns (hurst, -borough, port, -mere, -stead, stow, -wick)', 'Celtic Patterns'))
combined_patterns <- rbind(combined_patterns, c('by$|thwaite|twatt|thorp|thorpe|kirk', 'Dane/Norse Patterns (-by, thwaite, thorp, kirk)', 'Dane/Norse Patterns'))

plotCities <- function(pattern_types,title) {

  # returns a pattern type based on a list of known suffixes/prefixes
  getPattern <- function(name) {
    for(i in rownames(pattern_types) )
      if(grepl(pattern_types[i, 'pattern'], name, ignore.case = TRUE)) {
        return(pattern_types[i, 'label'])
      }
    return('none')
  }

  # add a pattern name based on city name
  cities$pattern <- unlist(lapply(cities$NAME, \(name) getPattern(name)))

  uk_map_data <- map_data(map = "world") %>% filter(region == "UK" & subregion != 'Northern Ireland')

  # filter remove cities with unrecognized pattern
  cities <- cities[cities$pattern != 'none', ]

  blank_theme <- theme(axis.text.x = element_blank(),
                       axis.text.y = element_blank(),
                       axis.title.y = element_blank(),
                       axis.title.x = element_blank(),
                       axis.ticks = element_blank(),
                       rect = element_blank(),
                       text=element_text(size=20))

  uk_plot <- ggplot(data = uk_map_data, aes(long, lat)) +
    coord_map() +
    geom_polygon(aes(group=group), fill="white", colour = "black") +
    geom_point(data=cities, aes(x = long, y = lat, colour = pattern), size=2) +
    blank_theme +
    scale_color_discrete(name=title)

  uk_plot
}

if (!dir.exists("output")){
  dir.create("output")
}

png("output/saxon_patterns.png", width = 1080, height = 1080)
plotCities(saxon_patterns, 'Saxon Patterns')
dev.off()

png("output/celtic_patterns.png", width = 1080, height = 1080)
plotCities(celtic_patterns, 'Celtic Patterns')
dev.off()

png("output/dane_patterns.png", width = 1080, height = 1080)
plotCities(dane_patterns, 'Dane/Norse Patterns')
dev.off()

png("output/water_patterns.png", width = 1080, height = 1080)
plotCities(water_patterns, 'Patterns Related To Water')
dev.off()

# ultra common saxon name particles, which overwhelm the map if included:
png("output/leigh_patterns.png", width = 1080, height = 1080)
plotCities(data.frame(pattern = c('ley$|lea$|leigh'), label = c('-ley, -lea, -leigh'), explanation = c('woodland clearing')), 'Generic Patterns')
dev.off()

png("output/ford_patterns.png", width = 1080, height = 1080)
plotCities(data.frame(pattern = c('ford|forth|ffordd'), label = c('ford, forth, ffordd'), explanation = c('river crossing or road, like German "-furt" in "Frankfurt"')), 'Generic Patterns')
dev.off()

png("output/ton_patterns.png", width = 1080, height = 1080)
plotCities(data.frame(pattern = c('ton$'), label = c('-ton'), explanation = c('enclosure, estate, related to the modern word "town"')), 'Generic Patterns')
dev.off()

png("output/combined_patterns.png", width = 1080, height = 1080)
plotCities(combined_patterns, 'Generic Place Name Patterns by Etymology')
dev.off()
	# generic place name maps in great britain (not northern ireland, my dataset didn't include it)

	# I used the place name data from https://www.ordnancesurvey.co.uk
	# it doesn't include latitude or longitude, rather "geometry_x", "geometry_y" values which correspond
	# to the british ordinance survey national grid
	# where 'geometry_x' is the number of metres east of the southwest corner of the grid
	# and 'geometry_y' is the number of metres north of the southwest corner of the grid

	# I am using the sgo library to convert BNG to WGS84
	# (note: WGS84 is the standard, google maps location format)
	# for more information, read:
	# - https://www.ordnancesurvey.co.uk/documents/resources/guide-coordinate-systems-great-britain.pdf
	# - https://www.ordnancesurvey.co.uk/documents/resources/guide-to-nationalgrid.pdf

	# I am filtering the ordnance survey data for only populated places, however it doesn't include
	# population sizes which may be interesting to filter for in future

	# heavily inspired by the Map Men youtube video (and associated maps):
	# - https://www.youtube.com/watch?v=uYNzqgU7na4

	# larger list of generic place names with more alternate spellings:
	# - https://en.wikipedia.org/wiki/List_of_generic_forms_in_place_names_in_the_British_Isles


	if(system.file(package='sgo') == '') install.packages("sgo")
	if(system.file(package='dplyr') == '') install.packages("dplyr")

	library(sgo)
	library(ggplot2)
	library(dplyr)

	# TODO
	# using dynamic pattern list
	# read all of the comma-separated patterns from the "Term" column
	# fix regex to search for prefix, suffix, standalone, or interfix values specified in the "Position" column
	# combine into bar-separated values for the regex (stripping whitespace)

	# all_patterns <- read.csv('all_patterns_fixed.csv')

	cities = read.csv('OSDH_populated_places2.csv')

	# Create an sgo_points object (which is basically a list)
	DT1_sgo <- sgo_points(cities, coords=c("GEOMETRY_X","GEOMETRY_Y"), epsg=27700)

	# BNG to WGS84
	DT1_sgo_4326 <- as.data.frame(sgo_bng_lonlat(DT1_sgo, to=4326))
	cities$long <- DT1_sgo_4326$x
	cities$lat <- DT1_sgo_4326$y

	# test map made from these suffixes I found: https://pronunciationstudio.com/english-place-name-suffixes/
	orig_patterns <- data.frame( pattern = c('emptypattern'), label = c(''), explanation = c(''))
	orig_patterns <- rbind(orig_patterns, c('caster\|cester\|chester\|cetter\|xeter', '-caster, -cester, -chester', 'camp or fortification, from the latin "castrum"'))
	orig_patterns <- rbind(orig_patterns, c('borough\|brough\|burgh\|burg$\|bury$', '-borough, -brough, -burg(h), -bury', 'fort or fortification, related to the German "-berg"'))
	orig_patterns <- rbind(orig_patterns, c('by$', '-by', 'From Old Norse, meaning "settlement" or "village"'))
	orig_patterns <- rbind(orig_patterns, c('ham$', '-ham', 'farm orhomestead, related to the German "heim"'))
	orig_patterns <- rbind(orig_patterns, c('mouth', '-mouth', 'Mouth (of a river), bay'))
	orig_patterns <- rbind(orig_patterns, c('stead', '-stead', 'place or enclosed pasture, related to the German "Stadt"'))
	orig_patterns <- rbind(orig_patterns, c('tun$\|ton$', '-ton, -tun', 'enclosure, estate, related to the modern word "town"'))
	orig_patterns <- rbind(orig_patterns, c('worth\|worthy\|wardine', '-worth(y), -wardine', 'Old English: enclosure; related to the Dutch "-waard"'))

	# celtic names
	celtic_patterns <- data.frame( pattern = c('emptypattern'), label = c(''), explanation = c(''))
	celtic_patterns <- rbind(celtic_patterns, c('^tre\|^tra', 'Tre-, Tra-', 'celtic: settlement'))
	celtic_patterns <- rbind(celtic_patterns, c('loch\|lough', 'Loch, Lough', 'celtic: lake'))
	celtic_patterns <- rbind(celtic_patterns, c('bryn\|brin\|bren', 'Bryn, Brin, Bren', 'celtic: hill'))
	celtic_patterns <- rbind(celtic_patterns, c('aber', 'Aber', 'celtic: mouth (of a river) or meeting of waters'))

	# saxon names
	saxon_patterns <- data.frame( pattern = c('emptypattern'), label = c(''), explanation = c(''))
	saxon_patterns <- rbind(saxon_patterns, c('hurst\|hirst', 'hurst,hirst', 'hill'))
	saxon_patterns <- rbind(saxon_patterns, c('borough\|brough\|burgh\|burg$\|bury$', '-borough, -brough, -burg(h), -bury', 'fort or fortification, similar to the German "berg"'))
	saxon_patterns <- rbind(saxon_patterns, c('port', 'port', 'port or harbour'))
	saxon_patterns <- rbind(saxon_patterns, c('mere$\|mer$', '-mere, -mer', 'lake or pond, similar to the German "Meer"'))
	saxon_patterns <- rbind(saxon_patterns, c('stead', '-stead', 'place, enclorue, pasture, related to the German "Stadt"'))
	saxon_patterns <- rbind(saxon_patterns, c('stow', 'stow', '(holy) place (of assembly)'))
	saxon_patterns <- rbind(saxon_patterns, c('wick\|wich\|wych\|wyke', '-wick, -wich, -wych, -wyke', 'place, settlement, similar to the German "-weig" in "Braunschweig"'))

	# norse/danish/viking patterns
	dane_patterns <- data.frame(pattern = c('emptypattern'), label = c(''), explanation = c(''))
	dane_patterns <- rbind(dane_patterns, c('by$', '-by', 'settlement or village'))
	dane_patterns <- rbind(dane_patterns, c('thwaite\|twatt', 'thwaite, twatt', 'a forest clearing with a dwelling, or parcel of land'))
	dane_patterns <- rbind(dane_patterns, c('thorp\|thorpe', 'thorp, thorpe', 'an outlier of an earlier settlement, related to the German "Dorf"'))
	dane_patterns <- rbind(dane_patterns, c('kirk', 'kirk', 'church, similar to the German "-kirch" in "Altkirch"'))

	# let me check all patterns that are relative to a lake, river, bay, port
	water_patterns <- data.frame(pattern = c('emptypattern'),label = c(''),explanation = c(''))
	# water_patterns <- rbind(water_patterns, c('ford\|forth\|ffordd', 'ford, forth, ffordd', 'river crossing or road, like German "-furt" like in "Frankfurt"'))
	water_patterns <- rbind(water_patterns, c('port', 'port', 'port or harbour'))
	water_patterns <- rbind(water_patterns, c('loch\|lough', 'loch, lough', 'celtic: lake'))
	water_patterns <- rbind(water_patterns, c('aber', 'aber', 'celtic: mouth (of a river) or meeting of waters'))
	water_patterns <- rbind(water_patterns, c('mouth', '-mouth', 'Mouth (of a river), bay'))

	# view patterns comparing celtic, saxon, norse/danish
	combined_patterns <- data.frame( pattern = c('emptypattern'), label = c(''), explanation = c(''))
	combined_patterns <- rbind(combined_patterns, c('^tre\|^tra\|loch\|lough\|bryn\|brin\|bren\|aber', 'Celtic Patterns (Tre-, Loch, Bryn, Aber)', 'Celtic Patterns'))
	combined_patterns <- rbind(combined_patterns, c('hurst\|hirst\|borough\|brough\|burgh\|burg$\|bury$\|port\|mere$\|mer$\|stead\|stow\|wick\|wich\|wych\|wyke', 'Saxon Patterns (hurst, -borough, port, -mere, -stead, stow, -wick)', 'Celtic Patterns'))
	combined_patterns <- rbind(combined_patterns, c('by$\|thwaite\|twatt\|thorp\|thorpe\|kirk', 'Dane/Norse Patterns (-by, thwaite, thorp, kirk)', 'Dane/Norse Patterns'))

	plotCities <- function(pattern_types,title) {

	# returns a pattern type based on a list of known suffixes/prefixes
	getPattern <- function(name) {
	for(i in rownames(pattern_types) )
	if(grepl(pattern_types[i, 'pattern'], name, ignore.case = TRUE)) {
	return(pattern_types[i, 'label'])
	}
	return('none')
	}

	# add a pattern name based on city name
	cities$pattern <- unlist(lapply(cities$NAME, \(name) getPattern(name)))

	uk_map_data <- map_data(map = "world") %>% filter(region == "UK" & subregion != 'Northern Ireland')

	# filter remove cities with unrecognized pattern
	cities <- cities[cities$pattern != 'none', ]

	blank_theme <- theme(axis.text.x = element_blank(),
	axis.text.y = element_blank(),
	axis.title.y = element_blank(),
	axis.title.x = element_blank(),
	axis.ticks = element_blank(),
	rect = element_blank(),
	text=element_text(size=20))

	uk_plot <- ggplot(data = uk_map_data, aes(long, lat)) +
	coord_map() +
	geom_polygon(aes(group=group), fill="white", colour = "black") +
	geom_point(data=cities, aes(x = long, y = lat, colour = pattern), size=2) +
	blank_theme +
	scale_color_discrete(name=title)

	uk_plot
	}

	if (!dir.exists("output")){
	dir.create("output")
	}

	png("output/saxon_patterns.png", width = 1080, height = 1080)
	plotCities(saxon_patterns, 'Saxon Patterns')
	dev.off()

	png("output/celtic_patterns.png", width = 1080, height = 1080)
	plotCities(celtic_patterns, 'Celtic Patterns')
	dev.off()

	png("output/dane_patterns.png", width = 1080, height = 1080)
	plotCities(dane_patterns, 'Dane/Norse Patterns')
	dev.off()

	png("output/water_patterns.png", width = 1080, height = 1080)
	plotCities(water_patterns, 'Patterns Related To Water')
	dev.off()

	# ultra common saxon name particles, which overwhelm the map if included:
	png("output/leigh_patterns.png", width = 1080, height = 1080)
	plotCities(data.frame(pattern = c('ley$\|lea$\|leigh'), label = c('-ley, -lea, -leigh'), explanation = c('woodland clearing')), 'Generic Patterns')
	dev.off()

	png("output/ford_patterns.png", width = 1080, height = 1080)
	plotCities(data.frame(pattern = c('ford\|forth\|ffordd'), label = c('ford, forth, ffordd'), explanation = c('river crossing or road, like German "-furt" in "Frankfurt"')), 'Generic Patterns')
	dev.off()

	png("output/ton_patterns.png", width = 1080, height = 1080)
	plotCities(data.frame(pattern = c('ton$'), label = c('-ton'), explanation = c('enclosure, estate, related to the modern word "town"')), 'Generic Patterns')
	dev.off()

	png("output/combined_patterns.png", width = 1080, height = 1080)
	plotCities(combined_patterns, 'Generic Place Name Patterns by Etymology')
	dev.off()