Last active
November 16, 2024 21:52
-
-
Save benmarwick/0fe946d173b9e9e2d47373df9650712a to your computer and use it in GitHub Desktop.
Simple tasks with the SignBase dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(rcarbon) | |
signbase <- read_csv("signBase_Version1.0.csv") | |
# clean up and separate the data in the radiocarbon age columns | |
# so we can use it for plotting and analyses. This will generate some | |
# warnings, that's ok, nothing to worry about | |
signbase_years <- | |
signbase %>% | |
drop_na(date_bp_max_min) %>% | |
mutate(date_bp_max_min = str_replace_all(date_bp_max_min, "\\+\\/\\-", "±")) %>% | |
mutate(date_bp_max_min = str_replace_all(date_bp_max_min, "\\+", "±")) %>% | |
separate(date_bp_max_min, | |
sep = " - ", | |
c("date_bp_max", "date_bp_min"), | |
remove = FALSE) %>% | |
mutate(date_bp_max = ifelse(str_detect(date_bp_max, "\\/"), | |
str_extract(date_bp_max, ".*?(?=\\/)"), | |
date_bp_max)) %>% | |
mutate(date_bp_min = ifelse(str_detect(date_bp_min, "\\/"), | |
str_extract(date_bp_min, ".*?(?=\\/)"), | |
date_bp_min)) %>% | |
separate(date_bp_max, | |
sep = "±", | |
c("date_bp_max_age", | |
"date_bp_max_error")) %>% | |
separate(date_bp_min, | |
sep = "±", | |
c("date_bp_min_age", | |
"date_bp_min_error")) %>% | |
drop_na(date_bp_max_age, | |
date_bp_max_error) %>% | |
mutate(date_bp_max_age = parse_number(date_bp_max_age), | |
date_bp_max_error = parse_number(date_bp_max_error)) | |
# calibrate the radiocarbon ages and join the calibration results back | |
# to the main data frame of all the object, location, and sign data. | |
# Use this signbase_years_cal data frame for all your anlyses now. | |
# Use the column "MedianBP" for visualising and analysing ages. That | |
# is calendar years Before the Present | |
signbase_years_cal <- | |
calibrate(signbase_years$date_bp_max_age, | |
signbase_years$date_bp_max_error, | |
ids = signbase_years$object_id, | |
verbose = FALSE) %>% | |
summary() %>% | |
tibble() %>% | |
left_join(signbase, | |
join_by("DateID" == "object_id")) | |
# for example, we can make a histogram to see which time periods are most | |
# frequently represented in the data | |
ggplot(signbase_years_cal) + | |
aes(MedianBP) + | |
geom_histogram() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(rnaturalearth) | |
library(ggrepel) | |
# import the data from the paper https://www.nature.com/articles/s41597-020-00704-x | |
signbase <- read_csv("signBase_Version1.0.csv") | |
# prepare simple features objects for a base map of the coastlines | |
# and country boundaries of Europe | |
world <- ne_countries(scale = "medium", returnclass = "sf") | |
Europe <- world[which(world$continent == "Europe"),] | |
# make a map of the signbase sites! | |
ggplot() + | |
# plot the base map of European countries and coastlines | |
geom_sf(data = Europe) + | |
# crop the map to zoom in on the parts of Europe where the sites are | |
coord_sf(xlim = c(-10,30), | |
ylim = c(35,53), | |
expand = FALSE) + | |
# add the layer of signbase sites, here I'm using the raw data, but you can | |
# also use a data frame that you have done some calculations on. You just need | |
# the longitude and latitude columns to be present for this to work | |
geom_point(data = signbase, | |
# point colour indicates 'country', feel free to explore and | |
# change this to other columns in the signbase_sf data frame | |
aes(x = longitude, | |
y = latitude, | |
# point colour indicates 'country', feel free to explore and | |
# change this to other columns in the signbase data frame | |
colour = country)) + | |
# add labels for the site names | |
geom_text_repel(data = signbase %>% | |
# only one label per site, otherwise it's too croweded to read | |
distinct(site_name, | |
.keep_all = TRUE), | |
aes(x = longitude , | |
y = latitude, | |
label = site_name), | |
max.overlaps = 100, | |
size = 2) + | |
theme_minimal() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here's the map that this code produces