Skip to content

Instantly share code, notes, and snippets.

@benmarwick
Last active November 16, 2024 21:52
Show Gist options
  • Save benmarwick/0fe946d173b9e9e2d47373df9650712a to your computer and use it in GitHub Desktop.
Save benmarwick/0fe946d173b9e9e2d47373df9650712a to your computer and use it in GitHub Desktop.
Simple tasks with the SignBase dataset
library(tidyverse)
library(rcarbon)
signbase <- read_csv("signBase_Version1.0.csv")
# clean up and separate the data in the radiocarbon age columns
# so we can use it for plotting and analyses. This will generate some
# warnings, that's ok, nothing to worry about
signbase_years <-
signbase %>%
drop_na(date_bp_max_min) %>%
mutate(date_bp_max_min = str_replace_all(date_bp_max_min, "\\+\\/\\-", "±")) %>%
mutate(date_bp_max_min = str_replace_all(date_bp_max_min, "\\+", "±")) %>%
separate(date_bp_max_min,
sep = " - ",
c("date_bp_max", "date_bp_min"),
remove = FALSE) %>%
mutate(date_bp_max = ifelse(str_detect(date_bp_max, "\\/"),
str_extract(date_bp_max, ".*?(?=\\/)"),
date_bp_max)) %>%
mutate(date_bp_min = ifelse(str_detect(date_bp_min, "\\/"),
str_extract(date_bp_min, ".*?(?=\\/)"),
date_bp_min)) %>%
separate(date_bp_max,
sep = "±",
c("date_bp_max_age",
"date_bp_max_error")) %>%
separate(date_bp_min,
sep = "±",
c("date_bp_min_age",
"date_bp_min_error")) %>%
drop_na(date_bp_max_age,
date_bp_max_error) %>%
mutate(date_bp_max_age = parse_number(date_bp_max_age),
date_bp_max_error = parse_number(date_bp_max_error))
# calibrate the radiocarbon ages and join the calibration results back
# to the main data frame of all the object, location, and sign data.
# Use this signbase_years_cal data frame for all your anlyses now.
# Use the column "MedianBP" for visualising and analysing ages. That
# is calendar years Before the Present
signbase_years_cal <-
calibrate(signbase_years$date_bp_max_age,
signbase_years$date_bp_max_error,
ids = signbase_years$object_id,
verbose = FALSE) %>%
summary() %>%
tibble() %>%
left_join(signbase,
join_by("DateID" == "object_id"))
# for example, we can make a histogram to see which time periods are most
# frequently represented in the data
ggplot(signbase_years_cal) +
aes(MedianBP) +
geom_histogram()
library(tidyverse)
library(rnaturalearth)
library(ggrepel)
# import the data from the paper https://www.nature.com/articles/s41597-020-00704-x
signbase <- read_csv("signBase_Version1.0.csv")
# prepare simple features objects for a base map of the coastlines
# and country boundaries of Europe
world <- ne_countries(scale = "medium", returnclass = "sf")
Europe <- world[which(world$continent == "Europe"),]
# make a map of the signbase sites!
ggplot() +
# plot the base map of European countries and coastlines
geom_sf(data = Europe) +
# crop the map to zoom in on the parts of Europe where the sites are
coord_sf(xlim = c(-10,30),
ylim = c(35,53),
expand = FALSE) +
# add the layer of signbase sites, here I'm using the raw data, but you can
# also use a data frame that you have done some calculations on. You just need
# the longitude and latitude columns to be present for this to work
geom_point(data = signbase,
# point colour indicates 'country', feel free to explore and
# change this to other columns in the signbase_sf data frame
aes(x = longitude,
y = latitude,
# point colour indicates 'country', feel free to explore and
# change this to other columns in the signbase data frame
colour = country)) +
# add labels for the site names
geom_text_repel(data = signbase %>%
# only one label per site, otherwise it's too croweded to read
distinct(site_name,
.keep_all = TRUE),
aes(x = longitude ,
y = latitude,
label = site_name),
max.overlaps = 100,
size = 2) +
theme_minimal()
@benmarwick
Copy link
Author

Here's the map that this code produces

image

@benmarwick
Copy link
Author

Here's the histogram of the radiocarbon data:

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment