Skip to content

Instantly share code, notes, and snippets.

@tmalsburg
Created September 27, 2023 10:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tmalsburg/18b3475107b4ebd7f4e23d55eb577265 to your computer and use it in GitHub Desktop.
Save tmalsburg/18b3475107b4ebd7f4e23d55eb577265 to your computer and use it in GitHub Desktop.
Analysis of onset patters of basic Mandarin Chinese color terms
library(ggplot2)
library(dplyr)
options(scipen=999)
N <- 100000 # Number of simulations
# Data:
onsets <- tibble(
onset = c("b", "p", "m", "f", "d", "t", "n", "l", "g", "k", "h",
"j", "q", "x", "zh", "ch", "sh", "r", "z", "c", "s"),
count = c(16, 16, 18, 9, 21, 19, 23, 24, 18, 18, 19, 13, 14, 14, 20,
18, 19, 14, 17, 16, 16),
prob = count / sum(count))
# Assuming that color terms have random onsets
# (H0), how likely is it that warm colors (yellow,
# red) share the same onset and cold colors (blue,
# green) share the same onset?
sapply(1:N, function(i) {
with(onsets,
sample(onset, 4, replace=TRUE, prob=prob))
}) |>
t() |>
data.frame() |>
dplyr::rename(yellow=X1, red=X2, blue=X3, green=X4) -> sim
# Count cases where “yellow” and “red”, and “blue”
# and “green” share the same onset:
sim |>
filter(
yellow == red,
blue == green,
yellow != green) |>
nrow() -> i
# p-value:
i/N -> p1
# Assuming that “black” and “white” have random
# onsets (H0), how likely is it that they start
# with onsets that are extremely dissimilar with
# respect to place and mode of articulation?
# Assign random onsets to black and white and
# simulate N time:
sapply(1:N, function(i) {
with(onsets,
sample(onset, 2, replace=TRUE, prob=prob))
}) |>
t() |>
data.frame() |>
dplyr::rename(black=X1, white=X2) -> sim
# Count cases with extremely dissimilar onsets:
sim |>
filter(
case_when(
black %in% c("p", "b") & white %in% c("h") ~ TRUE,
black %in% c("m") & white %in% c("g", "k", "h") ~ TRUE,
black %in% c("g", "k") & white %in% c("m", "f") ~ TRUE,
black %in% c("f") & white %in% c("g", "k") ~ TRUE,
white %in% c("p", "b") & black %in% c("h") ~ TRUE,
white %in% c("m") & black %in% c("g", "k", "h") ~ TRUE,
white %in% c("g", "k") & black %in% c("m", "f") ~ TRUE,
white %in% c("f") & black %in% c("g", "k") ~ TRUE,
.default = FALSE)
) |>
nrow() -> i
# p-value:
i/N -> p2
p1 * p2 -> p3
message("p1: ", p1)
message("p2: ", p2)
message("p3: ", p3)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment