Skip to content

Instantly share code, notes, and snippets.

@valentinitnelav
Last active November 18, 2016 10:10
Show Gist options
  • Save valentinitnelav/1b4111666a034d964b389f79ad47ec95 to your computer and use it in GitHub Desktop.
Save valentinitnelav/1b4111666a034d964b389f79ad47ec95 to your computer and use it in GitHub Desktop.
# function to get first name from a string of names (including the particle e.g. von Frisch)
# strg = a string containing names
# NOTE: The function still needs further testing. Use with care!
get_author_name <- function(strg){
# remove any leading and trailing whitespace
strg <- trimws(strg)
# check if string starts with two "nobiliary" particles
logic_2prtcl <- grepl(pattern = "^van der|^von der", x = strg, perl=TRUE, ignore.case = TRUE)
# if there are two particles then don't test for one particle anymore
if (logic_2prtcl) {
logic_1prtcl <- FALSE
} else {
# check if string starts with one "nobiliary" particle
logic_1prtcl <- grepl(pattern = "^von|^van|^de|^der|^di|^d'|^lord", x = strg, perl=TRUE, ignore.case = TRUE)
}
# replace all punctuation with space (except apostrophe (caret symbol) and minus sign)
strg <- gsub(pattern = "[^[:alnum:][:space:]'-]", replacement = ' ', strg, perl=TRUE)
if (!any(logic_1prtcl, logic_2prtcl)) {
# if there is no kind of particle, then take first element when splitting by space
# also translate characters to upper
# this is tested first because is the most common case and "if" will often stop here
# without the need to waste time with further testing
return(toupper(strsplit(strg, split=" ", fixed=TRUE)[[1]][1]))
} else if (logic_1prtcl) {
# if contains particle take first 2 elements when splitting by space
# also translate characters to upper
return(toupper(paste0(strsplit(strg, split=" ", fixed=TRUE)[[1]][1:2], collapse=" ")))
} else {
# if contains 2 particles take first 3 elements when splitting by space
# also translate characters to upper
return(toupper(paste0(strsplit(strg, split=" ", fixed=TRUE)[[1]][1:3], collapse=" ")))
}
}
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# EXAMPLES
get_author_name("Anders-Frisch, GJ; Johnson,")
[1] "ANDERS-FRISCH"
get_author_name("von Frisch, GJ; Johnson,")
[1] "VON FRISCH"
get_author_name("von Anders-Frisch, GJ; Johnson,")
[1] "VON ANDERS-FRISCH"
get_author_name("van der Anders-Frisch, GJ; Johnson,")
[1] "VAN DER ANDERS-FRISCH"
get_author_name("lord Anders-Frisch, GJ; Johnson,")
[1] "LORD ANDERS-FRISCH"
get_author_name("De Exemplu, GJ; Johnson,")
[1] "DE EXEMPLU"
get_author_name("An'der-son, GJ; Johnson,")
[1] "AN'DER-SON"
get_author_name("An' ders-on, GJ; Johnson,")
[1] "AN'"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment