Skip to content

Instantly share code, notes, and snippets.

@b-rodrigues
Created January 4, 2017 15:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save b-rodrigues/77293a15c5915e41583554219f2cdc46 to your computer and use it in GitHub Desktop.
Save b-rodrigues/77293a15c5915e41583554219f2cdc46 to your computer and use it in GitHub Desktop.
> library(janitor)
> get_dupes
function (dat, ...)
{
names <- as.list(substitute(list(...)))[-1L]
df_name <- deparse(substitute(dat))
var_names <- names
if (is.list(var_names)) {
var_names <- lapply(names, deparse)
}
check_vars_in_df(dat, df_name, unlist(var_names))
dupe_count <- NULL
if (length(names) == 0) {
var_names <- names(dat)
names <- paste0("`", as.list(names(dat)), "`")
message("No variable names specified - using all columns.\n")
}
counts <- dat %>% dplyr::count_(vars = names)
dupes <- suppressMessages(dplyr::inner_join(counts, dat))
dupes <- dupes %>% dplyr::filter(n > 1) %>% dplyr::ungroup() %>%
dplyr::arrange_(.dots = names) %>% dplyr::rename(dupe_count = n)
if (length(var_names) > 10) {
var_names <- c(var_names[1:9], paste("... and", length(var_names) -
9, "other variables"))
}
if (nrow(dupes) == 0) {
message(paste0("No duplicate combinations found of: ",
paste(var_names, collapse = ", ")))
}
dupes
}
<environment: namespace:janitor>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment