benjamin-chan/commonIDs.R

## commonIDs.R
library(magrittr)
library(dplyr)
buildSample <- function(hosp, pop = 500, samp = 100) {
  data.frame(hosp = hosp,
             id = sample(pop, samp, replace = TRUE),
             stringsAsFactors = FALSE) %>%
    mutate(z = rnorm(nrow(.)))
}
df1 <- buildSample("A")
df2 <- buildSample("B")
df3 <- buildSample("C")
df4 <- buildSample("D", pop = 1000)
df5 <- buildSample("E", samp = 200)
df <-
  bind_rows(df1, df2, df3, df4, df5) %>%
  select(hosp, id) %>%
  unique() %>%
  mutate(dummyByCol = 1) %>%  # This is important for the cross join (i.e., all possible pairs)
  inner_join(., ., by = c("dummyByCol", "id")) %>%
  filter(hosp.x < hosp.y) %>%
  select(hosp.x, hosp.y, id)
df %>%
  mutate(pair = sprintf("%s compared to %s", hosp.x, hosp.y)) %>%
  select(pair, id) %>%
  group_by(pair) %>%
  summarize(commonIDs = n())
	library(magrittr)
	library(dplyr)
	buildSample <- function(hosp, pop = 500, samp = 100) {
	data.frame(hosp = hosp,
	id = sample(pop, samp, replace = TRUE),
	stringsAsFactors = FALSE) %>%
	mutate(z = rnorm(nrow(.)))
	}
	df1 <- buildSample("A")
	df2 <- buildSample("B")
	df3 <- buildSample("C")
	df4 <- buildSample("D", pop = 1000)
	df5 <- buildSample("E", samp = 200)
	df <-
	bind_rows(df1, df2, df3, df4, df5) %>%
	select(hosp, id) %>%
	unique() %>%
	mutate(dummyByCol = 1) %>% # This is important for the cross join (i.e., all possible pairs)
	inner_join(., ., by = c("dummyByCol", "id")) %>%
	filter(hosp.x < hosp.y) %>%
	select(hosp.x, hosp.y, id)
	df %>%
	mutate(pair = sprintf("%s compared to %s", hosp.x, hosp.y)) %>%
	select(pair, id) %>%
	group_by(pair) %>%
	summarize(commonIDs = n())