lwaldron/knn-matching.R

## knn-matching.R
library(nabor)

# suppose you have two vectors of propensity scores
propensity_scores1 <- c(0.1, 0.2, 0.3, 0.4, 0.5) #more controls
propensity_scores2 <- c(0.15, 0.25, 0.35) #fewer cases

# use the knn function from the nabor package to find the index of the closest match in propensity_scores2 for each score in propensity_scores1
matches <- nabor::knn(matrix(propensity_scores2), matrix(propensity_scores1), k = 1)$nn.idx

# print the matches
print(matches)

# select only one of each nearest match from propensity_scores1
propensity_scores1[!duplicated(matches)]

# try a more realistic example
ages1 <- sample(30:80, 100, replace = TRUE)
ages2 <- sample(50:80, 30, replace = TRUE)

# make an overlapping density plot of age1 and age2
plot(density(ages1), col = "red", xlim = c(30, 70), ylim = c(0, 0.05),
     main = "Age Distribution", xlab = "Age", ylab = "Density")
lines(density(ages2), col = "black")

# use the knn function from the nabor package to find the index of the closest match in ages2 for each age in ages1
matches <- nabor::knn(matrix(ages2), matrix(ages1), k = 1)$nn.idx

# select only one of each nearest match from ages1
ages1_matched <- ages1[!duplicated(matches)]

matches <- nabor::knn(matrix(ages1_matched), matrix(ages2), k = 1)$nn.idx
ages2_matched <- ages2[!duplicated(matches)]

plot(density(ages1_matched), col = "red", xlim = c(30, 70), ylim = c(0, 0.05),
     main = "Age Distribution", xlab = "Age", ylab = "Density")
lines(density(ages2_matched), col = "black")
	library(nabor)

	# suppose you have two vectors of propensity scores
	propensity_scores1 <- c(0.1, 0.2, 0.3, 0.4, 0.5) #more controls
	propensity_scores2 <- c(0.15, 0.25, 0.35) #fewer cases

	# use the knn function from the nabor package to find the index of the closest match in propensity_scores2 for each score in propensity_scores1
	matches <- nabor::knn(matrix(propensity_scores2), matrix(propensity_scores1), k = 1)$nn.idx

	# print the matches
	print(matches)

	# select only one of each nearest match from propensity_scores1
	propensity_scores1[!duplicated(matches)]

	# try a more realistic example
	ages1 <- sample(30:80, 100, replace = TRUE)
	ages2 <- sample(50:80, 30, replace = TRUE)

	# make an overlapping density plot of age1 and age2
	plot(density(ages1), col = "red", xlim = c(30, 70), ylim = c(0, 0.05),
	main = "Age Distribution", xlab = "Age", ylab = "Density")
	lines(density(ages2), col = "black")

	# use the knn function from the nabor package to find the index of the closest match in ages2 for each age in ages1
	matches <- nabor::knn(matrix(ages2), matrix(ages1), k = 1)$nn.idx

	# select only one of each nearest match from ages1
	ages1_matched <- ages1[!duplicated(matches)]

	matches <- nabor::knn(matrix(ages1_matched), matrix(ages2), k = 1)$nn.idx
	ages2_matched <- ages2[!duplicated(matches)]

	plot(density(ages1_matched), col = "red", xlim = c(30, 70), ylim = c(0, 0.05),
	main = "Age Distribution", xlab = "Age", ylab = "Density")
	lines(density(ages2_matched), col = "black")