Skip to content

Instantly share code, notes, and snippets.

@lwaldron
Created June 25, 2024 18:45
Show Gist options
  • Save lwaldron/4c135b4461a9b50931b5a20b4da89e43 to your computer and use it in GitHub Desktop.
Save lwaldron/4c135b4461a9b50931b5a20b4da89e43 to your computer and use it in GitHub Desktop.
One way to age match using k-nearest neighbors
library(nabor)
# suppose you have two vectors of propensity scores
propensity_scores1 <- c(0.1, 0.2, 0.3, 0.4, 0.5) #more controls
propensity_scores2 <- c(0.15, 0.25, 0.35) #fewer cases
# use the knn function from the nabor package to find the index of the closest match in propensity_scores2 for each score in propensity_scores1
matches <- nabor::knn(matrix(propensity_scores2), matrix(propensity_scores1), k = 1)$nn.idx
# print the matches
print(matches)
# select only one of each nearest match from propensity_scores1
propensity_scores1[!duplicated(matches)]
# try a more realistic example
ages1 <- sample(30:80, 100, replace = TRUE)
ages2 <- sample(50:80, 30, replace = TRUE)
# make an overlapping density plot of age1 and age2
plot(density(ages1), col = "red", xlim = c(30, 70), ylim = c(0, 0.05),
main = "Age Distribution", xlab = "Age", ylab = "Density")
lines(density(ages2), col = "black")
# use the knn function from the nabor package to find the index of the closest match in ages2 for each age in ages1
matches <- nabor::knn(matrix(ages2), matrix(ages1), k = 1)$nn.idx
# select only one of each nearest match from ages1
ages1_matched <- ages1[!duplicated(matches)]
matches <- nabor::knn(matrix(ages1_matched), matrix(ages2), k = 1)$nn.idx
ages2_matched <- ages2[!duplicated(matches)]
plot(density(ages1_matched), col = "red", xlim = c(30, 70), ylim = c(0, 0.05),
main = "Age Distribution", xlab = "Age", ylab = "Density")
lines(density(ages2_matched), col = "black")
@lwaldron
Copy link
Author

library(nabor)

# suppose you have two vectors of propensity scores
propensity_scores1 <- c(0.1, 0.2, 0.3, 0.4, 0.5) #more controls
propensity_scores2 <- c(0.15, 0.25, 0.35) #fewer cases

# use the knn function from the nabor package to find the index of the closest match in propensity_scores2 for each score in propensity_scores1
matches <- nabor::knn(matrix(propensity_scores2), matrix(propensity_scores1), k = 1)$nn.idx

# print the matches
print(matches)
#>      [,1]
#> [1,]    1
#> [2,]    2
#> [3,]    2
#> [4,]    3
#> [5,]    3
# select only one of each nearest match from propensity_scores1
propensity_scores1[!duplicated(matches)]
#> [1] 0.1 0.2 0.4
# try a more realistic example
ages1 <- sample(30:80, 100, replace = TRUE)
ages2 <- sample(50:80, 30, replace = TRUE)

# make an overlapping density plot of age1 and age2
plot(density(ages1), col = "red", xlim = c(30, 70), ylim = c(0, 0.05), 
     main = "Age Distribution", xlab = "Age", ylab = "Density")
lines(density(ages2), col = "black")

# use the knn function from the nabor package to find the index of the closest match in ages2 for each age in ages1
matches <- nabor::knn(matrix(ages2), matrix(ages1), k = 1)$nn.idx

# select only one of each nearest match from ages1
ages1_matched <- ages1[!duplicated(matches)]

matches <- nabor::knn(matrix(ages1_matched), matrix(ages2), k = 1)$nn.idx
ages2_matched <- ages2[!duplicated(matches)]

plot(density(ages1_matched), col = "red", xlim = c(30, 70), ylim = c(0, 0.05), 
     main = "Age Distribution", xlab = "Age", ylab = "Density")
lines(density(ages2_matched), col = "black")

Created on 2024-06-25 with reprex v2.1.0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment