Created
September 30, 2022 19:06
-
-
Save fdefalco/2aca8656804cd1b3618f4a64c5900c88 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
options(connectionObserver = NULL) | |
library("dplyr") | |
library("tibble") | |
# This script leverages an existing WebAPI installation to compare two versions of the standard vocabulary and their impact on concept set resolution across a set of cohort definitions. | |
# base url of the WebAPI installation | |
baseUrl <- "https://epi.jnj.com:8443/WebAPI" | |
# if security is enabled authorize use of the webapi | |
ROhdsiWebApi::authorizeWebApi(baseUrl, "windows") | |
# establish result frames | |
results <- tibble( | |
cohort_definition_id = numeric(), | |
concept_set_index = numeric(), | |
count_concepts_added = numeric(), | |
count_concepts_removed = numeric() | |
) | |
conceptSetConceptDetails <- tibble( | |
cohortDefinitionId = numeric(), | |
conceptSetIndex = numeric(), | |
action = character(), | |
conceptId = numeric(), | |
conceptName = character(), | |
standardConcept = character(), | |
standardConceptCaption = character(), | |
invalidReason = character(), | |
invalidReasonCaption = character(), | |
conceptCode = character(), | |
domainId = character(), | |
vocabularyId = character(), | |
conceptClassId = character() | |
) | |
# A dynamic load of all available cohort definitions can be performed using the following code | |
# cohortDefinitionListUrl <- paste(baseUrl, "/cohortdefinition/", sep = "") | |
# cohortMetadata <- ROhdsiWebApi::getDefinitionsMetadata(baseUrl = baseUrl, category = "cohort") | |
# | |
# It may be desireable to limit the time frame of the cohorts you would like to analyze. | |
# filterDate <- as.Date('2022-09-27') | |
# selectedCohortDefinitionList <- cohortMetadata[ | |
# (as.Date(cohortMetadata$createdDate) >= filterDate|(as.Date(cohortMetadata$modifiedDate) >= filterDate) & !is.na(cohortMetadata$modifiedDate)),] | |
# In this case we load a prespecified list of cohort defintions to evaluate | |
selectedCohortDefinitionList <- readr::read_csv("phenotypes.csv") | |
totalCohortCount <- nrow(selectedCohortDefinitionList) | |
currentCohortIndex <- 0 | |
# Evaluate each cohort definition in our selectedCohortDefinitionList and build results | |
for (cohortDefinitionId in selectedCohortDefinitionList$cohort_definition_id) { | |
currentCohortIndex <- currentCohortIndex +1 | |
tryCatch({ | |
cohortDefinition <- ROhdsiWebApi::getCohortDefinition(cohortDefinitionId, baseUrl) | |
cohortDefinitionExpression <- cohortDefinition$expression | |
conceptsetList <- cohortDefinitionExpression$ConceptSets | |
# skip any cohort definition that has no concept sets | |
if (length(conceptsetList) == 0) { | |
next | |
} | |
for (conceptSetIndex in 1:length(conceptsetList)) { | |
print(paste("processing cohort ", currentCohortIndex, "/", totalCohortCount," - cohort id: ", cohortDefinitionId, " concept set: ", conceptSetIndex, "/", length(conceptsetList))) | |
baseVocabularySourceKey <- "v20210617" | |
baseConcepsetResolution <- ROhdsiWebApi::resolveConceptSet(conceptsetList[[conceptSetIndex]], | |
baseUrl = baseUrl, | |
vocabularySourceKey = baseVocabularySourceKey) | |
updateVocabularySourceKey <- "v20220409" | |
updateConceptsetResolution <- ROhdsiWebApi::resolveConceptSet(conceptsetList[[conceptSetIndex]], | |
baseUrl = baseUrl, | |
vocabularySourceKey = updateVocabularySourceKey) | |
removedConceptIds <- setdiff(baseConcepsetResolution, updateConceptsetResolution) | |
addedConceptIds <- setdiff(updateConceptsetResolution, baseConcepsetResolution) | |
if ((length(removedConceptIds) + length(addedConceptIds)) > 0) { | |
results <- results %>% add_row( | |
cohort_definition_id = cohortDefinitionId, | |
concept_set_index = conceptSetIndex, | |
count_concepts_added = length(addedConceptIds), | |
count_concepts_removed = length(removedConceptIds) | |
) | |
if (length(removedConceptIds)>0) { | |
removedConcepts <- ROhdsiWebApi::getConcepts(removedConceptIds, baseUrl = baseUrl, vocabularySourceKey = baseVocabularySourceKey) | |
removedConcepts$cohortDefinitionId <- cohortDefinitionId | |
removedConcepts$conceptSetIndex <- conceptSetIndex | |
removedConcepts$action <- "Removed" | |
conceptSetConceptDetails <- bind_rows(conceptSetConceptDetails,removedConcepts) | |
} | |
if (length(addedConceptIds)>0) { | |
addedConcepts <- ROhdsiWebApi::getConcepts(addedConceptIds, baseUrl = baseUrl, vocabularySourceKey = updateVocabularySourceKey) | |
addedConcepts$cohortDefinitionId <- cohortDefinitionId | |
addedConcepts$conceptSetIndex <- conceptSetIndex | |
addedConcepts$action <- "Added" | |
conceptSetConceptDetails <- bind_rows(conceptSetConceptDetails, addedConcepts) | |
} | |
} | |
} | |
}, error=function(err) { | |
print(err) | |
print(paste("cohort not found:",cohortDefinitionId)) | |
}) | |
} | |
# This function can be used to evaluate an individual concept set in a cohort definition | |
investigateDifference <- function(cohortDefinitionId, conceptSetIndex) { | |
cohortDefinition <- | |
ROhdsiWebApi::getCohortDefinition(cohortDefinitionId, baseUrl) | |
cohortDefinitionExpression <- cohortDefinition$expression | |
conceptsetList <- cohortDefinitionExpression$ConceptSets | |
baseVocabularySourceKey <- "v20210617" | |
baseConcepsetResolution <- ROhdsiWebApi::resolveConceptSet(conceptsetList[[conceptSetIndex]], | |
baseUrl = baseUrl, | |
vocabularySourceKey = baseVocabularySourceKey) | |
updateVocabularySourceKey <- "v20220409" | |
updateConceptsetResolution <- ROhdsiWebApi::resolveConceptSet(conceptsetList[[conceptSetIndex]], | |
baseUrl = baseUrl, | |
vocabularySourceKey = updateVocabularySourceKey) | |
removedConceptIds <- setdiff(baseConcepsetResolution, updateConceptsetResolution) | |
addedConceptIds <- setdiff(updateConceptsetResolution, baseConcepsetResolution) | |
removedConcepts <- ROhdsiWebApi::getConcepts(removedConceptIds, baseUrl = baseUrl, vocabularySourceKey = baseVocabularySourceKey) | |
addedConcepts <- ROhdsiWebApi::getConcepts(addedConceptIds, baseUrl = baseUrl, vocabularySourceKey = updateVocabularySourceKey) | |
results <- list(removed=removedConcepts, added=addedConcepts) | |
return(results) | |
} | |
# Sample of investigating a single concept set in a cohort definition | |
# cohortDefinitionConceptSetDifferences <- investigateDifference(1,1) | |
# View(cohortDefinitionConceptSetDifferences$removed) | |
# View(cohortDefinitionConceptSetDifferences$added) | |
# Create summary results | |
summarized_results <- results %>% | |
group_by(results$cohort_definition_id) %>% | |
summarise( | |
count_concepts_added = sum(count_concepts_added ), | |
count_concepts_removed = sum(count_concepts_removed) | |
) | |
domain_action_summary <- conceptSetConceptDetails %>% | |
group_by(action, domainId) %>% | |
summarise( | |
count_concepts = n() | |
) | |
vocabulary_action_summary <- conceptSetConceptDetails %>% | |
group_by(action, vocabularyId) %>% | |
summarise( | |
count_concepts = n() | |
) | |
action_summary <- conceptSetConceptDetails %>% | |
group_by(action) %>% | |
summarise( | |
count_concepts = n() | |
) | |
readr::write_csv(vocabulary_action_summary,"vocabulary_action_summary.csv") | |
readr::write_csv(domain_action_summary,"domain_action_summary.csv") | |
totalCohortDefinitionsEvaluatedCount <- length(selectedCohortDefinitionList$cohort_definition_id) | |
impactedCohortDefinitionsCount <- length(unique(results$cohort_definition_id)) | |
writeLines(paste(impactedCohortDefinitionsCount,"of",totalCohortDefinitionsEvaluatedCount,"(",impactedCohortDefinitionsCount/totalCohortDefinitionsEvaluatedCount,")", "cohort defintions were impacted.")) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment