Skip to content

Instantly share code, notes, and snippets.

@fdefalco
Created September 30, 2022 19:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save fdefalco/2aca8656804cd1b3618f4a64c5900c88 to your computer and use it in GitHub Desktop.
Save fdefalco/2aca8656804cd1b3618f4a64c5900c88 to your computer and use it in GitHub Desktop.
options(connectionObserver = NULL)
library("dplyr")
library("tibble")
# This script leverages an existing WebAPI installation to compare two versions of the standard vocabulary and their impact on concept set resolution across a set of cohort definitions.
# base url of the WebAPI installation
baseUrl <- "https://epi.jnj.com:8443/WebAPI"
# if security is enabled authorize use of the webapi
ROhdsiWebApi::authorizeWebApi(baseUrl, "windows")
# establish result frames
results <- tibble(
cohort_definition_id = numeric(),
concept_set_index = numeric(),
count_concepts_added = numeric(),
count_concepts_removed = numeric()
)
conceptSetConceptDetails <- tibble(
cohortDefinitionId = numeric(),
conceptSetIndex = numeric(),
action = character(),
conceptId = numeric(),
conceptName = character(),
standardConcept = character(),
standardConceptCaption = character(),
invalidReason = character(),
invalidReasonCaption = character(),
conceptCode = character(),
domainId = character(),
vocabularyId = character(),
conceptClassId = character()
)
# A dynamic load of all available cohort definitions can be performed using the following code
# cohortDefinitionListUrl <- paste(baseUrl, "/cohortdefinition/", sep = "")
# cohortMetadata <- ROhdsiWebApi::getDefinitionsMetadata(baseUrl = baseUrl, category = "cohort")
#
# It may be desireable to limit the time frame of the cohorts you would like to analyze.
# filterDate <- as.Date('2022-09-27')
# selectedCohortDefinitionList <- cohortMetadata[
# (as.Date(cohortMetadata$createdDate) >= filterDate|(as.Date(cohortMetadata$modifiedDate) >= filterDate) & !is.na(cohortMetadata$modifiedDate)),]
# In this case we load a prespecified list of cohort defintions to evaluate
selectedCohortDefinitionList <- readr::read_csv("phenotypes.csv")
totalCohortCount <- nrow(selectedCohortDefinitionList)
currentCohortIndex <- 0
# Evaluate each cohort definition in our selectedCohortDefinitionList and build results
for (cohortDefinitionId in selectedCohortDefinitionList$cohort_definition_id) {
currentCohortIndex <- currentCohortIndex +1
tryCatch({
cohortDefinition <- ROhdsiWebApi::getCohortDefinition(cohortDefinitionId, baseUrl)
cohortDefinitionExpression <- cohortDefinition$expression
conceptsetList <- cohortDefinitionExpression$ConceptSets
# skip any cohort definition that has no concept sets
if (length(conceptsetList) == 0) {
next
}
for (conceptSetIndex in 1:length(conceptsetList)) {
print(paste("processing cohort ", currentCohortIndex, "/", totalCohortCount," - cohort id: ", cohortDefinitionId, " concept set: ", conceptSetIndex, "/", length(conceptsetList)))
baseVocabularySourceKey <- "v20210617"
baseConcepsetResolution <- ROhdsiWebApi::resolveConceptSet(conceptsetList[[conceptSetIndex]],
baseUrl = baseUrl,
vocabularySourceKey = baseVocabularySourceKey)
updateVocabularySourceKey <- "v20220409"
updateConceptsetResolution <- ROhdsiWebApi::resolveConceptSet(conceptsetList[[conceptSetIndex]],
baseUrl = baseUrl,
vocabularySourceKey = updateVocabularySourceKey)
removedConceptIds <- setdiff(baseConcepsetResolution, updateConceptsetResolution)
addedConceptIds <- setdiff(updateConceptsetResolution, baseConcepsetResolution)
if ((length(removedConceptIds) + length(addedConceptIds)) > 0) {
results <- results %>% add_row(
cohort_definition_id = cohortDefinitionId,
concept_set_index = conceptSetIndex,
count_concepts_added = length(addedConceptIds),
count_concepts_removed = length(removedConceptIds)
)
if (length(removedConceptIds)>0) {
removedConcepts <- ROhdsiWebApi::getConcepts(removedConceptIds, baseUrl = baseUrl, vocabularySourceKey = baseVocabularySourceKey)
removedConcepts$cohortDefinitionId <- cohortDefinitionId
removedConcepts$conceptSetIndex <- conceptSetIndex
removedConcepts$action <- "Removed"
conceptSetConceptDetails <- bind_rows(conceptSetConceptDetails,removedConcepts)
}
if (length(addedConceptIds)>0) {
addedConcepts <- ROhdsiWebApi::getConcepts(addedConceptIds, baseUrl = baseUrl, vocabularySourceKey = updateVocabularySourceKey)
addedConcepts$cohortDefinitionId <- cohortDefinitionId
addedConcepts$conceptSetIndex <- conceptSetIndex
addedConcepts$action <- "Added"
conceptSetConceptDetails <- bind_rows(conceptSetConceptDetails, addedConcepts)
}
}
}
}, error=function(err) {
print(err)
print(paste("cohort not found:",cohortDefinitionId))
})
}
# This function can be used to evaluate an individual concept set in a cohort definition
investigateDifference <- function(cohortDefinitionId, conceptSetIndex) {
cohortDefinition <-
ROhdsiWebApi::getCohortDefinition(cohortDefinitionId, baseUrl)
cohortDefinitionExpression <- cohortDefinition$expression
conceptsetList <- cohortDefinitionExpression$ConceptSets
baseVocabularySourceKey <- "v20210617"
baseConcepsetResolution <- ROhdsiWebApi::resolveConceptSet(conceptsetList[[conceptSetIndex]],
baseUrl = baseUrl,
vocabularySourceKey = baseVocabularySourceKey)
updateVocabularySourceKey <- "v20220409"
updateConceptsetResolution <- ROhdsiWebApi::resolveConceptSet(conceptsetList[[conceptSetIndex]],
baseUrl = baseUrl,
vocabularySourceKey = updateVocabularySourceKey)
removedConceptIds <- setdiff(baseConcepsetResolution, updateConceptsetResolution)
addedConceptIds <- setdiff(updateConceptsetResolution, baseConcepsetResolution)
removedConcepts <- ROhdsiWebApi::getConcepts(removedConceptIds, baseUrl = baseUrl, vocabularySourceKey = baseVocabularySourceKey)
addedConcepts <- ROhdsiWebApi::getConcepts(addedConceptIds, baseUrl = baseUrl, vocabularySourceKey = updateVocabularySourceKey)
results <- list(removed=removedConcepts, added=addedConcepts)
return(results)
}
# Sample of investigating a single concept set in a cohort definition
# cohortDefinitionConceptSetDifferences <- investigateDifference(1,1)
# View(cohortDefinitionConceptSetDifferences$removed)
# View(cohortDefinitionConceptSetDifferences$added)
# Create summary results
summarized_results <- results %>%
group_by(results$cohort_definition_id) %>%
summarise(
count_concepts_added = sum(count_concepts_added ),
count_concepts_removed = sum(count_concepts_removed)
)
domain_action_summary <- conceptSetConceptDetails %>%
group_by(action, domainId) %>%
summarise(
count_concepts = n()
)
vocabulary_action_summary <- conceptSetConceptDetails %>%
group_by(action, vocabularyId) %>%
summarise(
count_concepts = n()
)
action_summary <- conceptSetConceptDetails %>%
group_by(action) %>%
summarise(
count_concepts = n()
)
readr::write_csv(vocabulary_action_summary,"vocabulary_action_summary.csv")
readr::write_csv(domain_action_summary,"domain_action_summary.csv")
totalCohortDefinitionsEvaluatedCount <- length(selectedCohortDefinitionList$cohort_definition_id)
impactedCohortDefinitionsCount <- length(unique(results$cohort_definition_id))
writeLines(paste(impactedCohortDefinitionsCount,"of",totalCohortDefinitionsEvaluatedCount,"(",impactedCohortDefinitionsCount/totalCohortDefinitionsEvaluatedCount,")", "cohort defintions were impacted."))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment