Last active
October 4, 2022 00:04
-
-
Save dlebauer/42583d666732a4a526de6bf2bebcef05 to your computer and use it in GitHub Desktop.
Find cohort files in BETYdb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
select | |
site_id, | |
sites.sitename, | |
st_X(sites.geometry) as lon, | |
st_y(sites.geometry) as lat, | |
formats.name as format_name, | |
dbfiles.file_path as file_path, | |
inputs.name as filename, | |
inputs.start_date, | |
inputs.end_date, | |
age(inputs.end_date, inputs.start_date) as duration, | |
inputs.created_at | |
from | |
inputs | |
join formats on inputs.format_id = formats. | |
id join sites on site_id = sites. | |
id | |
left join dbfiles on dbfiles.container_id = inputs.id and dbfiles.container_type = 'Input' | |
where | |
formats.name in ('ED2.cohort', 'ed.met_driver_header files format') | |
and file_path like '%dietzelab%'; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(jsonlite) | |
library(traits) | |
inputs <- traits::betydb_query(table = 'inputs', format_id = 11, | |
associations_mode = 'full_info', | |
key = 'FSWoT20GcwsZpSrRR4BUeYa2UGw3GhQd6MxkdWIp') | |
sites <- traits::betydb_query(table = 'sites', | |
key = 'FSWoT20GcwsZpSrRR4BUeYa2UGw3GhQd6MxkdWIp') | |
sites <- list() | |
i <- 1 | |
x <- 1000 | |
while(x > 0){ | |
time <- Sys.time() | |
tmp <- read_json(paste0('https://www.betydb.org/api/v1/sites?limit=1000&offset=', i*1000), | |
simplifyVector = TRUE) | |
sites[[i]] <- tmp | |
x <- tmp$metadata$count | |
i <- i + 1 | |
print(paste0(Sys.time() - time, ": i = ", i, " and count = ", x)) | |
} | |
s <- lapply(sites, function(x) y <- x$data$site) | |
sites2 <- dplyr::bind_rows(s) %>% | |
select(id, sitename, geometry) | |
dbfiles <- traits::betydb_query(table = 'dbfiles', limit = 10, file_name = 'ED_MET_DRIVER_HEADER',#container_type = 'Inputs', | |
key = 'FSWoT20GcwsZpSrRR4BUeYa2UGw3GhQd6MxkdWIp') | |
dbfiles2 <- dbfiles %>% | |
filter(container_type == 'Input') | |
library(dplyr) | |
z <- inputs %>% | |
left_join(dbfiles2, by = c('id' = 'container_id')) %>% | |
left_join(sites2, by = c('site_id' = 'id'), copy = TRUE) %>% | |
# filter(!is.na(file_name)) %>% | |
select(sitename, geometry, start_date, end_date) | |
library(leaflet) | |
library(rgeos) | |
zz <- z %>% filter(!is.na(geometry) && sitename != 'Jiufeng FP') %>% | |
mutate(m = grepl('MANDIFORE', sitename), | |
col = ifelse(m, 'blue', 'darkorange'), | |
sfc = sf::st_as_sfc(geometry)) %>% | |
sf::st_as_sf() | |
leaflet(zz, options = leafletOptions(preferCanvas = TRUE)) %>% | |
addProviderTiles(providers$Esri.WorldGrayCanvas) %>% | |
leaflet::addCircles(weight = 2, label = zz$sitename, color = zz$col) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
select | |
site_id, | |
sites.sitename, | |
formats.name as format_name, | |
inputs.name as filename, | |
inputs.created_at, | |
users.id as user_id, | |
users.name, | |
users.login as username | |
from | |
inputs | |
join formats on inputs.format_id = formats. | |
id join sites on site_id = sites. | |
id full join users on inputs.user_id = users.id | |
where | |
formats.name = 'ED2.cohort'; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
cohort_met_files <- 'https://gist.githubusercontent.com/dlebauer/66686e60bac92535585ddf7a0dfaeae2/raw/fb3f9c9a625f4a9e79221fe36906e3b7afae5f8c/cohort_met_files.csv' | |
x <- read_csv(cohort_met_files) | |
met <- x %>% filter(format_name == "ed.met_driver_header files format") | |
cohort <- x %>% filter(format_name == "ED2.cohort") | |
dim(met) | |
dim(cohort) | |
x %>% filter(sitename == 'NC07') %>% print() | |
cohort %>% select(-created_at) %>% unique() | |
library(leaflet) | |
met2 <- met %>% | |
mutate(duration = lubridate::as.duration(duration)) %>% | |
filter(duration >= lubridate::as.duration('1 years')) | |
cohort2 <- cohort %>% | |
select(sitename, file_path, filename) %>% | |
group_by(sitename) %>% | |
summarise(cohort_paths = list(file_path), cohort_files = list(filename)) | |
all <- met2 %>% inner_join(cohort2, by = 'sitename') #%>% | |
# filter( !grepl('MANDIFORE', sitename) | is.null(cohort_paths)) | |
leaflet(all ) %>% | |
addProviderTiles(providers$Esri.WorldGrayCanvas) %>% | |
leaflet::addMarkers(lng = ~lon, lat = ~lat, | |
label = all$sitename, | |
popup = paste(all$sitename, | |
'<br>met:', all$file_path, | |
'<br>cohort:', all$cohort_paths, | |
'<br>duration:', all$duration)) | |
New map of sites w/ ED2 configs: https://viz.datascience.arizona.edu/connect/#/apps/6787037c-014d-4bf4-9ad1-60b321766752
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
map of sites w/ ED2 configs: https://viz.datascience.arizona.edu/connect/#/apps/cf6ce95d-80b7-479b-b724-719fe0bbb2bc