Skip to content

Instantly share code, notes, and snippets.

@dlebauer
Last active October 4, 2022 00:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dlebauer/42583d666732a4a526de6bf2bebcef05 to your computer and use it in GitHub Desktop.
Save dlebauer/42583d666732a4a526de6bf2bebcef05 to your computer and use it in GitHub Desktop.
Find cohort files in BETYdb
select
site_id,
sites.sitename,
st_X(sites.geometry) as lon,
st_y(sites.geometry) as lat,
formats.name as format_name,
dbfiles.file_path as file_path,
inputs.name as filename,
inputs.start_date,
inputs.end_date,
age(inputs.end_date, inputs.start_date) as duration,
inputs.created_at
from
inputs
join formats on inputs.format_id = formats.
id join sites on site_id = sites.
id
left join dbfiles on dbfiles.container_id = inputs.id and dbfiles.container_type = 'Input'
where
formats.name in ('ED2.cohort', 'ed.met_driver_header files format')
and file_path like '%dietzelab%';
library(jsonlite)
library(traits)
inputs <- traits::betydb_query(table = 'inputs', format_id = 11,
associations_mode = 'full_info',
key = 'FSWoT20GcwsZpSrRR4BUeYa2UGw3GhQd6MxkdWIp')
sites <- traits::betydb_query(table = 'sites',
key = 'FSWoT20GcwsZpSrRR4BUeYa2UGw3GhQd6MxkdWIp')
sites <- list()
i <- 1
x <- 1000
while(x > 0){
time <- Sys.time()
tmp <- read_json(paste0('https://www.betydb.org/api/v1/sites?limit=1000&offset=', i*1000),
simplifyVector = TRUE)
sites[[i]] <- tmp
x <- tmp$metadata$count
i <- i + 1
print(paste0(Sys.time() - time, ": i = ", i, " and count = ", x))
}
s <- lapply(sites, function(x) y <- x$data$site)
sites2 <- dplyr::bind_rows(s) %>%
select(id, sitename, geometry)
dbfiles <- traits::betydb_query(table = 'dbfiles', limit = 10, file_name = 'ED_MET_DRIVER_HEADER',#container_type = 'Inputs',
key = 'FSWoT20GcwsZpSrRR4BUeYa2UGw3GhQd6MxkdWIp')
dbfiles2 <- dbfiles %>%
filter(container_type == 'Input')
library(dplyr)
z <- inputs %>%
left_join(dbfiles2, by = c('id' = 'container_id')) %>%
left_join(sites2, by = c('site_id' = 'id'), copy = TRUE) %>%
# filter(!is.na(file_name)) %>%
select(sitename, geometry, start_date, end_date)
library(leaflet)
library(rgeos)
zz <- z %>% filter(!is.na(geometry) && sitename != 'Jiufeng FP') %>%
mutate(m = grepl('MANDIFORE', sitename),
col = ifelse(m, 'blue', 'darkorange'),
sfc = sf::st_as_sfc(geometry)) %>%
sf::st_as_sf()
leaflet(zz, options = leafletOptions(preferCanvas = TRUE)) %>%
addProviderTiles(providers$Esri.WorldGrayCanvas) %>%
leaflet::addCircles(weight = 2, label = zz$sitename, color = zz$col)
select
site_id,
sites.sitename,
formats.name as format_name,
inputs.name as filename,
inputs.created_at,
users.id as user_id,
users.name,
users.login as username
from
inputs
join formats on inputs.format_id = formats.
id join sites on site_id = sites.
id full join users on inputs.user_id = users.id
where
formats.name = 'ED2.cohort';
library(tidyverse)
cohort_met_files <- 'https://gist.githubusercontent.com/dlebauer/66686e60bac92535585ddf7a0dfaeae2/raw/fb3f9c9a625f4a9e79221fe36906e3b7afae5f8c/cohort_met_files.csv'
x <- read_csv(cohort_met_files)
met <- x %>% filter(format_name == "ed.met_driver_header files format")
cohort <- x %>% filter(format_name == "ED2.cohort")
dim(met)
dim(cohort)
x %>% filter(sitename == 'NC07') %>% print()
cohort %>% select(-created_at) %>% unique()
library(leaflet)
met2 <- met %>%
mutate(duration = lubridate::as.duration(duration)) %>%
filter(duration >= lubridate::as.duration('1 years'))
cohort2 <- cohort %>%
select(sitename, file_path, filename) %>%
group_by(sitename) %>%
summarise(cohort_paths = list(file_path), cohort_files = list(filename))
all <- met2 %>% inner_join(cohort2, by = 'sitename') #%>%
# filter( !grepl('MANDIFORE', sitename) | is.null(cohort_paths))
leaflet(all ) %>%
addProviderTiles(providers$Esri.WorldGrayCanvas) %>%
leaflet::addMarkers(lng = ~lon, lat = ~lat,
label = all$sitename,
popup = paste(all$sitename,
'<br>met:', all$file_path,
'<br>cohort:', all$cohort_paths,
'<br>duration:', all$duration))
@dlebauer
Copy link
Author

@dlebauer
Copy link
Author

dlebauer commented Oct 3, 2022

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment