Skip to content

Instantly share code, notes, and snippets.

@omsai
Last active March 8, 2017 00:03
Show Gist options
  • Save omsai/0a66e3b3747b4db847e06d3ee5817a54 to your computer and use it in GitHub Desktop.
Save omsai/0a66e3b3747b4db847e06d3ee5817a54 to your computer and use it in GitHub Desktop.
Recent Data Carpentry Genomics workshops
suppressPackageStartupMessages({
library(tidyverse)
library(rvest) # Webscraping
library(stringr)
})
workshops_url <- read_html("http://www.datacarpentry.org/workshops-past/")
workshops <- tibble(
date = html_nodes(workshops_url, "td[class='date']") %>% html_text(),
name = html_nodes(workshops_url, "td[class='link']") %>% html_text(),
url = html_nodes(workshops_url, "td[class='link'] a") %>% html_attr("href")
)
## Handle missing pages (HTTP 404 error) using `try()`.
try_read_html <- function(url) try(read_html(url))
pages <- lapply(workshops$url, try_read_html)
is_error <- function(x) inherits(x, "try-error")
workshops$page <- pages
workshops <- filter(workshops, ! sapply(page, is_error))
## Label official genomics lessons
url_grepl <- function(x, pattern, ...) {
x %>%
html_nodes("a") %>%
html_attr("href") %>%
grepl(pattern = pattern, ignore.case = TRUE, ...) %>%
sum > 0
}
official <- mutate(workshops,
lesson_intro = sapply(
page, url_grepl, "introduction-genomics"),
lesson_cloud = sapply(
page, url_grepl, "cloud-genomics"),
lesson_shell = sapply(
page, url_grepl, "shell-genomics"),
lesson_wrangling = sapply(
page, url_grepl, "wrangling-genomics"),
lesson_r = sapply(
page, url_grepl, "R-genomics"))
## Count how many genomics lessons in these workshops
sums <- select(official, starts_with("lesson_")) %>% rowSums %>% as.integer
official <- mutate(official, lessons = sums)
## Show with at least 1 lesson, sorted by relevance
genomics_official <- filter(official, lessons > 0) %>%
arrange(-lessons) %>% select(lessons, url)
genomics_official
> genomics_official
# A tibble: 13 × 2
lessons url
<int> <chr>
1 5 https://ryanpeek.github.io/2017-01-22-stanford/
2 5 https://tracykteal.github.io/2016-11-21-genomics-unm/
3 5 https://uio-carpentry.github.io/2016-03-15-Oslo-data-bio/
4 4 https://nwu-eresearch.github.io/2016-09-26-nwu-genomics/
5 4 https://datacarpentry.github.io/2016-05-26-NIH/
6 4 https://iglpdc.github.io/2016-04-11-bu/
7 4 http://www.datacarpentry.org/2016-01-19-sbu/
8 4 http://www.datacarpentry.org/2015-09-22-UCDavis/
9 4 http://www.datacarpentry.org/2015-07-30-ASPB/
10 2 https://vlsci.github.io/datacarpentry_2015-11-23_VLSCI/
11 1 https://markrobinsonuzh.github.io/2016-07-18-zurich/
12 1 https://lmweber.github.io/2016-05-30-Zurich/
13 1 http://www.datacarpentry.org/2015-08-24-ISU/
>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment