Skip to content

Instantly share code, notes, and snippets.

@sinarueeger
Last active March 4, 2019 17:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sinarueeger/02f31d41337e0ab77806d89f1e24ffdf to your computer and use it in GitHub Desktop.
Save sinarueeger/02f31d41337e0ab77806d89f1e24ffdf to your computer and use it in GitHub Desktop.
satRday-paris notes https://paris2019.satrdays.org/
## /////////////////////////////////
## satRday Paris notes
## (untidy)
## /////////////////////////////////
## Lionel Henry: Programming in the tidyverse
## tidyverse (data analysis, data manipulation, data cleaning) <----> r-lib (production prgramming)
## dplyr, ggplot2, tidyr <----> vctrs, rland, devtools
## reprocibillity by few users <----> reusability by many users
## TIDYVERSE
## https://principles.tidyverse.org/
## data (data masking), domain oriented (e.g. data cleaning), language like (verbs like mutate, arrange)
## rlang
## =tidyeval
## !! and !!! and enquo()
## requires new concepts
## delayed computation: loops or functions
## datamasking is not transitive!
## options
## - fixed columns > no problem
## - mapping with purrr or *_all, *_if, *_at
## - tidyeval
## tidyeval:
## 1) pass the dots
library(margittr)
library(dplyr)
## group_by_at
my_count_by <- function(data, ...)
{
data %>% group_by_at(vars(...)) %>% summarize(n = n())
}
my_count_by(data = fivethirtyeight::bechdel, c("year", "binary"))
mean_freq <- function(data, ...)
{
data %>% summarize_at(vars(contains("alcohol")), mean)
}
mean_freq(data = fivethirtyeight::drug_use, c("alcohol"))
## ! select is the only verb that does a selection and not an action (adding a new column)
## selections have special properties: c(), - and :
## group_by also creates actions
## if you want selections at group_by, use group_by_at
my_wrap <- function(...) {
facet_wrap(vars(...), labeller = label_both)
}
ggplot(data = fivethirtyeight::bechdel) +
geom_point(aes(year, budget_2013, color = binary)) +
my_wrap(Budget = cut_number(intgross_2013, 3)) +
hrbrthemes::theme_ipsum()
## 2) subsetting .data
## use .data as a pronoun , e.g. %>% group_by(.data$gender)
group_by_summarise <- function(.data, var1, var2)
{
.data %>% group_by(.data[[var1]]) %>% summarise(mean = mean(.data[[var2]], na.rm = TRUE))
}
group_by_summarise(bechdel, "binary", "domgross")
fivethirtyeight::bechdel %>% group_by_summarise("binary", "domgross")
## 3) interpolation = tidyeval
## delay a blueprint with enquo() and insert it back with !!
## e.g. group_by(!!enquo(var1) %>% summarise(!!enquo(var2))
## example
#
group_by_summarise <- function(.data, var1, var2)
{
.data %>% group_by(!!enquo(var1)) %>% summarise(mean = mean(!!enquo(var2), na.rm = TRUE))
}
group_by_summarise(bechdel, binary, domgross)
fivethirtyeight::bechdel %>% group_by_summarise(binary, domgross)
##
drug <- fivethirtyeight::drug_use %>% select_at(vars("age", contains("use"))) %>% tidyr::gather(drug, use, -age)
heavy <- drug %>% group_by(drug) %>% summarize(max_larger_20 = max(use, na.rm = TRUE) > 20)
drug <- drug %>% full_join(heavy)
ggplot(data = drug) +
geom_path(aes(age, use, color = drug, group = drug)) +
facet_wrap(~max_larger_20, labeller = label_both)
ggplot(data = drug) +
geom_path(aes(age, use, color = drug, group = drug)) +
facet_wrap(~drug, scales = "free", labeller = label_both) +
theme_ipsum()
## https://twitter.com/dreamRs_fr
## etienne sanchez
## @fanny and @victor from dreamRs show the
## shiny application making job search more fun with
## - text mining
## - hierarchical clustering
## - shiny application
## bea: football and graph
## graph
## cypher is a syntax for pattern recognition , not like SQL
## Henrik Bengtson
## -------------------------------------
## lapply
## parallel : not working for windows
## foreach: not perfect either
## ???
## large data
## actions done
library(future)
plan(multiprocess)
fa <- future(sum(1:50))
fa
fb <- future(sum(51:100))
value(fb)
fa %<-% prod(1:1e2) ##%<-% ## future syntax
fa
## microbenchmarking
plan(sequential)
#system.time(value(future(sum(1:1e2))))
system.time(x %<-% sum(1:1e6))
plan(multiprocess)
system.time(x %<-% sum(1:1e6))
## custom function
slow_sum <- function(x)
{
SUM <- 0
for (i in x)
{
SUM <- SUM + i
Sys.sleep(0.5)
}
return(SUM)
}
library(future)
plan(multicore)
fa <- future(slow_sum(1:50))
value(fa)
resolved(fa)
pryr::ast
resolved()
## other options
future_lapply
furrr::future_map
#- how to use bash tools within R: wrapper
#- distributing data to nodes
#- using scheduler without script
## Suzanne Baert's theme ---------------------------------------------------------------
# points semitransparent
library(ggplot2)
## nice: theme_linedraw, theme_minimal
x <- c("theme_ipsum", "theme_ipsum_rc")
for (THEME in x)
{
qp <- ggplot(data = fivethirtyeight::bechdel) +
geom_point(aes(domgross, intgross, color = binary)) +
my_wrap(c(budget = cut_number(budget, 3))) +
eval(call(THEME)) + labs(
title="Seminal ggplot2 scatterplot example",
subtitle="A plot that is only useful for demonstration purposes",
caption="Brought to you by the letter 'g'")
print(qp)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment