Skip to content

Instantly share code, notes, and snippets.

@MatthieuStigler
Created November 22, 2020 21:12
Show Gist options
  • Save MatthieuStigler/f2ec6bb11f763658611faac8f12241e8 to your computer and use it in GitHub Desktop.
Save MatthieuStigler/f2ec6bb11f763658611faac8f12241e8 to your computer and use it in GitHub Desktop.
Extracing columns names with tidyselect: fastest implementation?
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyselect)

get_cols <- function(df, m){
  cols <- colnames(df)
  cols[starts_with(m,  vars = cols)]
  
}

get_cols2 <- function(df, m){
  pos <- eval_select(starts_with(m), df)
  colnames(df)[pos]
}

get_cols3 <- function(df, expr){
  pos <- eval_select(expr, df)
  colnames(df)[pos]
}

get_cols4 <- function(df, ...){
  pos <- eval_select(expr(c(...)), df)
  colnames(df)[pos]
}


get_cols_full <- function(df, ...){
  df %>% 
    select(...) %>% 
    colnames
}

get_cols(iris, "Sepal")
#> [1] "Sepal.Length" "Sepal.Width"
get_cols2(iris, "Sepal")
#> [1] "Sepal.Length" "Sepal.Width"
get_cols3(iris, starts_with("Sepal"))
#> [1] "Sepal.Length" "Sepal.Width"
get_cols3(iris, c(starts_with("Sepal"), contains("Species")))
#> [1] "Sepal.Length" "Sepal.Width"  "Species"
get_cols4(iris, starts_with("Sepal"))
#> [1] "Sepal.Length" "Sepal.Width"
get_cols_full(iris, starts_with("Sepal"))
#> [1] "Sepal.Length" "Sepal.Width"


microbenchmark::microbenchmark(a=get_cols(iris, "Sepal"),
                               a2=get_cols(iris, "Sepal"),
                               a3=get_cols3(iris, starts_with("Sepal")),
                               a4=get_cols4(iris, starts_with("Sepal")),
                               b=get_cols_full(iris, starts_with("Sepal")),
                               check="equal") %>% 
  summary() %>% 
  as.data.frame() %>% 
  knitr::kable()
expr min lq mean median uq max neval cld
a 37.546 47.6365 57.24979 53.0745 58.9625 220.526 100 a
a2 37.685 49.7940 56.36087 53.3735 59.7245 88.708 100 a
a3 284.418 295.4355 351.84057 311.8375 334.2105 1923.586 100 b
a4 844.777 880.8530 1019.41088 957.0495 1069.0475 2666.677 100 c
b 1052.824 1119.2700 1293.29644 1209.8315 1364.9575 2796.941 100 d

Created on 2020-11-22 by the reprex package (v0.3.0)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment