Skip to content

Instantly share code, notes, and snippets.

@njtierney
Created April 19, 2023 07:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save njtierney/b308cc698c49d79787bb2bbbd7809aad to your computer and use it in GitHub Desktop.
Save njtierney/b308cc698c49d79787bb2bbbd7809aad to your computer and use it in GitHub Desktop.
library(tidyverse)
cause_for_dismissal <- c("A",
                         "B",
                         "C")

vic_moz_long <- tibble(
  id = 1:5,
  species = c("B", "C", "D", "E", "F")
)

vic_moz_long
#> # A tibble: 5 × 2
#>      id species
#>   <int> <chr>  
#> 1     1 B      
#> 2     2 C      
#> 3     3 D      
#> 4     4 E      
#> 5     5 F

# error!
vic_moz_long %>%
  filter(str_detect(species, cause_for_dismissal, negate = TRUE)
  )
#> Error in `filter()`:
#> ℹ In argument: `str_detect(species, cause_for_dismissal, negate =
#>   TRUE)`.
#> Caused by error in `str_detect()`:
#> ! Can't recycle `string` (size 5) to match `pattern` (size 3).
#> Backtrace:
#>      ▆
#>   1. ├─vic_moz_long %>% ...
#>   2. ├─dplyr::filter(., str_detect(species, cause_for_dismissal, negate = TRUE))
#>   3. ├─dplyr:::filter.data.frame(...)
#>   4. │ └─dplyr:::filter_rows(.data, dots, by)
#>   5. │   └─dplyr:::filter_eval(...)
#>   6. │     ├─base::withCallingHandlers(...)
#>   7. │     └─mask$eval_all_filter(dots, env_filter)
#>   8. │       └─dplyr (local) eval()
#>   9. ├─stringr::str_detect(species, cause_for_dismissal, negate = TRUE)
#>  10. │ └─stringr:::check_lengths(string, pattern)
#>  11. │   └─vctrs::vec_size_common(...)
#>  12. └─vctrs::stop_incompatible_size(...)
#>  13.   └─vctrs:::stop_incompatible(...)
#>  14.     └─vctrs:::stop_vctrs(...)
#>  15.       └─rlang::abort(message, class = c(class, "vctrs_error"), ..., call = call)

# so you  want to match B and C only?
vic_moz_long$species %in% cause_for_dismissal
#> [1]  TRUE  TRUE FALSE FALSE FALSE

vic_moz_long %>%
  filter(species %in% cause_for_dismissal)
#> # A tibble: 2 × 2
#>      id species
#>   <int> <chr>  
#> 1     1 B      
#> 2     2 C

# or not B and C?

`%nin%` <- Negate(`%in%`)

vic_moz_long %>%
  filter(species %nin% cause_for_dismissal)
#> # A tibble: 3 × 2
#>      id species
#>   <int> <chr>  
#> 1     3 D      
#> 2     4 E      
#> 3     5 F

Created on 2023-04-19 with reprex v2.0.2

Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.2.3 (2023-03-15)
#>  os       macOS Ventura 13.2
#>  system   aarch64, darwin20
#>  ui       X11
#>  language (EN)
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       Australia/Brisbane
#>  date     2023-04-19
#>  pandoc   2.19.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package     * version date (UTC) lib source
#>  cli           3.6.1   2023-03-23 [1] CRAN (R 4.2.0)
#>  colorspace    2.1-0   2023-01-23 [1] CRAN (R 4.2.0)
#>  digest        0.6.31  2022-12-11 [1] CRAN (R 4.2.0)
#>  dplyr       * 1.1.1   2023-03-22 [1] CRAN (R 4.2.0)
#>  evaluate      0.20    2023-01-17 [1] CRAN (R 4.2.0)
#>  fansi         1.0.4   2023-01-22 [1] CRAN (R 4.2.0)
#>  fastmap       1.1.1   2023-02-24 [1] CRAN (R 4.2.0)
#>  forcats     * 1.0.0   2023-01-29 [1] CRAN (R 4.2.0)
#>  fs            1.6.1   2023-02-06 [1] CRAN (R 4.2.0)
#>  generics      0.1.3   2022-07-05 [1] CRAN (R 4.2.0)
#>  ggplot2     * 3.4.2   2023-04-03 [1] CRAN (R 4.2.0)
#>  glue          1.6.2   2022-02-24 [1] CRAN (R 4.2.0)
#>  gtable        0.3.3   2023-03-21 [1] CRAN (R 4.2.0)
#>  hms           1.1.3   2023-03-21 [1] CRAN (R 4.2.0)
#>  htmltools     0.5.5   2023-03-23 [1] CRAN (R 4.2.0)
#>  knitr         1.42    2023-01-25 [1] CRAN (R 4.2.0)
#>  lifecycle     1.0.3   2022-10-07 [1] CRAN (R 4.2.0)
#>  lubridate   * 1.9.2   2023-02-10 [1] CRAN (R 4.2.0)
#>  magrittr      2.0.3   2022-03-30 [1] CRAN (R 4.2.0)
#>  munsell       0.5.0   2018-06-12 [1] CRAN (R 4.2.0)
#>  pillar        1.9.0   2023-03-22 [1] CRAN (R 4.2.0)
#>  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.2.0)
#>  purrr       * 1.0.1   2023-01-10 [1] CRAN (R 4.2.0)
#>  R.cache       0.16.0  2022-07-21 [1] CRAN (R 4.2.0)
#>  R.methodsS3   1.8.2   2022-06-13 [1] CRAN (R 4.2.0)
#>  R.oo          1.25.0  2022-06-12 [1] CRAN (R 4.2.0)
#>  R.utils       2.12.2  2022-11-11 [1] CRAN (R 4.2.0)
#>  R6            2.5.1   2021-08-19 [1] CRAN (R 4.2.0)
#>  readr       * 2.1.4   2023-02-10 [1] CRAN (R 4.2.0)
#>  reprex        2.0.2   2022-08-17 [1] CRAN (R 4.2.0)
#>  rlang         1.1.0   2023-03-14 [1] CRAN (R 4.2.0)
#>  rmarkdown     2.21    2023-03-26 [1] CRAN (R 4.2.0)
#>  rstudioapi    0.14    2022-08-22 [1] CRAN (R 4.2.0)
#>  scales        1.2.1   2022-08-20 [1] CRAN (R 4.2.0)
#>  sessioninfo   1.2.2   2021-12-06 [1] CRAN (R 4.2.0)
#>  stringi       1.7.12  2023-01-11 [1] CRAN (R 4.2.0)
#>  stringr     * 1.5.0   2022-12-02 [1] CRAN (R 4.2.0)
#>  styler        1.9.1   2023-03-04 [1] CRAN (R 4.2.0)
#>  tibble      * 3.2.1   2023-03-20 [1] CRAN (R 4.2.0)
#>  tidyr       * 1.3.0   2023-01-24 [1] CRAN (R 4.2.0)
#>  tidyselect    1.2.0   2022-10-10 [1] CRAN (R 4.2.0)
#>  tidyverse   * 2.0.0   2023-02-22 [1] CRAN (R 4.2.0)
#>  timechange    0.2.0   2023-01-11 [1] CRAN (R 4.2.0)
#>  tzdb          0.3.0   2022-03-28 [1] CRAN (R 4.2.0)
#>  utf8          1.2.3   2023-01-31 [1] CRAN (R 4.2.0)
#>  vctrs         0.6.1   2023-03-22 [1] CRAN (R 4.2.0)
#>  withr         2.5.0   2022-03-03 [1] CRAN (R 4.2.0)
#>  xfun          0.38    2023-03-24 [1] CRAN (R 4.2.0)
#>  yaml          2.3.7   2023-01-23 [1] CRAN (R 4.2.0)
#> 
#>  [1] /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────
@dhduncan
Copy link

Thanks @njtierney - I tried this yesterday but my original problem was trying to detect partial matches in longer strings and %in% / %nin% (<-- how cool is that btw!) so I don't think this is my solution.

@dhduncan
Copy link

I see that I also got to experimenting with somthing like this str_detect(fruit, paste("^p", "l", sep = "|") but it seems inelegant because I would have to decompose my original handy vector of incomplete species ids (cause_for_dismissal),

@njtierney
Copy link
Author

the handy vector is still handy though! see this

library(tidyverse)
cause_for_dismissal <- c("Apple",
                         "Banana",
                         "Carrot")

vic_moz_long <- tibble(
  id = 1:5,
  species = c("a Banana", 
              "Banana Chilli",
              "one Carrot", 
              "Carrots", 
              "Doritos")
)

vic_moz_long
#> # A tibble: 5 × 2
#>      id species      
#>   <int> <chr>        
#> 1     1 a Banana     
#> 2     2 Banana Chilli
#> 3     3 one Carrot   
#> 4     4 Carrots      
#> 5     5 Doritos

str_subset(
  string = vic_moz_long$species,
  pattern = paste0(cause_for_dismissal, collapse = "|")
)
#> [1] "a Banana"      "Banana Chilli" "one Carrot"    "Carrots"

vic_moz_long
#> # A tibble: 5 × 2
#>      id species      
#>   <int> <chr>        
#> 1     1 a Banana     
#> 2     2 Banana Chilli
#> 3     3 one Carrot   
#> 4     4 Carrots      
#> 5     5 Doritos

vic_moz_long %>% 
  filter(
    species %in% str_subset(
      string = species,
      pattern = paste0(cause_for_dismissal, collapse = "|")
    )
  )
#> # A tibble: 4 × 2
#>      id species      
#>   <int> <chr>        
#> 1     1 a Banana     
#> 2     2 Banana Chilli
#> 3     3 one Carrot   
#> 4     4 Carrots

str_detect(
  string = vic_moz_long$species,
  pattern = paste0(cause_for_dismissal, collapse = "|")
)
#> [1]  TRUE  TRUE  TRUE  TRUE FALSE

vic_moz_long %>% 
  filter(
    str_detect(
      string = species,
      pattern = paste0(cause_for_dismissal, collapse = "|")
    )
  )
#> # A tibble: 4 × 2
#>      id species      
#>   <int> <chr>        
#> 1     1 a Banana     
#> 2     2 Banana Chilli
#> 3     3 one Carrot   
#> 4     4 Carrots

Created on 2023-04-20 with reprex v2.0.2

Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.2.3 (2023-03-15)
#>  os       macOS Ventura 13.2
#>  system   aarch64, darwin20
#>  ui       X11
#>  language (EN)
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       Australia/Hobart
#>  date     2023-04-20
#>  pandoc   2.19.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package     * version date (UTC) lib source
#>  cli           3.6.1   2023-03-23 [1] CRAN (R 4.2.0)
#>  colorspace    2.1-0   2023-01-23 [1] CRAN (R 4.2.0)
#>  digest        0.6.31  2022-12-11 [1] CRAN (R 4.2.0)
#>  dplyr       * 1.1.1   2023-03-22 [1] CRAN (R 4.2.0)
#>  evaluate      0.20    2023-01-17 [1] CRAN (R 4.2.0)
#>  fansi         1.0.4   2023-01-22 [1] CRAN (R 4.2.0)
#>  fastmap       1.1.1   2023-02-24 [1] CRAN (R 4.2.0)
#>  forcats     * 1.0.0   2023-01-29 [1] CRAN (R 4.2.0)
#>  fs            1.6.1   2023-02-06 [1] CRAN (R 4.2.0)
#>  generics      0.1.3   2022-07-05 [1] CRAN (R 4.2.0)
#>  ggplot2     * 3.4.2   2023-04-03 [1] CRAN (R 4.2.0)
#>  glue          1.6.2   2022-02-24 [1] CRAN (R 4.2.0)
#>  gtable        0.3.3   2023-03-21 [1] CRAN (R 4.2.0)
#>  hms           1.1.3   2023-03-21 [1] CRAN (R 4.2.0)
#>  htmltools     0.5.5   2023-03-23 [1] CRAN (R 4.2.0)
#>  knitr         1.42    2023-01-25 [1] CRAN (R 4.2.0)
#>  lifecycle     1.0.3   2022-10-07 [1] CRAN (R 4.2.0)
#>  lubridate   * 1.9.2   2023-02-10 [1] CRAN (R 4.2.0)
#>  magrittr      2.0.3   2022-03-30 [1] CRAN (R 4.2.0)
#>  munsell       0.5.0   2018-06-12 [1] CRAN (R 4.2.0)
#>  pillar        1.9.0   2023-03-22 [1] CRAN (R 4.2.0)
#>  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.2.0)
#>  purrr       * 1.0.1   2023-01-10 [1] CRAN (R 4.2.0)
#>  R.cache       0.16.0  2022-07-21 [1] CRAN (R 4.2.0)
#>  R.methodsS3   1.8.2   2022-06-13 [1] CRAN (R 4.2.0)
#>  R.oo          1.25.0  2022-06-12 [1] CRAN (R 4.2.0)
#>  R.utils       2.12.2  2022-11-11 [1] CRAN (R 4.2.0)
#>  R6            2.5.1   2021-08-19 [1] CRAN (R 4.2.0)
#>  readr       * 2.1.4   2023-02-10 [1] CRAN (R 4.2.0)
#>  reprex        2.0.2   2022-08-17 [1] CRAN (R 4.2.0)
#>  rlang         1.1.0   2023-03-14 [1] CRAN (R 4.2.0)
#>  rmarkdown     2.21    2023-03-26 [1] CRAN (R 4.2.0)
#>  rstudioapi    0.14    2022-08-22 [1] CRAN (R 4.2.0)
#>  scales        1.2.1   2022-08-20 [1] CRAN (R 4.2.0)
#>  sessioninfo   1.2.2   2021-12-06 [1] CRAN (R 4.2.0)
#>  stringi       1.7.12  2023-01-11 [1] CRAN (R 4.2.0)
#>  stringr     * 1.5.0   2022-12-02 [1] CRAN (R 4.2.0)
#>  styler        1.9.1   2023-03-04 [1] CRAN (R 4.2.0)
#>  tibble      * 3.2.1   2023-03-20 [1] CRAN (R 4.2.0)
#>  tidyr       * 1.3.0   2023-01-24 [1] CRAN (R 4.2.0)
#>  tidyselect    1.2.0   2022-10-10 [1] CRAN (R 4.2.0)
#>  tidyverse   * 2.0.0   2023-02-22 [1] CRAN (R 4.2.0)
#>  timechange    0.2.0   2023-01-11 [1] CRAN (R 4.2.0)
#>  tzdb          0.3.0   2022-03-28 [1] CRAN (R 4.2.0)
#>  utf8          1.2.3   2023-01-31 [1] CRAN (R 4.2.0)
#>  vctrs         0.6.1   2023-03-22 [1] CRAN (R 4.2.0)
#>  withr         2.5.0   2022-03-03 [1] CRAN (R 4.2.0)
#>  xfun          0.38    2023-03-24 [1] CRAN (R 4.2.0)
#>  yaml          2.3.7   2023-01-23 [1] CRAN (R 4.2.0)
#> 
#>  [1] /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────

@dhduncan
Copy link

ahhh collapse is what my attempts were missing! Thanks a big bunch of chilli bananas!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment