Skip to content

Instantly share code, notes, and snippets.

@gadenbuie
Created August 21, 2019 20:20
Show Gist options
  • Save gadenbuie/bda2206902314479a7e39c80af470da7 to your computer and use it in GitHub Desktop.
Save gadenbuie/bda2206902314479a7e39c80af470da7 to your computer and use it in GitHub Desktop.
write.csv(iris, "iris.csv")

read_csv_filtered <- function(file, ..., select_vars = NULL, chunk_size = 10) {
  stopifnot(requireNamespace("dplyr", quietly = TRUE),
            requireNamespace("readr", quietly = TRUE))
  
  filter_by_id <- function(..., select_vars) {
    function(.data, pos) {
      if (!is.null(select_vars)) .data <- dplyr::select(.data, !!!select_vars)
      dplyr::filter(.data, ...)
    }
  }
  
  readr::read_csv_chunked(
    file, 
    readr::DataFrameCallback$new(filter_by_id(..., select_vars = select_vars)),
    chunk_size = chunk_size
  )
}


read_csv_filtered("iris.csv", Sepal.Width > 3, select_vars = c("Sepal.Width", "Species"))
#> Warning: Missing column names filled in: 'X1' [1]
#> Parsed with column specification:
#> cols(
#>   X1 = col_double(),
#>   Sepal.Length = col_double(),
#>   Sepal.Width = col_double(),
#>   Petal.Length = col_double(),
#>   Petal.Width = col_double(),
#>   Species = col_character()
#> )
#> # A tibble: 67 x 2
#>    Sepal.Width Species
#>          <dbl> <chr>  
#>  1         3.5 setosa 
#>  2         3.2 setosa 
#>  3         3.1 setosa 
#>  4         3.6 setosa 
#>  5         3.9 setosa 
#>  6         3.4 setosa 
#>  7         3.4 setosa 
#>  8         3.1 setosa 
#>  9         3.7 setosa 
#> 10         3.4 setosa 
#> # … with 57 more rows

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
iris %>% 
  filter(Sepal.Width > 3) %>% 
  select(c("Sepal.Width", "Species"))
#>    Sepal.Width    Species
#> 1          3.5     setosa
#> 2          3.2     setosa
#> 3          3.1     setosa
#> 4          3.6     setosa
#> 5          3.9     setosa
#> 6          3.4     setosa
#> 7          3.4     setosa
#> 8          3.1     setosa
#> 9          3.7     setosa
#> 10         3.4     setosa
#> 11         4.0     setosa
#> 12         4.4     setosa
#> 13         3.9     setosa
#> 14         3.5     setosa
#> 15         3.8     setosa
#> 16         3.8     setosa
#> 17         3.4     setosa
#> 18         3.7     setosa
#> 19         3.6     setosa
#> 20         3.3     setosa
#> 21         3.4     setosa
#> 22         3.4     setosa
#> 23         3.5     setosa
#> 24         3.4     setosa
#> 25         3.2     setosa
#> 26         3.1     setosa
#> 27         3.4     setosa
#> 28         4.1     setosa
#> 29         4.2     setosa
#> 30         3.1     setosa
#> 31         3.2     setosa
#> 32         3.5     setosa
#> 33         3.6     setosa
#> 34         3.4     setosa
#> 35         3.5     setosa
#> 36         3.2     setosa
#> 37         3.5     setosa
#> 38         3.8     setosa
#> 39         3.8     setosa
#> 40         3.2     setosa
#> 41         3.7     setosa
#> 42         3.3     setosa
#> 43         3.2 versicolor
#> 44         3.2 versicolor
#> 45         3.1 versicolor
#> 46         3.3 versicolor
#> 47         3.1 versicolor
#> 48         3.2 versicolor
#> 49         3.4 versicolor
#> 50         3.1 versicolor
#> 51         3.3  virginica
#> 52         3.6  virginica
#> 53         3.2  virginica
#> 54         3.2  virginica
#> 55         3.8  virginica
#> 56         3.2  virginica
#> 57         3.3  virginica
#> 58         3.2  virginica
#> 59         3.8  virginica
#> 60         3.4  virginica
#> 61         3.1  virginica
#> 62         3.1  virginica
#> 63         3.1  virginica
#> 64         3.1  virginica
#> 65         3.2  virginica
#> 66         3.3  virginica
#> 67         3.4  virginica

Created on 2019-08-21 by the reprex package (v0.3.0)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment