Skip to content

Instantly share code, notes, and snippets.

@muschellij2
Last active October 10, 2019 17:12
Show Gist options
  • Save muschellij2/7d219ea11f0c3beb0f939075f3efe06d to your computer and use it in GitHub Desktop.
Save muschellij2/7d219ea11f0c3beb0f939075f3efe06d to your computer and use it in GitHub Desktop.
R function alternative for `file.exists`.
file_exists = function(...) {
x = list(...)
x = unlist(...)
x = unname(x)
df = data.frame(x = x,
bn = basename(x),
dn = dirname(x),
index = 1:length(x),
stringsAsFactors = FALSE)
dn = unique(df$dn)
udn = unique(dn)
res = lapply(udn, function(path) {
bn = list.files(path, recursive = FALSE, full.names = FALSE, all.files = TRUE)
data.frame(dn = path, bn = bn, exists = TRUE, stringsAsFactors = FALSE)
})
res = do.call(rbind, res)
lj = merge(df, res, all.x = TRUE, sort = FALSE)
lj$exists[is.na(lj$exists)] = FALSE
lj = lj[ order(lj$index), ]
return(lj$exists)
}
n_files = 3e5
files = sapply(seq(n_files), function(x) tempfile())
file_exists = function(...) {
  x = list(...)
  x = unlist(...)
  x = unname(x)
  df = data.frame(x = x, 
                  bn = basename(x),
                  dn = dirname(x),
                  index = 1:length(x),
                  stringsAsFactors = FALSE)
  dn = unique(df$dn)
  udn = unique(dn)
  res = lapply(udn, function(path) {
    bn = list.files(path, recursive = FALSE, full.names = FALSE, all.files = TRUE)
    data.frame(dn = path, bn = bn, exists = TRUE, stringsAsFactors = FALSE)
  })
  res = do.call(rbind, res)
  lj = merge(df, res, all.x = TRUE, sort = FALSE)
  lj$exists[is.na(lj$exists)] = FALSE
  lj = lj[ order(lj$index), ]
  return(lj$exists)
}

system.time(file.exists(files[1:100]))
#>    user  system elapsed 
#>   0.000   0.001   0.001
system.time(file_exists(files[1:100]))
#>    user  system elapsed 
#>   0.032   0.001   0.044

system.time(file.exists(files))
#>    user  system elapsed 
#>   0.607   2.019   2.754
system.time(file_exists(files))
#>    user  system elapsed 
#>   3.472   0.079   4.086

Created on 2019-10-10 by the reprex package (v0.3.0)

Session info
devtools::session_info()
#> ─ Session info ──────────────────────────────────────────────────────────
#>  setting  value                       
#>  version  R version 3.6.0 (2019-04-26)
#>  os       macOS Mojave 10.14.6        
#>  system   x86_64, darwin15.6.0        
#>  ui       X11                         
#>  language (EN)                        
#>  collate  en_US.UTF-8                 
#>  ctype    en_US.UTF-8                 
#>  tz       America/New_York            
#>  date     2019-10-10                  
#> 
#> ─ Packages ──────────────────────────────────────────────────────────────
#>  package     * version    date       lib
#>  assertthat    0.2.1      2019-03-21 [1]
#>  backports     1.1.4      2019-04-10 [1]
#>  callr         3.3.1      2019-07-18 [1]
#>  cli           1.1.0      2019-03-19 [1]
#>  crayon        1.3.4      2017-09-16 [1]
#>  desc          1.2.0      2019-07-10 [1]
#>  devtools      2.2.0.9000 2019-09-10 [1]
#>  digest        0.6.21     2019-09-20 [1]
#>  DT            0.8        2019-08-07 [1]
#>  ellipsis      0.3.0      2019-09-20 [1]
#>  evaluate      0.14       2019-05-28 [1]
#>  fs            1.3.1      2019-05-06 [1]
#>  glue          1.3.1      2019-03-12 [1]
#>  highr         0.8        2019-03-20 [1]
#>  htmltools     0.3.6      2017-04-28 [1]
#>  htmlwidgets   1.3        2018-09-30 [1]
#>  knitr         1.24.3     2019-08-28 [1]
#>  magrittr      1.5        2014-11-22 [1]
#>  memoise       1.1.0      2017-04-21 [1]
#>  pkgbuild      1.0.5      2019-08-26 [1]
#>  pkgload       1.0.2      2018-10-29 [1]
#>  prettyunits   1.0.2      2015-07-13 [1]
#>  processx      3.4.1      2019-07-18 [1]
#>  ps            1.3.0      2018-12-21 [1]
#>  R6            2.4.0      2019-02-14 [1]
#>  Rcpp          1.0.2      2019-07-25 [1]
#>  remotes       2.1.0      2019-06-24 [1]
#>  rlang         0.4.0      2019-06-25 [1]
#>  rmarkdown     1.15       2019-08-21 [1]
#>  rprojroot     1.3-2      2018-01-03 [1]
#>  sessioninfo   1.1.1      2018-11-05 [1]
#>  stringi       1.4.3      2019-03-12 [1]
#>  stringr       1.4.0      2019-02-10 [1]
#>  testthat      2.2.1      2019-07-25 [1]
#>  usethis       1.5.1.9000 2019-08-15 [1]
#>  withr         2.1.2      2018-03-15 [1]
#>  xfun          0.9        2019-08-21 [1]
#>  yaml          2.2.0      2018-07-25 [1]
#>  source                            
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  Github (muschellij2/desc@b0c374f) 
#>  Github (r-lib/devtools@d7f0915)   
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  Github (muschellij2/knitr@abcea3d)
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  local                             
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#>  CRAN (R 3.6.0)                    
#> 
#> [1] /Library/Frameworks/R.framework/Versions/3.6/Resources/library
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment