Skip to content

Instantly share code, notes, and snippets.

@paleolimbot
Last active October 21, 2022 02:06
Show Gist options
  • Save paleolimbot/630fdab1e204d70fea97633d8fa15ccb to your computer and use it in GitHub Desktop.
Save paleolimbot/630fdab1e204d70fea97633d8fa15ccb to your computer and use it in GitHub Desktop.
options(future.rng.onMisuse = "ignore")
library(furrr)
plan(multisession)
arrow_r_home <- fs::path_abs("../arrow/r")
# Note: RcisTarget is bioconductor
# Query reverse depencencies ----------
rev_imports <- c("CDMConnector", "ClickHouseHTTP", "dataversionr", "diffdfs",
"disk.frame", "gbifdb", "MolgenisArmadillo", "parqr",
"receptiviti", "sfarrow", "starvz", "strand", "tradestatistics"
)
rev_suggests <- c("analogsea", "arkdb", "duckdb", "mrgsim.parallel", "nflreadr",
"noctua", "opencpu", "pins", "plumber", "pointblank", "RAthena",
"raveio", "rio", "sparklyr", "targets")
rev_deps <- sort(c(rev_imports, rev_suggests))
unlink("rev_deps", recursive = TRUE)
dir.create("rev_deps")
# Fetch reverse dependency sources ----------
# Fetch dependency sources from cran/XXX
future_walk(
rev_deps,
~system(glue::glue("git clone https://github.com/cran/{.x} rev_deps/{.x}")),
.progress = TRUE
)
# Install dependencies needed for checking ----------
# collect dependencies and install them
rev_deps_pak <- paste0("local::rev_deps/", rev_deps)
deps <- pak::pkg_deps(rev_deps_pak, upgrade = TRUE, dependencies = TRUE)
# install dependencies to a dedicated library
deps_ref <- deps$ref
# Rmpi not available on MacOS
if (Sys.info()["sysname"] == "Darwin") {
deps_ref <- setdiff(deps_ref, "Rmpi")
}
pak::pkg_install(deps_ref, upgrade = TRUE)
# Install CRAN arrow
pak::pkg_install("arrow")
# Run rcmdcheck
unlink("check_with_cran", recursive = TRUE)
dir.create("check_with_cran")
results <- future_map(rev_deps, ~{
pkg <- file.path("rev_deps", .x)
try(
rcmdcheck::rcmdcheck(
pkg,
check_dir = file.path(getwd(), "check_with_cran", .x),
quiet = TRUE
)
)
}, .progress = TRUE)
failed <- vapply(results, inherits, logical(1), "try-error")
if (any(failed)) {
message(glue::glue("{sum(failed)} packages failed to check:"))
message(
paste0(
glue::glue(
"- rcmdcheck::rcmdcheck(\"rev_deps/{rev_deps[failed]}\")",
),
collapse = "\n"
)
)
}
# Install local arrow
pak::pkg_install(paste0("local::", arrow_r_home))
# Run the checks again
results2 <- future_map(rev_deps, ~{
pkg <- file.path("rev_deps", .x)
try(
rcmdcheck::rcmdcheck(
pkg,
check_dir = file.path(getwd(), "check_with_local", .x),
quiet = TRUE
)
)
}, .progress = TRUE)
failed2 <- vapply(results2, inherits, logical(1), "try-error")
if (any(failed2)) {
message(glue::glue("{sum(failed)} packages failed to check:"))
message(
paste0(
glue::glue(
"- rcmdcheck::rcmdcheck(\"rev_deps/{rev_deps[failed]}\")",
),
collapse = "\n"
)
)
}
saveRDS(rev_deps, "rev_deps.rds")
saveRDS(results, "results_with_cran.rds")
saveRDS(results2, "results_with_local.rds")
library(tidyverse)
library(rcmdcheck)
rev_deps <- readRDS("rev_deps.rds")
results_with_cran <- readRDS("results_with_cran.rds")
results_with_local <- readRDS("results_with_local.rds")
check_df <- function(lst, label) {
has_error <- map_lgl(lst, inherits, "try-error")
lst[has_error] <- list(NULL)
tibble(
label = label,
pkg = rev_deps,
notes = map(lst, "notes"),
warnings = map(lst, "warnings"),
errors = map(lst, "errors")
)
}
df_with_cran <- check_df(results_with_cran, "CRAN")
df_with_local <- check_df(results_with_local, "local")
has_difference <- logical(length(rev_deps))
for (i in seq_along(has_difference)) {
has_difference[i] <- !identical(df_with_cran[i, -1], df_with_local[i, -1])
}
cat("# Changes\n\n")
for (i in which(has_difference)) {
cat(glue::glue("## {rev_deps[i]}"))
cat("\n\n### With CRAN arrow\n\n```\n")
print(results_with_cran[[i]])
cat("\n```\n\n### With local arrow\n\n```\n")
print(results_with_local[[i]])
cat("\n\n```\n\n")
}
@paleolimbot
Copy link
Author

CDMConnector

With CRAN arrow

── R CMD check results ──────────────────────────────────────────────────────── CDMConnector 0.1.0 ────
Duration: 45.4s

❯ checking installed package size ... NOTE
    installed size is  5.1Mb
    sub-directories of 1Mb or more:
      duckdb   3.8Mb

0 errors ✔ | 0 warnings ✔ | 1 note ✖

With local arrow

── R CMD check results ──────────────────────────────────────────────────────── CDMConnector 0.1.0 ────
Duration: 44.2s

❯ checking tests ...
  See below...

❯ checking installed package size ... NOTE
    installed size is  5.1Mb
    sub-directories of 1Mb or more:
      duckdb   3.8Mb

── Test failures ──────────────────────────────────────────────────────────────────────── testthat ────

> # This file is part of the standard setup for testthat.
> # It is recommended that you do not modify it.
> #
> # Where should you do additional test configuration?
> # Learn more about the roles of various files in:
> # * https://r-pkgs.org/tests.html
> # * https://testthat.r-lib.org/reference/test_package.html#special-files
> 
> library(testthat)
> library(CDMConnector)
> 
> test_check("CDMConnector")
[ FAIL 1 | WARN 0 | SKIP 8 | PASS 36 ]

══ Skipped tests ═══════════════════════════════════════════════════════════════
• Sys.getenv("CDM5_POSTGRESQL_USER") == "" is TRUE (2)
• Sys.getenv("CDM5_REDSHIFT_USER") == "" is TRUE (2)
• Sys.getenv("CDM5_SQL_SERVER_USER") == "" is TRUE (2)
• Sys.getenv("LOCAL_POSTGRESQL_USER") == "" is TRUE (2)

══ Failed tests ════════════════════════════════════════════════════════════════
── Error ('test-cdm.R:211'): stow and cdm_from_files works ─────────────────────
Error in `if (single) {
    friendly <- sprintf("a single %s element", classes)
} else {
    friendly <- sprintf("a %s element", classes)
}`: argument is not interpretable as logical
Backtrace:
     ▆
  1. ├─testthat::expect_output(validate_cdm(local_arrow_cdm)) at test-cdm.R:211:2
  2. │ └─testthat:::quasi_capture(...)
  3. │   ├─testthat (local) .capture(...)
  4. │   │ └─testthat::capture_output_lines(code, print, width = width)
  5. │   │   └─testthat:::eval_with_output(code, print = print, width = width)
  6. │   │     ├─withr::with_output_sink(path, withVisible(code))
  7. │   │     │ └─base::force(code)
  8. │   │     └─base::withVisible(code)
  9. │   └─rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
 10. ├─CDMConnector::validate_cdm(local_arrow_cdm)
 11. │ └─CDMConnector:::validate_cdm_rowcounts(cdm)
 12. │   ├─... %>% rlang::set_names(nm)
 13. │   └─purrr::map_dbl(...)
 14. ├─rlang::set_names(., nm)
 15. └─purrr:::stop_bad_element_vector(...)
 16.   └─purrr:::stop_bad_vector(...)
 17.     └─purrr:::friendly_vector_type(x, length(x))
 18.       └─purrr:::friendly_type_of_element(x)

[ FAIL 1 | WARN 0 | SKIP 8 | PASS 36 ]
Error: Test failures
In addition: Warning message:
Connection is garbage-collected, use dbDisconnect() to avoid this. 
Execution halted
Warning messages:
1: Connection is garbage-collected, use dbDisconnect() to avoid this. 
2: Database is garbage-collected, use dbDisconnect(con, shutdown=TRUE) or duckdb::duckdb_shutdown(drv) to avoid this. 
3: Database is garbage-collected, use dbDisconnect(con, shutdown=TRUE) or duckdb::duckdb_shutdown(drv) to avoid this. 

1 error ✖ | 0 warnings ✔ | 1 note ✖


dataversionr

With CRAN arrow

── R CMD check results ──────────────────────────────────────────────────────── dataversionr 0.9.0 ────
Duration: 3m 19.8s

0 errors ✔ | 0 warnings ✔ | 0 notes ✔

With local arrow

── R CMD check results ──────────────────────────────────────────────────────── dataversionr 0.9.0 ────
Duration: 2m 57.5s

❯ checking examples ... ERROR
  Running examples in ‘dataversionr-Ex.R’ failed
  The error most likely occurred in:
  
  > ### Name: get_diff_stats
  > ### Title: Get diff stats
  > ### Aliases: get_diff_stats
  > 
  > ### ** Examples
  > 
  > temp_dir <- tempfile()
  > dir.create(temp_dir, recursive = TRUE)
  > df <- data.frame(a = 1:5, b = letters[1:5])
  > new_df <- data.frame(a = 2:5, b = letters[2:5])
  > diff <- diffdfs::diffdfs(new_df, df)
  > commit_diff(diff, temp_dir)
  [1] TRUE
  > put_diff_stats(temp_dir)
  Error in `.data$operation`:
  ! Column `operation` not found in `.data`.
  Backtrace:
       ▆
    1. ├─dataversionr::put_diff_stats(temp_dir)
    2. │ └─dataversionr::summarise_diffs(destination)
    3. │   └─... %>% as.data.frame()
    4. ├─base::as.data.frame(.)
    5. ├─dplyr::ungroup(.)
    6. ├─dplyr::mutate(.)
    7. ├─tidyr::pivot_wider(., names_from = .data$operation, values_from = .data$count)
    8. ├─tidyr:::pivot_wider.data.frame(...)
    9. │ └─tidyr::build_wider_spec(...)
   10. │   └─tidyselect::eval_select(enquo(names_from), data)
   11. │     └─tidyselect:::eval_select_impl(...)
   12. │       ├─tidyselect:::with_subscript_errors(...)
   13. │       │ └─rlang::try_fetch(...)
   14. │       │   └─base::withCallingHandlers(...)
   15. │       └─tidyselect:::vars_select_eval(...)
   16. │         └─tidyselect:::walk_data_tree(expr, data_mask, context_mask)
   17. │           └─base::eval(expr, data_mask)
   18. │             └─base::eval(expr, data_mask)
   19. │               ├─operation
   20. │               └─rlang:::`$.rlang_data_pronoun`(.data, operation)
   21. │                 └─rlang:::data_pronoun_get(...)
   22. └─rlang:::abort_data_pronoun(x, call = y)
   23.   └─rlang::abort(msg, "rlang_error_data_pronoun_not_found", call = call)
  Execution halted

❯ checking tests ...
  See below...

── Test failures ──────────────────────────────────────────────────────────────────────── testthat ────

> library(testthat)
> library(dataversionr)
> 
> test_check("dataversionr")

Attaching package: 'dplyr'

The following object is masked from 'package:testthat':

    matches

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union


Attaching package: 'tidyr'

The following object is masked from 'package:testthat':

    matches

Some features are not enabled in this build of Arrow. Run `arrow_info()` for more information.

Attaching package: 'arrow'

The following object is masked from 'package:testthat':

    matches

The following object is masked from 'package:utils':

    timestamp


Attaching package: 'lubridate'

The following object is masked from 'package:arrow':

    duration

The following objects are masked from 'package:base':

    date, intersect, setdiff, union

Committing diff to dataset...
Verifying diff can be retrieved from dataset...
A parquet file can be read from the target path...
Remote diff is identical to local diff.
Committing diff to dataset...
Verifying diff can be retrieved from dataset...
A parquet file can be read from the target path...
Remote diff is identical to local diff.
Checking that new_df can be diffed...
Diff test passed.
Checking that new_df can be diffed...
Diff test passed.
Checking that new_df can be diffed...
Checking that new_df can be diffed...
Diff test passed.
Checking that new_df can be diffed...
Diff test passed.
destination parameter is a string. Coercing to local FileSystem.
destination parameter is a string. Coercing to local FileSystem.
[ FAIL 3 | WARN 13 | SKIP 0 | PASS 46 ]

══ Failed tests ════════════════════════════════════════════════════════════════
── Error ('test-diff_ops.R:89'): local summarise_diffs  ────────────────────────
<rlang_error_data_pronoun_not_found/rlang_error/error/condition>
Error in `.data$operation`: Column `operation` not found in `.data`.
Backtrace:
     ▆
  1. ├─testthat::expect_equal(...) at test-diff_ops.R:89:2
  2. │ └─testthat::quasi_label(enquo(object), label, arg = "object")
  3. │   └─rlang::eval_bare(expr, quo_get_env(quo))
  4. ├─... %>% arrange(new)
  5. ├─dplyr::arrange(., new)
  6. ├─dplyr::select(., -diff_timestamp)
  7. ├─dataversionr::summarise_diffs(local_prefix)
  8. │ └─... %>% as.data.frame()
  9. ├─base::as.data.frame(.)
 10. ├─dplyr::ungroup(.)
 11. ├─dplyr::mutate(.)
 12. ├─tidyr::pivot_wider(., names_from = .data$operation, values_from = .data$count)
 13. ├─tidyr:::pivot_wider.data.frame(...)
 14. │ └─tidyr::build_wider_spec(...)
 15. │   └─tidyselect::eval_select(enquo(names_from), data)
 16. │     └─tidyselect:::eval_select_impl(...)
 17. │       ├─tidyselect:::with_subscript_errors(...)
 18. │       │ └─rlang::try_fetch(...)
 19. │       │   └─base::withCallingHandlers(...)
 20. │       └─tidyselect:::vars_select_eval(...)
 21. │         └─tidyselect:::walk_data_tree(expr, data_mask, context_mask)
 22. │           └─base::eval(expr, data_mask)
 23. │             └─base::eval(expr, data_mask)
 24. │               ├─operation
 25. │               └─rlang:::`$.rlang_data_pronoun`(.data, operation)
 26. │                 └─rlang:::data_pronoun_get(...)
 27. └─rlang:::abort_data_pronoun(x, call = y)
 28.   └─rlang::abort(msg, "rlang_error_data_pronoun_not_found", call = call)
── Error ('test-diff_ops.R:107'): local put_diff_stats  ────────────────────────
<rlang_error_data_pronoun_not_found/rlang_error/error/condition>
Error in `.data$operation`: Column `operation` not found in `.data`.
Backtrace:
     ▆
  1. ├─testthat::expect_true(put_diff_stats(local_prefix)) at test-diff_ops.R:107:2
  2. │ └─testthat::quasi_label(enquo(object), label, arg = "object")
  3. │   └─rlang::eval_bare(expr, quo_get_env(quo))
  4. ├─dataversionr::put_diff_stats(local_prefix)
  5. │ └─dataversionr::summarise_diffs(destination)
  6. │   └─... %>% as.data.frame()
  7. ├─base::as.data.frame(.)
  8. ├─dplyr::ungroup(.)
  9. ├─dplyr::mutate(.)
 10. ├─tidyr::pivot_wider(., names_from = .data$operation, values_from = .data$count)
 11. ├─tidyr:::pivot_wider.data.frame(...)
 12. │ └─tidyr::build_wider_spec(...)
 13. │   └─tidyselect::eval_select(enquo(names_from), data)
 14. │     └─tidyselect:::eval_select_impl(...)
 15. │       ├─tidyselect:::with_subscript_errors(...)
 16. │       │ └─rlang::try_fetch(...)
 17. │       │   └─base::withCallingHandlers(...)
 18. │       └─tidyselect:::vars_select_eval(...)
 19. │         └─tidyselect:::walk_data_tree(expr, data_mask, context_mask)
 20. │           └─base::eval(expr, data_mask)
 21. │             └─base::eval(expr, data_mask)
 22. │               ├─operation
 23. │               └─rlang:::`$.rlang_data_pronoun`(.data, operation)
 24. │                 └─rlang:::data_pronoun_get(...)
 25. └─rlang:::abort_data_pronoun(x, call = y)
 26.   └─rlang::abort(msg, "rlang_error_data_pronoun_not_found", call = call)
── Error ('test-diff_ops.R:125'): local get_diff_stats ─────────────────────────
Error: IOError: Failed to open local file '/private/var/folders/gt/l87wjg8s7312zs9s7c1fgs900000gn/T/RtmpmmWngV/file144314cc86fd6/diff_stats.csv'
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/io/file.cc:105  ::arrow::internal::FileOpenReadable(file_name_)
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/io/file.cc:450  file_->OpenReadable(path)
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/io/file.cc:616  result->memory_map_->Open(path, mode). Detail: [errno 2] No such file or directory
Backtrace:
     ▆
  1. ├─testthat::expect_equal(...) at test-diff_ops.R:125:2
  2. │ └─testthat::quasi_label(enquo(object), label, arg = "object")
  3. │   └─rlang::eval_bare(expr, quo_get_env(quo))
  4. ├─... %>% arrange(new)
  5. ├─dplyr::arrange(., new)
  6. ├─dplyr::select(., -diff_timestamp)
  7. └─dataversionr::get_diff_stats(local_prefix)
  8.   ├─base::as.data.frame(arrow::read_csv_arrow(get_location))
  9.   ├─arrow::read_csv_arrow(get_location)
 10.   │ └─base::eval.parent(mc)
 11.   │   └─base::eval(expr, p)
 12.   │     └─base::eval(expr, p)
 13.   └─arrow (local) `<fn>`(file = get_location, delim = ",")
 14.     └─arrow:::make_readable_file(file)
 15.       └─arrow::mmap_open(file)
 16.         └─arrow:::io___MemoryMappedFile__Open(path, mode)

[ FAIL 3 | WARN 13 | SKIP 0 | PASS 46 ]
Error: Test failures
Execution halted

2 errors ✖ | 0 warnings ✔ | 0 notes ✔


Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment