djnavarro/readr_threads_issue.R

## readr_threads_issue.R
set.seed(1)

sample_values <- function(n, p_safe) {
  val <- rep("safe", n)
  val[runif(n) > p_safe] <- "UNSAFE\n"
  return(val)
}

# the wild-caught data had this structure
old_df <- tibble::tibble(
  a = sample_values(1200, p = .99),
  b = sample_values(1200, p = .01),
  c = sample_values(1200, p = .01)
)

# write to temp file
path <- tempfile(pattern = "quoted_newlines_", fileext = ".csv")
write.csv(old_df, path, row.names = FALSE)

# read data without setting num_threads (throws warning)
new_df <- readr::read_csv(path, lazy = FALSE)

# the weirdness!
waldo::compare(old_df, new_df) # yes, it parses correctly...
readr::problems(new_df)        # ...but problems have been logged

# read data setting num_threads = 1 (no warning
new_df <- readr::read_csv(path, lazy = FALSE, num_threads = 1)

# weirdness vanishes
waldo::compare(old_df, new_df)
readr::problems(new_df)
	set.seed(1)

	sample_values <- function(n, p_safe) {
	val <- rep("safe", n)
	val[runif(n) > p_safe] <- "UNSAFE\n"
	return(val)
	}

	# the wild-caught data had this structure
	old_df <- tibble::tibble(
	a = sample_values(1200, p = .99),
	b = sample_values(1200, p = .01),
	c = sample_values(1200, p = .01)
	)

	# write to temp file
	path <- tempfile(pattern = "quoted_newlines_", fileext = ".csv")
	write.csv(old_df, path, row.names = FALSE)

	# read data without setting num_threads (throws warning)
	new_df <- readr::read_csv(path, lazy = FALSE)

	# the weirdness!
	waldo::compare(old_df, new_df) # yes, it parses correctly...
	readr::problems(new_df) # ...but problems have been logged

	# read data setting num_threads = 1 (no warning
	new_df <- readr::read_csv(path, lazy = FALSE, num_threads = 1)

	# weirdness vanishes
	waldo::compare(old_df, new_df)
	readr::problems(new_df)