Last active
October 19, 2019 00:02
-
-
Save matt-dray/c3ab424d67887968b5e47becd0e3ff5b to your computer and use it in GitHub Desktop.
Testing comparisons between data.frame and tibble objects with all.equal()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# all.equal() behaves differently when presented with objects of class | |
# data.frame versus those with tibble class. This is important for | |
# trying to rewrite the {tidyr} episode of Software Carpentry's | |
# R for Reproducible Scientific Analysis lesson to include the new | |
# pivot_wider() and pivot_longer() columns instead of spread() and | |
# gather(), which have been superseded. | |
# Seems to be a noted problem: | |
# https://github.com/tidyverse/tibble/issues/287 | |
# Software Carpentry episode: | |
# https://github.com/swcarpentry/r-novice-gapminder/blob/master/_episodes_rmd/14-tidyr.Rmd | |
# {tidyr} 1.0.0 release: | |
# https://www.tidyverse.org/articles/2019/09/tidyr-1-0-0/ | |
# Read packages ----------------------------------------------------------- | |
library(tibble) | |
# Create dataframes ------------------------------------------------------- | |
# Create dataframe | |
df1 <- data.frame( | |
col1 = LETTERS[1:5], # factor | |
col2 = 1:5, # integer | |
col3 = 6:10 # integer | |
) | |
# Copy dataframe and change third column to numeric | |
df2 <- df1 | |
df2$col3 <- as.numeric(df2$col3) | |
# Tibble copy of first dataframe | |
df3 <- as_tibble(df1) | |
# Tibble copy of second dataframe | |
df4 <- as_tibble(df2) | |
# See dataframes --------------------------------------------------------- | |
str(df1) | |
#'data.frame': 5 obs. of 3 variables: | |
# $ col1: Factor w/ 5 levels "A","B","C","D",..: 1 2 3 4 5 | |
# $ col2: int 1 2 3 4 5 | |
# $ col3: int 6 7 8 9 10 | |
str(df2) | |
#'data.frame': 5 obs. of 3 variables: | |
# $ col1: Factor w/ 5 levels "A","B","C","D",..: 1 2 3 4 5 | |
# $ col2: int 1 2 3 4 5 | |
# $ col3: num 6 7 8 9 10 | |
str(df3) | |
#Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 5 obs. of 3 variables: | |
# $ col1: Factor w/ 5 levels "A","B","C","D",..: 1 2 3 4 5 | |
# $ col2: int 1 2 3 4 5 | |
# $ col3: int 6 7 8 9 10 | |
str(df4) | |
#Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 5 obs. of 3 variables: | |
# $ col1: Factor w/ 5 levels "A","B","C","D",..: 1 2 3 4 5 | |
# $ col2: int 1 2 3 4 5 | |
# $ col3: num 6 7 8 9 10 | |
# Compare dataframes ------------------------------------------------------ | |
# col3 is int for df1, num for df2 | |
all.equal(df1, df2) | |
#[1] TRUE | |
# df3 is a tibble of df1 | |
all.equal(df1, df3) | |
#[1] "Attributes: < Component “class”: Lengths (1, 3) differ (string compare on first 1) >" | |
#[2] "Attributes: < Component “class”: 1 string mismatch >" | |
# df4 is a tibble of df2 | |
all.equal(df2, df4) | |
#[1] "Attributes: < Component “class”: Lengths (1, 3) differ (string compare on first 1) >" | |
#[2] "Attributes: < Component “class”: 1 string mismatch >" | |
# tibble versions of df1 and df2 | |
all.equal(df3, df4) | |
#[1] "Incompatible type for column `col3`: x integer, y numeric" | |
# So comparing the two as dataframes caused no problem, but comparing the tibbles | |
# was stricter. Comparing data.frame to tibble had its own error message. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment