Skip to content

Instantly share code, notes, and snippets.

@sharlagelfand
Last active February 25, 2020 20:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sharlagelfand/d6b1af70675c9d3ae07276c0612b1167 to your computer and use it in GitHub Desktop.
Save sharlagelfand/d6b1af70675c9d3ae07276c0612b1167 to your computer and use it in GitHub Desktop.
library(tidyr)
df <- tribble(
~Date, ~Fruit, ~Sold,
"2020-02-01", "Apple", 5,
"2020-02-01", "Banana", 1,
"2020-02-02", "Apple", 2
)
df_complete <- df %>%
complete(Date, Fruit)
df_complete
#> # A tibble: 4 x 3
#> Date Fruit Sold
#> <chr> <chr> <dbl>
#> 1 2020-02-01 Apple 5
#> 2 2020-02-01 Banana 1
#> 3 2020-02-02 Apple 2
#> 4 2020-02-02 Banana NA
df_complete[is.na(df_complete$Sold), "Sold"] <- 0
df_complete
#> # A tibble: 4 x 3
#> Date Fruit Sold
#> <chr> <chr> <dbl>
#> 1 2020-02-01 Apple 5
#> 2 2020-02-01 Banana 1
#> 3 2020-02-02 Apple 2
#> 4 2020-02-02 Banana 0
# What if there are more columns to complete? Gets cumbersome.
df_more <- tribble(
~Date, ~Fruit, ~Sold, ~Ate, ~Garbage,
"2020-02-01", "Apple", 5, 1, 4,
"2020-02-01", "Banana", 1, 2, 1,
"2020-02-02", "Apple", 2, 1, 1
)
df_more_complete <- df_more %>%
complete(Date, Fruit)
df_more_complete[is.na(df_more_complete$Sold), "Sold"] <- 0
df_more_complete[is.na(df_more_complete$Ate), "Ate"] <- 0
df_more_complete[is.na(df_more_complete$Garbage), "Garbage"] <- 0
df_more_complete
#> # A tibble: 4 x 5
#> Date Fruit Sold Ate Garbage
#> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 2020-02-01 Apple 5 1 4
#> 2 2020-02-01 Banana 1 2 1
#> 3 2020-02-02 Apple 2 1 1
#> 4 2020-02-02 Banana 0 0 0
# Or you could just do that all within complete()
df_more_complete <- df_more %>%
complete(Date, Fruit, fill = list(Sold = 0, Ate = 0, Garbage = 0))
df_more_complete
#> # A tibble: 4 x 5
#> Date Fruit Sold Ate Garbage
#> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 2020-02-01 Apple 5 1 4
#> 2 2020-02-01 Banana 1 2 1
#> 3 2020-02-02 Apple 2 1 1
#> 4 2020-02-02 Banana 0 0 0
# What if you want to do a pipeline?
library(dplyr, warn.conflicts = FALSE)
df %>%
complete(Date, Fruit, fill = list(Sold = 0)) %>%
group_by(Date) %>%
summarise(Total = sum(Sold))
#> # A tibble: 2 x 2
#> Date Total
#> <chr> <dbl>
#> 1 2020-02-01 6
#> 2 2020-02-02 2
# If you do is.na() etc, have to break it:
df_na_filled <- df %>%
complete(Date, Fruit)
df_na_filled[is.na(df_na_filled$Sold), "Sold"] <- 0
df_na_filled %>%
group_by(Date) %>%
summarise(Total = sum(Sold))
#> # A tibble: 2 x 2
#> Date Total
#> <chr> <dbl>
#> 1 2020-02-01 6
#> 2 2020-02-02 2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment