Skip to content

Instantly share code, notes, and snippets.

@andrewheiss
Created October 20, 2020 23:27
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andrewheiss/a12646011515431d6961533610c2d339 to your computer and use it in GitHub Desktop.
Save andrewheiss/a12646011515431d6961533610c2d339 to your computer and use it in GitHub Desktop.
library(tidyverse)

example <- tribble(
  ~Country, ~speed_100m, ~speed_200m, ~speed_400m, ~speed_800m, ~speed_1500m, ~speed_3000m, ~speed_marathon,
  "Argentina", 11.57, 22.94, 52.50, 2.05, 4.25, 9.19, 150.32,
  "Australia", 11.12, 22.23, 48.63, 1.98, 4.02, 8.63, 143.51,
  "Brazil", 11.17, 22.60, 50.62, 1.97, 4.17, 9.04, 147.41
)

example_tidy <- example %>% 
  # Move all the columns except Country into long format
  pivot_longer(cols = -Country, names_to = "distance", values_to = "value") %>% 
  # Clean up the distance column so it's numeric
  mutate(actual_distance = str_remove(distance, "speed_"),  # Remove speed_ prefix
         actual_distance = recode(actual_distance, "marathon" = "42195m"),  # Replace "marathon" with meters
         actual_distance = parse_number(actual_distance)) %>%   # Convert the numbers like "800m" to actual numbers
  # Convert the times to seconds for all races longer than 800 meters
  mutate(seconds = ifelse(actual_distance >= 800, value * 60, value)) %>% 
  # All the speeds and distances are in just one column, so we just need to do one division equation here
  mutate(speed = actual_distance / seconds) %>% 
  # Get rid of some of the columns
  select(Country, actual_distance, seconds, speed)
example_tidy
#> # A tibble: 21 x 4
#>    Country   actual_distance seconds speed
#>    <chr>               <dbl>   <dbl> <dbl>
#>  1 Argentina             100    11.6  8.64
#>  2 Argentina             200    22.9  8.72
#>  3 Argentina             400    52.5  7.62
#>  4 Argentina             800   123.   6.50
#>  5 Argentina            1500   255    5.88
#>  6 Argentina            3000   551.   5.44
#>  7 Argentina           42195  9019.   4.68
#>  8 Australia             100    11.1  8.99
#>  9 Australia             200    22.2  9.00
#> 10 Australia             400    48.6  8.23
#> # … with 11 more rows

# If you want to make that wider, you can with pivot_wider():
example_tidy %>% 
  # Get rid of the seconds column since we only want the speed column widened
  select(-seconds) %>% 
  pivot_wider(values_from = "speed", names_from = "actual_distance", names_prefix = "speed_")
#> # A tibble: 3 x 8
#>   Country speed_100 speed_200 speed_400 speed_800 speed_1500 speed_3000
#>   <chr>       <dbl>     <dbl>     <dbl>     <dbl>      <dbl>      <dbl>
#> 1 Argent…      8.64      8.72      7.62      6.50       5.88       5.44
#> 2 Austra…      8.99      9.00      8.23      6.73       6.22       5.79
#> 3 Brazil       8.95      8.85      7.90      6.77       6.00       5.53
#> # … with 1 more variable: speed_42195 <dbl>

<sup>Created on 2020-10-20 by the [reprex package](https://reprex.tidyverse.org) (v0.3.0)</sup>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment