Note that this requires dplyr
, readr
and tidyr
packages to be installed.
library(dplyr, warn.conflicts = FALSE)
ff_ind_nums <- c(5, 10, 12, 17, 30, 38, 48, 49)
get_ff_ind <- function(num = 48) {
t <- tempfile(fileext = ".zip")
url <- paste0("https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/Siccodes", num, ".zip")
download.file(url, t)
ff_data <-
readr::read_fwf(unzip(t),
col_positions = readr::fwf_widths(c(3, 7, NA),
c("ff_ind", "ff_ind_short_desc", "sic_range")),
col_types = "icc") %>%
mutate(ff_ind_desc = if_else(!is.na(ff_ind_short_desc), sic_range, NA_character_)) %>%
tidyr::fill(ff_ind, ff_ind_short_desc, ff_ind_desc) %>%
filter(grepl("^[0-9]", sic_range)) %>%
tidyr::extract(sic_range,
into = c("sic_min", "sic_max", "sic_desc"),
regex = "^([0-9]+)-([0-9]+)(.*)$",
convert = TRUE) %>%
mutate(sic_desc = trimws(sic_desc)) %>%
mutate(ff_ind_category = paste0("ff_", num)) %>%
select(ff_ind_category, everything())
ff_data
}
# Get Fama-French 48-industry data
ff48_data <- get_ff_ind(48)
ff48_data
#> # A tibble: 598 x 7
#> ff_ind_category ff_ind ff_ind_short_desc sic_min sic_max sic_desc ff_ind_desc
#> <chr> <int> <chr> <int> <int> <chr> <chr>
#> 1 ff_48 1 Agric 100 199 Agricul… Agriculture
#> 2 ff_48 1 Agric 200 299 Agricul… Agriculture
#> 3 ff_48 1 Agric 700 799 Agricul… Agriculture
#> 4 ff_48 1 Agric 910 919 Commerc… Agriculture
#> 5 ff_48 1 Agric 2048 2048 Prepare… Agriculture
#> 6 ff_48 2 Food 2000 2009 Food an… Food Produ…
#> 7 ff_48 2 Food 2010 2019 Meat pr… Food Produ…
#> 8 ff_48 2 Food 2020 2029 Dairy p… Food Produ…
#> 9 ff_48 2 Food 2030 2039 Canned … Food Produ…
#> 10 ff_48 2 Food 2040 2046 Flour a… Food Produ…
#> # … with 588 more rows
# Get all data at once
# (Note: Code does not work for 5-, 10-, 12-, and 38-industry data due to
# presence of an "Other" category. But this could be addressed with a left-join in
# a later data step.)
ff_industries <- bind_rows(lapply(ff_ind_nums, get_ff_ind))
ff_industries %>%
select(ff_ind_category, ff_ind) %>%
distinct() %>%
count(ff_ind_category) %>%
arrange(n)
#> # A tibble: 8 x 2
#> ff_ind_category n
#> <chr> <int>
#> 1 ff_5 4
#> 2 ff_10 9
#> 3 ff_12 11
#> 4 ff_17 17
#> 5 ff_30 30
#> 6 ff_38 37
#> 7 ff_48 48
#> 8 ff_49 49
Created on 2020-10-07 by the reprex package (v0.3.0)