iangow/get_ff_ind.md

## get_ff_ind.md

      
    Raw
  

              get_ff_ind.md
            
          
    Note that this requires dplyr, readr and tidyr packages to be installed.
library(dplyr, warn.conflicts = FALSE)
    
ff_ind_nums <- c(5, 10, 12, 17, 30, 38, 48, 49)

get_ff_ind <- function(num = 48) {
    t <- tempfile(fileext = ".zip") 
    
    url <- paste0("https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/Siccodes", num, ".zip")
    
    download.file(url, t)
    ff_data <- 
        readr::read_fwf(unzip(t), 
                 col_positions = readr::fwf_widths(c(3, 7, NA),
                                            c("ff_ind", "ff_ind_short_desc", "sic_range")),
                col_types = "icc") %>%
        mutate(ff_ind_desc = if_else(!is.na(ff_ind_short_desc), sic_range, NA_character_)) %>%
        tidyr::fill(ff_ind, ff_ind_short_desc, ff_ind_desc) %>%
        filter(grepl("^[0-9]", sic_range)) %>%
        tidyr::extract(sic_range, 
                into = c("sic_min", "sic_max", "sic_desc"),
                regex = "^([0-9]+)-([0-9]+)(.*)$",
                convert = TRUE) %>%
        mutate(sic_desc = trimws(sic_desc)) %>%
        mutate(ff_ind_category = paste0("ff_", num)) %>%
        select(ff_ind_category, everything())
    
    ff_data
}

# Get Fama-French 48-industry data
ff48_data <- get_ff_ind(48)
ff48_data
#> # A tibble: 598 x 7
#>    ff_ind_category ff_ind ff_ind_short_desc sic_min sic_max sic_desc ff_ind_desc
#>    <chr>            <int> <chr>               <int>   <int> <chr>    <chr>      
#>  1 ff_48                1 Agric                 100     199 Agricul… Agriculture
#>  2 ff_48                1 Agric                 200     299 Agricul… Agriculture
#>  3 ff_48                1 Agric                 700     799 Agricul… Agriculture
#>  4 ff_48                1 Agric                 910     919 Commerc… Agriculture
#>  5 ff_48                1 Agric                2048    2048 Prepare… Agriculture
#>  6 ff_48                2 Food                 2000    2009 Food an… Food Produ…
#>  7 ff_48                2 Food                 2010    2019 Meat pr… Food Produ…
#>  8 ff_48                2 Food                 2020    2029 Dairy p… Food Produ…
#>  9 ff_48                2 Food                 2030    2039 Canned … Food Produ…
#> 10 ff_48                2 Food                 2040    2046 Flour a… Food Produ…
#> # … with 588 more rows

# Get all data at once
# (Note: Code does not work for 5-, 10-, 12-, and 38-industry data due to
#  presence of an "Other" category. But this could be addressed with a left-join in
#  a later data step.)
ff_industries <- bind_rows(lapply(ff_ind_nums, get_ff_ind))
ff_industries %>%
    select(ff_ind_category, ff_ind) %>%
    distinct() %>%
    count(ff_ind_category) %>%
    arrange(n)
#> # A tibble: 8 x 2
#>   ff_ind_category     n
#>   <chr>           <int>
#> 1 ff_5                4
#> 2 ff_10               9
#> 3 ff_12              11
#> 4 ff_17              17
#> 5 ff_30              30
#> 6 ff_38              37
#> 7 ff_48              48
#> 8 ff_49              49
^{Created on 2020-10-07 by the reprex package (v0.3.0)}