Skip to content

Instantly share code, notes, and snippets.

@dantonnoriega
Last active March 4, 2024 19:26
Show Gist options
  • Save dantonnoriega/640f7473be99bdf741e73062c12ede3b to your computer and use it in GitHub Desktop.
Save dantonnoriega/640f7473be99bdf741e73062c12ede3b to your computer and use it in GitHub Desktop.
[development] prototype functions to programmatically pull ETF holdings (portfolio) data; likewise which ETFs hold a specific stock and its allocation (data source: etf.com)
# SETUP -----------------
library(httr2)
api_base_url = "https://api-prod.etf.com/private"
hdrs <- list(
`x-limit` = 10000,
`User-Agent` = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.3.1 Safari/605.1.15',
`Origin` = 'https://www.etf.com',
`Referer` = 'https://www.etf.com/',
`Accept` = '*/*',
`Sec-Fetch-Dest` = 'empty',
`Sec-Fetch-Mode` = 'cors',
`Sec-Fetch-Site` = 'same-site',
`Host` = 'api-prod.etf.com'
)
# PROTOTYPE -----------
# get etf holdings
etf <- "VOO"
path_fund_holdings <- sprintf('fund/%s/holdings', etf)
req <- request(api_base_url)
resp_fund_holdings <- req |>
req_url_path_append(path_fund_holdings) |>
req_headers(!!!hdrs) |>
req_url_query(`type` = 'securities', `formatValues` = TRUE) |>
req_perform()
#
body_fund_holdings <- resp_fund_holdings |> resp_body_json()
body_fund_holdings
# get ETF overview
etf <- "VOO"
path_display_fund <- sprintf('display/fund/%s', etf)
req <- request(api_base_url)
resp_display_fund <- req |>
req_url_path_append(path_display_fund) |>
req_headers(!!!hdrs) |>
req_url_query(`type` = 'liteDesktopVersion') |>
req_perform()
#
body_display_fund <- resp_display_fund |> resp_body_json()
# get ETFs that hold a given stock
stock <- 'NVDA'
path_stock_holdings <- sprintf('display/holding/%s', stock)
resp_stock_holdings <- req |>
req_url_path_append(path_stock_holdings) |>
req_headers(!!!hdrs) |>
req_url_query(`type` = 'desktopLite') |>
req_perform()
body_stock_holdings <- resp_stock_holdings |> resp_body_json()
# FUNCTIONS ----------------------
parse_resp_fund_holdings <- function(resp) {
body <- resp |> resp_body_json() |> purrr::pluck(1)
holdings <- purrr::map_dfr(body$data, ~.x) |>
janitor::clean_names() |>
dplyr::transmute(
symbol,
name,
weight = readr::parse_number(weight),
shares,
market_value,
as_of = as.Date(lubridate::ymd_hms(as_of)),
)
tibble::tibble(
ticker = body$ticker,
data = list(holdings)
)
}
# get holdings for given ETF
etf_fund_holdings <- function(x) {
paths <- sprintf('fund/%s/holdings', x)
req <- request(api_base_url)
purrr::map_dfr(paths, ~{
resp <- req |>
req_url_path_append(.x) |>
req_headers(!!!hdrs) |>
req_url_query(`type` = 'securities', `formatValues` = TRUE) |>
req_perform()
#
parse_resp_fund_holdings(resp)
})
}
etfs <- c("VOO", "VUG", "MGK", "VTV", "VV")
fund_holdings <- etf_fund_holdings(etfs)
fund_holdings
units_to_integer <- function(x) {
numeric_value <- readr::parse_number(x)
units <- toupper(stringr::str_extract(x, "[A-Za-z]+"))
multiplier <- sapply(units, switch, B = 1e9, M = 1e6, K = 1e3, USE.NAMES = FALSE)
nulls <- sapply(multiplier, is.null)
multiplier[nulls] <- 1
return(numeric_value * unlist(multiplier))
}
#
parse_resp_display_holding_stock <- function(resp) {
body <- resp |> resp_body_json()
fields <- body$data$results$body$groups[[2]]$groups[[1]]$fields
# header is first element of list
tbl_hdr <- purrr::flatten_chr(purrr::pluck(fields, 1)) |>
janitor::make_clean_names()
tbl_dat <- purrr::map_dfr(fields[-1], ~.x) |>
setNames(tbl_hdr) |>
dplyr::transmute(
ticker,
fund_name,
percent_allocation = readr::parse_number(percent_allocation, na = "--"),
market_value = units_to_integer(market_value),
number_of_shares = units_to_integer(number_of_shares),
percent_performance_30_day = readr::parse_number(x30_day, na = "--"),
segment,
strategy
)
tibble::tibble(
symbol = body$data$results$symbol,
data = list(tbl_dat)
)
}
#
stock_display_holding <- function(x) {
paths <- sprintf('display/holding/%s', x)
req <- request(api_base_url)
purrr::map_dfr(paths, ~{
resp <- req |>
req_url_path_append(.x) |>
req_headers(!!!hdrs) |>
req_url_query(`type` = 'desktopLite') |>
req_perform()
#
parse_resp_display_holding_stock(resp)
})
}
display_holding <- stock_display_holding(c("NVDA","MSFT","JPM","FSLR","GS"))
display_holding
parse_resp_display_fund_summary <- function(resp) {
body <- resp |> resp_body_json()
# find summary data
body_groups <- body$data$results$body$groups[[1]]$groups
idx <- which(purrr::map_lgl(body_groups, ~purrr::has_element(.x, 'fundSummaryData')))
smry <-
purrr::map_dfr(body_groups[[idx]]$fields, ~{
kv <-
lapply(.x, as.character) |>
# https://stackoverflow.com/a/74179086/3987905
tibble::enframe() |>
tidyr::unnest("value", keep_empty = TRUE) |>
tidyr::pivot_wider(names_from = "name", values_from = "value") |>
# extract label and raw value then pivot again
dplyr::select(label, rawValue)
tibble::tibble(
key = kv$label,
value = kv$rawValue
)
}) |>
tidyr::pivot_wider(names_from = 'key', values_from = 'value') |>
janitor::clean_names() |>
dplyr::transmute(
ticker = body$data$results$ticker,
issuer,
inception_date = as.Date(lubridate::ymd_hms(inception_date)),
expense_ratio = readr::parse_number(expense_ratio)/100,
assets_under_management = readr::parse_number(assets_under_management),
aum_billions = assets_under_management / 1e9,
index_tracked,
segment_benchmark,
legal_structure,
fund_home_page
)
#
smry
}
etf_fund_summary <- function(x) {
# get ETF overview
paths <- sprintf('display/fund/%s', x)
purrr::map_dfr(paths, ~{
resp <- req |>
req_url_path_append(.x) |>
req_headers(!!!hdrs) |>
req_url_query(`type` = 'liteDesktopVersion') |>
req_perform()
#
parse_resp_display_fund_summary(resp)
})
}
fund_summary <- etf_fund_summary(c("VOO", "VTI", "MGK", "VONG", "VCR"))
fund_summary
parse_resp_display_fund_general <- function(resp) {
body <- resp |> resp_body_json()
# find general data
header_groups <- body$data$results$header$groups
idx <- which(
purrr::map_lgl(header_groups, ~purrr::has_element(.x, 'generalData'))
)
gnrl <-
purrr::map_dfr(header_groups[[idx]]$fields, ~{
kv <-
lapply(.x, as.character) |>
# https://stackoverflow.com/a/74179086/3987905
tibble::enframe() |>
tidyr::unnest("value", keep_empty = TRUE) |>
tidyr::pivot_wider(names_from = "name", values_from = "value") |>
# extract name and value then pivot again
dplyr::select(name, value)
tibble::tibble(
key = kv$name,
value = kv$value
)
}) |>
tidyr::pivot_wider(names_from = 'key', values_from = 'value') |>
janitor::clean_names() |>
dplyr::transmute(
ticker,
fund,
asset_class,
focus,
niche,
region,
segment,
segment_id,
dividend_yield,
letter_grade,
fit_score,
overall_rating_score,
overall_rating_date = lubridate::mdy(overall_rating_date)
)
#
gnrl
}
etf_fund_general <- function(x) {
# get ETF overview
paths <- sprintf('display/fund/%s', x)
purrr::map_dfr(paths, ~{
resp <- req |>
req_url_path_append(.x) |>
req_headers(!!!hdrs) |>
req_url_query(`type` = 'liteDesktopVersion') |>
req_perform()
#
parse_resp_display_fund_general(resp)
})
}
fund_general <- etf_fund_general(etfs)
fund_general
# EXPLORE ----------------------
etfs <- c("VOO", "VUG", "ESGV", "VTV")
fund_holdings <- etf_fund_holdings(etfs)
fund_holdings
# explore weight matrices
hld <- tidyr::unnest(fund_holdings, 'data') |>
dplyr::distinct(ticker, symbol, weight) |>
dplyr::filter(!is.na(symbol)) |>
tidyr::pivot_wider(names_from = 'ticker', values_from = 'weight') |>
dplyr::mutate(
across(where(is.numeric), ~tidyr::replace_na(.x, 0))
)
# distance matrix
mat <- as.matrix(hld[,-1])
cn <- names(hld)[-1]
rn <- hld$symbol
colnames(mat) <- cn
rownames(mat) <- rn
head(mat,25)
# different dists
round(dist(t(head(mat,100))), 2)
round(dist(t(head(mat,100)), method = 'manhattan'), 2)
# correlation dist
round(as.dist((1 - cor(head(mat,100)))/2) * 100, 2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment