Skip to content

Instantly share code, notes, and snippets.

@erikgregorywebb
Created December 19, 2022 06:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erikgregorywebb/233c355e4f7c8ea8ba1c17b00bcf3c87 to your computer and use it in GitHub Desktop.
Save erikgregorywebb/233c355e4f7c8ea8ba1c17b00bcf3c87 to your computer and use it in GitHub Desktop.
# import libraries
library(aws.s3)
library(tidyverse)
library(jsonlite)
library(stringr)
library(ggbump)
library(cowplot)
# setting keys
Sys.setenv("AWS_ACCESS_KEY_ID" = "AWS_ACCESS_KEY_ID",
"AWS_SECRET_ACCESS_KEY" = "AWS_SECRET_ACCESS_KEY",
"AWS_DEFAULT_REGION" = "us-east-1")
# get all files within specific directory of bucket
all_files = get_bucket('egw-data-dumps', prefix = 'apple-app-store')
# loop over to extract file names
paths = c()
for (i in 2:length(all_files)) { paths = c(paths, all_objects[i]$Contents$Key) }
paths = paste0('s3://egw-data-dumps/', paths)
# get objects, compile dataframe
datalist = list()
for (i in 1:length(paths)) {
print(paths[i])
obj = get_object(paths[i])
datalist[[i]] = read_csv(rawToChar(obj)) %>% mutate(path = paths[i])
}
raw = do.call(rbind, datalist)
# clean things up a bit
chart_history = raw %>%
mutate(app_id = basename(link)) %>%
mutate(rank = as.numeric(str_extract(label, '[0-9]+'))) %>%
mutate(developer = sub("\\.$", "", str_remove(str_remove(label, '([^.]+?)\\. '), '([^.]+?)\\. '))) %>%
mutate(app = sub("\\.$", "", trimws(str_remove(str_remove(label, developer), '([^.]+?)\\. ')))) %>%
select(rank, app, developer, link, scraped_at, label, path)
# check thigns out
chart_history %>% filter(rank == 26) %>% View()
chart_history %>% group_by(label) %>% count(sort = T) %>% View()
chart_history %>% filter(developer == 'Crypto') %>% View()
# https://github.com/davidsjoberg/ggbump
rank_threshold = 10
chart_history %>%
#filter(rank <= rank_threshold) %>%
#filter(developer == 'Daily Pay Inc.') %>%
#filter(developer %in% c('Binance LTD', 'Coinbase Wallet', 'Crypto.Crypto.com')) %>%
filter(developer %in% c('Albert Corporation', 'Daily Pay Inc.')) %>%
ggplot(., aes(x = scraped_at, y = rank, color = app)) +
geom_point(size = 2) +
#geom_text(data = chart_history %>% filter(scraped_at == min(scraped_at) & rank <= rank_threshold), aes(x = scraped_at - .1, label = developer), size = 3, hjust = 1) +
#geom_text(data = chart_history %>% filter(scraped_at == max(scraped_at) & rank <= rank_threshold), aes(x = scraped_at + .1, label = developer), size = 3, hjust = 0) +
geom_bump(size = 1, smooth = 15) +
theme_minimal_grid(font_size = 14, line_size = 0) +
theme(legend.position = "none",
panel.grid.major = element_blank()) +
scale_y_reverse()
chart_history
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment