Skip to content

Instantly share code, notes, and snippets.

@AlbertRapp
Last active May 12, 2022 23:49
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AlbertRapp/37a2e0993acea9b4e36400037b797391 to your computer and use it in GitHub Desktop.
Save AlbertRapp/37a2e0993acea9b4e36400037b797391 to your computer and use it in GitHub Desktop.
Get Dataviz from Twitter into your note-taking system using R
### Change these parts here ----------------------------------------------------
# Set to your working directory where there is a template.md file
setwd(here::here())
# Locations
vault_location <- stop('Add Vault Location') # Location of markdown files
attachments_dir <- stop('Add Vault Subdirectory Location') # subdirectory of vault_location for png-files
imap_mail <- stop('Set imap mail client') # mail client
### Sensitive Information
stop('Set sensitive mail information')
bearer_token <- keyring::key_get('twitter-bearer-token')
user_mail <- keyring::key_get('dataviz-mail')
password_mail <- keyring::key_get('dataviz-mail-password')
allowed_senders <- keyring::key_get('allowed-senders')
### Dependencies -----------------------------------------------------------
library(purrr) # for pluck and map functions
library(stringr) # for regex matching
library(readr) # for reading and writing files from/to disk
library(tibble) # for easier readable tribble creation
library(tidyr) # for unnesting
library(dplyr) # for binding rows and pipe
library(httr) # for API communication
library(mRpostman) # for email communication
library(base64enc) # for decoding mails from Android
library(rvest) # for decoding mails from Android
### Helper functions -----------------------------------------------------------
request_twitter_data <- function(tweet_url, bearer_token) {
# Extract tweet id by regex
tweet_id <- tweet_url %>% str_match("status/([0-9]+)") %>% .[, 2]
auth <- paste("Bearer", bearer_token) # API needs format "Bearer <my_token>"
API_url <- 'https://api.twitter.com/2/tweets'
# Make request to API and parse to list
parsed_request <- GET(
API_url,
add_headers(Authorization = auth),
query = list(
ids = tweet_id,
tweet.fields = 'created_at', # time stamp
expansions='attachments.media_keys,author_id',
# necessary expansion fields for img_url
media.fields = 'url' # img_url
)
) %>% content('parsed')
# Extract necassary information from list-like structure
tweet_text <- parsed_request %>%
pluck("data", 1, 'text')
tweet_user <- parsed_request %>%
pluck("includes", 'users', 1, 'username')
# Make file name unique through time-date combination
# Replace white spaces and colons (:) for proper file names
tweet_date <- parsed_request %>%
pluck("data", 1, 'created_at') %>%
lubridate::as_datetime() %>%
str_replace(' ', '_') %>%
str_replace_all(':', '')
img_urls <- parsed_request %>%
pluck("includes", 'media') %>%
bind_rows() %>%
filter(type == 'photo') %>%
pull(url)
# Download image - set mode otherwise download is blurry
img_names <- paste('tweet', tweet_user, tweet_date, seq_along(img_urls), sep = "_")
walk2(img_urls, img_names, ~download.file(.x, paste0(.y, '.png'), mode = 'wb'))
# Return list with information
list(
url = tweet_url,
text = tweet_text,
user = tweet_user,
file_names = paste0(img_names, '.png'),
file_paths = paste0(getwd(), '/', img_names, '.png')
)
}
# Helper function for when a tweet contains multiple images
md_import_strings <- function(file_names) {
paste0('![[', file_names, ']]', collapse = '\n')
}
# Replace the placeholders in the template
# We change original mail place holder later on
replace_template_placeholder <- function(template_name, request) {
# Create a dictionary for what to replace in template
replace_dict <- tribble(
~template, ~replacement,
'\\!\\[\\[insert_img_name_here\\]\\]', md_import_strings(request$file_names),
'insert_text_here', request$text %>% str_replace_all('#', '(#)'),
'insert_URL_here', request$url
)
# Iteratively apply str_replace_all and keep only final result
reduce2(
.x = replace_dict$template,
.y = replace_dict$replacement,
.f = str_replace_all,
.init = read_lines(template_name)
) %>%
paste0(collapse = '\n') # Collaps lines into a single string
}
# Helper functions to write new text file to current working directory
# Returns file path of new file which is necessary to move files later on
write_replaced_text <- function(replaced_text, request) {
file_name <- request$file_name[1] %>% str_replace('_1.png', '.md')
write_lines(replaced_text, file_name)
paste0(getwd(), '/', file_name)
}
move_files <- function(request, replaced_template, vault_location, attachments_dir) {
# Create from-to dictionary with file paths in each column
move_dict <- tribble(
~from, ~to,
request$file_path, paste0(vault_location, '/', attachments_dir),
replaced_template, vault_location
) %>%
unnest(cols = 'from')
# Copy files from current working directory to destination
move_dict %>% pwalk(file.copy, overwrite = T)
# Delete files in current working directory
walk(move_dict$from, file.remove)
}
decode_encoded_mails <- function(encoded_mails) {
# Ressource: https://stackoverflow.com/questions/71772972/translate-encoding-of-android-mail-in-r
# Find location in each encoded string where actual text starts
start_encoded <- encoded_mails %>%
str_locate_all('base64\r\n\r\n') %>%
map(~pluck(., 4) + 1) %>%
unlist()
# Find location in each encoded string where actual text starts
end_encoded <- encoded_mails %>%
str_locate_all('----') %>%
map(~pluck(., 3) - 1)%>%
unlist()
# Use str_sub() to extract encoded text
encoded_text <- tibble(
string = unlist(encoded_mails),
start = start_encoded,
end = end_encoded
) %>%
pmap(str_sub)
# Decode: base64 -> raw -> char -> html -> text
encoded_text %>%
map(base64enc::base64decode) %>%
map(rawToChar) %>%
map(rvest::read_html) %>%
map(rvest::html_text2)
}
decode_all_mails <- function(mail_bodys) {
# Decode in case mail is base64 decoded
is_encoded <- str_detect(mail_bodys, 'Content-Transfer-Encoding')
encoded_mails <- mail_bodys[is_encoded]
plain_mails <- mail_bodys[!is_encoded]
decoded_mails <- encoded_mails %>% decode_encoded_mails()
c(decoded_mails, plain_mails)
}
### Actual script -----------------------------------------------------------
# Establish connection to imap server
con <- configure_imap(
url = imap_mail,
user = user_mail,
password = password_mail
)
# Switch to Inbox
con$select_folder('Inbox')
# Extract mails that are from the list of allowed senders
mails <- allowed_senders %>%
map(~con$search_string(expr = ., where = 'FROM')) %>%
unlist() %>%
na.omit() %>% # Remove NAs if no mail from a sender
as.numeric() # avoids attributes
# If a new mail is detected, begin processing
if (!is_empty(mails)) {
# Grab mail texts and URLs
mail_bodys <- mails %>% con$fetch_text() %>% decode_all_mails
urls <- mail_bodys %>% str_extract('https.*')
# Remove mails from vector in case sth goes wrong and urls cannot be detected
mail_bodys <- mail_bodys[!is.na(urls)]
mails <- mails[!is.na(urls)]
urls <- urls[!is.na(urls)]
# For each url request twitter data
requests <- map(urls, request_twitter_data, bearer_token = bearer_token)
# Use requested twitter data to insert texts into Markdown template and write
# to current working directory
replaced_templates <-
map(requests, replace_template_placeholder, template = 'template.md') %>%
map2(mail_bodys, ~str_replace(.x, 'insert_mail_here' ,.y)) %>%
map2(requests, ~write_replaced_text(.x, .y))
# Move markdown files and extracted pngs to correct place on HDD
walk2(
requests,
replaced_templates,
move_files,
vault_location = vault_location,
attachments_dir = attachments_dir
)
# Move emails on imap server to Processed directory
con$move_msg(mails, to_folder = 'Processed')
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment