Skip to content

Instantly share code, notes, and snippets.

@ryanrosenberg
Created March 25, 2018 21:44
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ryanrosenberg/f0b02e6fadb34faeed6c6e248d90a831 to your computer and use it in GitHub Desktop.
Save ryanrosenberg/f0b02e6fadb34faeed6c6e248d90a831 to your computer and use it in GitHub Desktop.
library(tidyverse)
#### clear workspace ####
rm(list=ls())
#### read in the html file and clean up the data ####
msgs <- readLines("~/Documents/gosschat.html")
msgs <- gsub(" ", msgs, replacement="__")
msgs <- gsub("\"", msgs, replacement="##")
msgs <- gsub("\'", msgs, replacement="##")
msgs <- gsub("<div__class=##message_header##>", msgs, replacement="#>>#gosshead")
msgs <- gsub("<div__class=##message##>", msgs, replacement="#>>#newgoss")
msgs <- gsub("<span__class=##user##>", msgs, replacement="#>>#gosser")
msgs <- gsub("<span__class=##meta##>", msgs, replacement="#>>#gossdate")
msgs <- gsub("<ul__class=##meta##>", msgs, replacement="#>>#gossreact")
msgs <- gsub("</div>", msgs, replacement="")
msgs <- gsub("<div>", msgs, replacement="")
msgs <- gsub("</p>", msgs, replacement="")
msgs <- gsub("<p>", msgs, replacement="")
msgs <- gsub("</span>", msgs, replacement="")
msgs <- gsub("<span>", msgs, replacement="")
msgs <- gsub("&#064;", msgs, replacement="@")
msgs <- gsub("&#039;", msgs, replacement="'")
msgs <- gsub("\\s", msgs, replacement="")
msgs <- gsub("CDT", msgs, replacement="CDT#>>#")
msg_split <- str_split(msgs, "#>>#newgoss#>>#gosshead") %>%
flatten() %>%
map(~str_split(., "#>>#") %>% flatten())
msg_reformat <- transpose(msg_split)[2:4] %>%
set_names("gosser", "gossdatetime", "goss") %>%
as.tibble()
msg_reformat_clean <- msg_reformat %>%
mutate(gosser = gsub("gosser", "", gosser),
gosser = gsub("__", " ", gosser),
gossdatetime = gsub("_", " ", gossdatetime),
gossdatetime = gsub("gossdate", "", gossdatetime),
goss = gsub("_", " ", goss))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment