Skip to content

Instantly share code, notes, and snippets.

View smc-dta's full-sized avatar

Shannon Callan smc-dta

View GitHub Profile
airline voluntary_denied involuntary_denied enplaned_ct involuntary_db_per_10k year
Hawaiian Airlines 326 49 10824495 0.05 2016
Delta Air Lines 129825 1238 129281098 0.1 2016
Virgin America 2375 94 7945329 0.12 2016
Alaska Airlines 6806 931 23390900 0.4 2016
United Airlines 62895 3765 86836527 0.43 2016
Spirit Airlines 10444 1117 19418650 0.58 2016
Frontier Airlines 2096 851 14666332 0.58 2016
American Airlines 54259 8312 130894653 0.64 2016
Jetblue Airways 1705 3176 34710003 0.92 2016
rm(list = ls())
doInstall <- TRUE # Change to FALSE if you don't want packages installed.
toInstall <- c("zoo", "tm", "ggplot2", "Snowball")
if(doInstall){install.packages(toInstall, repos = "http://cran.r-project.org")}
lapply(toInstall, library, character.only = TRUE)
# From: http://www.cnn.com/2012/10/03/politics/debate-transcript/index.html
Transcript <- readLines("https://raw.github.com/dsparks/Test_image/master/Denver_Debate_Transcript.txt")
head(Transcript, 20)
#install.packages("rjson","RCurl")
library(RCurl)
library(rjson)
get_User_subs_page = function(user, after=NULL, cache=c()){
baseurl = "http://www.reddit.com/user/"
params = "limit=100"
if(!is.null(after)){
params = paste(params, "&after=",after, sep="")
# Code to fetch news streams from 5 live sources, process the streams and text
# and apply a simple sentiment scoring algorigthm.
#
# A writeup of the analysis can be found here:
# https://www.linkedin.com/pulse/article/20141109035942-34768479-r-sentiment-scoring-hsbc-w-harvard-general-inquirer
# Define the packages we want to load:
packs = c(
"tm", # Text mining
"tm.plugin.webmining", # Web-source plugin for text mining
set.seed(1234)
dat <- data.frame(
product=c(rep("Mobile",2),
rep(c("Smartphone", "Mobile"),3),
rep(c("Video rental shop"),3),
rep(c("Video rental shop",
"Online movie rental"),2)),
year=c(2008:2010, 2010, 2011, 2011, 2012, 2012,
2008:2010, 2011, 2011, 2012, 2012),
category=c(rep("Phone", 8), rep("Film", 7)),