This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
simple_roc <- function(labels, scores){ | |
labels <- labels[order(scores, decreasing=TRUE)] | |
data.frame(TPR=cumsum(labels)/sum(labels), FPR=cumsum(!labels)/sum(!labels), labels) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set.seed(1) | |
sim_widget_data <- function(N, noise=100){ | |
x <- runif(N, min=0, max=100) | |
y <- 122 - x/2 + rnorm(N, sd=noise) | |
bad_widget <- factor(y > 100) | |
data.frame(x, y, bad_widget) | |
} | |
widget_data <- sim_widget_data(500, 10) | |
test_set_idx <- sample(1:nrow(widget_data), size=floor(nrow(widget_data)/4)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fit_glm <- glm(bad_widget ~ x, training_set, family=binomial(link="logit")) | |
glm_link_scores <- predict(fit_glm, test_set, type="link") | |
glm_response_scores <- predict(fit_glm, test_set, type="response") | |
score_data <- data.frame(link=glm_link_scores, | |
response=glm_response_scores, | |
bad_widget=test_set$bad_widget, | |
stringsAsFactors=FALSE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(pROC) | |
plot(roc(test_set$bad_widget, glm_response_scores, direction="<"), | |
col="yellow", lwd=3, main="The turtle finds its way") | |
## | |
## Call: | |
## roc.default(response = test_set$bad_widget, predictor = glm_response_scores, direction = "<") | |
## | |
## Data: glm_response_scores in 59 controls (test_set$bad_widget FALSE) < 66 cases (test_set$bad_widget TRUE). | |
## Area under the curve: 0.9037 | |
glm_simple_roc <- simple_roc(test_set$bad_widget=="TRUE", glm_link_scores) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set.seed(1) | |
N <- 2000 | |
P <- 0.01 | |
rare_success <- sample(c(TRUE, FALSE), N, replace=TRUE, prob=c(P, 1-P)) | |
guess_not <- rep(0, N) | |
plot(roc(rare_success, guess_not), print.auc=TRUE) | |
## | |
## Call: | |
## roc.default(response = rare_success, predictor = guess_not) | |
## |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Plot all Starbucks locations using OpenStreetMap | |
## Credit: http://www.computerworld.com/article/2893271/business-intelligence/5-data-visualizations-in-5-minutes-each-in-5-lines-or-less-of-r.html | |
library(checkpoint) | |
checkpoint("2016-08-22") | |
file <- "https://opendata.socrata.com/api/views/ddym-zvjk/rows.csv" | |
starbucks <- read.csv(file) | |
library(leaflet); library(magrittr) | |
leaflet() %>% addTiles() %>% setView(-84.3847, 33.7613, zoom = 16) %>% | |
addMarkers(data = starbucks, lat = ~ Latitude, lng = ~ Longitude, popup = starbucks$Name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Plot last 6 months of ANTM share price | |
## Credit: http://www.computerworld.com/article/2893271/business-intelligence/5-data-visualizations-in-5-minutes-each-in-5-lines-or-less-of-r.html | |
library(checkpoint) | |
checkpoint("2016-08-22") | |
library(quantmod) | |
getSymbols("ANTM", auto.assign=TRUE) | |
barChart(ANTM, subset = 'last 6 months') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Plot Atlanta area unemployment | |
## Credit: http://www.computerworld.com/article/2893271/business-intelligence/5-data-visualizations-in-5-minutes-each-in-5-lines-or-less-of-r.html | |
library(checkpoint) | |
checkpoint("2016-08-22") | |
library(quantmod) | |
getSymbols("ATLA013URN", src = "FRED") | |
names(ATLA013URN) = "rate" | |
library(dygraphs) | |
dygraph(ATLA013URN, main = "Atlanta area unemployment") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Credit: http://www.computerworld.com/article/2893271/business-intelligence/5-data-visualizations-in-5-minutes-each-in-5-lines-or-less-of-r.html | |
library(checkpoint) | |
checkpoint("2016-08-22") | |
## Correlation plot | |
file <- "https://github.com/smach/NICAR15data/raw/master/testscores.csv" | |
testdata <- read.csv(file, stringsAsFactors = FALSE) | |
library(ggvis) | |
ggvis(testdata, ~ pctpoor, ~ score) %>% | |
layer_points(size := input_slider(10, 310, label = "Point size"), opacity := input_slider(0, 1, label = "Point opacity")) %>% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Scrapes CRAN archives to determine the number of packages per release | |
# Create a list of pages to scrape, including both archive and current | |
extract_url <- function() { | |
url <- list( | |
archive = "https://cran-archive.r-project.org/bin/windows/contrib/", | |
active = "https://cran.r-project.org/bin/windows/contrib/" | |
) | |
get_urls <- function(url) { |