Skip to content

Instantly share code, notes, and snippets.

View revodavid's full-sized avatar

David Smith revodavid

View GitHub Profile
@revodavid
revodavid / birthdaysim.R
Last active January 24, 2018 23:39
R function to simulate the Birthday paradox
pbirthdaysim <- function(n, nsims=100000, feb29=TRUE) {
## Using nsims simulations, estimate the probability
## that a room of n people includes a shared birthday
bdays <- 1:366
## Feb 29 represented as day 366
## We'll sample other days 4 times as often
## compared to day 366
probs <- c(rep(4,365),1)
library(doAzureParallel)
# generate a credentials json file "cluster", then...
registerDoAzureParallel(cluster)
# Run 5 million option pricing simulations
closingPrices <- foreach(i = 1:50, .combine='c') %dopar% {
replicate(100000, getClosingPrice())
}
@revodavid
revodavid / holehiders.R
Created June 2, 2017 14:54
Function to detect when a Minecraft player is standing in a hole. (Required miner package.)
findMyId <- function() {
# Requires https://github.com/ROpenSciLabs/miner
# dig a hole one block deep and stand in it to be found
# returns the IDs of players standing in a hole
ids <- getPlayerIds()
holehiders <- NULL
for (id in ids) {
pos <- getPlayerPos(id, tile=TRUE)
surround <- getBlocks(pos[1]-1,pos[2],pos[3]-1, pos[1]+1, pos[2], pos[3]+1)[,1,]
inhole <- all(c(surround[1,2],surround[2,1],surround[2,3], surround[3,2])!=0)
@revodavid
revodavid / CRAN_pkg_history.R
Last active May 1, 2021 12:58 — forked from andrie/CRAN_pkg_history.R
Scrapes CRAN for historical number of packages per release
# Scrapes CRAN archives to determine the number of packages per release
# Create a list of pages to scrape, including both archive and current
extract_url <- function() {
url <- list(
archive = "https://cran-archive.r-project.org/bin/windows/contrib/",
active = "https://cran.r-project.org/bin/windows/contrib/"
)
get_urls <- function(url) {
## Credit: http://www.computerworld.com/article/2893271/business-intelligence/5-data-visualizations-in-5-minutes-each-in-5-lines-or-less-of-r.html
library(checkpoint)
checkpoint("2016-08-22")
## Correlation plot
file <- "https://github.com/smach/NICAR15data/raw/master/testscores.csv"
testdata <- read.csv(file, stringsAsFactors = FALSE)
library(ggvis)
ggvis(testdata, ~ pctpoor, ~ score) %>%
layer_points(size := input_slider(10, 310, label = "Point size"), opacity := input_slider(0, 1, label = "Point opacity")) %>%
## Plot Atlanta area unemployment
## Credit: http://www.computerworld.com/article/2893271/business-intelligence/5-data-visualizations-in-5-minutes-each-in-5-lines-or-less-of-r.html
library(checkpoint)
checkpoint("2016-08-22")
library(quantmod)
getSymbols("ATLA013URN", src = "FRED")
names(ATLA013URN) = "rate"
library(dygraphs)
dygraph(ATLA013URN, main = "Atlanta area unemployment")
## Plot last 6 months of ANTM share price
## Credit: http://www.computerworld.com/article/2893271/business-intelligence/5-data-visualizations-in-5-minutes-each-in-5-lines-or-less-of-r.html
library(checkpoint)
checkpoint("2016-08-22")
library(quantmod)
getSymbols("ANTM", auto.assign=TRUE)
barChart(ANTM, subset = 'last 6 months')
## Plot all Starbucks locations using OpenStreetMap
## Credit: http://www.computerworld.com/article/2893271/business-intelligence/5-data-visualizations-in-5-minutes-each-in-5-lines-or-less-of-r.html
library(checkpoint)
checkpoint("2016-08-22")
file <- "https://opendata.socrata.com/api/views/ddym-zvjk/rows.csv"
starbucks <- read.csv(file)
library(leaflet); library(magrittr)
leaflet() %>% addTiles() %>% setView(-84.3847, 33.7613, zoom = 16) %>%
addMarkers(data = starbucks, lat = ~ Latitude, lng = ~ Longitude, popup = starbucks$Name)
set.seed(1)
N <- 2000
P <- 0.01
rare_success <- sample(c(TRUE, FALSE), N, replace=TRUE, prob=c(P, 1-P))
guess_not <- rep(0, N)
plot(roc(rare_success, guess_not), print.auc=TRUE)
##
## Call:
## roc.default(response = rare_success, predictor = guess_not)
##
library(pROC)
plot(roc(test_set$bad_widget, glm_response_scores, direction="<"),
col="yellow", lwd=3, main="The turtle finds its way")
##
## Call:
## roc.default(response = test_set$bad_widget, predictor = glm_response_scores, direction = "<")
##
## Data: glm_response_scores in 59 controls (test_set$bad_widget FALSE) < 66 cases (test_set$bad_widget TRUE).
## Area under the curve: 0.9037
glm_simple_roc <- simple_roc(test_set$bad_widget=="TRUE", glm_link_scores)