Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@ajdamico
ajdamico / convey flowchart.mmd
Created September 13, 2023 17:25
convey flowchart
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@ajdamico
ajdamico / machine learning foundations
Created May 26, 2023 22:47
a hands-on introduction taught by william franz lamberti
# support vector machines #
data(quakes)
plot( quakes , col = as.factor( round( quakes$mag ) ) )
this_df <- quakes
this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) )
library(e1071)
library(rvest)
baseurl <- "https://washingtondc.craigslist.org/search/doc/jjj"
raw_query_list <-
lapply(
paste0( baseurl , "?s=" , seq( 0 , 1469 , 120 ) ) ,
xml2::read_html
)
tf <- tempfile()
download.file( "https://www.ssa.gov/oact/babynames/state/namesbystate.zip" , tf , mode = 'wb' )
z <- unzip( tf , exdir = tempdir() )
state_files <- grep( "\\.TXT$" , z , value = TRUE )
state_list <- lapply( state_files , read.table , header = FALSE , sep = ',' , stringsAsFactors = FALSE )
w <- do.call(rbind,state_list)
names( w ) <- c( 'stateab' , 'sex' , 'year' , 'name' , 'count' )
maxes <- with( w , tapply( count , list( year , stateab , sex ) , max ) )
max_df <- reshape2::melt( maxes )
names( max_df ) <- c( 'year' , 'stateab' , 'sex' , 'count' )
@ajdamico
ajdamico / survey design subsetting.R
Created February 28, 2019 17:11
incorrect standard errors when not including the full design information
library(survey)
data(api)
x <- apistrat
x <- subset( x , comp.imp == 'Yes' )
dstrat_before<-svydesign(id=~1,strata=~stype, weights=~pw, data=x, fpc=~fpc)
library(rvest)
tf <- tempfile()
tf2 <- tempfile()
# download and unzip
download.file( "https://dumps.wikimedia.org/other/pagecounts-ez/merged/2018/2018-05/pagecounts-2018-05-27.bz2" , tf , mode = 'wb' )
R.utils::bunzip2( tf , tf2 , remove = FALSE )
# import and restrict to english
library(survey)
library(lodown)
cpsasec_cat <- get_catalog( "cpsasec" , output_dir = tempdir() )
lodown( "cpsasec" , subset( cpsasec_cat , year == 2017 ) )
plot(
c( 15 , 75 ) ,
x <-
structure(list(name = c("George Washington", "John Adams", "Thomas Jefferson",
"James Madison", "James Monroe", "John Quincy Adams", "Andrew Jackson",
"Martin Van Buren", "William Henry Harrison", "John Tyler", "James K. Polk",
"Zachary Taylor", "Millard Fillmore", "Franklin Pierce", "James Buchanan",
"Abraham Lincoln", "Andrew Johnson", "Ulysses S. Grant", "Rutherford B. Hayes",
"James A. Garfield", "Chester Arthur", "Grover Cleveland", "Benjamin Harrison",
"William McKinley", "Theodore Roosevelt", "William Howard Taft",
"Woodrow Wilson", "Warren G. Harding", "Calvin Coolidge", "Herbert Hoover",
@ajdamico
ajdamico / top 25 words weighted by download.R
Created March 11, 2018 18:08
twenty-five most common words in CRAN title + description fields, weighted by downloads
# devtools::install_github( "ajdamico/lodown" )
library(tm)
library(tidyverse)
library(rvest)
cranlogs_html <- read_html( "http://cran-logs.rstudio.com/" )
gz_files <- html_attr( html_nodes( cranlogs_html , "a" ) , "href" )