Skip to content

Instantly share code, notes, and snippets.

tf <- tempfile()
download.file( "https://www.ssa.gov/oact/babynames/state/namesbystate.zip" , tf , mode = 'wb' )
z <- unzip( tf , exdir = tempdir() )
state_files <- grep( "\\.TXT$" , z , value = TRUE )
state_list <- lapply( state_files , read.table , header = FALSE , sep = ',' , stringsAsFactors = FALSE )
w <- do.call(rbind,state_list)
names( w ) <- c( 'stateab' , 'sex' , 'year' , 'name' , 'count' )
maxes <- with( w , tapply( count , list( year , stateab , sex ) , max ) )
max_df <- reshape2::melt( maxes )
names( max_df ) <- c( 'year' , 'stateab' , 'sex' , 'count' )
library(rvest)
baseurl <- "https://washingtondc.craigslist.org/search/doc/jjj"
raw_query_list <-
lapply(
paste0( baseurl , "?s=" , seq( 0 , 1469 , 120 ) ) ,
xml2::read_html
)
@ajdamico
ajdamico / download all files from an FTP site.R
Created September 19, 2011 22:45
download fifty years of National Health Interview Survey documentation PDFs
#install RCurl on your version of R if you don't already have it
#just run this once
#install.packages("RCurl")
#program start
#load the RCurl package
library(RCurl)
#set your output folder - this is where the pdfs will get saved
setwd("R:/National Health Interview Survey/documentation")
@ajdamico
ajdamico / download the medical expenditure panel survey.R
Created November 9, 2010 19:36
download all publicly-available data files for every year of the Medical Expenditure Panel Survey
library(RCurl)
setwd("R:\\Medical Expenditure Panel Survey\\Data")
#input all available MEPS public use file numbers
year <- c(1996:2008)
consolidated <- c(12,20,28,38,50,60,70,79,89,97,105,113,NA)
conditions <- c("06r",18,27,37,52,61,69,78,87,96,104,112,NA)
jobs <- c("07",19,25,32,40,56,63,74,83,91,100,108,116)
prpf <- c(24,47,47,47,47,57,66,76,88,95,103,111,119)
@ajdamico
ajdamico / NHIS download matrix.R
Created September 30, 2011 14:38
download every file for every year of the National Health Interview Survey and convert them all to csv and stata files
#load necessary libraries
library(stringr)
library(foreign)
library(survey)
library(RCurl)
#set the temporary directory to download all files to!
setwd("s:/temp")
@ajdamico
ajdamico / survey design subsetting.R
Created February 28, 2019 17:11
incorrect standard errors when not including the full design information
library(survey)
data(api)
x <- apistrat
x <- subset( x , comp.imp == 'Yes' )
dstrat_before<-svydesign(id=~1,strata=~stype, weights=~pw, data=x, fpc=~fpc)
@ajdamico
ajdamico / machine learning foundations
Created May 26, 2023 22:47
a hands-on introduction taught by william franz lamberti
# support vector machines #
data(quakes)
plot( quakes , col = as.factor( round( quakes$mag ) ) )
this_df <- quakes
this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) )
library(e1071)
@ajdamico
ajdamico / convey flowchart.mmd
Created September 13, 2023 17:25
convey flowchart
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@ajdamico
ajdamico / using R to replicate the NHIS Multiple Imputation technique.R
Created February 12, 2012 00:13
replicate the national health interview survey's multiply imputed income technique using R instead of SUDAAN
#page 118 of the NHIS document
#ftp://ftp.cdc.gov/pub/health_statistics/nchs/dataset_documentation/nhis/2010/srvydesc.pdf
#displays the R code to load the persons file into R as a survey object
#the code below creates a slightly different survey object, one that includes appropriately-imputed income.
#this R code:
# reads the year 2000 personsx file into R
# reads in all five imputed income files
# merges the 2000 personsx file with the five imputed income files