Skip to content

Instantly share code, notes, and snippets.

View district of columbia craigslist job postings by day in march.R
library(rvest)
baseurl <- "https://washingtondc.craigslist.org/search/doc/jjj"
raw_query_list <-
lapply(
paste0( baseurl , "?s=" , seq( 0 , 1469 , 120 ) ) ,
xml2::read_html
)
View baby name crown years and places.R
tf <- tempfile()
download.file( "https://www.ssa.gov/oact/babynames/state/namesbystate.zip" , tf , mode = 'wb' )
z <- unzip( tf , exdir = tempdir() )
state_files <- grep( "\\.TXT$" , z , value = TRUE )
state_list <- lapply( state_files , read.table , header = FALSE , sep = ',' , stringsAsFactors = FALSE )
w <- do.call(rbind,state_list)
names( w ) <- c( 'stateab' , 'sex' , 'year' , 'name' , 'count' )
maxes <- with( w , tapply( count , list( year , stateab , sex ) , max ) )
max_df <- reshape2::melt( maxes )
names( max_df ) <- c( 'year' , 'stateab' , 'sex' , 'count' )
View maximal legal time difference.R
@ajdamico
ajdamico / survey design subsetting.R
Created Feb 28, 2019
incorrect standard errors when not including the full design information
View survey design subsetting.R
library(survey)
data(api)
x <- apistrat
x <- subset( x , comp.imp == 'Yes' )
dstrat_before<-svydesign(id=~1,strata=~stype, weights=~pw, data=x, fpc=~fpc)
View wikipedia random articles are indeed random.R
library(rvest)
tf <- tempfile()
tf2 <- tempfile()
# download and unzip
download.file( "https://dumps.wikimedia.org/other/pagecounts-ez/merged/2018/2018-05/pagecounts-2018-05-27.bz2" , tf , mode = 'wb' )
R.utils::bunzip2( tf , tf2 , remove = FALSE )
# import and restrict to english
View earned income by linear age.R
library(survey)
library(lodown)
cpsasec_cat <- get_catalog( "cpsasec" , output_dir = tempdir() )
lodown( "cpsasec" , subset( cpsasec_cat , year == 2017 ) )
plot(
c( 15 , 75 ) ,
View living former presidents.R
x <-
structure(list(name = c("George Washington", "John Adams", "Thomas Jefferson",
"James Madison", "James Monroe", "John Quincy Adams", "Andrew Jackson",
"Martin Van Buren", "William Henry Harrison", "John Tyler", "James K. Polk",
"Zachary Taylor", "Millard Fillmore", "Franklin Pierce", "James Buchanan",
"Abraham Lincoln", "Andrew Johnson", "Ulysses S. Grant", "Rutherford B. Hayes",
"James A. Garfield", "Chester Arthur", "Grover Cleveland", "Benjamin Harrison",
"William McKinley", "Theodore Roosevelt", "William Howard Taft",
"Woodrow Wilson", "Warren G. Harding", "Calvin Coolidge", "Herbert Hoover",
@ajdamico
ajdamico / top 25 words weighted by download.R
Created Mar 11, 2018
twenty-five most common words in CRAN title + description fields, weighted by downloads
View top 25 words weighted by download.R
# devtools::install_github( "ajdamico/lodown" )
library(tm)
library(tidyverse)
library(rvest)
cranlogs_html <- read_html( "http://cran-logs.rstudio.com/" )
gz_files <- html_attr( html_nodes( cranlogs_html , "a" ) , "href" )
View ncvs_bughunt.R
library(downloader)
# install.packages( c("MonetDB.R", "MonetDBLite" , "survey" , "SAScii" , "descr" , "downloader" , "digest" , "stringr" , "R.utils" , "RCurl" ) , repos=c("http://dev.monetdb.org/Assets/R/", "http://cran.rstudio.com/"))
library(SAScii) # load the SAScii package (imports ascii data with a SAS script)
library(RCurl) # load RCurl package (downloads https files)
library(stringr) # load stringr package (manipulates character strings easily)
library(downloader) # downloads and then runs the source() function on scripts from github
library(MonetDB.R) # load the MonetDB.R package (connects r to a monet database)
View La_rs_bughunt.R
# # # # # # # # # # # # # # # # #
# # set the working directory # #
# # # # # # # # # # # # # # # # #
# setwd( "C:/My Directory/SWMAP/" )
# # # # # # # # # # # # # # # #
# # example survey data set # #
# # # # # # # # # # # # # # # #
You can’t perform that action at this time.