Skip to content

Instantly share code, notes, and snippets.

@CerebralMastication
Created November 9, 2010 15:59
Show Gist options
  • Save CerebralMastication/669278 to your computer and use it in GitHub Desktop.
Save CerebralMastication/669278 to your computer and use it in GitHub Desktop.
# Thanks to @greghirson who totally knocked this out of the park!
# Based on his work, here's a simple script to pull monthly prices from the USDA NASS website.
library(RCurl)
library(XML)
x <- postForm(uri = "http://quickstats.nass.usda.gov/uuid/encode",
source_desc = "SURVEY", sector_desc="CROPS",
group_desc="FIELD CROPS", commodity_desc="CORN",
statisticcat_desc="PRICE RECEIVED",
short_desc="CORN, GRAIN - PRICE RECEIVED, MEASURED IN $ / BU", agg_level_desc="NATIONAL",
state_name = "US TOTAL",
year = "1977",
year = "1978",
year = "1979",
year = "1980",
year = "1981",
year = "1982",
year = "1983",
year = "1984",
year = "1985",
year = "1986",
year = "1987",
year = "1988",
year = "1989",
year = "1990",
year = "1991",
year = "1992",
year = "1993",
year = "1994",
year = "1995",
year = "1996",
year = "1997",
year = "1998",
year = "1999",
year = "2000",
year = "2001",
year = "2002",
year = "2003",
year = "2004",
year = "2005",
year = "2006",
year = "2007",
year = "2008",
year = "2009",
year = "2010",
freq_desc = "MONTHLY",
reference_period_desc= "JAN",
reference_period_desc= "FEB",
reference_period_desc= "MAR",
reference_period_desc= "APR",
reference_period_desc= "MAY",
reference_period_desc= "JUN",
reference_period_desc= "JUL",
reference_period_desc= "AUG",
reference_period_desc= "SEP",
reference_period_desc= "OCT",
reference_period_desc= "NOV",
reference_period_desc= "DEC",
breadcrumb = "reference_period_desc")
x2 <- gsub("\"", "", x)
y <- getURL(paste("http://quickstats.nass.usda.gov/results/", x2, sep = ""))
yparse <- htmlParse(y, asText=T)
path <- xpathSApply(yparse, "//a", xmlAttrs)[[1]]["href"]
datapath <- gsub(".url", "", strsplit(path, c("/"), fixed = TRUE)[[1]][3])
usdaData <- read.csv(paste("http://quickstats.nass.usda.gov/data/spreadsheet/", datapath, ".csv", sep = ""))
# I turned the year and month lists into, well, lists.
# now it's a lot shorter
years <- 1970:2010
yearList <- list(year=as.character(years))
months <- c("JAN","FEB","MAR","APR","MAY","JUN","JUL","AUG","SEP","OCT","NOV","DEC")
monthList <- list(reference_period_desc= months)
myParams <- list(source_desc = "SURVEY", sector_desc="CROPS",
group_desc="FIELD CROPS", commodity_desc="CORN",
statisticcat_desc="PRICE RECEIVED",
short_desc="CORN, GRAIN - PRICE RECEIVED, MEASURED IN $ / BU", agg_level_desc="NATIONAL",
state_name = "US TOTAL",
freq_desc = "MONTHLY",
reference_period_desc= "JAN",
breadcrumb = "reference_period_desc")
myParams <- c(myParams, yearList, monthList)
x <- postForm(uri = "http://quickstats.nass.usda.gov/uuid/encode",
.params = myParams)
x2 <- gsub("\"", "", x)
y <- getURL(paste("http://quickstats.nass.usda.gov/results/", x2, sep = ""))
yparse <- htmlParse(y, asText=T)
path <- xpathSApply(yparse, "//a", xmlAttrs)[[1]]["href"]
datapath <- gsub(".url", "", strsplit(path, c("/"), fixed = TRUE)[[1]][3])
usdaData <- read.csv(paste("http://quickstats.nass.usda.gov/data/spreadsheet/", datapath, ".csv", sep = ""))
@CerebralMastication
Copy link
Author

This was my original challenge which I accidentally deleted:

What I want to do is pull down the data from this site and parse the resulting data into a CSV.

Here's the URL: http://quickstats.nass.usda.gov/#A8B7B992-BA0C-3ADB-895B-425DC85DB425

The page before submitting should look like this: http://ow.ly/i/5jET/original

When you submit, it sends the following POST to USDA:

16:42:49.188[291ms][total 291ms] Status: 200[OK]
POST http://quickstats.nass.usda.gov/uuid/encode Load Flags[LOAD_BYPASS_CACHE LOAD_BACKGROUND ] Content Size[38] Mime Type[text/html]
Request Headers:
Host[quickstats.nass.usda.gov]
User-Agent[Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/10.10 (maverick) Firefox/3.6.12 GTB7.1]
Accept[text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8]
Accept-Language[en-us,en;q=0.5]
Accept-Encoding[gzip,deflate]
Accept-Charset[ISO-8859-1,utf-8;q=0.7,*;q=0.7]
Keep-Alive[115]
Connection[keep-alive]
Content-Type[application/x-www-form-urlencoded; charset=UTF-8]
X-Requested-With[XMLHttpRequest]
Referer[http://quickstats.nass.usda.gov/]
Content-Length[1181]
Cookie[filters={"group_desc":{"visible":true,"name":"group_desc","index":3,"selectedItems":["FIELD CROPS"],"label":"Group","sortOrder":"asc","busy":false},"agg_level_desc":{"visible":true,"name":"agg_level_desc","children":{"watershed_desc":{"index":false,"visible":true,"selectedItems":[],"name":"watershed_desc","sortOrder":"asc","label":"Watershed","children":false,"busy":false},"state_name":{"index":8,"visible":true,"selectedItems":["US TOTAL"],"name":"state_name","sortOrder":"asc","label":"State","children":{"asd_desc":{"index":false,"visible":true,"selectedItems":[],"name":"asd_desc","sortOrder":"asc","label":"Ag. District","children":false,"busy":false},"county_name":{"index":false,"visible":true,"selectedItems":[],"name":"county_name","sortOrder":"asc","label":"County","children":false,"busy":false},"zip_5":{"index":false,"visible":true,"selectedItems":[],"name":"zip_5","sortOrder":"asc","label":"Zip Code","children":false,"busy":false}},"busy":false},"region_desc":{"index":false,"visible":true,"selectedItems":[],"name":"region_desc","sortOrder":"asc","label":"Region","children":false,"busy":false}},"index":5,"selectedItems":["NATIONAL"],"sortOrder":"asc","label":"Locale","busy":false},"sector_desc":{"visible":true,"name":"sector_desc","index":1,"selectedItems":["CROPS"],"label":"Sector","sortOrder":"asc","busy":false},"commodity_desc":{"visible":true,"name":"commodity_desc","children":{"statisticcat_desc":{"index":6,"visible":true,"selectedItems":["PRICE RECEIVED"],"name":"statisticcat_desc","sortOrder":"asc","label":"Category","children":false,"busy":false},"short_desc":{"index":7,"visible":true,"selectedItems":["CORN, GRAIN - PRICE RECEIVED, MEASURED IN $ / BU"],"name":"short_desc","sortOrder":"asc","label":"Data Item","children":{"domain_desc":{"index":false,"visible":true,"selectedItems":[],"name":"domain_desc","sortOrder":"asc","label":"Domain","children":false,"busy":false}},"busy":false}},"index":4,"selectedItems":["CORN"],"sortOrder":"asc","label":"Commodity","busy":false},"source_desc":{"visible":true,"name":"source_desc","index":2,"selectedItems":["SURVEY"],"label":"Program","sortOrder":"asc","busy":false},"year":{"visible":true,"name":"year","children":{"freq_desc":{"index":10,"visible":true,"selectedItems":["MONTHLY"],"name":"freq_desc","sortOrder":"asc","label":"Frequency","children":{"reference_period_desc":{"index":11,"visible":true,"selectedItems":["JAN","FEB","MAR","APR","MAY","JUN","JUL","AUG","SEP","OCT","NOV","DEC"],"name":"reference_period_desc","sortOrder":"asc","label":"Refernece Period","children":false,"busy":false}},"busy":false}},"index":9,"selectedItems":["2010","2009","2008","2007","2006","2005","2004","2003","2002","2001","2000","1999","1998","1997","1996","1995","1994","1993","1992","1991","1990","1989","1988","1987","1986","1985","1984","1983","1982","1981","1980","1979","1978","1977"],"sortOrder":"desc","label":"Year","busy":false}}]
Pragma[no-cache]
Cache-Control[no-cache]
Post Data:
source_desc[SURVEY]
sector_desc[CROPS]
group_desc[FIELD%20CROPS]
commodity_desc[CORN]
statisticcat_desc[PRICE%20RECEIVED]
short_desc[CORN%2C%20GRAIN%20-%20PRICE%20RECEIVED%2C%20MEASURED%20IN%20%24%20%2F%20BU]
agg_level_desc[NATIONAL]
state_name[US%20TOTAL]
year[1977]
freq_desc[MONTHLY]
reference_period_desc[DEC]
breadcrumb[reference_period_desc]
Response Headers:
Date[Mon, 08 Nov 2010 22:41:23 GMT]
Server[Apache]
Content-Length[38]
Keep-Alive[timeout=5, max=99]
Connection[Keep-Alive]
Content-Type[text/html; charset=utf-8]

What I want to capture is the resulting table which looks like this: http://ow.ly/i/5jF0/original

What I want is the CSV that you would get if you clicked in the "spreadsheet" link in the upper right hand corner.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment