Skip to content

Instantly share code, notes, and snippets.

@sdgilley
Last active September 19, 2016 18:43
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save sdgilley/15ebf67c5b01d12224f4b103c7065625 to your computer and use it in GitHub Desktop.
library(pdftools)
# read the data from the pdf
url="http://www.idfpr.com/Forms/MC/ListofLicensedDispensaries.pdf"
text<-pdf_text(url)
t<-text
# strip out info about start date, end date, etc... it is not consistently formatted
# 13 8/24/2015 8/24/2016 280.000\r\n
t<-gsub("\\d+\\s+\\d+/\\d+/\\d{4}\\s+\\d+/\\d+/\\d{4}\\s+\\d+.\\d+\\s+",'',t)
# turn \r\n into separators
t<-gsub("\r\n",";",t, fixed=TRUE)
# turn 2 or more spaces into separators
t<-gsub("\\s{2,}",';',t)
# get rid of mutiple consecutive separators
t<-gsub(";{2,}",';',t)
# also remove empty records
t<- gsub(";\\s+;",'',t)
# finally clean up some messiness in the data
t<-gsub(';Dispensary;',';',t)
t<-gsub("Salveo Health & Wellness", "Salveo Health & Wellness Dispensary",t)
t<-gsub(";Illinois;",';',t)
t<-gsub("Healthway Services of West","Healthway Services of West Illinois",t)
t<-gsub(";Centers;",';',t)
t<-gsub("Trinity Compassionate Care","Trinity Compassionate Care Centers",t)
t<-gsub("Maribis of Chicago Chicago","Maribis of Chicago; Chicago",t)
# some records need ";" in between zip and phone number as of 8/22/16
t<-gsub("(\\d{5})\\s+\\(", "\\1;(", t)
t<-unlist(t)
# Remove the text at the top of the pdf (updated on 8/22/16)
top<-";Illinois Department of Financial and Professional Regulation;Division of Professional Regulation;BRUCE RAUNER;DANIEL KELBER;Governor;Acting Director;The Illinois Department of Financial and Professional Regulation, Division of;Professional Regulation has licensed the following medical cannabis dispensaries under the Illinois Compassion;Cannabis Pilot program Act, 410 ILCS 130/1 et seq., and the regulations adopted pursuant ther;PATIENTS: You must select a dispensary with the Illinois Department of Public Health. BEFORE YOUR FIRST VIS;and ask when it is open for business.;IDFPR - LICENSED MEDICAL CANNABIS DISPENSARIES;Medical;License;License;Cred;Name;Address & Phone Number;Cannabis;Expiration;Issue Date;Nu;District;Date"
t<-gsub(top,'',t)
#now read the cleaned text as a data.frame
d<-read.delim(textConnection(t),header=FALSE,sep=";",blank.lines.skip=T, stringsAsFactors=F)
# each set of 4 vars starting with V2 is a new record.
all<-data.frame(addr=character(), name=character(), citistatezip = character(), phone=character())
nloops<-floor(ncol(d)/4)
for (i in c(1:nloops)){
start <- 2 + (i-1)*4
stop <- start + 3
df <- d[, start:stop ]
names(df) <- c("addr","name","citystatezip","phone")
all <- rbind(all, df)
}
# get rid of all blank rows
all[all==""]<-NA
all<-all[complete.cases(all),]
# create address for goecoding
all$address<-paste(all$addr,all$citystatezip,sep=', ')
# now get lat, lon values from the address
# thanks to http://stackoverflow.com/questions/22887833/r-how-to-geocode-a-simple-address-using-data-science-toolbox
require(RDSTK)
geo.dsk <- function(addr){ # single address geocode with data sciences toolkit
require(httr)
require(rjson)
url <- "http://www.datasciencetoolkit.org/maps/api/geocode/json"
response <- GET(url,query=list(sensor="FALSE",address=addr))
json <- fromJSON(content(response,type="text"))
loc <- json['results'][[1]][[1]]$geometry$location
return(c(address=addr,long=loc$lng, lat= loc$lat))
}
result <- do.call(rbind,lapply(all$address,geo.dsk))
result <- data.frame(result)
# now plot it on a map
# create id and LatLong for googleVis map
all$id <- paste(all$name, all$address, all$phone, sep='; ')
all$LatLong = paste(result$lat, result$long, sep=":")
# Now plot with googleVis
require(googleVis)
g1 <- gvisMap(all, "LatLong" , "id",
options=list(showTip=TRUE,
showLine=TRUE,
enableScrollWheel=TRUE,
mapType='normal',
width=400, height=400
))
# this opens a browser with the plot
plot(g1)
# write the code that will be used on my website
cat(g1$html$chart, file="dispensariesIL.html")
@JiawenQi98
Copy link

Hi,
I tried to follow your code, but I found an error when I ran this line:
result <- do.call(rbind,lapply(all$address,geo.dsk))

And the error is,
No encoding supplied: defaulting to UTF-8.
Error in json["results"][[1]][[1]] : subscript out of bounds

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment