Created
February 5, 2018 04:39
-
-
Save ZipporahPolinskyNagel/d6fe5726451a621e3a11f62757e02139 to your computer and use it in GitHub Desktop.
Zipporah's Shiny Project
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(dplyr) | |
library(data.table) | |
library(tidyr) | |
charges <- read.csv("charges.csv",stringsAsFactors = FALSE) | |
charges.df <- as.data.frame(charges) | |
names(charges.df)[1:23] <- tolower(names(charges.df)[1:23]) | |
#extract month and year | |
charges.df$violation.date = as.Date(charges.df$violation.date, "%m/%d/%Y") | |
charges.df$month = as.numeric(month(charges.df$violation.date)) | |
charges.df$year = as.numeric(year(charges.df$violation.date)) | |
#extract industry no. | |
charges.df <- separate(data = charges.df, col = industry, into = c("industry.name", "industry.id"), sep = " \\- ") | |
#TODO remove blank industry id at 70 locations for Other (industry_name) | |
#extract charge type | |
charges.df <- separate(data = charges.df, col = charge, into = c("charge.id", "charge.desc"), sep = " \\-") | |
charges.df$charge.cat = gsub("..+§ ","",charges.df$charge.id) | |
charges.df$charge.cat = gsub(" *\\(.*?\\) *","",charges.df$charge.cat) | |
#fix boro casing | |
capFirst <- function(s) { | |
paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "") | |
} | |
charges.df$borough <- tolower(charges.df$borough) | |
charges.df$borough <- capFirst(charges.df$borough) | |
charges.df = filter( charges.df, !grepl('Outside', borough) ) | |
charges.df = filter( charges.df, borough != "") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(shiny) | |
#server code | |
shinyServer(function(input, output) { | |
output$bar <- renderPlot({ | |
resetChargeData(input$charge_number, input$industry_number) | |
if (input$drilldown == "Borough") | |
{ | |
drilldown.data = charge_by_boro | |
xlabel = 'Borough' | |
xvector = charge_by_boro$borough | |
count = charge_by_boro$count | |
} | |
if (input$drilldown == "Industry") | |
{ | |
drilldown.data = charge_by_industry | |
xlabel = 'Industry' | |
xvector = charge_by_industry$industry.name | |
count = input$industry_number | |
} | |
if (input$drilldown == "Charge type") | |
{ | |
drilldown.data = charge_by_chargeid | |
xlabel = 'Charge ID' | |
xvector = charge_by_chargeid$charge.id | |
count = input$charge_number | |
} | |
ggplot(data = drilldown.data, aes(x = reorder(xvector, count), y = count, fill = count)) + | |
geom_bar(stat = "identity") + | |
labs(x = xlabel, | |
y = 'Number of charges') + | |
theme_classic() + | |
coord_flip() | |
}) | |
wordcloud_rep <- repeatable(wordcloud) | |
output$words <- renderPlot({ | |
resetChargeData(input$charge_number, input$industry_number) | |
wordcloud_rep(words = d$word, freq = d$freq, min.freq = 2, scale=c(2.5,.25), | |
max.words=500, random.order=FALSE, rot.per=0.15, | |
colors=brewer.pal(8, "Dark2")) | |
}) | |
output$charges <- renderTable({ | |
resetChargeData(input$charge_number, input$industry_number) | |
charge_by_chargeid | |
}) | |
}) | |
resetChargeData <- function( charge_count, industry_count ) { | |
top_industries <- | |
group_by(charges.df, industry.name) %>% | |
summarize( count = n()) %>% | |
arrange( desc(count)) %>% | |
top_n(industry_count) | |
charges.df.filtered = semi_join(charges.df, top_industries, by = 'industry.name') | |
top_chargeids <- charges.df %>% | |
group_by(., charge.id) %>% | |
summarize( count = n()) %>% | |
arrange( desc(count)) %>% | |
top_n(charge_count) | |
charges.df.filtered = semi_join(charges.df.filtered, top_chargeids, by = 'charge.id') | |
charge_by_boro = charges.df.filtered %>% | |
group_by(., borough) %>% | |
summarize( count = sum(charge.count)) %>% | |
arrange( desc(count)) | |
charge_by_industry = charges.df.filtered %>% | |
group_by(., industry.name) %>% | |
summarize( count = sum(charge.count))%>% | |
arrange( desc(count)) %>% | |
top_n(industry_count) | |
charge_by_chargeid = charges.df.filtered %>% | |
group_by(., charge.id) %>% | |
mutate(., description = first(charge.desc)) %>% | |
summarize( count = sum(charge.count), description = first(description))%>% | |
arrange( desc(count)) %>% | |
top_n(charge_count) | |
# word cloud | |
library(wordcloud) # this requires the tm and NLP packages | |
library(devtools) | |
library(SnowballC) | |
#word cloud | |
all_desc_vector <- as.vector(charges.df.filtered$description) | |
entire_desc_text = capture.output(cat(all_desc_vector)) | |
#entire_desc_text = paste(charge_by_chargeid$description, collapse =" ") | |
docs <- Corpus(VectorSource(entire_desc_text)) | |
toSpace <- content_transformer(function (x , pattern ) gsub(pattern, " ", x)) | |
docs <- tm_map(docs, toSpace, "/") | |
docs <- tm_map(docs, toSpace, "@") | |
docs <- tm_map(docs, toSpace, "\\|") | |
# Convert the text to lower case | |
docs <- tm_map(docs, content_transformer(tolower)) | |
# Remove numbers | |
docs <- tm_map(docs, removeNumbers) | |
# Remove english common stopwords | |
docs <- tm_map(docs, removeWords, stopwords("english")) | |
# Remove your own stop word | |
# specify your stopwords as a character vector | |
docs <- tm_map(docs, removeWords, c("near", "required", "upon", "use")) | |
# Remove punctuations | |
docs <- tm_map(docs, removePunctuation) | |
# Eliminate extra white spaces | |
docs <- tm_map(docs, stripWhitespace) | |
#docs <- tm_map(docs, stemDocument) | |
dtm <- TermDocumentMatrix(docs) | |
m <- as.matrix(dtm) | |
v <- sort(rowSums(m),decreasing=TRUE) | |
d <- data.frame(word = names(v),freq=v) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# This is the user-interface definition of a Shiny web application. You can | |
# run the application by clicking 'Run App' above. | |
library(shiny) | |
# Define UI for application that draws bar graph | |
shinyUI(fluidPage( | |
theme = "bootstrap.css", | |
# Application title | |
headerPanel("Department of Consumer Affairs charges for 2016-2017"), | |
sidebarPanel( | |
radioButtons("drilldown", # choose the drilldown | |
label = "Drill down by ", | |
choices = c("Borough", "Industry", "Charge type"), | |
selected = "Borough"), | |
#checkboxGroupInput("year", label = "Year", | |
# choices = c("2016","2017"), | |
# selected = "2016"), | |
sliderInput("industry_number", label = "No. Industries", | |
min = 1, max = 50, | |
value = 20, step = 1, | |
pre = "", sep = "", | |
animate = TRUE), | |
sliderInput("charge_number", label = "No. Charge types", | |
min = 1, max = 50, | |
value = 20, step = 1, | |
pre = "", sep = "", | |
animate = TRUE) | |
), | |
# Show a bar graph | |
mainPanel( | |
tabsetPanel( | |
tabPanel("Plots", plotOutput("bar", width = "100%")), | |
tabPanel("Word cloud", plotOutput("words", width = "100%")), | |
#tabPanel("Over time", tableOutput("table")), | |
tabPanel("Charge types", tableOutput("charges")) | |
) | |
) | |
) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment