Skip to content

Instantly share code, notes, and snippets.

@pierrelafortune
Last active October 23, 2021 15:26
Show Gist options
  • Save pierrelafortune/c1c201675918f3981feeffe4403b29d9 to your computer and use it in GitHub Desktop.
Save pierrelafortune/c1c201675918f3981feeffe4403b29d9 to your computer and use it in GitHub Desktop.
nyc restaurant violations
# data https://data.cityofnewyork.us/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/43nn-pn8j
# Which are most likely to be violated for critical things like live mice?
library(tidyverse)
library(scales)
nyc_restaurants <- read_csv("Downloads/nyc_restaurants.csv")
# Gradable inspections
ny <- nyc_restaurants %>%
filter((`INSPECTION TYPE` %in%
c('Cycle Inspection / Re-inspection'
,'Pre-permit (Operational) / Re-inspection')
|(`INSPECTION TYPE` %in%
c('Cycle Inspection / Initial Inspection'
,'Pre-permit (Operational) / Initial Inspection'))
& SCORE <= 13)
| (`INSPECTION TYPE` %in%
c('Pre-permit (Operational) / Reopening Inspection'
,'Cycle Inspection / Reopening Inspection'))
& GRADE %in% c('A', 'B', 'C', 'P', 'Z'))
# cuisine
cuisines <- ny %>% count(CAMIS, `CUISINE DESCRIPTION`) %>%
select(CAMIS, CUISINE=`CUISINE DESCRIPTION`)
# rodent violations
v <- ny %>%
group_by(CAMIS) %>%
summarise(inspections=n(),
critical_flag_inspections = max(ifelse(`CRITICAL FLAG` == 'Critical', 1, 0)),
mice_rats = max(ifelse(`VIOLATION DESCRIPTION` %in% c("Evidence of mice or live mice present in facility's food and/or non-food areas.",
"Evidence of rats or live rats present in facility's food and/or non-food areas."), 1, 0)))
# Join to cuisines
violations <- v %>% inner_join(cuisines)
agg <- violations %>%
group_by(CUISINE) %>%
summarise(restaurants = n(),
with_rodents = sum(mice_rats),
pct_with_rodents = with_rodents / restaurants) %>%
filter(restaurants > 200) %>% # At least 200 restaurants inspected
arrange(-pct_with_rodents)
# Average
v2 %>%
summarise(restaurants = n(),
with_rodents = sum(mice_rats),
pct_with_rodents = with_rodents / restaurants)
# Plot
?replace
sort(unique(cuisines$CUISINE))
agg %>%
mutate(CUISINE=recode(CUISINE, `Bakery Products/Desserts`="Bakery/Desserts",
`Juice, Smoothies, Fruit Salads`="Juice, Smoothie")) %>%
ggplot(aes(reorder(CUISINE, -pct_with_rodents), pct_with_rodents,
fill=pct_with_rodents,
label=round(pct_with_rodents * 100, 1))) +
geom_col(alpha=0.8) +
geom_text(hjust=-0.2, size=3) +
geom_hline(yintercept = 0.349, linetype="dashed", color="grey50") +
scale_fill_gradient(low = "dark green", high = "dark red") +
scale_y_continuous(labels=percent_format(1)) +
annotate("text", x="Donuts", y=0.38, label="Avg: 35%", size=3, color="grey20") +
guides(fill="none") +
coord_flip() +
theme_classic() +
labs(x=NULL,
y="Percent of restaurants with a rodent violation",
title="Which NYC restaurants are most likely to have mice or rat violations?",
subtitle = "NYC restaurant inspections with 'Evidence of mice..' or 'Evidence of rats..' violations",
caption= "NYC OpenData. Cuisines with 200+ restaurants in NYC",
fill=NULL)
> agg %>% arrange(-restaurants) %>% print(n=50)
# A tibble: 26 × 4
CUISINE restaurants with_rodents pct_with_rodents
<chr> <int> <dbl> <dbl>
1 American 5006 1585 0.317
2 Chinese 2211 954 0.431
3 Coffee/Tea 1758 404 0.230
4 Pizza 1540 612 0.397
5 Italian 941 353 0.375
6 Japanese 835 303 0.363
7 Latin American 808 341 0.422
8 Mexican 770 294 0.382
9 Bakery Products/Desserts 739 283 0.383
10 Caribbean 678 356 0.525
11 Sandwiches 626 209 0.334
12 Chicken 619 215 0.347
13 Donuts 584 91 0.156
14 Spanish 578 248 0.429
15 Hamburgers 504 100 0.198
16 Juice, Smoothies, Fruit Salads 404 84 0.208
17 Asian/Asian Fusion 388 128 0.330
18 Tex-Mex 369 85 0.230
19 Frozen Desserts 334 101 0.302
20 Jewish/Kosher 302 128 0.424
21 Indian 301 150 0.498
22 French 298 111 0.372
23 Thai 294 127 0.432
24 Korean 288 84 0.292
25 Mediterranean 261 99 0.379
26 Seafood 202 58 0.287
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment