Last active
October 23, 2021 15:26
-
-
Save pierrelafortune/c1c201675918f3981feeffe4403b29d9 to your computer and use it in GitHub Desktop.
nyc restaurant violations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# data https://data.cityofnewyork.us/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/43nn-pn8j | |
# Which are most likely to be violated for critical things like live mice? | |
library(tidyverse) | |
library(scales) | |
nyc_restaurants <- read_csv("Downloads/nyc_restaurants.csv") | |
# Gradable inspections | |
ny <- nyc_restaurants %>% | |
filter((`INSPECTION TYPE` %in% | |
c('Cycle Inspection / Re-inspection' | |
,'Pre-permit (Operational) / Re-inspection') | |
|(`INSPECTION TYPE` %in% | |
c('Cycle Inspection / Initial Inspection' | |
,'Pre-permit (Operational) / Initial Inspection')) | |
& SCORE <= 13) | |
| (`INSPECTION TYPE` %in% | |
c('Pre-permit (Operational) / Reopening Inspection' | |
,'Cycle Inspection / Reopening Inspection')) | |
& GRADE %in% c('A', 'B', 'C', 'P', 'Z')) | |
# cuisine | |
cuisines <- ny %>% count(CAMIS, `CUISINE DESCRIPTION`) %>% | |
select(CAMIS, CUISINE=`CUISINE DESCRIPTION`) | |
# rodent violations | |
v <- ny %>% | |
group_by(CAMIS) %>% | |
summarise(inspections=n(), | |
critical_flag_inspections = max(ifelse(`CRITICAL FLAG` == 'Critical', 1, 0)), | |
mice_rats = max(ifelse(`VIOLATION DESCRIPTION` %in% c("Evidence of mice or live mice present in facility's food and/or non-food areas.", | |
"Evidence of rats or live rats present in facility's food and/or non-food areas."), 1, 0))) | |
# Join to cuisines | |
violations <- v %>% inner_join(cuisines) | |
agg <- violations %>% | |
group_by(CUISINE) %>% | |
summarise(restaurants = n(), | |
with_rodents = sum(mice_rats), | |
pct_with_rodents = with_rodents / restaurants) %>% | |
filter(restaurants > 200) %>% # At least 200 restaurants inspected | |
arrange(-pct_with_rodents) | |
# Average | |
v2 %>% | |
summarise(restaurants = n(), | |
with_rodents = sum(mice_rats), | |
pct_with_rodents = with_rodents / restaurants) | |
# Plot | |
?replace | |
sort(unique(cuisines$CUISINE)) | |
agg %>% | |
mutate(CUISINE=recode(CUISINE, `Bakery Products/Desserts`="Bakery/Desserts", | |
`Juice, Smoothies, Fruit Salads`="Juice, Smoothie")) %>% | |
ggplot(aes(reorder(CUISINE, -pct_with_rodents), pct_with_rodents, | |
fill=pct_with_rodents, | |
label=round(pct_with_rodents * 100, 1))) + | |
geom_col(alpha=0.8) + | |
geom_text(hjust=-0.2, size=3) + | |
geom_hline(yintercept = 0.349, linetype="dashed", color="grey50") + | |
scale_fill_gradient(low = "dark green", high = "dark red") + | |
scale_y_continuous(labels=percent_format(1)) + | |
annotate("text", x="Donuts", y=0.38, label="Avg: 35%", size=3, color="grey20") + | |
guides(fill="none") + | |
coord_flip() + | |
theme_classic() + | |
labs(x=NULL, | |
y="Percent of restaurants with a rodent violation", | |
title="Which NYC restaurants are most likely to have mice or rat violations?", | |
subtitle = "NYC restaurant inspections with 'Evidence of mice..' or 'Evidence of rats..' violations", | |
caption= "NYC OpenData. Cuisines with 200+ restaurants in NYC", | |
fill=NULL) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> agg %>% arrange(-restaurants) %>% print(n=50) | |
# A tibble: 26 × 4 | |
CUISINE restaurants with_rodents pct_with_rodents | |
<chr> <int> <dbl> <dbl> | |
1 American 5006 1585 0.317 | |
2 Chinese 2211 954 0.431 | |
3 Coffee/Tea 1758 404 0.230 | |
4 Pizza 1540 612 0.397 | |
5 Italian 941 353 0.375 | |
6 Japanese 835 303 0.363 | |
7 Latin American 808 341 0.422 | |
8 Mexican 770 294 0.382 | |
9 Bakery Products/Desserts 739 283 0.383 | |
10 Caribbean 678 356 0.525 | |
11 Sandwiches 626 209 0.334 | |
12 Chicken 619 215 0.347 | |
13 Donuts 584 91 0.156 | |
14 Spanish 578 248 0.429 | |
15 Hamburgers 504 100 0.198 | |
16 Juice, Smoothies, Fruit Salads 404 84 0.208 | |
17 Asian/Asian Fusion 388 128 0.330 | |
18 Tex-Mex 369 85 0.230 | |
19 Frozen Desserts 334 101 0.302 | |
20 Jewish/Kosher 302 128 0.424 | |
21 Indian 301 150 0.498 | |
22 French 298 111 0.372 | |
23 Thai 294 127 0.432 | |
24 Korean 288 84 0.292 | |
25 Mediterranean 261 99 0.379 | |
26 Seafood 202 58 0.287 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment