Skip to content

Instantly share code, notes, and snippets.

@dgootman
Created February 20, 2024 17:18
Show Gist options
  • Save dgootman/6339257b99b18f6faec2dc5878026617 to your computer and use it in GitHub Desktop.
Save dgootman/6339257b99b18f6faec2dc5878026617 to your computer and use it in GitHub Desktop.
Streamlit visualization of Vancouver Crime Data
from io import BytesIO
from zipfile import ZipFile
import pandas as pd
import requests
import streamlit as st
from colorhash import ColorHash
from pyproj import Proj
st.title("Vancouver Crime Data")
DATE_COLUMN = "date"
@st.cache_data
def load_data():
with requests.get(
"https://geodash.vpd.ca/opendata/crimedata_download/AllNeighbourhoods_AllYears/crimedata_csv_AllNeighbourhoods_AllYears.zip",
stream=True,
) as r:
with ZipFile(BytesIO(r.content)) as z:
with z.open("crimedata_csv_AllNeighbourhoods_AllYears.csv") as f:
data = pd.read_csv(f)
return data
data_load_state = st.text("Loading data...")
data = load_data()
data[DATE_COLUMN] = pd.to_datetime(data[["YEAR", "MONTH", "DAY", "HOUR", "MINUTE"]])
p = Proj("+proj=utm +zone=10 +datum=WGS84 +units=m +no_defs +type=crs")
data.X.replace(0, None, inplace=True)
data.Y.replace(0, None, inplace=True)
data["lon"], data["lat"] = p(data.X, data.Y, inverse=True, errcheck=True)
data_load_state.text("Done! (using st.cache_data)")
if st.checkbox("Show raw data"):
st.subheader("Raw data")
st.write(data)
year_range = st.slider(
"Year range", data.YEAR.min(), data.YEAR.max(), (data.YEAR.min(), data.YEAR.max())
)
crime_types = st.multiselect("Types of crimes", data.TYPE.unique())
filtered_data = data[
data.YEAR.between(*year_range) & ((not crime_types) | (data.TYPE.isin(crime_types)))
]
st.subheader("Crimes by hour")
st.bar_chart(filtered_data.HOUR.value_counts())
st.subheader("Crimes by year")
st.bar_chart(filtered_data.YEAR.value_counts())
COLOR_COLUMN = "TYPE"
COLOR_MAP = {k: ColorHash(k).hex for k in filtered_data[COLOR_COLUMN].unique()}
st.subheader("Crimes by type")
type_data = filtered_data.TYPE.value_counts().to_frame()
type_data["color"] = type_data.index.map(COLOR_MAP)
st.bar_chart(type_data, y="count", color="color")
map_data = filtered_data[filtered_data["lat"].notna() & filtered_data["lon"].notna()]
map_data["color"] = map_data[COLOR_COLUMN].map(COLOR_MAP)
st.subheader("Map of crimes")
st.map(
map_data.value_counts(["lat", "lon", "color"]).to_frame().reset_index(),
size="count",
color="color",
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment