Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
from itertools import count, takewhile
def frange(start, stop, step):
return takewhile(lambda x: x < stop, count(start, step))
def show_avg_hist(df, column, buckets=40):
counts = df[column].value_counts()
counts = counts[counts > 20] # Drop rare values
min_val = counts.index.min()
max_val = counts.index.max()
step = (max_val - min_val) / buckets
buckets = zip(frange(min_val, max_val + 0.1, step), frange(min_val + step + 0.001, max_val + 0.1 + step, step))
print(column, "from-to\tmean\tdays", sep="\n")
s = 0
for (_from, _to) in buckets:
chosen = df[column].between(_from, _to)
days = sum(chosen)
s = s + days
print(
"{_from:.1f}-{_to:.1f}\t{mean:.1f}\t{days:d}".format(_from=_from, _to=_to, mean=df[chosen]['crimes'].mean(),
days=days).replace(".", ","))
def add_day_month_year(df):
dates = df["Date"].map(lambda x: x[:10])
df["day_of_month"] = dates.map(lambda x: x.split("/")[1])
df["month"] = dates.map(lambda x: x.split("/")[0])
df["year"] = dates.map(lambda x: x.split("/")[2])
df["date"] = dates
return df
from pandas import read_csv
crimes_df = read_csv("CrimeData.csv")
crimes_df = add_day_month_year(crimes_df)
crimes_df = crimes_df[crimes_df.year <= "2019"]
weather_df = read_csv("Weather.csv")
weather_df["date"] = weather_df["DATE"].map(lambda x: x[5:7] + "/" + x[8:11] + "/" + x[0:4])
crimes_count_df = crimes_df \
.groupby(["date"]) \
.size() \
.to_frame("crimes") \
.reset_index()
df = crimes_count_df.merge(weather_df, on="date", how='left')
show_avg_hist(df, column="TMAX", buckets=20)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment