MarcinMoskala/by_temp.py

## by_temp.py
from itertools import count, takewhile


def frange(start, stop, step):
    return takewhile(lambda x: x < stop, count(start, step))


def show_avg_hist(df, column, buckets=40):
    counts = df[column].value_counts()
    counts = counts[counts > 20]  # Drop rare values
    min_val = counts.index.min()
    max_val = counts.index.max()

    step = (max_val - min_val) / buckets

    buckets = zip(frange(min_val, max_val + 0.1, step), frange(min_val + step + 0.001, max_val + 0.1 + step, step))

    print(column, "from-to\tmean\tdays", sep="\n")

    s = 0
    for (_from, _to) in buckets:
        chosen = df[column].between(_from, _to)
        days = sum(chosen)
        s = s + days
        print(
            "{_from:.1f}-{_to:.1f}\t{mean:.1f}\t{days:d}".format(_from=_from, _to=_to, mean=df[chosen]['crimes'].mean(),
                                                                 days=days).replace(".", ","))


def add_day_month_year(df):
    dates = df["Date"].map(lambda x: x[:10])
    df["day_of_month"] = dates.map(lambda x: x.split("/")[1])
    df["month"] = dates.map(lambda x: x.split("/")[0])
    df["year"] = dates.map(lambda x: x.split("/")[2])
    df["date"] = dates
    return df


from pandas import read_csv
crimes_df = read_csv("CrimeData.csv")
crimes_df = add_day_month_year(crimes_df)
crimes_df = crimes_df[crimes_df.year <= "2019"]
weather_df = read_csv("Weather.csv")
weather_df["date"] = weather_df["DATE"].map(lambda x: x[5:7] + "/" + x[8:11] + "/" + x[0:4])

crimes_count_df = crimes_df \
    .groupby(["date"]) \
    .size() \
    .to_frame("crimes") \
    .reset_index()

df = crimes_count_df.merge(weather_df, on="date", how='left')

show_avg_hist(df, column="TMAX", buckets=20)
	from itertools import count, takewhile


	def frange(start, stop, step):
	return takewhile(lambda x: x < stop, count(start, step))


	def show_avg_hist(df, column, buckets=40):
	counts = df[column].value_counts()
	counts = counts[counts > 20] # Drop rare values
	min_val = counts.index.min()
	max_val = counts.index.max()

	step = (max_val - min_val) / buckets

	buckets = zip(frange(min_val, max_val + 0.1, step), frange(min_val + step + 0.001, max_val + 0.1 + step, step))

	print(column, "from-to\tmean\tdays", sep="\n")

	s = 0
	for (_from, _to) in buckets:
	chosen = df[column].between(_from, _to)
	days = sum(chosen)
	s = s + days
	print(
	"{_from:.1f}-{_to:.1f}\t{mean:.1f}\t{days:d}".format(_from=_from, _to=_to, mean=df[chosen]['crimes'].mean(),
	days=days).replace(".", ","))


	def add_day_month_year(df):
	dates = df["Date"].map(lambda x: x[:10])
	df["day_of_month"] = dates.map(lambda x: x.split("/")[1])
	df["month"] = dates.map(lambda x: x.split("/")[0])
	df["year"] = dates.map(lambda x: x.split("/")[2])
	df["date"] = dates
	return df


	from pandas import read_csv
	crimes_df = read_csv("CrimeData.csv")
	crimes_df = add_day_month_year(crimes_df)
	crimes_df = crimes_df[crimes_df.year <= "2019"]
	weather_df = read_csv("Weather.csv")
	weather_df["date"] = weather_df["DATE"].map(lambda x: x[5:7] + "/" + x[8:11] + "/" + x[0:4])

	crimes_count_df = crimes_df \
	.groupby(["date"]) \
	.size() \
	.to_frame("crimes") \
	.reset_index()

	df = crimes_count_df.merge(weather_df, on="date", how='left')

	show_avg_hist(df, column="TMAX", buckets=20)