Skip to content

Instantly share code, notes, and snippets.

@aagnone3
Created October 30, 2020 00:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aagnone3/f654ff127a7319972f90423b6375daa0 to your computer and use it in GitHub Desktop.
Save aagnone3/f654ff127a7319972f90423b6375daa0 to your computer and use it in GitHub Desktop.
sf_crime_5.py
train['IsOnBlock'] = train['Address'].str.contains('block', case=False)
train['IsAtIntersection'] = train['Address'].str.contains('/', case=False)
def clean_road(text):
return re.sub(r"[0-9]+ [bB]lock of ", "", text)
def make_counts(values):
counts = Counter()
for value in values:
cur_counts = list(map(clean_road, value.split(" / ")))
counts.update(cur_counts)
return counts
# compute road counts, in preparation of the log road probability feature
counts = make_counts(train["Address"])
common_roads = pd.Series(dict(counts.most_common(20)))
# have a look at the most common roads in the data
plt.figure(figsize=(10, 10))
with sns.axes_style("whitegrid"):
ax = sns.barplot(
(common_roads / common_roads.sum()) * 100,
common_roads.index,
orient='h',
palette="Blues_r")
plt.title('Most Common Roads', fontdict={'fontsize': 16})
plt.xlabel('P(x)')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment