Skip to content

Instantly share code, notes, and snippets.

Created October 30, 2020 00:15
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save aagnone3/f654ff127a7319972f90423b6375daa0 to your computer and use it in GitHub Desktop.
train['IsOnBlock'] = train['Address'].str.contains('block', case=False)
train['IsAtIntersection'] = train['Address'].str.contains('/', case=False)
def clean_road(text):
return re.sub(r"[0-9]+ [bB]lock of ", "", text)
def make_counts(values):
counts = Counter()
for value in values:
cur_counts = list(map(clean_road, value.split(" / ")))
return counts
# compute road counts, in preparation of the log road probability feature
counts = make_counts(train["Address"])
common_roads = pd.Series(dict(counts.most_common(20)))
# have a look at the most common roads in the data
plt.figure(figsize=(10, 10))
with sns.axes_style("whitegrid"):
ax = sns.barplot(
(common_roads / common_roads.sum()) * 100,
plt.title('Most Common Roads', fontdict={'fontsize': 16})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment