Skip to content

Instantly share code, notes, and snippets.

@danielsuo
Last active November 6, 2020 16:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danielsuo/d68be7aec460738f4d182bc7e5982456 to your computer and use it in GitHub Desktop.
Save danielsuo/d68be7aec460738f4d182bc7e5982456 to your computer and use it in GitHub Desktop.
"""
Predicts final votes by taking current votes, % counted, and current margin.
Two pro-Trump biases:
1. Does not factor in mail-in bias
2. We strip off the ">" in ">98%". Many more of the small counties (which are R) have reported
"""
import bs4
import requests
import pandas as pd
def predict(state, hi=0.98, lo=0.01):
soup = bs4.BeautifulSoup(requests.get(f"https://www.nytimes.com/interactive/2020/11/03/us/elections/results-{state}-president.html").content.decode("utf-8"))
table = soup.find_all("table", class_="e-county-table")[0]
rows = table.find_all("tr")
res = []
for row in rows:
col = row.find_all("td")
if len(col) == 0:
continue
try:
county = row.find_all("th", class_="e-name")[0].decode_contents().strip()
lead, margin = row.find_all("td", class_="e-margin")[0].find_all("span")[0].decode_contents().split()
margin = float(margin[1:]) / 100
reported = row.find_all("td", class_="e-est-pct")[0].find_all("div")[0].decode_contents()
if ">" in reported:
reported = hi
elif "<" in reported:
reported = lo
else:
reported = float(reported.replace("%", "")) / 100
current = int(row.find_all("td", class_="e-total")[0].decode_contents().strip().replace(",", ""))
if row:
res.append({
"state": state,
"country": county,
"lead": lead,
"margin": margin,
"reported": reported,
"current": current
})
except Exception as e:
continue
df = pd.DataFrame(res)
df["remaining"] = (df.current / df.reported - df.current).astype(int)
df["final"] = df.current + df.remaining
df["Trump"] = 0
df["Biden"] = 0
# Trump winning
margin = (1 + df[df.lead == "Trump"].margin) / 2
df.loc[df.lead == "Trump", "Trump"] = df[df.lead == "Trump"].final * margin
df.loc[df.lead == "Trump", "Biden"] = df[df.lead == "Trump"].final * (1 - margin)
# Biden winning
margin = (1 + df[df.lead == "Biden"].margin) / 2
df.loc[df.lead == "Biden", "Trump"] = df[df.lead == "Biden"].final * (1 - margin)
df.loc[df.lead == "Biden", "Biden"] = df[df.lead == "Biden"].final * (margin)
trump = int(df.Trump.sum())
biden = int(df.Biden.sum())
margin = (biden - trump) / (biden + trump)
return trump, biden, margin
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment