Last active
November 6, 2020 16:20
-
-
Save danielsuo/d68be7aec460738f4d182bc7e5982456 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Predicts final votes by taking current votes, % counted, and current margin. | |
Two pro-Trump biases: | |
1. Does not factor in mail-in bias | |
2. We strip off the ">" in ">98%". Many more of the small counties (which are R) have reported | |
""" | |
import bs4 | |
import requests | |
import pandas as pd | |
def predict(state, hi=0.98, lo=0.01): | |
soup = bs4.BeautifulSoup(requests.get(f"https://www.nytimes.com/interactive/2020/11/03/us/elections/results-{state}-president.html").content.decode("utf-8")) | |
table = soup.find_all("table", class_="e-county-table")[0] | |
rows = table.find_all("tr") | |
res = [] | |
for row in rows: | |
col = row.find_all("td") | |
if len(col) == 0: | |
continue | |
try: | |
county = row.find_all("th", class_="e-name")[0].decode_contents().strip() | |
lead, margin = row.find_all("td", class_="e-margin")[0].find_all("span")[0].decode_contents().split() | |
margin = float(margin[1:]) / 100 | |
reported = row.find_all("td", class_="e-est-pct")[0].find_all("div")[0].decode_contents() | |
if ">" in reported: | |
reported = hi | |
elif "<" in reported: | |
reported = lo | |
else: | |
reported = float(reported.replace("%", "")) / 100 | |
current = int(row.find_all("td", class_="e-total")[0].decode_contents().strip().replace(",", "")) | |
if row: | |
res.append({ | |
"state": state, | |
"country": county, | |
"lead": lead, | |
"margin": margin, | |
"reported": reported, | |
"current": current | |
}) | |
except Exception as e: | |
continue | |
df = pd.DataFrame(res) | |
df["remaining"] = (df.current / df.reported - df.current).astype(int) | |
df["final"] = df.current + df.remaining | |
df["Trump"] = 0 | |
df["Biden"] = 0 | |
# Trump winning | |
margin = (1 + df[df.lead == "Trump"].margin) / 2 | |
df.loc[df.lead == "Trump", "Trump"] = df[df.lead == "Trump"].final * margin | |
df.loc[df.lead == "Trump", "Biden"] = df[df.lead == "Trump"].final * (1 - margin) | |
# Biden winning | |
margin = (1 + df[df.lead == "Biden"].margin) / 2 | |
df.loc[df.lead == "Biden", "Trump"] = df[df.lead == "Biden"].final * (1 - margin) | |
df.loc[df.lead == "Biden", "Biden"] = df[df.lead == "Biden"].final * (margin) | |
trump = int(df.Trump.sum()) | |
biden = int(df.Biden.sum()) | |
margin = (biden - trump) / (biden + trump) | |
return trump, biden, margin |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment