danielsuo/nyt_vote.py

## nyt_vote.py
"""
Predicts final votes by taking current votes, % counted, and current margin.

Two pro-Trump biases:

1. Does not factor in mail-in bias
2. We strip off the ">" in ">98%". Many more of the small counties (which are R) have reported
"""
import bs4
import requests
import pandas as pd

def predict(state, hi=0.98, lo=0.01):
    soup = bs4.BeautifulSoup(requests.get(f"https://www.nytimes.com/interactive/2020/11/03/us/elections/results-{state}-president.html").content.decode("utf-8"))
    table = soup.find_all("table", class_="e-county-table")[0]
    rows = table.find_all("tr")

    res = []
    for row in rows:
        col = row.find_all("td")
        if len(col) == 0:
            continue

        try:
            county = row.find_all("th", class_="e-name")[0].decode_contents().strip()
            lead, margin = row.find_all("td", class_="e-margin")[0].find_all("span")[0].decode_contents().split()
            margin = float(margin[1:]) / 100
            reported = row.find_all("td", class_="e-est-pct")[0].find_all("div")[0].decode_contents()

            if "&gt;" in reported:
                reported = hi
            elif "&lt;" in reported:
                reported = lo
            else:
                reported = float(reported.replace("%", "")) / 100
            current = int(row.find_all("td", class_="e-total")[0].decode_contents().strip().replace(",", ""))
            if row:
                res.append({
                    "state": state,
                    "country": county,
                    "lead": lead,
                    "margin": margin,
                    "reported": reported,
                    "current": current
                })
        except Exception as e:
            continue

    df = pd.DataFrame(res)

    df["remaining"] = (df.current / df.reported - df.current).astype(int)
    df["final"] = df.current + df.remaining

    df["Trump"] = 0
    df["Biden"] = 0

    # Trump winning
    margin = (1 + df[df.lead == "Trump"].margin) / 2
    df.loc[df.lead == "Trump", "Trump"] = df[df.lead == "Trump"].final * margin
    df.loc[df.lead == "Trump", "Biden"] = df[df.lead == "Trump"].final * (1 - margin)

    # Biden winning
    margin = (1 + df[df.lead == "Biden"].margin) / 2
    df.loc[df.lead == "Biden", "Trump"] = df[df.lead == "Biden"].final * (1 - margin)
    df.loc[df.lead == "Biden", "Biden"] = df[df.lead == "Biden"].final * (margin)

    trump = int(df.Trump.sum())
    biden = int(df.Biden.sum())

    margin = (biden - trump) / (biden + trump)

    return trump, biden, margin
	"""
	Predicts final votes by taking current votes, % counted, and current margin.

	Two pro-Trump biases:

	1. Does not factor in mail-in bias
	2. We strip off the ">" in ">98%". Many more of the small counties (which are R) have reported
	"""
	import bs4
	import requests
	import pandas as pd

	def predict(state, hi=0.98, lo=0.01):
	soup = bs4.BeautifulSoup(requests.get(f"https://www.nytimes.com/interactive/2020/11/03/us/elections/results-{state}-president.html").content.decode("utf-8"))
	table = soup.find_all("table", class_="e-county-table")[0]
	rows = table.find_all("tr")

	res = []
	for row in rows:
	col = row.find_all("td")
	if len(col) == 0:
	continue

	try:
	county = row.find_all("th", class_="e-name")[0].decode_contents().strip()
	lead, margin = row.find_all("td", class_="e-margin")[0].find_all("span")[0].decode_contents().split()
	margin = float(margin[1:]) / 100
	reported = row.find_all("td", class_="e-est-pct")[0].find_all("div")[0].decode_contents()

	if ">" in reported:
	reported = hi
	elif "<" in reported:
	reported = lo
	else:
	reported = float(reported.replace("%", "")) / 100
	current = int(row.find_all("td", class_="e-total")[0].decode_contents().strip().replace(",", ""))
	if row:
	res.append({
	"state": state,
	"country": county,
	"lead": lead,
	"margin": margin,
	"reported": reported,
	"current": current
	})
	except Exception as e:
	continue

	df = pd.DataFrame(res)

	df["remaining"] = (df.current / df.reported - df.current).astype(int)
	df["final"] = df.current + df.remaining

	df["Trump"] = 0
	df["Biden"] = 0

	# Trump winning
	margin = (1 + df[df.lead == "Trump"].margin) / 2
	df.loc[df.lead == "Trump", "Trump"] = df[df.lead == "Trump"].final * margin
	df.loc[df.lead == "Trump", "Biden"] = df[df.lead == "Trump"].final * (1 - margin)

	# Biden winning
	margin = (1 + df[df.lead == "Biden"].margin) / 2
	df.loc[df.lead == "Biden", "Trump"] = df[df.lead == "Biden"].final * (1 - margin)
	df.loc[df.lead == "Biden", "Biden"] = df[df.lead == "Biden"].final * (margin)

	trump = int(df.Trump.sum())
	biden = int(df.Biden.sum())

	margin = (biden - trump) / (biden + trump)

	return trump, biden, margin