Skip to content

Instantly share code, notes, and snippets.

@XertroV
Created November 14, 2020 00:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save XertroV/c3dc349dd86fa22c136661ab721a4ede to your computer and use it in GitHub Desktop.
Save XertroV/c3dc349dd86fa22c136661ab721a4ede to your computer and use it in GitHub Desktop.
import json
from collections import Counter, defaultdict
import requests
import sys
import matplotlib.pyplot as plt
import csv
import io
def loads_and_parse(nums):
parse_f = lambda s : int(s.replace(',', ''))
return list(map(parse_f, json.loads(nums)))
def get_milwaukee_2020():
raw_csv = io.StringIO(requests.get("https://gist.githubusercontent.com/XertroV/4c4376121e2d280c897adf2ca6298a8c/raw/1232d55807b872d6f7c00ba6a4461192168779b4/milwaukee-2020.csv").text)
return csv.reader(raw_csv)
def get_milwaukee_2016():
lines = [line.split(' ') for line in requests.get("https://gist.githubusercontent.com/XertroV/c52587b005402c4eb6172ac481be4925/raw/75be031711d8f70ade405cdb1c74479939ce3c26/milwaukee-2016-by-ward.txt").text.splitlines()]
return list((l[0], l[1]) for l in lines)
mil_data = list(r[2:4] for r in list(get_milwaukee_2020())[1:][:-1])
#mil_data = get_milwaukee_2016()
mil_dict = defaultdict(list)
for (cand, i) in [('bidenj', 0), ('trumpd', 1)]:
for row in mil_data:
mil_dict[cand].append(row[i])
def get_data(state_name):
ret = defaultdict(list)
counties = requests.get(f"https://static01.nyt.com/elections-assets/2020/data/api/2020-11-03/state-page/{state_name}.json").json()['data']['races'][0]['counties']
for county in counties:
for cand, votes in county['results'].items():
ret[cand].append(votes)
return ret
def first_digits_kvs(kvs):
k, vs = kvs
# return (k, Counter(list(map(lambda v: int(str(v)[0]), vs))))
return (k, list(map(lambda v: int(str(v)[0]), vs)))
def all_but_first_kvs(kvs):
k, vs = kvs
return (k, list(i for split_num in map(lambda v: list(map(int, str(v)[1:])), vs) for i in split_num))
# from https://realpython.com/python-histograms/
def ascii_histogram(seq) -> None:
"""A horizontal frequency-table/histogram plot."""
for k in sorted(seq):
print('{0:5d} {1}'.format(k, '+' * seq[k]))
def heading(h):
print(f"\n\n{h} histogram")
def histogram_state(title, fname, counts, only_two=True, xlabel='leading_digit', bins=None):
if only_two:
counts = dict(bidenj=counts['bidenj'], trumpd=counts['trumpd'])
plt.figure(figsize=([16,9]))
leg = list(counts.keys())
# print(leg)
zipped_cs = list(map(list, counts.values()))
# print(zipped_cs)
n, n_bins, patches = plt.hist(zipped_cs, None if bins is None else range(bins + 1), density=True, histtype='bar', label=leg, color=['blue', 'red'])
plt.legend(prop=dict(size=10))
# plt.grid(axis='y', alpha=0.75)
plt.xlabel(xlabel)
plt.ylabel('frequency')
plt.title(title)
plt.savefig(f"{fname}.png")
print(f"done {title}")
def print_graph(h, seq):
heading(h)
ascii_histogram(seq)
state = 'milwaukee-2020'
pairs = dict(map(first_digits_kvs, mil_dict.items()))
histogram_state("Milwaukee 2020 - Distribution of leading digits (Benford's Law)", f"{state}-by-precinct", pairs, bins=10, only_two=False)
all_but_first_digits = dict(map(all_but_first_kvs, mil_dict.items()))
histogram_state("Milwaukee 2020 - Distribution of all digits excluding leading", f"{state}-digit-freq-excluding-first-digit", all_but_first_digits, xlabel='digit', bins=10, only_two=False)
sys.exit()
for state in ['illinois', 'nevada', 'wisconsin', 'washington', 'new-york', 'florida', 'arizona', 'georgia', 'wyoming', 'california', 'pennsylvania', 'ohio']:
try:
print(f"\n\n ## {state} ##\n")
cand_votes = get_data(state)
pairs = dict(map(first_digits_kvs, cand_votes.items()))
histogram_state(state, pairs)
# for k, v in pairs.items():
# print_graph(k, v)
except Exception as e:
print(f'exception during state: {state}')
raise e
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment