Skip to content

Instantly share code, notes, and snippets.

@RhodiumToad
Created May 7, 2021 19:44
Show Gist options
  • Save RhodiumToad/646a3ef3e400710e9edafbbd9b50ff33 to your computer and use it in GitHub Desktop.
Save RhodiumToad/646a3ef3e400710e9edafbbd9b50ff33 to your computer and use it in GitHub Desktop.
2020 US census apportionment
--
create table states_orig
(
state text primary key,
pop integer not null,
reps integer,
change integer
);
comment on table states_orig is 'Original data from census.gov';
-- data from https://www.census.gov/data/tables/2020/dec/2020-apportionment-data.html
-- specifically, table 1, exported from Excel to CSV and cleaned up
copy states_orig from stdin csv;
"Alabama",5030053,7,0
"Alaska",736081,1,0
"Arizona",7158923,9,0
"Arkansas",3013756,4,0
"California",39576757,52,-1
"Colorado",5782171,8,1
"Connecticut",3608298,5,0
"Delaware",990837,1,0
"Florida",21570527,28,1
"Georgia",10725274,14,0
"Hawaii",1460137,2,0
"Idaho",1841377,2,0
"Illinois",12822739,17,-1
"Indiana",6790280,9,0
"Iowa",3192406,4,0
"Kansas",2940865,4,0
"Kentucky",4509342,6,0
"Louisiana",4661468,6,0
"Maine",1363582,2,0
"Maryland",6185278,8,0
"Massachusetts",7033469,9,0
"Michigan",10084442,13,-1
"Minnesota",5709752,8,0
"Mississippi",2963914,4,0
"Missouri",6160281,8,0
"Montana",1085407,2,1
"Nebraska",1963333,3,0
"Nevada",3108462,4,0
"New Hampshire",1379089,2,0
"New Jersey",9294493,12,0
"New Mexico",2120220,3,0
"New York",20215751,26,-1
"North Carolina",10453948,14,1
"North Dakota",779702,1,0
"Ohio",11808848,15,-1
"Oklahoma",3963516,5,0
"Oregon",4241500,6,1
"Pennsylvania",13011844,17,-1
"Rhode Island",1098163,2,0
"South Carolina",5124712,7,0
"South Dakota",887770,1,0
"Tennessee",6916897,9,0
"Texas",29183290,38,2
"Utah",3275252,4,0
"Vermont",643503,1,0
"Virginia",8654542,11,0
"Washington",7715946,10,0
"West Virginia",1795045,2,-1
"Wisconsin",5897473,8,0
"Wyoming",577719,1,0
\.
select sum(pop) = 331108434 from states_orig;
create or replace function d(n integer)
returns double precision
language sql
as $$
select 1/sqrt(n*(n-1)); -- resolves to sqrt(double)
$$;
create table states
(
state text primary key,
pop integer not null
);
insert into states select state, pop from states_orig;
-- if you want to try out the possibilities of DC statehood:
-- insert into states values ('DC', 689545+1988);
-- (that's the resident + overseas population)
-- parameters
-- this is the maximum number of seats one state can have, so that we
-- can set an upper bound on the generated series of priority
-- multipliers. With the current populations, this needs to be about
-- 1/8th of the total seats, but we can set it to a larger value. If
-- you want to play about with House sizes over about 10000, increase
-- this.
create view maxseats(value) as values (1250);
-- interior subquery for generating raw seat assignment sequence
-- diff_minus is the number of people to subtract from population in
-- order to swap with the following row; diff_plus is the number to
-- add to swap with the previous row.
create view raw_seat_assignments as
select (select count(*) from states)
+ row_number() over w
as seat_no,
state,
maxseats,
ceil(pop - (lead(priority) over w)/factor) as diff_minus,
ceil((lag(priority) over w)/factor - pop) as diff_plus
from (select state,
pop,
d(n) as factor,
pop * d(n) as priority,
maxseats.value as maxseats
from states,
maxseats,
generate_series(2, maxseats.value+2) n /* +2 is just slop */
) as s1
window w as (order by priority desc);
-- This view generates the seat assignments. Once one state reaches
-- the maximum the results are no longer valid, so we cut off the
-- computation there.
create view seat_assignments(seat_no, state, diff_minus, diff_plus) as
select row_number() over (order by pop desc),
state,
null,
null
from states
union all
select seat_no,
state,
diff_minus,
diff_plus
from (select seat_no,
state,
diff_minus,
diff_plus,
every(accum_seats <= maxseats) over (order by seat_no)
as valid_data
from (select seat_no,
state,
maxseats,
diff_minus,
diff_plus,
1 + count(*) over (partition by state order by seat_no)
as accum_seats
from raw_seat_assignments
) s1
) s2
where valid_data;
-- end
-- results with original data
census=# select * from seat_assignments where seat_no between 430 and 450;
seat_no | state | diff_minus | diff_plus
---------+----------------+------------+-----------
430 | North Carolina | 6878 | 16943
431 | Oregon | 9385 | 2793
432 | Colorado | 31117 | 12822
433 | California | 52440 | 214136
434 | Montana | 6366 | 1441
435 | Minnesota | 26 | 33682
436 | New York | 19514 | 89
437 | Ohio | 64856 | 11410
438 | Texas | 42130 | 161163
439 | Florida | 67260 | 31185
440 | Arizona | 7017 | 22393
441 | California | 31288 | 38826
442 | Virginia | 17719 | 6848
443 | Idaho | 10363 | 3778
444 | Michigan | 45243 | 57072
445 | New Jersey | 3965 | 41887
446 | Pennsylvania | 42768 | 5553
447 | Massachusetts | 12130 | 23195
448 | Georgia | 4775 | 18528
449 | California | 48582 | 17625
450 | Texas | 125999 | 35868
(21 rows)
-- temporarily add 89 people to NY and try again
census=# begin; update states set pop = pop + 89 where state = 'New York';
BEGIN
UPDATE 1
census=*# select * from seat_assignments where seat_no between 434 and 436;
seat_no | state | diff_minus | diff_plus
---------+-----------+------------+-----------
434 | Montana | 6366 | 1441
435 | New York | 1 | 119254
436 | Minnesota | 5537 | 1
(3 rows)
census=*# rollback;
ROLLBACK
-- temporarily subtract 26 people from MN instead:
census=# begin; update states set pop = pop - 26 where state = 'Minnesota';
BEGIN
UPDATE 1
census=*# select * from seat_assignments where seat_no between 434 and 436;
seat_no | state | diff_minus | diff_plus
---------+-----------+------------+-----------
434 | Montana | 6371 | 1441
435 | New York | 4 | 119343
436 | Minnesota | 5511 | 1
(3 rows)
census=*# rollback;
ROLLBACK
census=# \q
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment